0.4.6: blacklist

This commit is contained in:
Sam Denty 2022-12-26 16:29:09 +00:00
parent 9d1374c342
commit 2db3c15845
No known key found for this signature in database
GPG key ID: 7B4EAF7B9E291B79
3 changed files with 38 additions and 38 deletions

2
Cargo.lock generated
View file

@ -1638,7 +1638,7 @@ checksum = "7bd3e3206899af3f8b12af284fafc038cc1dc2b41d1b89dd17297221c5d225de"
[[package]] [[package]]
name = "site_icons" name = "site_icons"
version = "0.4.5" version = "0.4.6"
dependencies = [ dependencies = [
"byteorder", "byteorder",
"clap", "clap",

View file

@ -1,6 +1,6 @@
[package] [package]
name = "site_icons" name = "site_icons"
version = "0.4.5" version = "0.4.6"
authors = ["Sam Denty <sam@samdenty.com>"] authors = ["Sam Denty <sam@samdenty.com>"]
edition = "2018" edition = "2018"
license = "GPL-3.0" license = "GPL-3.0"

View file

@ -118,6 +118,14 @@ impl Icons {
}; };
} }
pub fn is_blacklisted(&self, url: &Url) -> bool {
if let Some(is_blacklisted) = &self.blacklist {
is_blacklisted(url)
} else {
false
}
}
pub async fn load_website<U: IntoUrl>(&mut self, url: U) -> Result<(), Box<dyn Error>> { pub async fn load_website<U: IntoUrl>(&mut self, url: U) -> Result<(), Box<dyn Error>> {
let res = CLIENT let res = CLIENT
.get(url) .get(url)
@ -128,10 +136,8 @@ impl Icons {
let url = res.url().clone(); let url = res.url().clone();
if let Some(is_blacklisted) = &self.blacklist { if self.is_blacklisted(&url) {
if is_blacklisted(&url) { return Ok(());
return Ok(());
}
} }
let mut body = res.bytes_stream(); let mut body = res.bytes_stream();
@ -194,6 +200,7 @@ impl Icons {
)) ))
.enumerate() .enumerate()
.filter_map(|(i, elem_ref)| { .filter_map(|(i, elem_ref)| {
let elem = elem_ref.value();
let ancestors = elem_ref let ancestors = elem_ref
.ancestors() .ancestors()
.map(ElementRef::wrap) .map(ElementRef::wrap)
@ -230,15 +237,12 @@ impl Icons {
} }
let mentions = |attr_name, is_match: Box<dyn Fn(&str) -> bool>| { let mentions = |attr_name, is_match: Box<dyn Fn(&str) -> bool>| {
ancestors ancestors.iter().chain(iter::once(&elem)).any(|ancestor| {
.iter() ancestor
.chain(iter::once(&elem_ref.value())) .attr(attr_name)
.any(|ancestor| { .map(|attr| is_match(&attr.to_lowercase()))
ancestor .unwrap_or(false)
.attr(attr_name) })
.map(|attr| is_match(&attr.to_lowercase()))
.unwrap_or(false)
})
}; };
let mentions_logo = |attr_name| { let mentions_logo = |attr_name| {
@ -272,15 +276,27 @@ impl Icons {
} }
} }
Some((elem_ref, weight)) let href = if elem.name() == "svg" {
Some(Url::parse(&encode_svg(&elem_ref.html())).unwrap())
} else {
elem.attr("src").and_then(|href| url.join(&href).ok())
};
if let Some(href) = &href {
if self.is_blacklisted(href) {
return None;
}
}
href.map(|href| (href, elem_ref, weight))
}) })
.collect(); .collect();
logos.sort_by(|(_, a_weight), (_, b_weight)| b_weight.cmp(a_weight)); logos.sort_by(|(_, _, a_weight), (_, _, b_weight)| b_weight.cmp(a_weight));
// prefer <img> over svg // prefer <img> over svg
let mut prev_weight = None; let mut prev_weight = None;
for (i, (logo, weight)) in logos.iter().enumerate() { for (href, elem_ref, weight) in &logos {
if let Some(prev_weight) = prev_weight { if let Some(prev_weight) = prev_weight {
if weight != prev_weight { if weight != prev_weight {
break; break;
@ -288,31 +304,15 @@ impl Icons {
} }
prev_weight = Some(weight); prev_weight = Some(weight);
if logo.value().name() == "img" { if elem_ref.value().name() == "img" {
let (logo, weight) = logos.remove(i); self.add_icon(href.clone(), IconKind::SiteLogo, None);
logos.insert(0, (logo, weight + 1));
break; break;
} }
} }
for (elem_ref, _) in logos { let (href, _, _) = logos.into_iter().next().unwrap();
let elem = elem_ref.value();
if elem.name() == "svg" { self.add_icon(href, IconKind::SiteLogo, None);
let data_uri = Url::parse(&encode_svg(&elem_ref.html())).unwrap();
self.add_icon(data_uri, IconKind::SiteLogo, None);
break;
}
if let Some(href) = elem_ref
.value()
.attr("src")
.and_then(|href| url.join(&href).ok())
{
self.add_icon(href, IconKind::SiteLogo, None);
break;
};
}
} }
for elem_ref in document.select(selector!("link[rel='manifest']")) { for elem_ref in document.select(selector!("link[rel='manifest']")) {