0.4.6: blacklist

This commit is contained in:
Sam Denty 2022-12-26 16:29:09 +00:00
parent 9d1374c342
commit 2db3c15845
No known key found for this signature in database
GPG key ID: 7B4EAF7B9E291B79
3 changed files with 38 additions and 38 deletions

2
Cargo.lock generated
View file

@ -1638,7 +1638,7 @@ checksum = "7bd3e3206899af3f8b12af284fafc038cc1dc2b41d1b89dd17297221c5d225de"
[[package]]
name = "site_icons"
version = "0.4.5"
version = "0.4.6"
dependencies = [
"byteorder",
"clap",

View file

@ -1,6 +1,6 @@
[package]
name = "site_icons"
version = "0.4.5"
version = "0.4.6"
authors = ["Sam Denty <sam@samdenty.com>"]
edition = "2018"
license = "GPL-3.0"

View file

@ -118,6 +118,14 @@ impl Icons {
};
}
pub fn is_blacklisted(&self, url: &Url) -> bool {
if let Some(is_blacklisted) = &self.blacklist {
is_blacklisted(url)
} else {
false
}
}
pub async fn load_website<U: IntoUrl>(&mut self, url: U) -> Result<(), Box<dyn Error>> {
let res = CLIENT
.get(url)
@ -128,11 +136,9 @@ impl Icons {
let url = res.url().clone();
if let Some(is_blacklisted) = &self.blacklist {
if is_blacklisted(&url) {
if self.is_blacklisted(&url) {
return Ok(());
}
}
let mut body = res.bytes_stream();
@ -194,6 +200,7 @@ impl Icons {
))
.enumerate()
.filter_map(|(i, elem_ref)| {
let elem = elem_ref.value();
let ancestors = elem_ref
.ancestors()
.map(ElementRef::wrap)
@ -230,10 +237,7 @@ impl Icons {
}
let mentions = |attr_name, is_match: Box<dyn Fn(&str) -> bool>| {
ancestors
.iter()
.chain(iter::once(&elem_ref.value()))
.any(|ancestor| {
ancestors.iter().chain(iter::once(&elem)).any(|ancestor| {
ancestor
.attr(attr_name)
.map(|attr| is_match(&attr.to_lowercase()))
@ -272,15 +276,27 @@ impl Icons {
}
}
Some((elem_ref, weight))
let href = if elem.name() == "svg" {
Some(Url::parse(&encode_svg(&elem_ref.html())).unwrap())
} else {
elem.attr("src").and_then(|href| url.join(&href).ok())
};
if let Some(href) = &href {
if self.is_blacklisted(href) {
return None;
}
}
href.map(|href| (href, elem_ref, weight))
})
.collect();
logos.sort_by(|(_, a_weight), (_, b_weight)| b_weight.cmp(a_weight));
logos.sort_by(|(_, _, a_weight), (_, _, b_weight)| b_weight.cmp(a_weight));
// prefer <img> over svg
let mut prev_weight = None;
for (i, (logo, weight)) in logos.iter().enumerate() {
for (href, elem_ref, weight) in &logos {
if let Some(prev_weight) = prev_weight {
if weight != prev_weight {
break;
@ -288,31 +304,15 @@ impl Icons {
}
prev_weight = Some(weight);
if logo.value().name() == "img" {
let (logo, weight) = logos.remove(i);
logos.insert(0, (logo, weight + 1));
if elem_ref.value().name() == "img" {
self.add_icon(href.clone(), IconKind::SiteLogo, None);
break;
}
}
for (elem_ref, _) in logos {
let elem = elem_ref.value();
let (href, _, _) = logos.into_iter().next().unwrap();
if elem.name() == "svg" {
let data_uri = Url::parse(&encode_svg(&elem_ref.html())).unwrap();
self.add_icon(data_uri, IconKind::SiteLogo, None);
break;
}
if let Some(href) = elem_ref
.value()
.attr("src")
.and_then(|href| url.join(&href).ok())
{
self.add_icon(href, IconKind::SiteLogo, None);
break;
};
}
}
for elem_ref in document.select(selector!("link[rel='manifest']")) {