0.4.6: blacklist
This commit is contained in:
parent
9d1374c342
commit
2db3c15845
3 changed files with 38 additions and 38 deletions
2
Cargo.lock
generated
2
Cargo.lock
generated
|
@ -1638,7 +1638,7 @@ checksum = "7bd3e3206899af3f8b12af284fafc038cc1dc2b41d1b89dd17297221c5d225de"
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "site_icons"
|
name = "site_icons"
|
||||||
version = "0.4.5"
|
version = "0.4.6"
|
||||||
dependencies = [
|
dependencies = [
|
||||||
"byteorder",
|
"byteorder",
|
||||||
"clap",
|
"clap",
|
||||||
|
|
|
@ -1,6 +1,6 @@
|
||||||
[package]
|
[package]
|
||||||
name = "site_icons"
|
name = "site_icons"
|
||||||
version = "0.4.5"
|
version = "0.4.6"
|
||||||
authors = ["Sam Denty <sam@samdenty.com>"]
|
authors = ["Sam Denty <sam@samdenty.com>"]
|
||||||
edition = "2018"
|
edition = "2018"
|
||||||
license = "GPL-3.0"
|
license = "GPL-3.0"
|
||||||
|
|
58
src/icons.rs
58
src/icons.rs
|
@ -118,6 +118,14 @@ impl Icons {
|
||||||
};
|
};
|
||||||
}
|
}
|
||||||
|
|
||||||
|
pub fn is_blacklisted(&self, url: &Url) -> bool {
|
||||||
|
if let Some(is_blacklisted) = &self.blacklist {
|
||||||
|
is_blacklisted(url)
|
||||||
|
} else {
|
||||||
|
false
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
pub async fn load_website<U: IntoUrl>(&mut self, url: U) -> Result<(), Box<dyn Error>> {
|
pub async fn load_website<U: IntoUrl>(&mut self, url: U) -> Result<(), Box<dyn Error>> {
|
||||||
let res = CLIENT
|
let res = CLIENT
|
||||||
.get(url)
|
.get(url)
|
||||||
|
@ -128,11 +136,9 @@ impl Icons {
|
||||||
|
|
||||||
let url = res.url().clone();
|
let url = res.url().clone();
|
||||||
|
|
||||||
if let Some(is_blacklisted) = &self.blacklist {
|
if self.is_blacklisted(&url) {
|
||||||
if is_blacklisted(&url) {
|
|
||||||
return Ok(());
|
return Ok(());
|
||||||
}
|
}
|
||||||
}
|
|
||||||
|
|
||||||
let mut body = res.bytes_stream();
|
let mut body = res.bytes_stream();
|
||||||
|
|
||||||
|
@ -194,6 +200,7 @@ impl Icons {
|
||||||
))
|
))
|
||||||
.enumerate()
|
.enumerate()
|
||||||
.filter_map(|(i, elem_ref)| {
|
.filter_map(|(i, elem_ref)| {
|
||||||
|
let elem = elem_ref.value();
|
||||||
let ancestors = elem_ref
|
let ancestors = elem_ref
|
||||||
.ancestors()
|
.ancestors()
|
||||||
.map(ElementRef::wrap)
|
.map(ElementRef::wrap)
|
||||||
|
@ -230,10 +237,7 @@ impl Icons {
|
||||||
}
|
}
|
||||||
|
|
||||||
let mentions = |attr_name, is_match: Box<dyn Fn(&str) -> bool>| {
|
let mentions = |attr_name, is_match: Box<dyn Fn(&str) -> bool>| {
|
||||||
ancestors
|
ancestors.iter().chain(iter::once(&elem)).any(|ancestor| {
|
||||||
.iter()
|
|
||||||
.chain(iter::once(&elem_ref.value()))
|
|
||||||
.any(|ancestor| {
|
|
||||||
ancestor
|
ancestor
|
||||||
.attr(attr_name)
|
.attr(attr_name)
|
||||||
.map(|attr| is_match(&attr.to_lowercase()))
|
.map(|attr| is_match(&attr.to_lowercase()))
|
||||||
|
@ -272,15 +276,27 @@ impl Icons {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
Some((elem_ref, weight))
|
let href = if elem.name() == "svg" {
|
||||||
|
Some(Url::parse(&encode_svg(&elem_ref.html())).unwrap())
|
||||||
|
} else {
|
||||||
|
elem.attr("src").and_then(|href| url.join(&href).ok())
|
||||||
|
};
|
||||||
|
|
||||||
|
if let Some(href) = &href {
|
||||||
|
if self.is_blacklisted(href) {
|
||||||
|
return None;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
href.map(|href| (href, elem_ref, weight))
|
||||||
})
|
})
|
||||||
.collect();
|
.collect();
|
||||||
|
|
||||||
logos.sort_by(|(_, a_weight), (_, b_weight)| b_weight.cmp(a_weight));
|
logos.sort_by(|(_, _, a_weight), (_, _, b_weight)| b_weight.cmp(a_weight));
|
||||||
|
|
||||||
// prefer <img> over svg
|
// prefer <img> over svg
|
||||||
let mut prev_weight = None;
|
let mut prev_weight = None;
|
||||||
for (i, (logo, weight)) in logos.iter().enumerate() {
|
for (href, elem_ref, weight) in &logos {
|
||||||
if let Some(prev_weight) = prev_weight {
|
if let Some(prev_weight) = prev_weight {
|
||||||
if weight != prev_weight {
|
if weight != prev_weight {
|
||||||
break;
|
break;
|
||||||
|
@ -288,31 +304,15 @@ impl Icons {
|
||||||
}
|
}
|
||||||
prev_weight = Some(weight);
|
prev_weight = Some(weight);
|
||||||
|
|
||||||
if logo.value().name() == "img" {
|
if elem_ref.value().name() == "img" {
|
||||||
let (logo, weight) = logos.remove(i);
|
self.add_icon(href.clone(), IconKind::SiteLogo, None);
|
||||||
logos.insert(0, (logo, weight + 1));
|
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
for (elem_ref, _) in logos {
|
let (href, _, _) = logos.into_iter().next().unwrap();
|
||||||
let elem = elem_ref.value();
|
|
||||||
|
|
||||||
if elem.name() == "svg" {
|
|
||||||
let data_uri = Url::parse(&encode_svg(&elem_ref.html())).unwrap();
|
|
||||||
self.add_icon(data_uri, IconKind::SiteLogo, None);
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
|
|
||||||
if let Some(href) = elem_ref
|
|
||||||
.value()
|
|
||||||
.attr("src")
|
|
||||||
.and_then(|href| url.join(&href).ok())
|
|
||||||
{
|
|
||||||
self.add_icon(href, IconKind::SiteLogo, None);
|
self.add_icon(href, IconKind::SiteLogo, None);
|
||||||
break;
|
|
||||||
};
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
for elem_ref in document.select(selector!("link[rel='manifest']")) {
|
for elem_ref in document.select(selector!("link[rel='manifest']")) {
|
||||||
|
|
Reference in a new issue