0.4.5: highest priority for alt containing site_name
This commit is contained in:
parent
302b8dea63
commit
9d1374c342
3 changed files with 72 additions and 9 deletions
37
Cargo.lock
generated
37
Cargo.lock
generated
|
@ -1638,7 +1638,7 @@ checksum = "7bd3e3206899af3f8b12af284fafc038cc1dc2b41d1b89dd17297221c5d225de"
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "site_icons"
|
name = "site_icons"
|
||||||
version = "0.4.4"
|
version = "0.4.5"
|
||||||
dependencies = [
|
dependencies = [
|
||||||
"byteorder",
|
"byteorder",
|
||||||
"clap",
|
"clap",
|
||||||
|
@ -1658,6 +1658,7 @@ dependencies = [
|
||||||
"serde",
|
"serde",
|
||||||
"serde_json",
|
"serde_json",
|
||||||
"serde_with",
|
"serde_with",
|
||||||
|
"tldextract",
|
||||||
"tokio",
|
"tokio",
|
||||||
"url",
|
"url",
|
||||||
"vec1",
|
"vec1",
|
||||||
|
@ -1783,6 +1784,26 @@ version = "0.1.1"
|
||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
checksum = "8eaa81235c7058867fa8c0e7314f33dcce9c215f535d1913822a2b3f5e289f3c"
|
checksum = "8eaa81235c7058867fa8c0e7314f33dcce9c215f535d1913822a2b3f5e289f3c"
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "thiserror"
|
||||||
|
version = "1.0.38"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "6a9cd18aa97d5c45c6603caea1da6628790b37f7a34b6ca89522331c5180fed0"
|
||||||
|
dependencies = [
|
||||||
|
"thiserror-impl",
|
||||||
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "thiserror-impl"
|
||||||
|
version = "1.0.38"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "1fb327af4685e4d03fa8cbcf1716380da910eeb2bb8be417e7f9fd3fb164f36f"
|
||||||
|
dependencies = [
|
||||||
|
"proc-macro2",
|
||||||
|
"quote",
|
||||||
|
"syn",
|
||||||
|
]
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "time"
|
name = "time"
|
||||||
version = "0.3.17"
|
version = "0.3.17"
|
||||||
|
@ -1825,6 +1846,20 @@ version = "0.1.0"
|
||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
checksum = "cda74da7e1a664f795bb1f8a87ec406fb89a02522cf6e50620d016add6dbbf5c"
|
checksum = "cda74da7e1a664f795bb1f8a87ec406fb89a02522cf6e50620d016add6dbbf5c"
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "tldextract"
|
||||||
|
version = "0.6.0"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "ec03259a0567ad58eed30812bc3e5eda8030f154abc70317ab57b14f00699ca4"
|
||||||
|
dependencies = [
|
||||||
|
"idna 0.2.3",
|
||||||
|
"log",
|
||||||
|
"regex",
|
||||||
|
"serde_json",
|
||||||
|
"thiserror",
|
||||||
|
"url",
|
||||||
|
]
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "tokio"
|
name = "tokio"
|
||||||
version = "1.22.0"
|
version = "1.22.0"
|
||||||
|
|
|
@ -1,6 +1,6 @@
|
||||||
[package]
|
[package]
|
||||||
name = "site_icons"
|
name = "site_icons"
|
||||||
version = "0.4.4"
|
version = "0.4.5"
|
||||||
authors = ["Sam Denty <sam@samdenty.com>"]
|
authors = ["Sam Denty <sam@samdenty.com>"]
|
||||||
edition = "2018"
|
edition = "2018"
|
||||||
license = "GPL-3.0"
|
license = "GPL-3.0"
|
||||||
|
@ -34,6 +34,7 @@ mime = { package = "mime_4", version = "0.4.0-a.0" }
|
||||||
serde = { version = "1.0", features = ["derive", "rc"] }
|
serde = { version = "1.0", features = ["derive", "rc"] }
|
||||||
serde_json = "1.0"
|
serde_json = "1.0"
|
||||||
futures = "0.3.25"
|
futures = "0.3.25"
|
||||||
|
tldextract = "0.6.0"
|
||||||
|
|
||||||
[target.'cfg(target_arch = "wasm32")'.dependencies]
|
[target.'cfg(target_arch = "wasm32")'.dependencies]
|
||||||
reqwest = { package = "reqwest-wasm", version = "0.11.15", features = [
|
reqwest = { package = "reqwest-wasm", version = "0.11.15", features = [
|
||||||
|
|
33
src/icons.rs
33
src/icons.rs
|
@ -10,8 +10,10 @@ use reqwest::{header::*, IntoUrl};
|
||||||
use scraper::{ElementRef, Html};
|
use scraper::{ElementRef, Html};
|
||||||
use serde::Deserialize;
|
use serde::Deserialize;
|
||||||
use std::convert::TryInto;
|
use std::convert::TryInto;
|
||||||
|
use std::iter;
|
||||||
use std::task::Poll;
|
use std::task::Poll;
|
||||||
use std::{collections::HashMap, error::Error, pin::Pin, task::Context};
|
use std::{collections::HashMap, error::Error, pin::Pin, task::Context};
|
||||||
|
use tldextract::TldOption;
|
||||||
use url::Url;
|
use url::Url;
|
||||||
|
|
||||||
pub struct Icons {
|
pub struct Icons {
|
||||||
|
@ -227,14 +229,25 @@ impl Icons {
|
||||||
weight += 1;
|
weight += 1;
|
||||||
}
|
}
|
||||||
|
|
||||||
let mentions_logo = |attr_name| {
|
let mentions = |attr_name, is_match: Box<dyn Fn(&str) -> bool>| {
|
||||||
ancestors.iter().any(|ancestor| {
|
ancestors
|
||||||
|
.iter()
|
||||||
|
.chain(iter::once(&elem_ref.value()))
|
||||||
|
.any(|ancestor| {
|
||||||
ancestor
|
ancestor
|
||||||
.attr(attr_name)
|
.attr(attr_name)
|
||||||
.map(|attr| regex!("logo([^s]|$)").is_match(&attr.to_lowercase()))
|
.map(|attr| is_match(&attr.to_lowercase()))
|
||||||
.unwrap_or(false)
|
.unwrap_or(false)
|
||||||
})
|
})
|
||||||
};
|
};
|
||||||
|
|
||||||
|
let mentions_logo = |attr_name| {
|
||||||
|
mentions(
|
||||||
|
attr_name,
|
||||||
|
Box::new(|attr| regex!("logo([^s]|$)").is_match(attr)),
|
||||||
|
)
|
||||||
|
};
|
||||||
|
|
||||||
if mentions_logo("class") || mentions_logo("id") {
|
if mentions_logo("class") || mentions_logo("id") {
|
||||||
weight += 3;
|
weight += 3;
|
||||||
}
|
}
|
||||||
|
@ -245,6 +258,20 @@ impl Icons {
|
||||||
weight += 1;
|
weight += 1;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if let Some(site_name) = url
|
||||||
|
.domain()
|
||||||
|
.and_then(|domain| TldOption::default().build().extract(domain).unwrap().domain)
|
||||||
|
{
|
||||||
|
// if the alt contains the site_name then highest priority
|
||||||
|
if site_name
|
||||||
|
.to_lowercase()
|
||||||
|
.split('-')
|
||||||
|
.any(|segment| mentions("alt", Box::new(move |attr| attr.contains(segment))))
|
||||||
|
{
|
||||||
|
weight += 10;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
Some((elem_ref, weight))
|
Some((elem_ref, weight))
|
||||||
})
|
})
|
||||||
.collect();
|
.collect();
|
||||||
|
|
Reference in a new issue