0.4.5: highest priority for alt containing site_name

This commit is contained in:
Sam Denty 2022-12-26 16:01:35 +00:00
parent 302b8dea63
commit 9d1374c342
No known key found for this signature in database
GPG key ID: 7B4EAF7B9E291B79
3 changed files with 72 additions and 9 deletions

37
Cargo.lock generated
View file

@ -1638,7 +1638,7 @@ checksum = "7bd3e3206899af3f8b12af284fafc038cc1dc2b41d1b89dd17297221c5d225de"
[[package]]
name = "site_icons"
version = "0.4.4"
version = "0.4.5"
dependencies = [
"byteorder",
"clap",
@ -1658,6 +1658,7 @@ dependencies = [
"serde",
"serde_json",
"serde_with",
"tldextract",
"tokio",
"url",
"vec1",
@ -1783,6 +1784,26 @@ version = "0.1.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "8eaa81235c7058867fa8c0e7314f33dcce9c215f535d1913822a2b3f5e289f3c"
[[package]]
name = "thiserror"
version = "1.0.38"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "6a9cd18aa97d5c45c6603caea1da6628790b37f7a34b6ca89522331c5180fed0"
dependencies = [
"thiserror-impl",
]
[[package]]
name = "thiserror-impl"
version = "1.0.38"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "1fb327af4685e4d03fa8cbcf1716380da910eeb2bb8be417e7f9fd3fb164f36f"
dependencies = [
"proc-macro2",
"quote",
"syn",
]
[[package]]
name = "time"
version = "0.3.17"
@ -1825,6 +1846,20 @@ version = "0.1.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "cda74da7e1a664f795bb1f8a87ec406fb89a02522cf6e50620d016add6dbbf5c"
[[package]]
name = "tldextract"
version = "0.6.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "ec03259a0567ad58eed30812bc3e5eda8030f154abc70317ab57b14f00699ca4"
dependencies = [
"idna 0.2.3",
"log",
"regex",
"serde_json",
"thiserror",
"url",
]
[[package]]
name = "tokio"
version = "1.22.0"

View file

@ -1,6 +1,6 @@
[package]
name = "site_icons"
version = "0.4.4"
version = "0.4.5"
authors = ["Sam Denty <sam@samdenty.com>"]
edition = "2018"
license = "GPL-3.0"
@ -34,6 +34,7 @@ mime = { package = "mime_4", version = "0.4.0-a.0" }
serde = { version = "1.0", features = ["derive", "rc"] }
serde_json = "1.0"
futures = "0.3.25"
tldextract = "0.6.0"
[target.'cfg(target_arch = "wasm32")'.dependencies]
reqwest = { package = "reqwest-wasm", version = "0.11.15", features = [

View file

@ -10,8 +10,10 @@ use reqwest::{header::*, IntoUrl};
use scraper::{ElementRef, Html};
use serde::Deserialize;
use std::convert::TryInto;
use std::iter;
use std::task::Poll;
use std::{collections::HashMap, error::Error, pin::Pin, task::Context};
use tldextract::TldOption;
use url::Url;
pub struct Icons {
@ -227,14 +229,25 @@ impl Icons {
weight += 1;
}
let mentions_logo = |attr_name| {
ancestors.iter().any(|ancestor| {
ancestor
.attr(attr_name)
.map(|attr| regex!("logo([^s]|$)").is_match(&attr.to_lowercase()))
.unwrap_or(false)
})
let mentions = |attr_name, is_match: Box<dyn Fn(&str) -> bool>| {
ancestors
.iter()
.chain(iter::once(&elem_ref.value()))
.any(|ancestor| {
ancestor
.attr(attr_name)
.map(|attr| is_match(&attr.to_lowercase()))
.unwrap_or(false)
})
};
let mentions_logo = |attr_name| {
mentions(
attr_name,
Box::new(|attr| regex!("logo([^s]|$)").is_match(attr)),
)
};
if mentions_logo("class") || mentions_logo("id") {
weight += 3;
}
@ -245,6 +258,20 @@ impl Icons {
weight += 1;
}
if let Some(site_name) = url
.domain()
.and_then(|domain| TldOption::default().build().extract(domain).unwrap().domain)
{
// if the alt contains the site_name then highest priority
if site_name
.to_lowercase()
.split('-')
.any(|segment| mentions("alt", Box::new(move |attr| attr.contains(segment))))
{
weight += 10;
}
}
Some((elem_ref, weight))
})
.collect();