2021-01-30 19:12:13 +00:00
|
|
|
use crate::{utils::encode_svg, Icon, IconInfo, IconKind, CLIENT};
|
2021-01-29 12:23:15 +00:00
|
|
|
use future::join_all;
|
|
|
|
use futures::StreamExt;
|
|
|
|
use futures::{prelude::*, task::noop_waker};
|
|
|
|
use html5ever::{
|
|
|
|
driver,
|
|
|
|
tendril::{Tendril, TendrilSink},
|
|
|
|
};
|
|
|
|
use reqwest::{header::*, IntoUrl};
|
2021-01-29 16:26:04 +00:00
|
|
|
use scraper::{ElementRef, Html};
|
2021-01-29 12:23:15 +00:00
|
|
|
use serde::Deserialize;
|
2022-10-01 22:45:09 +01:00
|
|
|
use std::convert::TryInto;
|
2021-01-29 12:23:15 +00:00
|
|
|
use std::task::Poll;
|
|
|
|
use std::{collections::HashMap, error::Error, pin::Pin, task::Context};
|
|
|
|
use url::Url;
|
|
|
|
|
|
|
|
pub struct Icons {
|
|
|
|
entries: Vec<Icon>,
|
|
|
|
pending_entries: HashMap<
|
|
|
|
Url,
|
|
|
|
(
|
|
|
|
IconKind,
|
2022-10-01 22:45:09 +01:00
|
|
|
HashMap<String, String>,
|
2022-07-24 21:14:34 +01:00
|
|
|
Pin<Box<dyn Future<Output = Result<IconInfo, Box<dyn Error>>>>>,
|
2021-01-29 12:23:15 +00:00
|
|
|
),
|
|
|
|
>,
|
|
|
|
}
|
|
|
|
|
|
|
|
fn add_icon_entry(
|
|
|
|
entries: &mut Vec<Icon>,
|
|
|
|
url: Url,
|
2022-10-01 22:45:09 +01:00
|
|
|
headers: HashMap<String, String>,
|
2021-01-29 12:23:15 +00:00
|
|
|
kind: IconKind,
|
2022-07-24 21:14:34 +01:00
|
|
|
info: Result<IconInfo, Box<dyn Error>>,
|
2021-01-29 12:23:15 +00:00
|
|
|
) {
|
|
|
|
match info {
|
2022-10-01 22:45:09 +01:00
|
|
|
Ok(info) => entries.push(Icon {
|
|
|
|
url,
|
|
|
|
headers,
|
|
|
|
kind,
|
|
|
|
info,
|
|
|
|
}),
|
2021-01-29 16:26:04 +00:00
|
|
|
Err(_) => warn_err!(info, "failed to parse icon"),
|
2021-01-29 12:23:15 +00:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
impl Icons {
|
|
|
|
pub fn new() -> Self {
|
|
|
|
Icons {
|
|
|
|
entries: Vec::new(),
|
|
|
|
pending_entries: HashMap::new(),
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
/// Add an icon URL and start fetching it
|
2021-01-29 16:26:04 +00:00
|
|
|
pub fn add_icon(&mut self, url: Url, kind: IconKind, sizes: Option<String>) {
|
2022-10-01 22:45:09 +01:00
|
|
|
self.add_icon_with_headers(url, HashMap::new(), kind, sizes)
|
2022-10-01 21:57:23 +01:00
|
|
|
}
|
|
|
|
|
|
|
|
/// Add an icon URL and start fetching it,
|
|
|
|
/// along with the specified headers
|
|
|
|
pub fn add_icon_with_headers(
|
|
|
|
&mut self,
|
|
|
|
url: Url,
|
2022-10-01 22:45:09 +01:00
|
|
|
headers: HashMap<String, String>,
|
2022-10-01 21:57:23 +01:00
|
|
|
kind: IconKind,
|
|
|
|
sizes: Option<String>,
|
|
|
|
) {
|
2021-01-29 12:23:15 +00:00
|
|
|
// check to see if it already exists
|
|
|
|
let mut entries = self.entries.iter_mut();
|
|
|
|
if let Some(existing_kind) = self
|
|
|
|
.pending_entries
|
|
|
|
.get_mut(&url)
|
2022-10-01 22:45:09 +01:00
|
|
|
.map(|(kind, _, _)| kind)
|
2022-05-06 16:40:29 +03:00
|
|
|
.or_else(|| {
|
|
|
|
entries.find_map(|icon| {
|
|
|
|
if icon.url.eq(&url) {
|
|
|
|
Some(&mut icon.kind)
|
|
|
|
} else {
|
|
|
|
None
|
|
|
|
}
|
|
|
|
})
|
|
|
|
})
|
2021-01-29 12:23:15 +00:00
|
|
|
{
|
|
|
|
// if the kind is more important, replace it
|
|
|
|
if &kind > existing_kind {
|
|
|
|
*existing_kind = kind;
|
|
|
|
}
|
2021-01-29 16:26:04 +00:00
|
|
|
return;
|
2021-01-29 12:23:15 +00:00
|
|
|
}
|
|
|
|
|
2022-10-01 22:45:09 +01:00
|
|
|
let mut info = Box::pin(IconInfo::load(
|
|
|
|
url.clone(),
|
|
|
|
(&headers).try_into().unwrap(),
|
|
|
|
sizes,
|
|
|
|
));
|
2021-01-29 12:23:15 +00:00
|
|
|
|
|
|
|
// Start fetching the icon
|
|
|
|
let noop_waker = noop_waker();
|
|
|
|
let cx = &mut Context::from_waker(&noop_waker);
|
|
|
|
match info.poll_unpin(cx) {
|
2022-10-01 22:45:09 +01:00
|
|
|
Poll::Ready(info) => add_icon_entry(&mut self.entries, url, headers, kind, info),
|
2021-01-29 12:23:15 +00:00
|
|
|
Poll::Pending => {
|
2022-10-01 22:45:09 +01:00
|
|
|
self.pending_entries.insert(url, (kind, headers, info));
|
2021-01-29 12:23:15 +00:00
|
|
|
}
|
|
|
|
};
|
|
|
|
}
|
|
|
|
|
|
|
|
pub async fn load_website<U: IntoUrl>(&mut self, url: U) -> Result<(), Box<dyn Error>> {
|
|
|
|
let res = CLIENT.get(url).header(ACCEPT, "text/html").send().await?;
|
|
|
|
let url = res.url().clone();
|
|
|
|
let mut body = res.bytes_stream();
|
|
|
|
|
|
|
|
let mut parser = driver::parse_document(Html::new_document(), Default::default());
|
|
|
|
while let Some(data) = body.next().await {
|
2021-01-29 16:26:04 +00:00
|
|
|
if let Ok(data) = Tendril::try_from_byte_slice(&data?) {
|
|
|
|
parser.process(data)
|
|
|
|
}
|
2021-01-29 12:23:15 +00:00
|
|
|
}
|
|
|
|
let document = parser.finish();
|
|
|
|
|
|
|
|
{
|
|
|
|
let mut found_favicon = false;
|
|
|
|
|
2021-01-29 16:26:04 +00:00
|
|
|
for elem_ref in document.select(selector!(
|
2021-01-29 12:23:15 +00:00
|
|
|
"link[rel='icon']",
|
|
|
|
"link[rel='shortcut icon']",
|
|
|
|
"link[rel='apple-touch-icon']",
|
|
|
|
"link[rel='apple-touch-icon-precomposed']"
|
|
|
|
)) {
|
2021-01-29 16:26:04 +00:00
|
|
|
let elem = elem_ref.value();
|
2021-01-29 12:23:15 +00:00
|
|
|
if let Some(href) = elem.attr("href").and_then(|href| url.join(&href).ok()) {
|
2021-01-29 16:26:04 +00:00
|
|
|
self.add_icon(
|
|
|
|
href,
|
|
|
|
IconKind::SiteFavicon,
|
|
|
|
elem.attr("sizes").map(|sizes| sizes.into()),
|
|
|
|
);
|
|
|
|
|
|
|
|
found_favicon = true;
|
2021-01-29 12:23:15 +00:00
|
|
|
};
|
|
|
|
}
|
|
|
|
|
|
|
|
// Check for default favicon.ico
|
|
|
|
if !found_favicon {
|
2021-01-29 16:26:04 +00:00
|
|
|
self.add_icon(
|
|
|
|
url.join("/favicon.ico").unwrap(),
|
|
|
|
IconKind::SiteFavicon,
|
|
|
|
None,
|
|
|
|
);
|
2021-01-29 12:23:15 +00:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2021-01-29 16:26:04 +00:00
|
|
|
{
|
|
|
|
let mut logos: Vec<_> = document
|
|
|
|
.select(selector!(
|
|
|
|
"header img, header svg",
|
|
|
|
"img[src*=logo]",
|
|
|
|
"img[alt*=logo], svg[alt*=logo]",
|
|
|
|
"img[class*=logo], svg[class*=logo]",
|
|
|
|
))
|
|
|
|
.map(|elem_ref| {
|
|
|
|
let elem = elem_ref.value();
|
|
|
|
let mut weight = 0;
|
|
|
|
|
|
|
|
// if in the header
|
|
|
|
if elem_ref
|
|
|
|
.ancestors()
|
|
|
|
.map(ElementRef::wrap)
|
|
|
|
.flatten()
|
|
|
|
.any(|element| element.value().name() == "header")
|
|
|
|
{
|
|
|
|
weight += 2;
|
|
|
|
}
|
|
|
|
|
|
|
|
let mentions_logo = |attr_name| {
|
|
|
|
elem
|
|
|
|
.attr(attr_name)
|
2021-02-02 16:50:31 +00:00
|
|
|
.map(|attr| regex!("logo([^s]|$)").is_match(&attr.to_lowercase()))
|
2021-01-29 16:26:04 +00:00
|
|
|
.unwrap_or(false)
|
|
|
|
};
|
|
|
|
if mentions_logo("class") || mentions_logo("id") {
|
|
|
|
weight += 3;
|
|
|
|
}
|
|
|
|
if mentions_logo("alt") {
|
|
|
|
weight += 2;
|
|
|
|
}
|
|
|
|
if mentions_logo("src") {
|
|
|
|
weight += 1;
|
|
|
|
}
|
|
|
|
|
|
|
|
(elem_ref, weight)
|
|
|
|
})
|
|
|
|
.collect();
|
|
|
|
|
|
|
|
logos.sort_by(|(_, a_weight), (_, b_weight)| b_weight.cmp(a_weight));
|
|
|
|
|
|
|
|
// prefer <img> over svg
|
|
|
|
let mut prev_weight = None;
|
|
|
|
for (i, (logo, weight)) in logos.iter().enumerate() {
|
|
|
|
if let Some(prev_weight) = prev_weight {
|
|
|
|
if weight != prev_weight {
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
prev_weight = Some(weight);
|
|
|
|
|
|
|
|
if logo.value().name() == "img" {
|
|
|
|
let (logo, weight) = logos.remove(i);
|
|
|
|
logos.insert(0, (logo, weight + 1));
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
for (elem_ref, _) in logos {
|
|
|
|
let elem = elem_ref.value();
|
|
|
|
|
|
|
|
if elem.name() == "svg" {
|
|
|
|
let data_uri = Url::parse(&encode_svg(&elem_ref.html())).unwrap();
|
|
|
|
self.add_icon(data_uri, IconKind::SiteLogo, None);
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
|
|
|
|
if let Some(href) = elem_ref
|
|
|
|
.value()
|
|
|
|
.attr("src")
|
|
|
|
.and_then(|href| url.join(&href).ok())
|
|
|
|
{
|
|
|
|
self.add_icon(href, IconKind::SiteLogo, None);
|
2021-01-29 12:23:15 +00:00
|
|
|
break;
|
|
|
|
};
|
2021-01-29 16:26:04 +00:00
|
|
|
}
|
2021-01-29 12:23:15 +00:00
|
|
|
}
|
|
|
|
|
2021-01-29 16:26:04 +00:00
|
|
|
for elem_ref in document.select(selector!("link[rel='manifest']")) {
|
|
|
|
if let Some(href) = elem_ref
|
2021-01-29 12:23:15 +00:00
|
|
|
.value()
|
|
|
|
.attr("href")
|
|
|
|
.and_then(|href| url.join(&href).ok())
|
|
|
|
{
|
2021-01-29 16:26:04 +00:00
|
|
|
warn_err!(self.load_manifest(href).await, "failed to fetch manifest");
|
2021-01-29 12:23:15 +00:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
Ok(())
|
|
|
|
}
|
|
|
|
|
|
|
|
pub async fn load_manifest(&mut self, manifest_url: Url) -> Result<(), Box<dyn Error>> {
|
|
|
|
#[derive(Deserialize)]
|
|
|
|
struct ManifestIcon {
|
|
|
|
src: String,
|
|
|
|
sizes: Option<String>,
|
|
|
|
}
|
|
|
|
|
|
|
|
#[derive(Deserialize)]
|
|
|
|
struct Manifest {
|
|
|
|
icons: Option<Vec<ManifestIcon>>,
|
|
|
|
}
|
|
|
|
|
|
|
|
let manifest: Manifest = CLIENT
|
|
|
|
.get(manifest_url.as_str())
|
|
|
|
.send()
|
|
|
|
.await?
|
|
|
|
.json()
|
|
|
|
.await?;
|
|
|
|
|
|
|
|
if let Some(icons) = manifest.icons {
|
|
|
|
for icon in icons {
|
|
|
|
if let Ok(src) = manifest_url.join(&icon.src) {
|
|
|
|
let _ = self.add_icon(src, IconKind::AppIcon, icon.sizes);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
Ok(())
|
|
|
|
}
|
|
|
|
|
2021-01-29 16:26:04 +00:00
|
|
|
/// Fetch all the icons. Ordered from highest to lowest resolution
|
2021-01-29 12:23:15 +00:00
|
|
|
///
|
|
|
|
/// ```
|
|
|
|
/// # async fn run() {
|
|
|
|
/// let icons = Icons::new();
|
|
|
|
/// icons.load_website("https://github.com").await?;
|
|
|
|
///
|
|
|
|
/// let entries = icons.entries().await;
|
|
|
|
/// for icon in entries {
|
|
|
|
/// println("{:?}", icon)
|
|
|
|
/// }
|
|
|
|
/// ```
|
|
|
|
pub async fn entries(mut self) -> Vec<Icon> {
|
|
|
|
let (urls, infos): (Vec<_>, Vec<_>) = self
|
|
|
|
.pending_entries
|
|
|
|
.into_iter()
|
2022-10-01 22:45:09 +01:00
|
|
|
.map(|(url, (kind, headers, info))| ((url, headers, kind), info))
|
2021-01-29 12:23:15 +00:00
|
|
|
.unzip();
|
|
|
|
|
|
|
|
let mut urls = urls.into_iter();
|
|
|
|
|
|
|
|
for info in join_all(infos).await {
|
2022-10-01 22:45:09 +01:00
|
|
|
let (url, headers, kind) = urls.next().unwrap();
|
|
|
|
add_icon_entry(&mut self.entries, url, headers, kind, info);
|
2021-01-29 12:23:15 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
self.entries.sort();
|
|
|
|
|
|
|
|
self.entries
|
|
|
|
}
|
|
|
|
}
|