From a1c38dbc9abe30ba3a269c4a48a5cce351b04a61 Mon Sep 17 00:00:00 2001 From: Markus Kohlhase Date: Fri, 15 Sep 2023 15:25:39 +0200 Subject: [PATCH] Use default rust formatting --- .rustfmt.toml | 3 - src/bin/site-icons.rs | 50 ++-- src/html_parser/head.rs | 184 ++++++------- src/html_parser/site_logo.rs | 271 ++++++++++---------- src/icon/icon_info.rs | 425 ++++++++++++++++--------------- src/icon/icon_size/gif.rs | 22 +- src/icon/icon_size/ico.rs | 86 +++---- src/icon/icon_size/icon_sizes.rs | 120 ++++----- src/icon/icon_size/jpeg.rs | 90 +++---- src/icon/icon_size/mod.rs | 104 ++++---- src/icon/icon_size/png.rs | 18 +- src/icon/icon_size/svg.rs | 100 ++++---- src/icon/mod.rs | 157 ++++++------ src/icons.rs | 358 +++++++++++++------------- src/lib.rs | 10 +- src/manifest.rs | 52 ++-- src/utils/background_poll.rs | 62 ++--- src/utils/macros.rs | 8 +- src/utils/mod.rs | 6 +- src/utils/svg_encoder.rs | 80 +++--- 20 files changed, 1104 insertions(+), 1102 deletions(-) delete mode 100644 .rustfmt.toml diff --git a/.rustfmt.toml b/.rustfmt.toml deleted file mode 100644 index b68828e..0000000 --- a/.rustfmt.toml +++ /dev/null @@ -1,3 +0,0 @@ -indent_style = "Block" -reorder_imports = true -tab_spaces = 2 diff --git a/src/bin/site-icons.rs b/src/bin/site-icons.rs index 2d373c5..5af6a3a 100644 --- a/src/bin/site-icons.rs +++ b/src/bin/site-icons.rs @@ -6,37 +6,37 @@ use std::error::Error; #[derive(Parser)] struct Opts { - url: String, + url: String, - #[clap(long)] - fast: bool, - #[clap(long)] - json: bool, - #[clap(long)] - /// Print out errors that occurred for skipped items - debug: bool, + #[clap(long)] + fast: bool, + #[clap(long)] + json: bool, + #[clap(long)] + /// Print out errors that occurred for skipped items + debug: bool, } #[tokio::main] async fn main() -> Result<(), Box> { - let mut icons = SiteIcons::new(); - let opts: Opts = Opts::parse(); + let mut icons = SiteIcons::new(); + let opts: Opts = Opts::parse(); - if opts.debug { - let mut builder = Builder::new(); - builder.filter_level(LevelFilter::Info); - builder.init(); - } - - let entries = icons.load_website(opts.url, opts.fast).await?; - - if opts.json { - println!("{}", serde_json::to_string_pretty(&entries)?) - } else { - for icon in entries { - println!("{} {} {}", icon.url, icon.kind, icon.info); + if opts.debug { + let mut builder = Builder::new(); + builder.filter_level(LevelFilter::Info); + builder.init(); } - } - Ok(()) + let entries = icons.load_website(opts.url, opts.fast).await?; + + if opts.json { + println!("{}", serde_json::to_string_pretty(&entries)?) + } else { + for icon in entries { + println!("{} {} {}", icon.url, icon.kind, icon.info); + } + } + + Ok(()) } diff --git a/src/html_parser/head.rs b/src/html_parser/head.rs index 075b938..65b0aab 100644 --- a/src/html_parser/head.rs +++ b/src/html_parser/head.rs @@ -8,9 +8,9 @@ use futures::Stream; use futures::StreamExt; use lol_html::{element, errors::RewritingError, HtmlRewriter, Settings}; use std::{ - cell::RefCell, - error::Error, - fmt::{self, Display}, + cell::RefCell, + error::Error, + fmt::{self, Display}, }; use url::Url; @@ -18,104 +18,106 @@ use url::Url; struct EndOfHead {} impl Display for EndOfHead { - fn fmt(&self, _: &mut fmt::Formatter<'_>) -> fmt::Result { - Ok(()) - } + fn fmt(&self, _: &mut fmt::Formatter<'_>) -> fmt::Result { + Ok(()) + } } impl Error for EndOfHead {} pub async fn parse_head( - url: &Url, - mut body: impl Stream, String>> + Unpin, + url: &Url, + mut body: impl Stream, String>> + Unpin, ) -> Result, Box> { - let mut icons = Vec::new(); - let new_icons = RefCell::new(Vec::new()); + let mut icons = Vec::new(); + let new_icons = RefCell::new(Vec::new()); - { - let mut rewriter = HtmlRewriter::new( - Settings { - element_content_handlers: vec![ - element!("head", |head| { - head.on_end_tag(|_| Err(Box::new(EndOfHead {})))?; - Ok(()) - }), - element!("link[rel~='manifest']", |manifest| { - if let Some(href) = manifest - .get_attribute("href") - .and_then(|href| url.join(&href).ok()) - { - new_icons.borrow_mut().push( - async { SiteIcons::load_manifest(href).await.unwrap_or(Vec::new()) } - .boxed_local() - .shared(), - ) + { + let mut rewriter = HtmlRewriter::new( + Settings { + element_content_handlers: vec![ + element!("head", |head| { + head.on_end_tag(|_| Err(Box::new(EndOfHead {})))?; + Ok(()) + }), + element!("link[rel~='manifest']", |manifest| { + if let Some(href) = manifest + .get_attribute("href") + .and_then(|href| url.join(&href).ok()) + { + new_icons.borrow_mut().push( + async { + SiteIcons::load_manifest(href).await.unwrap_or(Vec::new()) + } + .boxed_local() + .shared(), + ) + } + + Ok(()) + }), + element!( + join_with!( + ",", + "link[rel~='icon']", + "link[rel~='apple-touch-icon']", + "link[rel~='apple-touch-icon-precomposed']" + ), + |link| { + let rel = link.get_attribute("rel").unwrap(); + + if let Some(href) = link + .get_attribute("href") + .and_then(|href| url.join(&href).ok()) + { + let kind = if rel.contains("apple-touch-icon") { + IconKind::AppIcon + } else { + IconKind::SiteFavicon + }; + + let sizes = link.get_attribute("sizes"); + + new_icons.borrow_mut().push( + async { + Icon::load(href, kind, sizes) + .await + .map(|icon| vec![icon]) + .unwrap_or(Vec::new()) + } + .boxed_local() + .shared(), + ) + }; + + Ok(()) + } + ), + ], + ..Settings::default() + }, + |_: &[u8]| {}, + ); + + while let Some(data) = poll_in_background(body.next(), join_all(icons.clone())).await { + let result = rewriter.write(&data?); + + icons.extend(new_icons.borrow_mut().drain(..)); + + match result { + Err(RewritingError::ContentHandlerError(result)) => { + match result.downcast::() { + Ok(_) => break, + Err(err) => return Err(err), + }; + } + + result => result?, } - - Ok(()) - }), - element!( - join_with!( - ",", - "link[rel~='icon']", - "link[rel~='apple-touch-icon']", - "link[rel~='apple-touch-icon-precomposed']" - ), - |link| { - let rel = link.get_attribute("rel").unwrap(); - - if let Some(href) = link - .get_attribute("href") - .and_then(|href| url.join(&href).ok()) - { - let kind = if rel.contains("apple-touch-icon") { - IconKind::AppIcon - } else { - IconKind::SiteFavicon - }; - - let sizes = link.get_attribute("sizes"); - - new_icons.borrow_mut().push( - async { - Icon::load(href, kind, sizes) - .await - .map(|icon| vec![icon]) - .unwrap_or(Vec::new()) - } - .boxed_local() - .shared(), - ) - }; - - Ok(()) - } - ), - ], - ..Settings::default() - }, - |_: &[u8]| {}, - ); - - while let Some(data) = poll_in_background(body.next(), join_all(icons.clone())).await { - let result = rewriter.write(&data?); - - icons.extend(new_icons.borrow_mut().drain(..)); - - match result { - Err(RewritingError::ContentHandlerError(result)) => { - match result.downcast::() { - Ok(_) => break, - Err(err) => return Err(err), - }; } - - result => result?, - } } - } - let icons = join_all(icons).await.into_iter().flatten().collect(); + let icons = join_all(icons).await.into_iter().flatten().collect(); - Ok(icons) + Ok(icons) } diff --git a/src/html_parser/site_logo.rs b/src/html_parser/site_logo.rs index 297c8aa..8cab121 100644 --- a/src/html_parser/site_logo.rs +++ b/src/html_parser/site_logo.rs @@ -1,8 +1,8 @@ use crate::{utils::encode_svg, Icon, IconKind}; use futures::{Stream, StreamExt}; use html5ever::{ - driver, - tendril::{Tendril, TendrilSink}, + driver, + tendril::{Tendril, TendrilSink}, }; use scraper::{ElementRef, Html}; use std::error::Error; @@ -11,147 +11,146 @@ use tldextract::TldOption; use url::Url; pub async fn parse_site_logo( - url: &Url, - mut body: impl Stream, String>> + Unpin, - is_blacklisted: impl Fn(&Url) -> bool, + url: &Url, + mut body: impl Stream, String>> + Unpin, + is_blacklisted: impl Fn(&Url) -> bool, ) -> Result> { - let mut parser = driver::parse_document(Html::new_document(), Default::default()); - while let Some(data) = body.next().await { - if let Ok(data) = Tendril::try_from_byte_slice(&data?) { - parser.process(data) - } - } - - let document = parser.finish(); - - let mut logos: Vec<_> = document - .select(selector!( - "a[href='/'] img, a[href='/'] svg", - "header img, header svg", - "img[src*=logo]", - "img[alt*=logo], svg[alt*=logo]", - "*[class*=logo] img, *[class*=logo] svg", - "*[id*=logo] img, *[id*=logo] svg", - "img[class*=logo], svg[class*=logo]", - "img[id*=logo], svg[id*=logo]", - )) - .enumerate() - .filter_map(|(i, elem_ref)| { - let elem = elem_ref.value(); - let ancestors = elem_ref - .ancestors() - .map(ElementRef::wrap) - .flatten() - .map(|elem_ref| elem_ref.value()) - .collect::>(); - - let skip_classnames = regex!("menu|search"); - let should_skip = ancestors.iter().any(|ancestor| { - ancestor - .attr("class") - .map(|attr| skip_classnames.is_match(&attr.to_lowercase())) - .or_else(|| { - ancestor - .attr("id") - .map(|attr| skip_classnames.is_match(&attr.to_lowercase())) - }) - .unwrap_or(false) - }); - - if should_skip { - return None; - } - - let mut weight = 0; - - // if in the header - if ancestors.iter().any(|element| element.name() == "header") { - weight += 2; - } - - if i == 0 { - weight += 1; - } - - let mentions = |attr_name, is_match: Box bool>| { - ancestors.iter().chain(iter::once(&elem)).any(|ancestor| { - ancestor - .attr(attr_name) - .map(|attr| is_match(&attr.to_lowercase())) - .unwrap_or(false) - }) - }; - - if mentions("href", Box::new(|attr| attr == "/")) { - weight += 5; - }; - - let mentions_logo = |attr_name| { - mentions( - attr_name, - Box::new(|attr| regex!("logo([^s]|$)").is_match(attr)), - ) - }; - - if mentions_logo("class") || mentions_logo("id") { - weight += 3; - } - if mentions_logo("alt") { - weight += 2; - } - if mentions_logo("src") { - weight += 1; - } - - if let Some(site_name) = url - .domain() - .and_then(|domain| TldOption::default().build().extract(domain).unwrap().domain) - { - // if the alt contains the site_name then highest priority - if site_name - .to_lowercase() - .split('-') - .any(|segment| mentions("alt", Box::new(move |attr| attr.contains(segment)))) - { - weight += 10; + let mut parser = driver::parse_document(Html::new_document(), Default::default()); + while let Some(data) = body.next().await { + if let Ok(data) = Tendril::try_from_byte_slice(&data?) { + parser.process(data) } - } + } - let href = if elem.name() == "svg" { - Some(Url::parse(&encode_svg(&elem_ref.html())).unwrap()) - } else { - elem.attr("src").and_then(|href| url.join(&href).ok()) - }; + let document = parser.finish(); - if let Some(href) = &href { - if is_blacklisted(href) { - return None; + let mut logos: Vec<_> = + document + .select(selector!( + "a[href='/'] img, a[href='/'] svg", + "header img, header svg", + "img[src*=logo]", + "img[alt*=logo], svg[alt*=logo]", + "*[class*=logo] img, *[class*=logo] svg", + "*[id*=logo] img, *[id*=logo] svg", + "img[class*=logo], svg[class*=logo]", + "img[id*=logo], svg[id*=logo]", + )) + .enumerate() + .filter_map(|(i, elem_ref)| { + let elem = elem_ref.value(); + let ancestors = elem_ref + .ancestors() + .map(ElementRef::wrap) + .flatten() + .map(|elem_ref| elem_ref.value()) + .collect::>(); + + let skip_classnames = regex!("menu|search"); + let should_skip = ancestors.iter().any(|ancestor| { + ancestor + .attr("class") + .map(|attr| skip_classnames.is_match(&attr.to_lowercase())) + .or_else(|| { + ancestor + .attr("id") + .map(|attr| skip_classnames.is_match(&attr.to_lowercase())) + }) + .unwrap_or(false) + }); + + if should_skip { + return None; + } + + let mut weight = 0; + + // if in the header + if ancestors.iter().any(|element| element.name() == "header") { + weight += 2; + } + + if i == 0 { + weight += 1; + } + + let mentions = |attr_name, is_match: Box bool>| { + ancestors.iter().chain(iter::once(&elem)).any(|ancestor| { + ancestor + .attr(attr_name) + .map(|attr| is_match(&attr.to_lowercase())) + .unwrap_or(false) + }) + }; + + if mentions("href", Box::new(|attr| attr == "/")) { + weight += 5; + }; + + let mentions_logo = |attr_name| { + mentions( + attr_name, + Box::new(|attr| regex!("logo([^s]|$)").is_match(attr)), + ) + }; + + if mentions_logo("class") || mentions_logo("id") { + weight += 3; + } + if mentions_logo("alt") { + weight += 2; + } + if mentions_logo("src") { + weight += 1; + } + + if let Some(site_name) = url + .domain() + .and_then(|domain| TldOption::default().build().extract(domain).unwrap().domain) + { + // if the alt contains the site_name then highest priority + if site_name.to_lowercase().split('-').any(|segment| { + mentions("alt", Box::new(move |attr| attr.contains(segment))) + }) { + weight += 10; + } + } + + let href = if elem.name() == "svg" { + Some(Url::parse(&encode_svg(&elem_ref.html())).unwrap()) + } else { + elem.attr("src").and_then(|href| url.join(&href).ok()) + }; + + if let Some(href) = &href { + if is_blacklisted(href) { + return None; + } + } + + href.map(|href| (href, elem_ref, weight)) + }) + .collect(); + + logos.sort_by(|(_, _, a_weight), (_, _, b_weight)| b_weight.cmp(a_weight)); + + // prefer over svg + let mut prev_weight = None; + for (href, elem_ref, weight) in &logos { + if let Some(prev_weight) = prev_weight { + if weight != prev_weight { + break; + } } - } + prev_weight = Some(weight); - href.map(|href| (href, elem_ref, weight)) - }) - .collect(); - - logos.sort_by(|(_, _, a_weight), (_, _, b_weight)| b_weight.cmp(a_weight)); - - // prefer over svg - let mut prev_weight = None; - for (href, elem_ref, weight) in &logos { - if let Some(prev_weight) = prev_weight { - if weight != prev_weight { - break; - } + if elem_ref.value().name() == "img" { + return Icon::load(href.clone(), IconKind::SiteLogo, None).await; + } } - prev_weight = Some(weight); - if elem_ref.value().name() == "img" { - return Icon::load(href.clone(), IconKind::SiteLogo, None).await; + match logos.into_iter().next() { + Some((href, _, _)) => Icon::load(href.clone(), IconKind::SiteLogo, None).await, + None => Err("No site logo found".into()), } - } - - match logos.into_iter().next() { - Some((href, _, _)) => Icon::load(href.clone(), IconKind::SiteLogo, None).await, - None => Err("No site logo found".into()), - } } diff --git a/src/icon/icon_info.rs b/src/icon/icon_info.rs index 50ab597..a407838 100644 --- a/src/icon/icon_info.rs +++ b/src/icon/icon_info.rs @@ -6,269 +6,272 @@ use mime::MediaType; use reqwest::{header::*, Url}; use serde::{Deserialize, Serialize}; use std::{ - cmp::Ordering, - convert::TryFrom, - error::Error, - fmt::{self, Display}, - io, + cmp::Ordering, + convert::TryFrom, + error::Error, + fmt::{self, Display}, + io, }; enum IconKind { - SVG, - PNG, - JPEG, - ICO, - GIF, + SVG, + PNG, + JPEG, + ICO, + GIF, } #[derive(Clone, Debug, Serialize, Deserialize, Eq, PartialEq)] #[serde(tag = "type")] #[serde(rename_all = "lowercase")] pub enum IconInfo { - PNG { size: IconSize }, - JPEG { size: IconSize }, - ICO { sizes: IconSizes }, - GIF { size: IconSize }, - SVG { size: Option }, + PNG { size: IconSize }, + JPEG { size: IconSize }, + ICO { sizes: IconSizes }, + GIF { size: IconSize }, + SVG { size: Option }, } impl IconInfo { - async fn decode( - reader: &mut R, - kind: Option, - ) -> Result> { - let mut header = [0; 2]; - reader.read_exact(&mut header).await?; + async fn decode( + reader: &mut R, + kind: Option, + ) -> Result> { + let mut header = [0; 2]; + reader.read_exact(&mut header).await?; - match (kind, &header) { - (Some(IconKind::SVG), bytes) => { - let size = get_svg_size(bytes, reader).await?; - Ok(IconInfo::SVG { size }) - } - (_, &[0x60, byte_two]) => { - let size = get_svg_size(&[0x60, byte_two], reader).await?; - Ok(IconInfo::SVG { size }) - } - (Some(IconKind::PNG), _) | (_, b"\x89P") => { - let size = get_png_size(reader).await?; - Ok(IconInfo::PNG { size }) - } - (Some(IconKind::ICO), _) | (_, &[0x00, 0x00]) => { - let sizes = get_ico_sizes(reader).await?; - Ok(IconInfo::ICO { sizes }) - } - (Some(IconKind::JPEG), _) | (_, &[0xFF, 0xD8]) => { - let size = get_jpeg_size(reader).await?; - Ok(IconInfo::JPEG { size }) - } - (Some(IconKind::GIF), _) | (_, b"GI") => { - let size = get_gif_size(reader).await?; - Ok(IconInfo::GIF { size }) - } - _ => Err(format!("unknown icon type ({:?})", header).into()), + match (kind, &header) { + (Some(IconKind::SVG), bytes) => { + let size = get_svg_size(bytes, reader).await?; + Ok(IconInfo::SVG { size }) + } + (_, &[0x60, byte_two]) => { + let size = get_svg_size(&[0x60, byte_two], reader).await?; + Ok(IconInfo::SVG { size }) + } + (Some(IconKind::PNG), _) | (_, b"\x89P") => { + let size = get_png_size(reader).await?; + Ok(IconInfo::PNG { size }) + } + (Some(IconKind::ICO), _) | (_, &[0x00, 0x00]) => { + let sizes = get_ico_sizes(reader).await?; + Ok(IconInfo::ICO { sizes }) + } + (Some(IconKind::JPEG), _) | (_, &[0xFF, 0xD8]) => { + let size = get_jpeg_size(reader).await?; + Ok(IconInfo::JPEG { size }) + } + (Some(IconKind::GIF), _) | (_, b"GI") => { + let size = get_gif_size(reader).await?; + Ok(IconInfo::GIF { size }) + } + _ => Err(format!("unknown icon type ({:?})", header).into()), + } } - } - pub async fn load( - url: Url, - headers: HeaderMap, - sizes: Option, - ) -> Result> { - let sizes = sizes.as_ref().and_then(|s| IconSizes::try_from(s).ok()); + pub async fn load( + url: Url, + headers: HeaderMap, + sizes: Option, + ) -> Result> { + let sizes = sizes.as_ref().and_then(|s| IconSizes::try_from(s).ok()); - let (mime, mut body): (_, Box) = match url.scheme() { - "data" => { - let url = url.to_string(); - let url = DataUrl::process(&url).map_err(|_| "failed to parse data uri")?; + let (mime, mut body): (_, Box) = match url.scheme() { + "data" => { + let url = url.to_string(); + let url = DataUrl::process(&url).map_err(|_| "failed to parse data uri")?; - let mime = url.mime_type().to_string().parse::()?; + let mime = url.mime_type().to_string().parse::()?; - let body = Cursor::new( - url - .decode_to_vec() - .map_err(|_| "failed to decode data uri body")? - .0, - ); + let body = Cursor::new( + url.decode_to_vec() + .map_err(|_| "failed to decode data uri body")? + .0, + ); - (mime, Box::new(body)) - } + (mime, Box::new(body)) + } - _ => { - let res = CLIENT - .get(url) - .headers(headers) - .send() - .await? - .error_for_status()?; + _ => { + let res = CLIENT + .get(url) + .headers(headers) + .send() + .await? + .error_for_status()?; - if !res.status().is_success() { - return Err("failed to fetch".into()); + if !res.status().is_success() { + return Err("failed to fetch".into()); + }; + + let mime = res + .headers() + .get(CONTENT_TYPE) + .ok_or("no content type")? + .to_str()? + .parse::()?; + + let body = res + .bytes_stream() + .map(|result| { + result.map_err(|error| { + io::Error::new(io::ErrorKind::Other, error.to_string()) + }) + }) + .into_async_read(); + + (mime, Box::new(body)) + } }; - let mime = res - .headers() - .get(CONTENT_TYPE) - .ok_or("no content type")? - .to_str()? - .parse::()?; + let kind = match (mime.type_(), mime.subtype()) { + (mime::IMAGE, mime::PNG) => { + if let Some(sizes) = sizes { + return Ok(IconInfo::PNG { + size: *sizes.largest(), + }); + } + Some(IconKind::PNG) + } - let body = res - .bytes_stream() - .map(|result| { - result.map_err(|error| io::Error::new(io::ErrorKind::Other, error.to_string())) - }) - .into_async_read(); + (mime::IMAGE, mime::JPEG) => { + if let Some(sizes) = sizes { + return Ok(IconInfo::JPEG { + size: *sizes.largest(), + }); + } + Some(IconKind::JPEG) + } - (mime, Box::new(body)) - } - }; + (mime::IMAGE, "x-icon") | (mime::IMAGE, "vnd.microsoft.icon") => { + if let Some(sizes) = sizes { + return Ok(IconInfo::ICO { sizes }); + } - let kind = match (mime.type_(), mime.subtype()) { - (mime::IMAGE, mime::PNG) => { - if let Some(sizes) = sizes { - return Ok(IconInfo::PNG { - size: *sizes.largest(), - }); - } - Some(IconKind::PNG) - } + Some(IconKind::ICO) + } - (mime::IMAGE, mime::JPEG) => { - if let Some(sizes) = sizes { - return Ok(IconInfo::JPEG { - size: *sizes.largest(), - }); - } - Some(IconKind::JPEG) - } + (mime::IMAGE, mime::GIF) => { + if let Some(sizes) = sizes { + return Ok(IconInfo::GIF { + size: *sizes.largest(), + }); + } - (mime::IMAGE, "x-icon") | (mime::IMAGE, "vnd.microsoft.icon") => { - if let Some(sizes) = sizes { - return Ok(IconInfo::ICO { sizes }); - } + Some(IconKind::GIF) + } - Some(IconKind::ICO) - } + (mime::IMAGE, mime::SVG) | (mime::TEXT, mime::PLAIN) => { + if let Some(sizes) = sizes { + return Ok(IconInfo::SVG { + size: Some(*sizes.largest()), + }); + } - (mime::IMAGE, mime::GIF) => { - if let Some(sizes) = sizes { - return Ok(IconInfo::GIF { - size: *sizes.largest(), - }); - } + Some(IconKind::SVG) + } - Some(IconKind::GIF) - } + _ => None, + }; - (mime::IMAGE, mime::SVG) | (mime::TEXT, mime::PLAIN) => { - if let Some(sizes) = sizes { - return Ok(IconInfo::SVG { - size: Some(*sizes.largest()), - }); - } - - Some(IconKind::SVG) - } - - _ => None, - }; - - IconInfo::decode(&mut body, kind).await - } - - pub fn size(&self) -> Option<&IconSize> { - match self { - IconInfo::ICO { sizes } => Some(sizes.largest()), - IconInfo::PNG { size } | IconInfo::JPEG { size } | IconInfo::GIF { size } => Some(size), - IconInfo::SVG { size } => size.as_ref(), + IconInfo::decode(&mut body, kind).await } - } - pub fn sizes(&self) -> Option { - match self { - IconInfo::ICO { sizes } => Some((*sizes).clone()), - IconInfo::PNG { size } | IconInfo::JPEG { size } | IconInfo::GIF { size } => { - Some((*size).into()) - } - IconInfo::SVG { size } => size.map(|size| size.into()), + pub fn size(&self) -> Option<&IconSize> { + match self { + IconInfo::ICO { sizes } => Some(sizes.largest()), + IconInfo::PNG { size } | IconInfo::JPEG { size } | IconInfo::GIF { size } => Some(size), + IconInfo::SVG { size } => size.as_ref(), + } } - } - pub fn mime_type(&self) -> &'static str { - match self { - IconInfo::PNG { .. } => "image/png", - IconInfo::JPEG { .. } => "image/jpeg", - IconInfo::ICO { .. } => "image/x-icon", - IconInfo::GIF { .. } => "image/gif", - IconInfo::SVG { .. } => "image/svg+xml", + pub fn sizes(&self) -> Option { + match self { + IconInfo::ICO { sizes } => Some((*sizes).clone()), + IconInfo::PNG { size } | IconInfo::JPEG { size } | IconInfo::GIF { size } => { + Some((*size).into()) + } + IconInfo::SVG { size } => size.map(|size| size.into()), + } + } + + pub fn mime_type(&self) -> &'static str { + match self { + IconInfo::PNG { .. } => "image/png", + IconInfo::JPEG { .. } => "image/jpeg", + IconInfo::ICO { .. } => "image/x-icon", + IconInfo::GIF { .. } => "image/gif", + IconInfo::SVG { .. } => "image/svg+xml", + } } - } } impl Display for IconInfo { - fn fmt(&self, f: &mut fmt::Formatter) -> Result<(), fmt::Error> { - match self { - IconInfo::PNG { size } => write!(f, "png {}", size), - IconInfo::JPEG { size } => write!(f, "jpeg {}", size), - IconInfo::GIF { size } => write!(f, "gif {}", size), - IconInfo::ICO { sizes } => write!(f, "ico {}", sizes), - IconInfo::SVG { size } => { - write!( - f, - "svg{}", - if let Some(size) = size { - format!(" {}", size) - } else { - "".to_string() - } - ) - } + fn fmt(&self, f: &mut fmt::Formatter) -> Result<(), fmt::Error> { + match self { + IconInfo::PNG { size } => write!(f, "png {}", size), + IconInfo::JPEG { size } => write!(f, "jpeg {}", size), + IconInfo::GIF { size } => write!(f, "gif {}", size), + IconInfo::ICO { sizes } => write!(f, "ico {}", sizes), + IconInfo::SVG { size } => { + write!( + f, + "svg{}", + if let Some(size) = size { + format!(" {}", size) + } else { + "".to_string() + } + ) + } + } } - } } impl Ord for IconInfo { - fn cmp(&self, other: &Self) -> Ordering { - match (self, other) { - (IconInfo::SVG { size }, IconInfo::SVG { size: other_size }) => match (size, other_size) { - (Some(_), None) => Ordering::Less, - (None, Some(_)) => Ordering::Greater, - (Some(size), Some(other_size)) => size.cmp(other_size), - (None, None) => Ordering::Equal, - }, - (IconInfo::SVG { .. }, _) => Ordering::Less, - (_, IconInfo::SVG { .. }) => Ordering::Greater, + fn cmp(&self, other: &Self) -> Ordering { + match (self, other) { + (IconInfo::SVG { size }, IconInfo::SVG { size: other_size }) => { + match (size, other_size) { + (Some(_), None) => Ordering::Less, + (None, Some(_)) => Ordering::Greater, + (Some(size), Some(other_size)) => size.cmp(other_size), + (None, None) => Ordering::Equal, + } + } + (IconInfo::SVG { .. }, _) => Ordering::Less, + (_, IconInfo::SVG { .. }) => Ordering::Greater, - _ => { - let size = self.size().unwrap(); - let other_size = other.size().unwrap(); + _ => { + let size = self.size().unwrap(); + let other_size = other.size().unwrap(); - size.cmp(other_size).then_with(|| match (self, other) { - (IconInfo::PNG { .. }, IconInfo::PNG { .. }) => Ordering::Equal, - (IconInfo::PNG { .. }, _) => Ordering::Less, - (_, IconInfo::PNG { .. }) => Ordering::Greater, + size.cmp(other_size).then_with(|| match (self, other) { + (IconInfo::PNG { .. }, IconInfo::PNG { .. }) => Ordering::Equal, + (IconInfo::PNG { .. }, _) => Ordering::Less, + (_, IconInfo::PNG { .. }) => Ordering::Greater, - (IconInfo::GIF { .. }, IconInfo::GIF { .. }) => Ordering::Equal, - (IconInfo::GIF { .. }, _) => Ordering::Less, - (_, IconInfo::GIF { .. }) => Ordering::Greater, + (IconInfo::GIF { .. }, IconInfo::GIF { .. }) => Ordering::Equal, + (IconInfo::GIF { .. }, _) => Ordering::Less, + (_, IconInfo::GIF { .. }) => Ordering::Greater, - (IconInfo::JPEG { .. }, IconInfo::JPEG { .. }) => Ordering::Equal, - (IconInfo::JPEG { .. }, _) => Ordering::Less, - (_, IconInfo::JPEG { .. }) => Ordering::Greater, + (IconInfo::JPEG { .. }, IconInfo::JPEG { .. }) => Ordering::Equal, + (IconInfo::JPEG { .. }, _) => Ordering::Less, + (_, IconInfo::JPEG { .. }) => Ordering::Greater, - (IconInfo::ICO { .. }, IconInfo::ICO { .. }) => Ordering::Equal, - (IconInfo::ICO { .. }, _) => Ordering::Less, - (_, IconInfo::ICO { .. }) => Ordering::Greater, + (IconInfo::ICO { .. }, IconInfo::ICO { .. }) => Ordering::Equal, + (IconInfo::ICO { .. }, _) => Ordering::Less, + (_, IconInfo::ICO { .. }) => Ordering::Greater, - _ => unreachable!(), - }) - } + _ => unreachable!(), + }) + } + } } - } } impl PartialOrd for IconInfo { - fn partial_cmp(&self, other: &Self) -> Option { - Some(self.cmp(other)) - } + fn partial_cmp(&self, other: &Self) -> Option { + Some(self.cmp(other)) + } } diff --git a/src/icon/icon_size/gif.rs b/src/icon/icon_size/gif.rs index e75a60f..83991e8 100644 --- a/src/icon/icon_size/gif.rs +++ b/src/icon/icon_size/gif.rs @@ -2,23 +2,23 @@ use super::IconSize; use byteorder::{LittleEndian, ReadBytesExt}; use futures::prelude::*; use std::{ - error::Error, - io::{Cursor, Seek, SeekFrom}, + error::Error, + io::{Cursor, Seek, SeekFrom}, }; pub async fn get_gif_size( - reader: &mut R, + reader: &mut R, ) -> Result> { - let mut header = [0; 8]; - reader.read_exact(&mut header).await?; - let header = &mut Cursor::new(header); + let mut header = [0; 8]; + reader.read_exact(&mut header).await?; + let header = &mut Cursor::new(header); - assert_slice_eq!(header, 0, b"F8", "bad header"); + assert_slice_eq!(header, 0, b"F8", "bad header"); - header.seek(SeekFrom::Start(4))?; + header.seek(SeekFrom::Start(4))?; - let width = header.read_u16::()? as u32; - let height = header.read_u16::()? as u32; + let width = header.read_u16::()? as u32; + let height = header.read_u16::()? as u32; - Ok(IconSize::new(width, height)) + Ok(IconSize::new(width, height)) } diff --git a/src/icon/icon_size/ico.rs b/src/icon/icon_size/ico.rs index a86a0de..7f155b7 100644 --- a/src/icon/icon_size/ico.rs +++ b/src/icon/icon_size/ico.rs @@ -2,58 +2,58 @@ use super::{png::get_png_size, IconSize, IconSizes}; use byteorder::{LittleEndian, ReadBytesExt as _}; use futures::prelude::*; use std::{ - convert::TryInto, - error::Error, - io::{Cursor, Seek, SeekFrom}, + convert::TryInto, + error::Error, + io::{Cursor, Seek, SeekFrom}, }; const ICO_TYPE: u16 = 1; const INDEX_SIZE: u16 = 16; pub async fn get_ico_sizes( - reader: &mut R, + reader: &mut R, ) -> Result> { - let mut offset = 4; - let mut header = [0; 4]; - reader.read_exact(&mut header).await?; - let mut header = Cursor::new(header); + let mut offset = 4; + let mut header = [0; 4]; + reader.read_exact(&mut header).await?; + let mut header = Cursor::new(header); - let icon_type = header.read_u16::()?; + let icon_type = header.read_u16::()?; - if icon_type != ICO_TYPE { - return Err("bad header".into()); - } - - let icon_count = header.read_u16::()?; - - let mut data = vec![0; (icon_count * INDEX_SIZE) as usize]; - reader.read_exact(&mut data).await?; - offset += data.len(); - let mut data = Cursor::new(data); - - let mut sizes = Vec::new(); - for i in 0..icon_count { - data.seek(SeekFrom::Start((INDEX_SIZE * i) as _))?; - - let width = data.read_u8()?; - let height = data.read_u8()?; - - if width == 0 && height == 0 { - data.seek(SeekFrom::Current(10))?; - let image_offset = data.read_u32::()?; - - let mut data = vec![0; image_offset as usize - offset]; - reader.read_exact(&mut data).await?; - offset += data.len(); - - let size = get_png_size(reader).await; - if let Ok(size) = size { - sizes.push(size); - } - } else { - sizes.push(IconSize::new(width as _, height as _)) + if icon_type != ICO_TYPE { + return Err("bad header".into()); } - } - Ok(sizes.try_into()?) + let icon_count = header.read_u16::()?; + + let mut data = vec![0; (icon_count * INDEX_SIZE) as usize]; + reader.read_exact(&mut data).await?; + offset += data.len(); + let mut data = Cursor::new(data); + + let mut sizes = Vec::new(); + for i in 0..icon_count { + data.seek(SeekFrom::Start((INDEX_SIZE * i) as _))?; + + let width = data.read_u8()?; + let height = data.read_u8()?; + + if width == 0 && height == 0 { + data.seek(SeekFrom::Current(10))?; + let image_offset = data.read_u32::()?; + + let mut data = vec![0; image_offset as usize - offset]; + reader.read_exact(&mut data).await?; + offset += data.len(); + + let size = get_png_size(reader).await; + if let Ok(size) = size { + sizes.push(size); + } + } else { + sizes.push(IconSize::new(width as _, height as _)) + } + } + + Ok(sizes.try_into()?) } diff --git a/src/icon/icon_size/icon_sizes.rs b/src/icon/icon_size/icon_sizes.rs index b5fae53..b74e2d4 100644 --- a/src/icon/icon_size/icon_sizes.rs +++ b/src/icon/icon_size/icon_sizes.rs @@ -3,11 +3,11 @@ use itertools::Itertools; use serde::{Deserialize, Serialize}; use serde_json::Value; use std::{ - cmp::Ordering, - convert::{TryFrom, TryInto}, - error::Error, - fmt::{self, Display}, - ops::Deref, + cmp::Ordering, + convert::{TryFrom, TryInto}, + error::Error, + fmt::{self, Display}, + ops::Deref, }; use vec1::{vec1, Vec1}; @@ -16,99 +16,99 @@ use vec1::{vec1, Vec1}; pub struct IconSizes(Vec1); impl Display for IconSizes { - fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { - f.write_str(&self.0.iter().join(" ")) - } + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + f.write_str(&self.0.iter().join(" ")) + } } impl IconSizes { - pub fn add_size(&mut self, size: IconSize) { - match self.0.binary_search(&size) { - Ok(_) => {} - Err(pos) => self.0.insert(pos, size), + pub fn add_size(&mut self, size: IconSize) { + match self.0.binary_search(&size) { + Ok(_) => {} + Err(pos) => self.0.insert(pos, size), + } } - } - pub fn largest(&self) -> &IconSize { - self.0.first() - } + pub fn largest(&self) -> &IconSize { + self.0.first() + } } impl TryFrom<&str> for IconSizes { - type Error = Box; + type Error = Box; - fn try_from(sizes_str: &str) -> Result { - let size_strs = sizes_str.split(" "); + fn try_from(sizes_str: &str) -> Result { + let size_strs = sizes_str.split(" "); - let mut sizes = Vec::new(); - for size in size_strs { - if let Ok(size) = serde_json::from_value(Value::String(size.to_string())) { - sizes.push(size); - } + let mut sizes = Vec::new(); + for size in size_strs { + if let Ok(size) = serde_json::from_value(Value::String(size.to_string())) { + sizes.push(size); + } + } + + Ok(sizes.try_into()?) } - - Ok(sizes.try_into()?) - } } impl TryFrom<&String> for IconSizes { - type Error = Box; + type Error = Box; - fn try_from(sizes_str: &String) -> Result { - IconSizes::try_from(sizes_str.as_str()) - } + fn try_from(sizes_str: &String) -> Result { + IconSizes::try_from(sizes_str.as_str()) + } } impl TryFrom for IconSizes { - type Error = Box; + type Error = Box; - fn try_from(sizes_str: String) -> Result { - IconSizes::try_from(sizes_str.as_str()) - } + fn try_from(sizes_str: String) -> Result { + IconSizes::try_from(sizes_str.as_str()) + } } impl Deref for IconSizes { - type Target = Vec1; - fn deref(&self) -> &Vec1 { - &self.0 - } + type Target = Vec1; + fn deref(&self) -> &Vec1 { + &self.0 + } } impl IntoIterator for IconSizes { - type Item = IconSize; - type IntoIter = std::vec::IntoIter; + type Item = IconSize; + type IntoIter = std::vec::IntoIter; - fn into_iter(self) -> Self::IntoIter { - self.0.into_iter() - } + fn into_iter(self) -> Self::IntoIter { + self.0.into_iter() + } } impl Ord for IconSizes { - fn cmp(&self, other: &Self) -> Ordering { - self.largest().cmp(&other.largest()) - } + fn cmp(&self, other: &Self) -> Ordering { + self.largest().cmp(&other.largest()) + } } impl PartialOrd for IconSizes { - fn partial_cmp(&self, other: &Self) -> Option { - Some(self.cmp(other)) - } + fn partial_cmp(&self, other: &Self) -> Option { + Some(self.cmp(other)) + } } impl TryFrom> for IconSizes { - type Error = String; + type Error = String; - fn try_from(mut vec: Vec) -> Result { - vec.sort(); + fn try_from(mut vec: Vec) -> Result { + vec.sort(); - Ok(IconSizes( - vec.try_into().map_err(|_| "must contain a size")?, - )) - } + Ok(IconSizes( + vec.try_into().map_err(|_| "must contain a size")?, + )) + } } impl From for IconSizes { - fn from(size: IconSize) -> Self { - IconSizes(vec1![size]) - } + fn from(size: IconSize) -> Self { + IconSizes(vec1![size]) + } } diff --git a/src/icon/icon_size/jpeg.rs b/src/icon/icon_size/jpeg.rs index 8415a19..5b768fb 100644 --- a/src/icon/icon_size/jpeg.rs +++ b/src/icon/icon_size/jpeg.rs @@ -5,58 +5,58 @@ use futures::{AsyncRead, AsyncReadExt as _}; use super::IconSize; async fn read_u16_be(reader: &mut R) -> Result> { - let mut buf = [0u8; 2]; - reader.read_exact(&mut buf).await?; - Ok(u16::from_be_bytes(buf)) + let mut buf = [0u8; 2]; + reader.read_exact(&mut buf).await?; + Ok(u16::from_be_bytes(buf)) } pub async fn get_jpeg_size( - reader: &mut R, + reader: &mut R, ) -> Result> { - let mut marker = [0; 2]; - let mut depth = 0i32; + let mut marker = [0; 2]; + let mut depth = 0i32; - loop { - // Read current marker (FF XX) - reader.read_exact(&mut marker).await?; + loop { + // Read current marker (FF XX) + reader.read_exact(&mut marker).await?; - if marker[0] != 0xFF { - // Did not read a marker. Assume image is corrupt. - return Err("invalid jpeg".into()); + if marker[0] != 0xFF { + // Did not read a marker. Assume image is corrupt. + return Err("invalid jpeg".into()); + } + + let page = marker[1]; + + // Check for valid SOFn markers. C4, C8, and CC aren't dimension markers. + if (page >= 0xC0 && page <= 0xC3) + || (page >= 0xC5 && page <= 0xC7) + || (page >= 0xC9 && page <= 0xCB) + || (page >= 0xCD && page <= 0xCF) + { + // Only get outside image size + if depth == 0 { + // Correct marker, go forward 3 bytes so we're at height offset + reader.read_exact(&mut [0; 3]).await?; + break; + } + } else if page == 0xD8 { + depth += 1; + } else if page == 0xD9 { + depth -= 1; + if depth < 0 { + return Err("invalid jpeg".into()); + } + } + + // Read the marker length and skip over it entirely + let page_size = read_u16_be(reader).await? as i64; + reader + .read_exact(&mut vec![0; (page_size - 2) as usize]) + .await?; } - let page = marker[1]; + let height = read_u16_be(reader).await?; + let width = read_u16_be(reader).await?; - // Check for valid SOFn markers. C4, C8, and CC aren't dimension markers. - if (page >= 0xC0 && page <= 0xC3) - || (page >= 0xC5 && page <= 0xC7) - || (page >= 0xC9 && page <= 0xCB) - || (page >= 0xCD && page <= 0xCF) - { - // Only get outside image size - if depth == 0 { - // Correct marker, go forward 3 bytes so we're at height offset - reader.read_exact(&mut [0; 3]).await?; - break; - } - } else if page == 0xD8 { - depth += 1; - } else if page == 0xD9 { - depth -= 1; - if depth < 0 { - return Err("invalid jpeg".into()); - } - } - - // Read the marker length and skip over it entirely - let page_size = read_u16_be(reader).await? as i64; - reader - .read_exact(&mut vec![0; (page_size - 2) as usize]) - .await?; - } - - let height = read_u16_be(reader).await?; - let width = read_u16_be(reader).await?; - - Ok(IconSize::new(width as _, height as _)) + Ok(IconSize::new(width as _, height as _)) } diff --git a/src/icon/icon_size/mod.rs b/src/icon/icon_size/mod.rs index b5c3618..fe8aed5 100644 --- a/src/icon/icon_size/mod.rs +++ b/src/icon/icon_size/mod.rs @@ -14,87 +14,87 @@ pub use svg::*; use serde::{de, Deserialize, Deserializer, Serialize, Serializer}; use std::{ - cmp::{self, Ordering}, - error::Error, - fmt::{self, Display}, - io::{Read, Seek, SeekFrom}, + cmp::{self, Ordering}, + error::Error, + fmt::{self, Display}, + io::{Read, Seek, SeekFrom}, }; #[serde_as] #[derive(Clone, Copy, Debug, PartialEq, Eq)] pub struct IconSize { - pub width: u32, - pub height: u32, + pub width: u32, + pub height: u32, } impl Display for IconSize { - fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { - write!(f, "{}x{}", self.width, self.height) - } + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + write!(f, "{}x{}", self.width, self.height) + } } impl IconSize { - pub fn new(width: u32, height: u32) -> Self { - Self { width, height } - } + pub fn new(width: u32, height: u32) -> Self { + Self { width, height } + } - pub fn max_rect(&self) -> u32 { - cmp::max(self.width, self.height) - } + pub fn max_rect(&self) -> u32 { + cmp::max(self.width, self.height) + } } impl Ord for IconSize { - fn cmp(&self, other: &Self) -> Ordering { - other.max_rect().cmp(&self.max_rect()) - } + fn cmp(&self, other: &Self) -> Ordering { + other.max_rect().cmp(&self.max_rect()) + } } impl PartialOrd for IconSize { - fn partial_cmp(&self, other: &Self) -> Option { - Some(self.cmp(other)) - } + fn partial_cmp(&self, other: &Self) -> Option { + Some(self.cmp(other)) + } } impl Serialize for IconSize { - fn serialize(&self, serializer: S) -> Result - where - S: Serializer, - { - serializer.collect_str(self) - } + fn serialize(&self, serializer: S) -> Result + where + S: Serializer, + { + serializer.collect_str(self) + } } impl<'de> Deserialize<'de> for IconSize { - fn deserialize(deserializer: D) -> Result - where - D: Deserializer<'de>, - { - let value: String = Deserialize::deserialize(deserializer)?; + fn deserialize(deserializer: D) -> Result + where + D: Deserializer<'de>, + { + let value: String = Deserialize::deserialize(deserializer)?; - let mut split = value.split("x"); - let width = split - .next() - .ok_or(de::Error::custom("expected width"))? - .parse() - .map_err(de::Error::custom)?; + let mut split = value.split("x"); + let width = split + .next() + .ok_or(de::Error::custom("expected width"))? + .parse() + .map_err(de::Error::custom)?; - let height = split - .next() - .ok_or(de::Error::custom("expected height"))? - .parse() - .map_err(de::Error::custom)?; + let height = split + .next() + .ok_or(de::Error::custom("expected height"))? + .parse() + .map_err(de::Error::custom)?; - Ok(IconSize::new(width, height)) - } + Ok(IconSize::new(width, height)) + } } fn slice_eq( - cur: &mut T, - offset: u64, - slice: &[u8], + cur: &mut T, + offset: u64, + slice: &[u8], ) -> Result> { - cur.seek(SeekFrom::Start(offset))?; - let mut buffer = vec![0; slice.len()]; - cur.read_exact(&mut buffer)?; - Ok(buffer == slice) + cur.seek(SeekFrom::Start(offset))?; + let mut buffer = vec![0; slice.len()]; + cur.read_exact(&mut buffer)?; + Ok(buffer == slice) } diff --git a/src/icon/icon_size/png.rs b/src/icon/icon_size/png.rs index 65c3c0e..7796c5e 100644 --- a/src/icon/icon_size/png.rs +++ b/src/icon/icon_size/png.rs @@ -4,17 +4,17 @@ use futures::prelude::*; use std::{error::Error, io::Cursor}; pub async fn get_png_size( - reader: &mut R, + reader: &mut R, ) -> Result> { - let mut header = [0; 22]; - reader.read_exact(&mut header).await?; - let header = &mut Cursor::new(header); + let mut header = [0; 22]; + reader.read_exact(&mut header).await?; + let header = &mut Cursor::new(header); - assert_slice_eq!(header, 0, b"NG\r\n\x1a\n", "bad header"); - assert_slice_eq!(header, 10, b"IHDR", "bad header"); + assert_slice_eq!(header, 0, b"NG\r\n\x1a\n", "bad header"); + assert_slice_eq!(header, 10, b"IHDR", "bad header"); - let width = header.read_u32::()?; - let height = header.read_u32::()?; + let width = header.read_u32::()?; + let height = header.read_u32::()?; - Ok(IconSize::new(width, height)) + Ok(IconSize::new(width, height)) } diff --git a/src/icon/icon_size/svg.rs b/src/icon/icon_size/svg.rs index 38f0f8b..912397e 100644 --- a/src/icon/icon_size/svg.rs +++ b/src/icon/icon_size/svg.rs @@ -4,65 +4,67 @@ use lol_html::{element, HtmlRewriter, Settings}; use std::{cell::RefCell, error::Error}; fn parse_size(size: S) -> Option { - size - .to_string() - .parse::() - .ok() - .map(|size| size.round() as u32) + size.to_string() + .parse::() + .ok() + .map(|size| size.round() as u32) } pub async fn get_svg_size( - first_bytes: &[u8; 2], - reader: &mut R, + first_bytes: &[u8; 2], + reader: &mut R, ) -> Result, Box> { - let size = RefCell::new(None); + let size = RefCell::new(None); - let mut rewriter = HtmlRewriter::new( - Settings { - element_content_handlers: vec![ - // Rewrite insecure hyperlinks - element!("svg", |el| { - let viewbox = el.get_attribute("viewbox"); + let mut rewriter = HtmlRewriter::new( + Settings { + element_content_handlers: vec![ + // Rewrite insecure hyperlinks + element!("svg", |el| { + let viewbox = el.get_attribute("viewbox"); - let width = el.get_attribute("width").and_then(parse_size); - let height = el.get_attribute("height").and_then(parse_size); + let width = el.get_attribute("width").and_then(parse_size); + let height = el.get_attribute("height").and_then(parse_size); - *size.borrow_mut() = Some(if let (Some(width), Some(height)) = (width, height) { - Some(IconSize::new(width, height)) - } else if let Some(viewbox) = viewbox { - regex!(r"^-?\d+\s+-?\d+\s+(\d+\.?[\d]?)\s+(\d+\.?[\d]?)") - .captures(&viewbox) - .map(|captures| { - let width = parse_size(captures.get(1).unwrap().as_str()).unwrap(); - let height = parse_size(captures.get(2).unwrap().as_str()).unwrap(); - IconSize::new(width, height) - }) - } else { - None - }); + *size.borrow_mut() = + Some(if let (Some(width), Some(height)) = (width, height) { + Some(IconSize::new(width, height)) + } else if let Some(viewbox) = viewbox { + regex!(r"^-?\d+\s+-?\d+\s+(\d+\.?[\d]?)\s+(\d+\.?[\d]?)") + .captures(&viewbox) + .map(|captures| { + let width = + parse_size(captures.get(1).unwrap().as_str()).unwrap(); + let height = + parse_size(captures.get(2).unwrap().as_str()).unwrap(); + IconSize::new(width, height) + }) + } else { + None + }); - Ok(()) - }), - ], - ..Settings::default() - }, - |_: &[u8]| {}, - ); + Ok(()) + }), + ], + ..Settings::default() + }, + |_: &[u8]| {}, + ); - rewriter.write(first_bytes)?; + rewriter.write(first_bytes)?; - let mut buffer = [0; 100]; + let mut buffer = [0; 100]; - loop { - let n = reader.read(&mut buffer).await?; - if n == 0 { - return Err("invalid svg".into()); + loop { + let n = reader.read(&mut buffer).await?; + if n == 0 { + return Err("invalid svg".into()); + } + + rewriter.write(&buffer[..n])?; + + if let Some(size) = *size.borrow() { + return Ok(size); + } } - - rewriter.write(&buffer[..n])?; - - if let Some(size) = *size.borrow() { - return Ok(size); - } - } } diff --git a/src/icon/mod.rs b/src/icon/mod.rs index 109e59f..ab7168b 100644 --- a/src/icon/mod.rs +++ b/src/icon/mod.rs @@ -8,116 +8,115 @@ use itertools::Itertools; use serde::{Deserialize, Serialize}; use serde_with::{DeserializeFromStr, SerializeDisplay}; use std::{ - cmp::Ordering, - collections::HashMap, - convert::TryInto, - error::Error, - fmt::{self, Display}, - hash::{Hash, Hasher}, - str::FromStr, + cmp::Ordering, + collections::HashMap, + convert::TryInto, + error::Error, + fmt::{self, Display}, + hash::{Hash, Hasher}, + str::FromStr, }; use url::Url; #[derive(Debug, Clone, PartialOrd, PartialEq, Ord, Eq, SerializeDisplay, DeserializeFromStr)] pub enum IconKind { - AppIcon, - SiteFavicon, - SiteLogo, + AppIcon, + SiteFavicon, + SiteLogo, } impl Display for IconKind { - fn fmt(&self, f: &mut fmt::Formatter) -> Result<(), fmt::Error> { - f.write_str(match self { - IconKind::SiteLogo => "site_logo", - IconKind::AppIcon => "app_icon", - IconKind::SiteFavicon => "site_favicon", - }) - } + fn fmt(&self, f: &mut fmt::Formatter) -> Result<(), fmt::Error> { + f.write_str(match self { + IconKind::SiteLogo => "site_logo", + IconKind::AppIcon => "app_icon", + IconKind::SiteFavicon => "site_favicon", + }) + } } impl FromStr for IconKind { - type Err = String; + type Err = String; - fn from_str(kind: &str) -> Result { - match kind { - "site_logo" => Ok(IconKind::SiteLogo), - "app_icon" => Ok(IconKind::AppIcon), - "site_favicon" => Ok(IconKind::SiteFavicon), - _ => Err("unknown icon kind!".into()), + fn from_str(kind: &str) -> Result { + match kind { + "site_logo" => Ok(IconKind::SiteLogo), + "app_icon" => Ok(IconKind::AppIcon), + "site_favicon" => Ok(IconKind::SiteFavicon), + _ => Err("unknown icon kind!".into()), + } } - } } #[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)] pub struct Icon { - pub url: Url, - pub headers: HashMap, - pub kind: IconKind, - #[serde(flatten)] - pub info: IconInfo, + pub url: Url, + pub headers: HashMap, + pub kind: IconKind, + #[serde(flatten)] + pub info: IconInfo, } impl Hash for Icon { - fn hash(&self, state: &mut H) { - ( - &self.url, - self - .headers - .iter() - .sorted_by_key(|(key, _)| *key) - .collect::>(), - ) - .hash(state); - } + fn hash(&self, state: &mut H) { + ( + &self.url, + self.headers + .iter() + .sorted_by_key(|(key, _)| *key) + .collect::>(), + ) + .hash(state); + } } impl Icon { - pub fn new(url: Url, kind: IconKind, info: IconInfo) -> Self { - Icon::new_with_headers(url, HashMap::new(), kind, info) - } - - pub fn new_with_headers( - url: Url, - headers: HashMap, - kind: IconKind, - info: IconInfo, - ) -> Self { - Self { - url, - headers, - kind, - info, + pub fn new(url: Url, kind: IconKind, info: IconInfo) -> Self { + Icon::new_with_headers(url, HashMap::new(), kind, info) } - } - pub async fn load( - url: Url, - kind: IconKind, - sizes: Option, - ) -> Result> { - Icon::load_with_headers(url, HashMap::new(), kind, sizes).await - } + pub fn new_with_headers( + url: Url, + headers: HashMap, + kind: IconKind, + info: IconInfo, + ) -> Self { + Self { + url, + headers, + kind, + info, + } + } - pub async fn load_with_headers( - url: Url, - headers: HashMap, - kind: IconKind, - sizes: Option, - ) -> Result> { - let info = IconInfo::load(url.clone(), (&headers).try_into().unwrap(), sizes).await?; + pub async fn load( + url: Url, + kind: IconKind, + sizes: Option, + ) -> Result> { + Icon::load_with_headers(url, HashMap::new(), kind, sizes).await + } - Ok(Icon::new_with_headers(url, headers, kind, info)) - } + pub async fn load_with_headers( + url: Url, + headers: HashMap, + kind: IconKind, + sizes: Option, + ) -> Result> { + let info = IconInfo::load(url.clone(), (&headers).try_into().unwrap(), sizes).await?; + + Ok(Icon::new_with_headers(url, headers, kind, info)) + } } impl Ord for Icon { - fn cmp(&self, other: &Self) -> Ordering { - self.info.cmp(&other.info) - } + fn cmp(&self, other: &Self) -> Ordering { + self.info.cmp(&other.info) + } } impl PartialOrd for Icon { - fn partial_cmp(&self, other: &Self) -> Option { - Some(self.cmp(other)) - } + fn partial_cmp(&self, other: &Self) -> Option { + Some(self.cmp(other)) + } } diff --git a/src/icons.rs b/src/icons.rs index 4e839c1..6842ae8 100644 --- a/src/icons.rs +++ b/src/icons.rs @@ -11,198 +11,200 @@ use url::Url; use vec1::Vec1; pub struct SiteIcons { - blacklist: Option bool>>, + blacklist: Option bool>>, } #[derive(Debug, Clone)] enum LoadedKind { - DefaultManifest(Option>), - HeadTags(Option>), - DefaultFavicon(Option), - SiteLogo(Option), + DefaultManifest(Option>), + HeadTags(Option>), + DefaultFavicon(Option), + SiteLogo(Option), } impl SiteIcons { - pub fn new() -> Self { - SiteIcons { blacklist: None } - } - - pub fn new_with_blacklist(blacklist: impl Fn(&Url) -> bool + 'static) -> Self { - SiteIcons { - blacklist: Some(Box::new(blacklist)), + pub fn new() -> Self { + SiteIcons { blacklist: None } } - } - pub fn is_blacklisted(&self, url: &Url) -> bool { - if let Some(is_blacklisted) = &self.blacklist { - is_blacklisted(url) - } else { - false - } - } - - pub async fn load_website( - &mut self, - url: U, - best_matches_only: bool, - ) -> Result, Box> { - let url = url.into_url()?; - - let manifest_urls = vec![ - push_url(&url, "manifest.json"), - push_url(&url, "manifest.webmanifest"), - url.join("/manifest.json")?, - url.join("/manifest.webmanifest")?, - ] - .into_iter() - .unique(); - - let favicon_urls = vec![ - push_url(&url, "favicon.svg"), - url.join("/favicon.svg")?, - push_url(&url, "favicon.ico"), - url.join("/favicon.ico")?, - ] - .into_iter() - .unique(); - - let html_response = async { - let res = CLIENT - .get(url.clone()) - .header(ACCEPT, "text/html") - .send() - .await - .ok()? - .error_for_status() - .ok()?; - - let url = res.url().clone(); - - if self.is_blacklisted(&url) { - None - } else { - let body = res.bytes_stream().map(|res| { - res - .map(|bytes| bytes.to_vec()) - .map_err(|err| err.to_string()) - }); - - let mut publisher = Publisher::new(128); - let subscriber = publisher.subscribe(); - - Some(( - url, - async move { StreamPublisher::new(&mut publisher, body).await }.shared(), - subscriber, - )) - } - } - .shared(); - - let mut futures = vec![ - async { - let html_response = html_response.clone().await; - - LoadedKind::HeadTags(match html_response { - Some((url, _, body)) => html_parser::parse_head(&url, body) - .await - .ok() - .and_then(|icons| icons.try_into().ok()), - None => None, - }) - } - .boxed_local(), - async { - let html_response = html_response.clone().await; - - LoadedKind::SiteLogo(match html_response { - Some((url, complete, body)) => { - let (icons, _) = join!( - html_parser::parse_site_logo(&url, body, |url| self.is_blacklisted(url)), - complete - ); - - icons.ok() - } - None => None, - }) - } - .boxed_local(), - async { - let manifests = join_all(manifest_urls.map(|url| SiteIcons::load_manifest(url))).await; - - LoadedKind::DefaultManifest( - manifests - .into_iter() - .find_map(|manifest| manifest.ok().and_then(|icons| icons.try_into().ok())), - ) - } - .boxed_local(), - async { - let favicons = - join_all(favicon_urls.map(|url| Icon::load(url.clone(), IconKind::SiteFavicon, None))) - .await; - - LoadedKind::DefaultFavicon(favicons.into_iter().find_map(|favicon| favicon.ok())) - } - .boxed_local(), - ]; - - let mut icons: Vec = Vec::new(); - let mut found_best_match = false; - let mut previous_loads = Vec::new(); - - while !futures.is_empty() { - let (loaded, index, _) = select_all(&mut futures).await; - futures.remove(index); - - match loaded.clone() { - LoadedKind::DefaultManifest(manifest_icons) => { - if let Some(manifest_icons) = manifest_icons { - icons.extend(manifest_icons); - found_best_match = true; - } + pub fn new_with_blacklist(blacklist: impl Fn(&Url) -> bool + 'static) -> Self { + SiteIcons { + blacklist: Some(Box::new(blacklist)), } - LoadedKind::DefaultFavicon(favicon) => { - if let Some(favicon) = favicon { - icons.push(favicon); + } - if previous_loads - .iter() - .any(|kind| matches!(kind, LoadedKind::HeadTags(_))) - { - found_best_match = true; + pub fn is_blacklisted(&self, url: &Url) -> bool { + if let Some(is_blacklisted) = &self.blacklist { + is_blacklisted(url) + } else { + false + } + } + + pub async fn load_website( + &mut self, + url: U, + best_matches_only: bool, + ) -> Result, Box> { + let url = url.into_url()?; + + let manifest_urls = vec![ + push_url(&url, "manifest.json"), + push_url(&url, "manifest.webmanifest"), + url.join("/manifest.json")?, + url.join("/manifest.webmanifest")?, + ] + .into_iter() + .unique(); + + let favicon_urls = vec![ + push_url(&url, "favicon.svg"), + url.join("/favicon.svg")?, + push_url(&url, "favicon.ico"), + url.join("/favicon.ico")?, + ] + .into_iter() + .unique(); + + let html_response = async { + let res = CLIENT + .get(url.clone()) + .header(ACCEPT, "text/html") + .send() + .await + .ok()? + .error_for_status() + .ok()?; + + let url = res.url().clone(); + + if self.is_blacklisted(&url) { + None + } else { + let body = res.bytes_stream().map(|res| { + res.map(|bytes| bytes.to_vec()) + .map_err(|err| err.to_string()) + }); + + let mut publisher = Publisher::new(128); + let subscriber = publisher.subscribe(); + + Some(( + url, + async move { StreamPublisher::new(&mut publisher, body).await }.shared(), + subscriber, + )) } - } } - LoadedKind::HeadTags(head_icons) => { - if let Some(head_icons) = head_icons { - icons.extend(head_icons); - found_best_match = true; - } else if previous_loads - .iter() - .any(|kind| matches!(kind, LoadedKind::DefaultFavicon(Some(_)))) - { - found_best_match = true; - } + .shared(); + + let mut futures = vec![ + async { + let html_response = html_response.clone().await; + + LoadedKind::HeadTags(match html_response { + Some((url, _, body)) => html_parser::parse_head(&url, body) + .await + .ok() + .and_then(|icons| icons.try_into().ok()), + None => None, + }) + } + .boxed_local(), + async { + let html_response = html_response.clone().await; + + LoadedKind::SiteLogo(match html_response { + Some((url, complete, body)) => { + let (icons, _) = join!( + html_parser::parse_site_logo(&url, body, |url| self + .is_blacklisted(url)), + complete + ); + + icons.ok() + } + None => None, + }) + } + .boxed_local(), + async { + let manifests = + join_all(manifest_urls.map(|url| SiteIcons::load_manifest(url))).await; + + LoadedKind::DefaultManifest( + manifests + .into_iter() + .find_map(|manifest| manifest.ok().and_then(|icons| icons.try_into().ok())), + ) + } + .boxed_local(), + async { + let favicons = join_all( + favicon_urls.map(|url| Icon::load(url.clone(), IconKind::SiteFavicon, None)), + ) + .await; + + LoadedKind::DefaultFavicon(favicons.into_iter().find_map(|favicon| favicon.ok())) + } + .boxed_local(), + ]; + + let mut icons: Vec = Vec::new(); + let mut found_best_match = false; + let mut previous_loads = Vec::new(); + + while !futures.is_empty() { + let (loaded, index, _) = select_all(&mut futures).await; + futures.remove(index); + + match loaded.clone() { + LoadedKind::DefaultManifest(manifest_icons) => { + if let Some(manifest_icons) = manifest_icons { + icons.extend(manifest_icons); + found_best_match = true; + } + } + LoadedKind::DefaultFavicon(favicon) => { + if let Some(favicon) = favicon { + icons.push(favicon); + + if previous_loads + .iter() + .any(|kind| matches!(kind, LoadedKind::HeadTags(_))) + { + found_best_match = true; + } + } + } + LoadedKind::HeadTags(head_icons) => { + if let Some(head_icons) = head_icons { + icons.extend(head_icons); + found_best_match = true; + } else if previous_loads + .iter() + .any(|kind| matches!(kind, LoadedKind::DefaultFavicon(Some(_)))) + { + found_best_match = true; + } + } + LoadedKind::SiteLogo(logo) => { + if let Some(logo) = logo { + icons.push(logo); + } + } + } + + previous_loads.push(loaded); + + icons.sort(); + icons = icons.into_iter().unique().collect(); + + if best_matches_only && found_best_match { + break; + } } - LoadedKind::SiteLogo(logo) => { - if let Some(logo) = logo { - icons.push(logo); - } - } - } - previous_loads.push(loaded); - - icons.sort(); - icons = icons.into_iter().unique().collect(); - - if best_matches_only && found_best_match { - break; - } + Ok(icons) } - - Ok(icons) - } } diff --git a/src/lib.rs b/src/lib.rs index c8592ca..0ea7151 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -35,12 +35,12 @@ pub use icons::*; use once_cell::sync::Lazy; use reqwest::{ - header::{HeaderMap, HeaderValue, USER_AGENT}, - Client, + header::{HeaderMap, HeaderValue, USER_AGENT}, + Client, }; static CLIENT: Lazy = Lazy::new(|| { - let mut headers = HeaderMap::new(); - headers.insert(USER_AGENT, HeaderValue::from_str("Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/88.0.4324.104 Safari/537.36").unwrap()); - Client::builder().default_headers(headers).build().unwrap() + let mut headers = HeaderMap::new(); + headers.insert(USER_AGENT, HeaderValue::from_str("Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/88.0.4324.104 Safari/537.36").unwrap()); + Client::builder().default_headers(headers).build().unwrap() }); diff --git a/src/manifest.rs b/src/manifest.rs index 6ac517d..7d43602 100644 --- a/src/manifest.rs +++ b/src/manifest.rs @@ -8,49 +8,47 @@ use url::Url; #[derive(Debug, Deserialize)] struct ManifestIcon { - src: String, - sizes: Option, + src: String, + sizes: Option, } #[derive(Debug, Deserialize)] struct Manifest { - icons: Vec, + icons: Vec, } impl SiteIcons { - pub async fn load_manifest(url: U) -> Result, Box> { - let url = url.into_url()?; + pub async fn load_manifest(url: U) -> Result, Box> { + let url = url.into_url()?; - Ok(load_manifest_cached(url).await?) - } + Ok(load_manifest_cached(url).await?) + } } #[cached(sync_writes = true)] async fn load_manifest_cached(url: Url) -> Result, String> { - let url = &url; + let url = &url; - let manifest: Manifest = CLIENT - .get(url.clone()) - .send() - .await - .map_err(|e| format!("{}: {:?}", url, e))? - .error_for_status() - .map_err(|e| format!("{}: {:?}", url, e))? - .json() - .await - .map_err(|e| format!("{}: {:?}", url, e))?; + let manifest: Manifest = CLIENT + .get(url.clone()) + .send() + .await + .map_err(|e| format!("{}: {:?}", url, e))? + .error_for_status() + .map_err(|e| format!("{}: {:?}", url, e))? + .json() + .await + .map_err(|e| format!("{}: {:?}", url, e))?; - Ok( - join_all(manifest.icons.into_iter().map(|icon| async move { - if let Ok(src) = url.join(&icon.src) { - Icon::load(src, IconKind::AppIcon, icon.sizes).await.ok() - } else { - None - } + Ok(join_all(manifest.icons.into_iter().map(|icon| async move { + if let Ok(src) = url.join(&icon.src) { + Icon::load(src, IconKind::AppIcon, icon.sizes).await.ok() + } else { + None + } })) .await .into_iter() .filter_map(|icon| icon) - .collect(), - ) + .collect()) } diff --git a/src/utils/background_poll.rs b/src/utils/background_poll.rs index bcc4ec1..f900e54 100644 --- a/src/utils/background_poll.rs +++ b/src/utils/background_poll.rs @@ -1,43 +1,43 @@ use std::{ - pin::Pin, - task::{Context, Poll}, + pin::Pin, + task::{Context, Poll}, }; use futures::Future; pub async fn poll_in_background(future: F, background_future: B) -> FO where - F: Future + Unpin, - B: Future + Unpin, -{ - struct BackgroundPoller { - future: F, - background_future: B, - } - - impl Future for BackgroundPoller - where F: Future + Unpin, B: Future + Unpin, - { - type Output = FO; - - fn poll(self: Pin<&mut Self>, cx: &mut Context<'_>) -> Poll { - let this = self.get_mut(); - - let result = Pin::new(&mut this.future).poll(cx); - - if result.is_pending() { - let _ = Pin::new(&mut this.background_future).poll(cx); - } - - result +{ + struct BackgroundPoller { + future: F, + background_future: B, } - } - BackgroundPoller { - future, - background_future, - } - .await + impl Future for BackgroundPoller + where + F: Future + Unpin, + B: Future + Unpin, + { + type Output = FO; + + fn poll(self: Pin<&mut Self>, cx: &mut Context<'_>) -> Poll { + let this = self.get_mut(); + + let result = Pin::new(&mut this.future).poll(cx); + + if result.is_pending() { + let _ = Pin::new(&mut this.background_future).poll(cx); + } + + result + } + } + + BackgroundPoller { + future, + background_future, + } + .await } diff --git a/src/utils/macros.rs b/src/utils/macros.rs index 68a4342..9a17895 100644 --- a/src/utils/macros.rs +++ b/src/utils/macros.rs @@ -12,10 +12,10 @@ macro_rules! join_with { } macro_rules! regex { - ($re:literal $(,)?) => {{ - static RE: once_cell::sync::OnceCell = once_cell::sync::OnceCell::new(); - RE.get_or_init(|| regex::Regex::new($re).unwrap()) - }}; + ($re:literal $(,)?) => {{ + static RE: once_cell::sync::OnceCell = once_cell::sync::OnceCell::new(); + RE.get_or_init(|| regex::Regex::new($re).unwrap()) + }}; } macro_rules! assert_slice_eq { diff --git a/src/utils/mod.rs b/src/utils/mod.rs index fa00d3f..b15fa86 100644 --- a/src/utils/mod.rs +++ b/src/utils/mod.rs @@ -10,7 +10,7 @@ pub use svg_encoder::*; use url::Url; pub fn push_url(url: &Url, segment: &str) -> Url { - let mut url = url.clone(); - url.path_segments_mut().unwrap().push(segment); - url + let mut url = url.clone(); + url.path_segments_mut().unwrap().push(segment); + url } diff --git a/src/utils/svg_encoder.rs b/src/utils/svg_encoder.rs index 583af85..d940da7 100644 --- a/src/utils/svg_encoder.rs +++ b/src/utils/svg_encoder.rs @@ -2,52 +2,52 @@ use percent_encoding::{utf8_percent_encode, AsciiSet, CONTROLS}; use std::borrow::Cow; const DATA_URI: &AsciiSet = &CONTROLS - .add(b'\r') - .add(b'\n') - .add(b'%') - .add(b'#') - .add(b'(') - .add(b')') - .add(b'<') - .add(b'>') - .add(b'?') - .add(b'[') - .add(b'\\') - .add(b']') - .add(b'^') - .add(b'`') - .add(b'{') - .add(b'|') - .add(b'}'); + .add(b'\r') + .add(b'\n') + .add(b'%') + .add(b'#') + .add(b'(') + .add(b')') + .add(b'<') + .add(b'>') + .add(b'?') + .add(b'[') + .add(b'\\') + .add(b']') + .add(b'^') + .add(b'`') + .add(b'{') + .add(b'|') + .add(b'}'); pub fn encode_svg(svg: &str) -> String { - // add namespace - let encoded = if !svg.contains("http://www.w3.org/2000/svg") { - regex!("]+fill='?(none)'?").captures(&encoded) { - let index = captures.get(1).unwrap(); - let mut result = String::new(); - for (i, c) in encoded.chars().enumerate() { - if i < index.start() || i >= index.end() { - result.push(c); - } + // remove a fill=none attribute + if let Some(captures) = regex!("^[^>]+fill='?(none)'?").captures(&encoded) { + let index = captures.get(1).unwrap(); + let mut result = String::new(); + for (i, c) in encoded.chars().enumerate() { + if i < index.start() || i >= index.end() { + result.push(c); + } + } + encoded = Cow::from(result); } - encoded = Cow::from(result); - } - // remove whitespace - let encoded = regex!(r">\s{1,}<").replace_all(&encoded, "><"); - let encoded = regex!(r"\s{2,}").replace_all(&encoded, " "); + // remove whitespace + let encoded = regex!(r">\s{1,}<").replace_all(&encoded, "><"); + let encoded = regex!(r"\s{2,}").replace_all(&encoded, " "); - let encoded = utf8_percent_encode(&encoded, DATA_URI); + let encoded = utf8_percent_encode(&encoded, DATA_URI); - format!("data:image/svg+xml,{}", encoded) + format!("data:image/svg+xml,{}", encoded) }