Use default rust formatting

This commit is contained in:
Markus Kohlhase 2023-09-15 15:25:39 +02:00
parent ed163b9a42
commit a1c38dbc9a
20 changed files with 1104 additions and 1102 deletions

View file

@ -1,3 +0,0 @@
indent_style = "Block"
reorder_imports = true
tab_spaces = 2

View file

@ -6,37 +6,37 @@ use std::error::Error;
#[derive(Parser)]
struct Opts {
url: String,
url: String,
#[clap(long)]
fast: bool,
#[clap(long)]
json: bool,
#[clap(long)]
/// Print out errors that occurred for skipped items
debug: bool,
#[clap(long)]
fast: bool,
#[clap(long)]
json: bool,
#[clap(long)]
/// Print out errors that occurred for skipped items
debug: bool,
}
#[tokio::main]
async fn main() -> Result<(), Box<dyn Error>> {
let mut icons = SiteIcons::new();
let opts: Opts = Opts::parse();
let mut icons = SiteIcons::new();
let opts: Opts = Opts::parse();
if opts.debug {
let mut builder = Builder::new();
builder.filter_level(LevelFilter::Info);
builder.init();
}
let entries = icons.load_website(opts.url, opts.fast).await?;
if opts.json {
println!("{}", serde_json::to_string_pretty(&entries)?)
} else {
for icon in entries {
println!("{} {} {}", icon.url, icon.kind, icon.info);
if opts.debug {
let mut builder = Builder::new();
builder.filter_level(LevelFilter::Info);
builder.init();
}
}
Ok(())
let entries = icons.load_website(opts.url, opts.fast).await?;
if opts.json {
println!("{}", serde_json::to_string_pretty(&entries)?)
} else {
for icon in entries {
println!("{} {} {}", icon.url, icon.kind, icon.info);
}
}
Ok(())
}

View file

@ -8,9 +8,9 @@ use futures::Stream;
use futures::StreamExt;
use lol_html::{element, errors::RewritingError, HtmlRewriter, Settings};
use std::{
cell::RefCell,
error::Error,
fmt::{self, Display},
cell::RefCell,
error::Error,
fmt::{self, Display},
};
use url::Url;
@ -18,104 +18,106 @@ use url::Url;
struct EndOfHead {}
impl Display for EndOfHead {
fn fmt(&self, _: &mut fmt::Formatter<'_>) -> fmt::Result {
Ok(())
}
fn fmt(&self, _: &mut fmt::Formatter<'_>) -> fmt::Result {
Ok(())
}
}
impl Error for EndOfHead {}
pub async fn parse_head(
url: &Url,
mut body: impl Stream<Item = Result<Vec<u8>, String>> + Unpin,
url: &Url,
mut body: impl Stream<Item = Result<Vec<u8>, String>> + Unpin,
) -> Result<Vec<Icon>, Box<dyn Error>> {
let mut icons = Vec::new();
let new_icons = RefCell::new(Vec::new());
let mut icons = Vec::new();
let new_icons = RefCell::new(Vec::new());
{
let mut rewriter = HtmlRewriter::new(
Settings {
element_content_handlers: vec![
element!("head", |head| {
head.on_end_tag(|_| Err(Box::new(EndOfHead {})))?;
Ok(())
}),
element!("link[rel~='manifest']", |manifest| {
if let Some(href) = manifest
.get_attribute("href")
.and_then(|href| url.join(&href).ok())
{
new_icons.borrow_mut().push(
async { SiteIcons::load_manifest(href).await.unwrap_or(Vec::new()) }
.boxed_local()
.shared(),
)
{
let mut rewriter = HtmlRewriter::new(
Settings {
element_content_handlers: vec![
element!("head", |head| {
head.on_end_tag(|_| Err(Box::new(EndOfHead {})))?;
Ok(())
}),
element!("link[rel~='manifest']", |manifest| {
if let Some(href) = manifest
.get_attribute("href")
.and_then(|href| url.join(&href).ok())
{
new_icons.borrow_mut().push(
async {
SiteIcons::load_manifest(href).await.unwrap_or(Vec::new())
}
.boxed_local()
.shared(),
)
}
Ok(())
}),
element!(
join_with!(
",",
"link[rel~='icon']",
"link[rel~='apple-touch-icon']",
"link[rel~='apple-touch-icon-precomposed']"
),
|link| {
let rel = link.get_attribute("rel").unwrap();
if let Some(href) = link
.get_attribute("href")
.and_then(|href| url.join(&href).ok())
{
let kind = if rel.contains("apple-touch-icon") {
IconKind::AppIcon
} else {
IconKind::SiteFavicon
};
let sizes = link.get_attribute("sizes");
new_icons.borrow_mut().push(
async {
Icon::load(href, kind, sizes)
.await
.map(|icon| vec![icon])
.unwrap_or(Vec::new())
}
.boxed_local()
.shared(),
)
};
Ok(())
}
),
],
..Settings::default()
},
|_: &[u8]| {},
);
while let Some(data) = poll_in_background(body.next(), join_all(icons.clone())).await {
let result = rewriter.write(&data?);
icons.extend(new_icons.borrow_mut().drain(..));
match result {
Err(RewritingError::ContentHandlerError(result)) => {
match result.downcast::<EndOfHead>() {
Ok(_) => break,
Err(err) => return Err(err),
};
}
result => result?,
}
Ok(())
}),
element!(
join_with!(
",",
"link[rel~='icon']",
"link[rel~='apple-touch-icon']",
"link[rel~='apple-touch-icon-precomposed']"
),
|link| {
let rel = link.get_attribute("rel").unwrap();
if let Some(href) = link
.get_attribute("href")
.and_then(|href| url.join(&href).ok())
{
let kind = if rel.contains("apple-touch-icon") {
IconKind::AppIcon
} else {
IconKind::SiteFavicon
};
let sizes = link.get_attribute("sizes");
new_icons.borrow_mut().push(
async {
Icon::load(href, kind, sizes)
.await
.map(|icon| vec![icon])
.unwrap_or(Vec::new())
}
.boxed_local()
.shared(),
)
};
Ok(())
}
),
],
..Settings::default()
},
|_: &[u8]| {},
);
while let Some(data) = poll_in_background(body.next(), join_all(icons.clone())).await {
let result = rewriter.write(&data?);
icons.extend(new_icons.borrow_mut().drain(..));
match result {
Err(RewritingError::ContentHandlerError(result)) => {
match result.downcast::<EndOfHead>() {
Ok(_) => break,
Err(err) => return Err(err),
};
}
result => result?,
}
}
}
let icons = join_all(icons).await.into_iter().flatten().collect();
let icons = join_all(icons).await.into_iter().flatten().collect();
Ok(icons)
Ok(icons)
}

View file

@ -1,8 +1,8 @@
use crate::{utils::encode_svg, Icon, IconKind};
use futures::{Stream, StreamExt};
use html5ever::{
driver,
tendril::{Tendril, TendrilSink},
driver,
tendril::{Tendril, TendrilSink},
};
use scraper::{ElementRef, Html};
use std::error::Error;
@ -11,147 +11,146 @@ use tldextract::TldOption;
use url::Url;
pub async fn parse_site_logo(
url: &Url,
mut body: impl Stream<Item = Result<Vec<u8>, String>> + Unpin,
is_blacklisted: impl Fn(&Url) -> bool,
url: &Url,
mut body: impl Stream<Item = Result<Vec<u8>, String>> + Unpin,
is_blacklisted: impl Fn(&Url) -> bool,
) -> Result<Icon, Box<dyn Error>> {
let mut parser = driver::parse_document(Html::new_document(), Default::default());
while let Some(data) = body.next().await {
if let Ok(data) = Tendril::try_from_byte_slice(&data?) {
parser.process(data)
}
}
let document = parser.finish();
let mut logos: Vec<_> = document
.select(selector!(
"a[href='/'] img, a[href='/'] svg",
"header img, header svg",
"img[src*=logo]",
"img[alt*=logo], svg[alt*=logo]",
"*[class*=logo] img, *[class*=logo] svg",
"*[id*=logo] img, *[id*=logo] svg",
"img[class*=logo], svg[class*=logo]",
"img[id*=logo], svg[id*=logo]",
))
.enumerate()
.filter_map(|(i, elem_ref)| {
let elem = elem_ref.value();
let ancestors = elem_ref
.ancestors()
.map(ElementRef::wrap)
.flatten()
.map(|elem_ref| elem_ref.value())
.collect::<Vec<_>>();
let skip_classnames = regex!("menu|search");
let should_skip = ancestors.iter().any(|ancestor| {
ancestor
.attr("class")
.map(|attr| skip_classnames.is_match(&attr.to_lowercase()))
.or_else(|| {
ancestor
.attr("id")
.map(|attr| skip_classnames.is_match(&attr.to_lowercase()))
})
.unwrap_or(false)
});
if should_skip {
return None;
}
let mut weight = 0;
// if in the header
if ancestors.iter().any(|element| element.name() == "header") {
weight += 2;
}
if i == 0 {
weight += 1;
}
let mentions = |attr_name, is_match: Box<dyn Fn(&str) -> bool>| {
ancestors.iter().chain(iter::once(&elem)).any(|ancestor| {
ancestor
.attr(attr_name)
.map(|attr| is_match(&attr.to_lowercase()))
.unwrap_or(false)
})
};
if mentions("href", Box::new(|attr| attr == "/")) {
weight += 5;
};
let mentions_logo = |attr_name| {
mentions(
attr_name,
Box::new(|attr| regex!("logo([^s]|$)").is_match(attr)),
)
};
if mentions_logo("class") || mentions_logo("id") {
weight += 3;
}
if mentions_logo("alt") {
weight += 2;
}
if mentions_logo("src") {
weight += 1;
}
if let Some(site_name) = url
.domain()
.and_then(|domain| TldOption::default().build().extract(domain).unwrap().domain)
{
// if the alt contains the site_name then highest priority
if site_name
.to_lowercase()
.split('-')
.any(|segment| mentions("alt", Box::new(move |attr| attr.contains(segment))))
{
weight += 10;
let mut parser = driver::parse_document(Html::new_document(), Default::default());
while let Some(data) = body.next().await {
if let Ok(data) = Tendril::try_from_byte_slice(&data?) {
parser.process(data)
}
}
}
let href = if elem.name() == "svg" {
Some(Url::parse(&encode_svg(&elem_ref.html())).unwrap())
} else {
elem.attr("src").and_then(|href| url.join(&href).ok())
};
let document = parser.finish();
if let Some(href) = &href {
if is_blacklisted(href) {
return None;
let mut logos: Vec<_> =
document
.select(selector!(
"a[href='/'] img, a[href='/'] svg",
"header img, header svg",
"img[src*=logo]",
"img[alt*=logo], svg[alt*=logo]",
"*[class*=logo] img, *[class*=logo] svg",
"*[id*=logo] img, *[id*=logo] svg",
"img[class*=logo], svg[class*=logo]",
"img[id*=logo], svg[id*=logo]",
))
.enumerate()
.filter_map(|(i, elem_ref)| {
let elem = elem_ref.value();
let ancestors = elem_ref
.ancestors()
.map(ElementRef::wrap)
.flatten()
.map(|elem_ref| elem_ref.value())
.collect::<Vec<_>>();
let skip_classnames = regex!("menu|search");
let should_skip = ancestors.iter().any(|ancestor| {
ancestor
.attr("class")
.map(|attr| skip_classnames.is_match(&attr.to_lowercase()))
.or_else(|| {
ancestor
.attr("id")
.map(|attr| skip_classnames.is_match(&attr.to_lowercase()))
})
.unwrap_or(false)
});
if should_skip {
return None;
}
let mut weight = 0;
// if in the header
if ancestors.iter().any(|element| element.name() == "header") {
weight += 2;
}
if i == 0 {
weight += 1;
}
let mentions = |attr_name, is_match: Box<dyn Fn(&str) -> bool>| {
ancestors.iter().chain(iter::once(&elem)).any(|ancestor| {
ancestor
.attr(attr_name)
.map(|attr| is_match(&attr.to_lowercase()))
.unwrap_or(false)
})
};
if mentions("href", Box::new(|attr| attr == "/")) {
weight += 5;
};
let mentions_logo = |attr_name| {
mentions(
attr_name,
Box::new(|attr| regex!("logo([^s]|$)").is_match(attr)),
)
};
if mentions_logo("class") || mentions_logo("id") {
weight += 3;
}
if mentions_logo("alt") {
weight += 2;
}
if mentions_logo("src") {
weight += 1;
}
if let Some(site_name) = url
.domain()
.and_then(|domain| TldOption::default().build().extract(domain).unwrap().domain)
{
// if the alt contains the site_name then highest priority
if site_name.to_lowercase().split('-').any(|segment| {
mentions("alt", Box::new(move |attr| attr.contains(segment)))
}) {
weight += 10;
}
}
let href = if elem.name() == "svg" {
Some(Url::parse(&encode_svg(&elem_ref.html())).unwrap())
} else {
elem.attr("src").and_then(|href| url.join(&href).ok())
};
if let Some(href) = &href {
if is_blacklisted(href) {
return None;
}
}
href.map(|href| (href, elem_ref, weight))
})
.collect();
logos.sort_by(|(_, _, a_weight), (_, _, b_weight)| b_weight.cmp(a_weight));
// prefer <img> over svg
let mut prev_weight = None;
for (href, elem_ref, weight) in &logos {
if let Some(prev_weight) = prev_weight {
if weight != prev_weight {
break;
}
}
}
prev_weight = Some(weight);
href.map(|href| (href, elem_ref, weight))
})
.collect();
logos.sort_by(|(_, _, a_weight), (_, _, b_weight)| b_weight.cmp(a_weight));
// prefer <img> over svg
let mut prev_weight = None;
for (href, elem_ref, weight) in &logos {
if let Some(prev_weight) = prev_weight {
if weight != prev_weight {
break;
}
if elem_ref.value().name() == "img" {
return Icon::load(href.clone(), IconKind::SiteLogo, None).await;
}
}
prev_weight = Some(weight);
if elem_ref.value().name() == "img" {
return Icon::load(href.clone(), IconKind::SiteLogo, None).await;
match logos.into_iter().next() {
Some((href, _, _)) => Icon::load(href.clone(), IconKind::SiteLogo, None).await,
None => Err("No site logo found".into()),
}
}
match logos.into_iter().next() {
Some((href, _, _)) => Icon::load(href.clone(), IconKind::SiteLogo, None).await,
None => Err("No site logo found".into()),
}
}

View file

@ -6,269 +6,272 @@ use mime::MediaType;
use reqwest::{header::*, Url};
use serde::{Deserialize, Serialize};
use std::{
cmp::Ordering,
convert::TryFrom,
error::Error,
fmt::{self, Display},
io,
cmp::Ordering,
convert::TryFrom,
error::Error,
fmt::{self, Display},
io,
};
enum IconKind {
SVG,
PNG,
JPEG,
ICO,
GIF,
SVG,
PNG,
JPEG,
ICO,
GIF,
}
#[derive(Clone, Debug, Serialize, Deserialize, Eq, PartialEq)]
#[serde(tag = "type")]
#[serde(rename_all = "lowercase")]
pub enum IconInfo {
PNG { size: IconSize },
JPEG { size: IconSize },
ICO { sizes: IconSizes },
GIF { size: IconSize },
SVG { size: Option<IconSize> },
PNG { size: IconSize },
JPEG { size: IconSize },
ICO { sizes: IconSizes },
GIF { size: IconSize },
SVG { size: Option<IconSize> },
}
impl IconInfo {
async fn decode<R: AsyncRead + Unpin>(
reader: &mut R,
kind: Option<IconKind>,
) -> Result<IconInfo, Box<dyn Error>> {
let mut header = [0; 2];
reader.read_exact(&mut header).await?;
async fn decode<R: AsyncRead + Unpin>(
reader: &mut R,
kind: Option<IconKind>,
) -> Result<IconInfo, Box<dyn Error>> {
let mut header = [0; 2];
reader.read_exact(&mut header).await?;
match (kind, &header) {
(Some(IconKind::SVG), bytes) => {
let size = get_svg_size(bytes, reader).await?;
Ok(IconInfo::SVG { size })
}
(_, &[0x60, byte_two]) => {
let size = get_svg_size(&[0x60, byte_two], reader).await?;
Ok(IconInfo::SVG { size })
}
(Some(IconKind::PNG), _) | (_, b"\x89P") => {
let size = get_png_size(reader).await?;
Ok(IconInfo::PNG { size })
}
(Some(IconKind::ICO), _) | (_, &[0x00, 0x00]) => {
let sizes = get_ico_sizes(reader).await?;
Ok(IconInfo::ICO { sizes })
}
(Some(IconKind::JPEG), _) | (_, &[0xFF, 0xD8]) => {
let size = get_jpeg_size(reader).await?;
Ok(IconInfo::JPEG { size })
}
(Some(IconKind::GIF), _) | (_, b"GI") => {
let size = get_gif_size(reader).await?;
Ok(IconInfo::GIF { size })
}
_ => Err(format!("unknown icon type ({:?})", header).into()),
match (kind, &header) {
(Some(IconKind::SVG), bytes) => {
let size = get_svg_size(bytes, reader).await?;
Ok(IconInfo::SVG { size })
}
(_, &[0x60, byte_two]) => {
let size = get_svg_size(&[0x60, byte_two], reader).await?;
Ok(IconInfo::SVG { size })
}
(Some(IconKind::PNG), _) | (_, b"\x89P") => {
let size = get_png_size(reader).await?;
Ok(IconInfo::PNG { size })
}
(Some(IconKind::ICO), _) | (_, &[0x00, 0x00]) => {
let sizes = get_ico_sizes(reader).await?;
Ok(IconInfo::ICO { sizes })
}
(Some(IconKind::JPEG), _) | (_, &[0xFF, 0xD8]) => {
let size = get_jpeg_size(reader).await?;
Ok(IconInfo::JPEG { size })
}
(Some(IconKind::GIF), _) | (_, b"GI") => {
let size = get_gif_size(reader).await?;
Ok(IconInfo::GIF { size })
}
_ => Err(format!("unknown icon type ({:?})", header).into()),
}
}
}
pub async fn load(
url: Url,
headers: HeaderMap,
sizes: Option<String>,
) -> Result<IconInfo, Box<dyn Error>> {
let sizes = sizes.as_ref().and_then(|s| IconSizes::try_from(s).ok());
pub async fn load(
url: Url,
headers: HeaderMap,
sizes: Option<String>,
) -> Result<IconInfo, Box<dyn Error>> {
let sizes = sizes.as_ref().and_then(|s| IconSizes::try_from(s).ok());
let (mime, mut body): (_, Box<dyn AsyncRead + Unpin>) = match url.scheme() {
"data" => {
let url = url.to_string();
let url = DataUrl::process(&url).map_err(|_| "failed to parse data uri")?;
let (mime, mut body): (_, Box<dyn AsyncRead + Unpin>) = match url.scheme() {
"data" => {
let url = url.to_string();
let url = DataUrl::process(&url).map_err(|_| "failed to parse data uri")?;
let mime = url.mime_type().to_string().parse::<MediaType>()?;
let mime = url.mime_type().to_string().parse::<MediaType>()?;
let body = Cursor::new(
url
.decode_to_vec()
.map_err(|_| "failed to decode data uri body")?
.0,
);
let body = Cursor::new(
url.decode_to_vec()
.map_err(|_| "failed to decode data uri body")?
.0,
);
(mime, Box::new(body))
}
(mime, Box::new(body))
}
_ => {
let res = CLIENT
.get(url)
.headers(headers)
.send()
.await?
.error_for_status()?;
_ => {
let res = CLIENT
.get(url)
.headers(headers)
.send()
.await?
.error_for_status()?;
if !res.status().is_success() {
return Err("failed to fetch".into());
if !res.status().is_success() {
return Err("failed to fetch".into());
};
let mime = res
.headers()
.get(CONTENT_TYPE)
.ok_or("no content type")?
.to_str()?
.parse::<MediaType>()?;
let body = res
.bytes_stream()
.map(|result| {
result.map_err(|error| {
io::Error::new(io::ErrorKind::Other, error.to_string())
})
})
.into_async_read();
(mime, Box::new(body))
}
};
let mime = res
.headers()
.get(CONTENT_TYPE)
.ok_or("no content type")?
.to_str()?
.parse::<MediaType>()?;
let kind = match (mime.type_(), mime.subtype()) {
(mime::IMAGE, mime::PNG) => {
if let Some(sizes) = sizes {
return Ok(IconInfo::PNG {
size: *sizes.largest(),
});
}
Some(IconKind::PNG)
}
let body = res
.bytes_stream()
.map(|result| {
result.map_err(|error| io::Error::new(io::ErrorKind::Other, error.to_string()))
})
.into_async_read();
(mime::IMAGE, mime::JPEG) => {
if let Some(sizes) = sizes {
return Ok(IconInfo::JPEG {
size: *sizes.largest(),
});
}
Some(IconKind::JPEG)
}
(mime, Box::new(body))
}
};
(mime::IMAGE, "x-icon") | (mime::IMAGE, "vnd.microsoft.icon") => {
if let Some(sizes) = sizes {
return Ok(IconInfo::ICO { sizes });
}
let kind = match (mime.type_(), mime.subtype()) {
(mime::IMAGE, mime::PNG) => {
if let Some(sizes) = sizes {
return Ok(IconInfo::PNG {
size: *sizes.largest(),
});
}
Some(IconKind::PNG)
}
Some(IconKind::ICO)
}
(mime::IMAGE, mime::JPEG) => {
if let Some(sizes) = sizes {
return Ok(IconInfo::JPEG {
size: *sizes.largest(),
});
}
Some(IconKind::JPEG)
}
(mime::IMAGE, mime::GIF) => {
if let Some(sizes) = sizes {
return Ok(IconInfo::GIF {
size: *sizes.largest(),
});
}
(mime::IMAGE, "x-icon") | (mime::IMAGE, "vnd.microsoft.icon") => {
if let Some(sizes) = sizes {
return Ok(IconInfo::ICO { sizes });
}
Some(IconKind::GIF)
}
Some(IconKind::ICO)
}
(mime::IMAGE, mime::SVG) | (mime::TEXT, mime::PLAIN) => {
if let Some(sizes) = sizes {
return Ok(IconInfo::SVG {
size: Some(*sizes.largest()),
});
}
(mime::IMAGE, mime::GIF) => {
if let Some(sizes) = sizes {
return Ok(IconInfo::GIF {
size: *sizes.largest(),
});
}
Some(IconKind::SVG)
}
Some(IconKind::GIF)
}
_ => None,
};
(mime::IMAGE, mime::SVG) | (mime::TEXT, mime::PLAIN) => {
if let Some(sizes) = sizes {
return Ok(IconInfo::SVG {
size: Some(*sizes.largest()),
});
}
Some(IconKind::SVG)
}
_ => None,
};
IconInfo::decode(&mut body, kind).await
}
pub fn size(&self) -> Option<&IconSize> {
match self {
IconInfo::ICO { sizes } => Some(sizes.largest()),
IconInfo::PNG { size } | IconInfo::JPEG { size } | IconInfo::GIF { size } => Some(size),
IconInfo::SVG { size } => size.as_ref(),
IconInfo::decode(&mut body, kind).await
}
}
pub fn sizes(&self) -> Option<IconSizes> {
match self {
IconInfo::ICO { sizes } => Some((*sizes).clone()),
IconInfo::PNG { size } | IconInfo::JPEG { size } | IconInfo::GIF { size } => {
Some((*size).into())
}
IconInfo::SVG { size } => size.map(|size| size.into()),
pub fn size(&self) -> Option<&IconSize> {
match self {
IconInfo::ICO { sizes } => Some(sizes.largest()),
IconInfo::PNG { size } | IconInfo::JPEG { size } | IconInfo::GIF { size } => Some(size),
IconInfo::SVG { size } => size.as_ref(),
}
}
}
pub fn mime_type(&self) -> &'static str {
match self {
IconInfo::PNG { .. } => "image/png",
IconInfo::JPEG { .. } => "image/jpeg",
IconInfo::ICO { .. } => "image/x-icon",
IconInfo::GIF { .. } => "image/gif",
IconInfo::SVG { .. } => "image/svg+xml",
pub fn sizes(&self) -> Option<IconSizes> {
match self {
IconInfo::ICO { sizes } => Some((*sizes).clone()),
IconInfo::PNG { size } | IconInfo::JPEG { size } | IconInfo::GIF { size } => {
Some((*size).into())
}
IconInfo::SVG { size } => size.map(|size| size.into()),
}
}
pub fn mime_type(&self) -> &'static str {
match self {
IconInfo::PNG { .. } => "image/png",
IconInfo::JPEG { .. } => "image/jpeg",
IconInfo::ICO { .. } => "image/x-icon",
IconInfo::GIF { .. } => "image/gif",
IconInfo::SVG { .. } => "image/svg+xml",
}
}
}
}
impl Display for IconInfo {
fn fmt(&self, f: &mut fmt::Formatter) -> Result<(), fmt::Error> {
match self {
IconInfo::PNG { size } => write!(f, "png {}", size),
IconInfo::JPEG { size } => write!(f, "jpeg {}", size),
IconInfo::GIF { size } => write!(f, "gif {}", size),
IconInfo::ICO { sizes } => write!(f, "ico {}", sizes),
IconInfo::SVG { size } => {
write!(
f,
"svg{}",
if let Some(size) = size {
format!(" {}", size)
} else {
"".to_string()
}
)
}
fn fmt(&self, f: &mut fmt::Formatter) -> Result<(), fmt::Error> {
match self {
IconInfo::PNG { size } => write!(f, "png {}", size),
IconInfo::JPEG { size } => write!(f, "jpeg {}", size),
IconInfo::GIF { size } => write!(f, "gif {}", size),
IconInfo::ICO { sizes } => write!(f, "ico {}", sizes),
IconInfo::SVG { size } => {
write!(
f,
"svg{}",
if let Some(size) = size {
format!(" {}", size)
} else {
"".to_string()
}
)
}
}
}
}
}
impl Ord for IconInfo {
fn cmp(&self, other: &Self) -> Ordering {
match (self, other) {
(IconInfo::SVG { size }, IconInfo::SVG { size: other_size }) => match (size, other_size) {
(Some(_), None) => Ordering::Less,
(None, Some(_)) => Ordering::Greater,
(Some(size), Some(other_size)) => size.cmp(other_size),
(None, None) => Ordering::Equal,
},
(IconInfo::SVG { .. }, _) => Ordering::Less,
(_, IconInfo::SVG { .. }) => Ordering::Greater,
fn cmp(&self, other: &Self) -> Ordering {
match (self, other) {
(IconInfo::SVG { size }, IconInfo::SVG { size: other_size }) => {
match (size, other_size) {
(Some(_), None) => Ordering::Less,
(None, Some(_)) => Ordering::Greater,
(Some(size), Some(other_size)) => size.cmp(other_size),
(None, None) => Ordering::Equal,
}
}
(IconInfo::SVG { .. }, _) => Ordering::Less,
(_, IconInfo::SVG { .. }) => Ordering::Greater,
_ => {
let size = self.size().unwrap();
let other_size = other.size().unwrap();
_ => {
let size = self.size().unwrap();
let other_size = other.size().unwrap();
size.cmp(other_size).then_with(|| match (self, other) {
(IconInfo::PNG { .. }, IconInfo::PNG { .. }) => Ordering::Equal,
(IconInfo::PNG { .. }, _) => Ordering::Less,
(_, IconInfo::PNG { .. }) => Ordering::Greater,
size.cmp(other_size).then_with(|| match (self, other) {
(IconInfo::PNG { .. }, IconInfo::PNG { .. }) => Ordering::Equal,
(IconInfo::PNG { .. }, _) => Ordering::Less,
(_, IconInfo::PNG { .. }) => Ordering::Greater,
(IconInfo::GIF { .. }, IconInfo::GIF { .. }) => Ordering::Equal,
(IconInfo::GIF { .. }, _) => Ordering::Less,
(_, IconInfo::GIF { .. }) => Ordering::Greater,
(IconInfo::GIF { .. }, IconInfo::GIF { .. }) => Ordering::Equal,
(IconInfo::GIF { .. }, _) => Ordering::Less,
(_, IconInfo::GIF { .. }) => Ordering::Greater,
(IconInfo::JPEG { .. }, IconInfo::JPEG { .. }) => Ordering::Equal,
(IconInfo::JPEG { .. }, _) => Ordering::Less,
(_, IconInfo::JPEG { .. }) => Ordering::Greater,
(IconInfo::JPEG { .. }, IconInfo::JPEG { .. }) => Ordering::Equal,
(IconInfo::JPEG { .. }, _) => Ordering::Less,
(_, IconInfo::JPEG { .. }) => Ordering::Greater,
(IconInfo::ICO { .. }, IconInfo::ICO { .. }) => Ordering::Equal,
(IconInfo::ICO { .. }, _) => Ordering::Less,
(_, IconInfo::ICO { .. }) => Ordering::Greater,
(IconInfo::ICO { .. }, IconInfo::ICO { .. }) => Ordering::Equal,
(IconInfo::ICO { .. }, _) => Ordering::Less,
(_, IconInfo::ICO { .. }) => Ordering::Greater,
_ => unreachable!(),
})
}
_ => unreachable!(),
})
}
}
}
}
}
impl PartialOrd for IconInfo {
fn partial_cmp(&self, other: &Self) -> Option<Ordering> {
Some(self.cmp(other))
}
fn partial_cmp(&self, other: &Self) -> Option<Ordering> {
Some(self.cmp(other))
}
}

View file

@ -2,23 +2,23 @@ use super::IconSize;
use byteorder::{LittleEndian, ReadBytesExt};
use futures::prelude::*;
use std::{
error::Error,
io::{Cursor, Seek, SeekFrom},
error::Error,
io::{Cursor, Seek, SeekFrom},
};
pub async fn get_gif_size<R: AsyncRead + Unpin>(
reader: &mut R,
reader: &mut R,
) -> Result<IconSize, Box<dyn Error>> {
let mut header = [0; 8];
reader.read_exact(&mut header).await?;
let header = &mut Cursor::new(header);
let mut header = [0; 8];
reader.read_exact(&mut header).await?;
let header = &mut Cursor::new(header);
assert_slice_eq!(header, 0, b"F8", "bad header");
assert_slice_eq!(header, 0, b"F8", "bad header");
header.seek(SeekFrom::Start(4))?;
header.seek(SeekFrom::Start(4))?;
let width = header.read_u16::<LittleEndian>()? as u32;
let height = header.read_u16::<LittleEndian>()? as u32;
let width = header.read_u16::<LittleEndian>()? as u32;
let height = header.read_u16::<LittleEndian>()? as u32;
Ok(IconSize::new(width, height))
Ok(IconSize::new(width, height))
}

View file

@ -2,58 +2,58 @@ use super::{png::get_png_size, IconSize, IconSizes};
use byteorder::{LittleEndian, ReadBytesExt as _};
use futures::prelude::*;
use std::{
convert::TryInto,
error::Error,
io::{Cursor, Seek, SeekFrom},
convert::TryInto,
error::Error,
io::{Cursor, Seek, SeekFrom},
};
const ICO_TYPE: u16 = 1;
const INDEX_SIZE: u16 = 16;
pub async fn get_ico_sizes<R: AsyncRead + Unpin>(
reader: &mut R,
reader: &mut R,
) -> Result<IconSizes, Box<dyn Error>> {
let mut offset = 4;
let mut header = [0; 4];
reader.read_exact(&mut header).await?;
let mut header = Cursor::new(header);
let mut offset = 4;
let mut header = [0; 4];
reader.read_exact(&mut header).await?;
let mut header = Cursor::new(header);
let icon_type = header.read_u16::<LittleEndian>()?;
let icon_type = header.read_u16::<LittleEndian>()?;
if icon_type != ICO_TYPE {
return Err("bad header".into());
}
let icon_count = header.read_u16::<LittleEndian>()?;
let mut data = vec![0; (icon_count * INDEX_SIZE) as usize];
reader.read_exact(&mut data).await?;
offset += data.len();
let mut data = Cursor::new(data);
let mut sizes = Vec::new();
for i in 0..icon_count {
data.seek(SeekFrom::Start((INDEX_SIZE * i) as _))?;
let width = data.read_u8()?;
let height = data.read_u8()?;
if width == 0 && height == 0 {
data.seek(SeekFrom::Current(10))?;
let image_offset = data.read_u32::<LittleEndian>()?;
let mut data = vec![0; image_offset as usize - offset];
reader.read_exact(&mut data).await?;
offset += data.len();
let size = get_png_size(reader).await;
if let Ok(size) = size {
sizes.push(size);
}
} else {
sizes.push(IconSize::new(width as _, height as _))
if icon_type != ICO_TYPE {
return Err("bad header".into());
}
}
Ok(sizes.try_into()?)
let icon_count = header.read_u16::<LittleEndian>()?;
let mut data = vec![0; (icon_count * INDEX_SIZE) as usize];
reader.read_exact(&mut data).await?;
offset += data.len();
let mut data = Cursor::new(data);
let mut sizes = Vec::new();
for i in 0..icon_count {
data.seek(SeekFrom::Start((INDEX_SIZE * i) as _))?;
let width = data.read_u8()?;
let height = data.read_u8()?;
if width == 0 && height == 0 {
data.seek(SeekFrom::Current(10))?;
let image_offset = data.read_u32::<LittleEndian>()?;
let mut data = vec![0; image_offset as usize - offset];
reader.read_exact(&mut data).await?;
offset += data.len();
let size = get_png_size(reader).await;
if let Ok(size) = size {
sizes.push(size);
}
} else {
sizes.push(IconSize::new(width as _, height as _))
}
}
Ok(sizes.try_into()?)
}

View file

@ -3,11 +3,11 @@ use itertools::Itertools;
use serde::{Deserialize, Serialize};
use serde_json::Value;
use std::{
cmp::Ordering,
convert::{TryFrom, TryInto},
error::Error,
fmt::{self, Display},
ops::Deref,
cmp::Ordering,
convert::{TryFrom, TryInto},
error::Error,
fmt::{self, Display},
ops::Deref,
};
use vec1::{vec1, Vec1};
@ -16,99 +16,99 @@ use vec1::{vec1, Vec1};
pub struct IconSizes(Vec1<IconSize>);
impl Display for IconSizes {
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
f.write_str(&self.0.iter().join(" "))
}
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
f.write_str(&self.0.iter().join(" "))
}
}
impl IconSizes {
pub fn add_size(&mut self, size: IconSize) {
match self.0.binary_search(&size) {
Ok(_) => {}
Err(pos) => self.0.insert(pos, size),
pub fn add_size(&mut self, size: IconSize) {
match self.0.binary_search(&size) {
Ok(_) => {}
Err(pos) => self.0.insert(pos, size),
}
}
}
pub fn largest(&self) -> &IconSize {
self.0.first()
}
pub fn largest(&self) -> &IconSize {
self.0.first()
}
}
impl TryFrom<&str> for IconSizes {
type Error = Box<dyn Error>;
type Error = Box<dyn Error>;
fn try_from(sizes_str: &str) -> Result<Self, Self::Error> {
let size_strs = sizes_str.split(" ");
fn try_from(sizes_str: &str) -> Result<Self, Self::Error> {
let size_strs = sizes_str.split(" ");
let mut sizes = Vec::new();
for size in size_strs {
if let Ok(size) = serde_json::from_value(Value::String(size.to_string())) {
sizes.push(size);
}
let mut sizes = Vec::new();
for size in size_strs {
if let Ok(size) = serde_json::from_value(Value::String(size.to_string())) {
sizes.push(size);
}
}
Ok(sizes.try_into()?)
}
Ok(sizes.try_into()?)
}
}
impl TryFrom<&String> for IconSizes {
type Error = Box<dyn Error>;
type Error = Box<dyn Error>;
fn try_from(sizes_str: &String) -> Result<Self, Self::Error> {
IconSizes::try_from(sizes_str.as_str())
}
fn try_from(sizes_str: &String) -> Result<Self, Self::Error> {
IconSizes::try_from(sizes_str.as_str())
}
}
impl TryFrom<String> for IconSizes {
type Error = Box<dyn Error>;
type Error = Box<dyn Error>;
fn try_from(sizes_str: String) -> Result<Self, Self::Error> {
IconSizes::try_from(sizes_str.as_str())
}
fn try_from(sizes_str: String) -> Result<Self, Self::Error> {
IconSizes::try_from(sizes_str.as_str())
}
}
impl Deref for IconSizes {
type Target = Vec1<IconSize>;
fn deref(&self) -> &Vec1<IconSize> {
&self.0
}
type Target = Vec1<IconSize>;
fn deref(&self) -> &Vec1<IconSize> {
&self.0
}
}
impl IntoIterator for IconSizes {
type Item = IconSize;
type IntoIter = std::vec::IntoIter<Self::Item>;
type Item = IconSize;
type IntoIter = std::vec::IntoIter<Self::Item>;
fn into_iter(self) -> Self::IntoIter {
self.0.into_iter()
}
fn into_iter(self) -> Self::IntoIter {
self.0.into_iter()
}
}
impl Ord for IconSizes {
fn cmp(&self, other: &Self) -> Ordering {
self.largest().cmp(&other.largest())
}
fn cmp(&self, other: &Self) -> Ordering {
self.largest().cmp(&other.largest())
}
}
impl PartialOrd for IconSizes {
fn partial_cmp(&self, other: &Self) -> Option<Ordering> {
Some(self.cmp(other))
}
fn partial_cmp(&self, other: &Self) -> Option<Ordering> {
Some(self.cmp(other))
}
}
impl TryFrom<Vec<IconSize>> for IconSizes {
type Error = String;
type Error = String;
fn try_from(mut vec: Vec<IconSize>) -> Result<Self, Self::Error> {
vec.sort();
fn try_from(mut vec: Vec<IconSize>) -> Result<Self, Self::Error> {
vec.sort();
Ok(IconSizes(
vec.try_into().map_err(|_| "must contain a size")?,
))
}
Ok(IconSizes(
vec.try_into().map_err(|_| "must contain a size")?,
))
}
}
impl From<IconSize> for IconSizes {
fn from(size: IconSize) -> Self {
IconSizes(vec1![size])
}
fn from(size: IconSize) -> Self {
IconSizes(vec1![size])
}
}

View file

@ -5,58 +5,58 @@ use futures::{AsyncRead, AsyncReadExt as _};
use super::IconSize;
async fn read_u16_be<R: AsyncRead + Unpin>(reader: &mut R) -> Result<u16, Box<dyn Error>> {
let mut buf = [0u8; 2];
reader.read_exact(&mut buf).await?;
Ok(u16::from_be_bytes(buf))
let mut buf = [0u8; 2];
reader.read_exact(&mut buf).await?;
Ok(u16::from_be_bytes(buf))
}
pub async fn get_jpeg_size<R: AsyncRead + Unpin>(
reader: &mut R,
reader: &mut R,
) -> Result<IconSize, Box<dyn Error>> {
let mut marker = [0; 2];
let mut depth = 0i32;
let mut marker = [0; 2];
let mut depth = 0i32;
loop {
// Read current marker (FF XX)
reader.read_exact(&mut marker).await?;
loop {
// Read current marker (FF XX)
reader.read_exact(&mut marker).await?;
if marker[0] != 0xFF {
// Did not read a marker. Assume image is corrupt.
return Err("invalid jpeg".into());
if marker[0] != 0xFF {
// Did not read a marker. Assume image is corrupt.
return Err("invalid jpeg".into());
}
let page = marker[1];
// Check for valid SOFn markers. C4, C8, and CC aren't dimension markers.
if (page >= 0xC0 && page <= 0xC3)
|| (page >= 0xC5 && page <= 0xC7)
|| (page >= 0xC9 && page <= 0xCB)
|| (page >= 0xCD && page <= 0xCF)
{
// Only get outside image size
if depth == 0 {
// Correct marker, go forward 3 bytes so we're at height offset
reader.read_exact(&mut [0; 3]).await?;
break;
}
} else if page == 0xD8 {
depth += 1;
} else if page == 0xD9 {
depth -= 1;
if depth < 0 {
return Err("invalid jpeg".into());
}
}
// Read the marker length and skip over it entirely
let page_size = read_u16_be(reader).await? as i64;
reader
.read_exact(&mut vec![0; (page_size - 2) as usize])
.await?;
}
let page = marker[1];
let height = read_u16_be(reader).await?;
let width = read_u16_be(reader).await?;
// Check for valid SOFn markers. C4, C8, and CC aren't dimension markers.
if (page >= 0xC0 && page <= 0xC3)
|| (page >= 0xC5 && page <= 0xC7)
|| (page >= 0xC9 && page <= 0xCB)
|| (page >= 0xCD && page <= 0xCF)
{
// Only get outside image size
if depth == 0 {
// Correct marker, go forward 3 bytes so we're at height offset
reader.read_exact(&mut [0; 3]).await?;
break;
}
} else if page == 0xD8 {
depth += 1;
} else if page == 0xD9 {
depth -= 1;
if depth < 0 {
return Err("invalid jpeg".into());
}
}
// Read the marker length and skip over it entirely
let page_size = read_u16_be(reader).await? as i64;
reader
.read_exact(&mut vec![0; (page_size - 2) as usize])
.await?;
}
let height = read_u16_be(reader).await?;
let width = read_u16_be(reader).await?;
Ok(IconSize::new(width as _, height as _))
Ok(IconSize::new(width as _, height as _))
}

View file

@ -14,87 +14,87 @@ pub use svg::*;
use serde::{de, Deserialize, Deserializer, Serialize, Serializer};
use std::{
cmp::{self, Ordering},
error::Error,
fmt::{self, Display},
io::{Read, Seek, SeekFrom},
cmp::{self, Ordering},
error::Error,
fmt::{self, Display},
io::{Read, Seek, SeekFrom},
};
#[serde_as]
#[derive(Clone, Copy, Debug, PartialEq, Eq)]
pub struct IconSize {
pub width: u32,
pub height: u32,
pub width: u32,
pub height: u32,
}
impl Display for IconSize {
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
write!(f, "{}x{}", self.width, self.height)
}
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
write!(f, "{}x{}", self.width, self.height)
}
}
impl IconSize {
pub fn new(width: u32, height: u32) -> Self {
Self { width, height }
}
pub fn new(width: u32, height: u32) -> Self {
Self { width, height }
}
pub fn max_rect(&self) -> u32 {
cmp::max(self.width, self.height)
}
pub fn max_rect(&self) -> u32 {
cmp::max(self.width, self.height)
}
}
impl Ord for IconSize {
fn cmp(&self, other: &Self) -> Ordering {
other.max_rect().cmp(&self.max_rect())
}
fn cmp(&self, other: &Self) -> Ordering {
other.max_rect().cmp(&self.max_rect())
}
}
impl PartialOrd for IconSize {
fn partial_cmp(&self, other: &Self) -> Option<Ordering> {
Some(self.cmp(other))
}
fn partial_cmp(&self, other: &Self) -> Option<Ordering> {
Some(self.cmp(other))
}
}
impl Serialize for IconSize {
fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error>
where
S: Serializer,
{
serializer.collect_str(self)
}
fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error>
where
S: Serializer,
{
serializer.collect_str(self)
}
}
impl<'de> Deserialize<'de> for IconSize {
fn deserialize<D>(deserializer: D) -> Result<Self, D::Error>
where
D: Deserializer<'de>,
{
let value: String = Deserialize::deserialize(deserializer)?;
fn deserialize<D>(deserializer: D) -> Result<Self, D::Error>
where
D: Deserializer<'de>,
{
let value: String = Deserialize::deserialize(deserializer)?;
let mut split = value.split("x");
let width = split
.next()
.ok_or(de::Error::custom("expected width"))?
.parse()
.map_err(de::Error::custom)?;
let mut split = value.split("x");
let width = split
.next()
.ok_or(de::Error::custom("expected width"))?
.parse()
.map_err(de::Error::custom)?;
let height = split
.next()
.ok_or(de::Error::custom("expected height"))?
.parse()
.map_err(de::Error::custom)?;
let height = split
.next()
.ok_or(de::Error::custom("expected height"))?
.parse()
.map_err(de::Error::custom)?;
Ok(IconSize::new(width, height))
}
Ok(IconSize::new(width, height))
}
}
fn slice_eq<T: Read + Seek + Unpin>(
cur: &mut T,
offset: u64,
slice: &[u8],
cur: &mut T,
offset: u64,
slice: &[u8],
) -> Result<bool, Box<dyn Error>> {
cur.seek(SeekFrom::Start(offset))?;
let mut buffer = vec![0; slice.len()];
cur.read_exact(&mut buffer)?;
Ok(buffer == slice)
cur.seek(SeekFrom::Start(offset))?;
let mut buffer = vec![0; slice.len()];
cur.read_exact(&mut buffer)?;
Ok(buffer == slice)
}

View file

@ -4,17 +4,17 @@ use futures::prelude::*;
use std::{error::Error, io::Cursor};
pub async fn get_png_size<R: AsyncRead + Unpin>(
reader: &mut R,
reader: &mut R,
) -> Result<IconSize, Box<dyn Error>> {
let mut header = [0; 22];
reader.read_exact(&mut header).await?;
let header = &mut Cursor::new(header);
let mut header = [0; 22];
reader.read_exact(&mut header).await?;
let header = &mut Cursor::new(header);
assert_slice_eq!(header, 0, b"NG\r\n\x1a\n", "bad header");
assert_slice_eq!(header, 10, b"IHDR", "bad header");
assert_slice_eq!(header, 0, b"NG\r\n\x1a\n", "bad header");
assert_slice_eq!(header, 10, b"IHDR", "bad header");
let width = header.read_u32::<BigEndian>()?;
let height = header.read_u32::<BigEndian>()?;
let width = header.read_u32::<BigEndian>()?;
let height = header.read_u32::<BigEndian>()?;
Ok(IconSize::new(width, height))
Ok(IconSize::new(width, height))
}

View file

@ -4,65 +4,67 @@ use lol_html::{element, HtmlRewriter, Settings};
use std::{cell::RefCell, error::Error};
fn parse_size<S: ToString>(size: S) -> Option<u32> {
size
.to_string()
.parse::<f64>()
.ok()
.map(|size| size.round() as u32)
size.to_string()
.parse::<f64>()
.ok()
.map(|size| size.round() as u32)
}
pub async fn get_svg_size<R: AsyncRead + Unpin>(
first_bytes: &[u8; 2],
reader: &mut R,
first_bytes: &[u8; 2],
reader: &mut R,
) -> Result<Option<IconSize>, Box<dyn Error>> {
let size = RefCell::new(None);
let size = RefCell::new(None);
let mut rewriter = HtmlRewriter::new(
Settings {
element_content_handlers: vec![
// Rewrite insecure hyperlinks
element!("svg", |el| {
let viewbox = el.get_attribute("viewbox");
let mut rewriter = HtmlRewriter::new(
Settings {
element_content_handlers: vec![
// Rewrite insecure hyperlinks
element!("svg", |el| {
let viewbox = el.get_attribute("viewbox");
let width = el.get_attribute("width").and_then(parse_size);
let height = el.get_attribute("height").and_then(parse_size);
let width = el.get_attribute("width").and_then(parse_size);
let height = el.get_attribute("height").and_then(parse_size);
*size.borrow_mut() = Some(if let (Some(width), Some(height)) = (width, height) {
Some(IconSize::new(width, height))
} else if let Some(viewbox) = viewbox {
regex!(r"^-?\d+\s+-?\d+\s+(\d+\.?[\d]?)\s+(\d+\.?[\d]?)")
.captures(&viewbox)
.map(|captures| {
let width = parse_size(captures.get(1).unwrap().as_str()).unwrap();
let height = parse_size(captures.get(2).unwrap().as_str()).unwrap();
IconSize::new(width, height)
})
} else {
None
});
*size.borrow_mut() =
Some(if let (Some(width), Some(height)) = (width, height) {
Some(IconSize::new(width, height))
} else if let Some(viewbox) = viewbox {
regex!(r"^-?\d+\s+-?\d+\s+(\d+\.?[\d]?)\s+(\d+\.?[\d]?)")
.captures(&viewbox)
.map(|captures| {
let width =
parse_size(captures.get(1).unwrap().as_str()).unwrap();
let height =
parse_size(captures.get(2).unwrap().as_str()).unwrap();
IconSize::new(width, height)
})
} else {
None
});
Ok(())
}),
],
..Settings::default()
},
|_: &[u8]| {},
);
Ok(())
}),
],
..Settings::default()
},
|_: &[u8]| {},
);
rewriter.write(first_bytes)?;
rewriter.write(first_bytes)?;
let mut buffer = [0; 100];
let mut buffer = [0; 100];
loop {
let n = reader.read(&mut buffer).await?;
if n == 0 {
return Err("invalid svg".into());
loop {
let n = reader.read(&mut buffer).await?;
if n == 0 {
return Err("invalid svg".into());
}
rewriter.write(&buffer[..n])?;
if let Some(size) = *size.borrow() {
return Ok(size);
}
}
rewriter.write(&buffer[..n])?;
if let Some(size) = *size.borrow() {
return Ok(size);
}
}
}

View file

@ -8,116 +8,115 @@ use itertools::Itertools;
use serde::{Deserialize, Serialize};
use serde_with::{DeserializeFromStr, SerializeDisplay};
use std::{
cmp::Ordering,
collections::HashMap,
convert::TryInto,
error::Error,
fmt::{self, Display},
hash::{Hash, Hasher},
str::FromStr,
cmp::Ordering,
collections::HashMap,
convert::TryInto,
error::Error,
fmt::{self, Display},
hash::{Hash, Hasher},
str::FromStr,
};
use url::Url;
#[derive(Debug, Clone, PartialOrd, PartialEq, Ord, Eq, SerializeDisplay, DeserializeFromStr)]
pub enum IconKind {
AppIcon,
SiteFavicon,
SiteLogo,
AppIcon,
SiteFavicon,
SiteLogo,
}
impl Display for IconKind {
fn fmt(&self, f: &mut fmt::Formatter) -> Result<(), fmt::Error> {
f.write_str(match self {
IconKind::SiteLogo => "site_logo",
IconKind::AppIcon => "app_icon",
IconKind::SiteFavicon => "site_favicon",
})
}
fn fmt(&self, f: &mut fmt::Formatter) -> Result<(), fmt::Error> {
f.write_str(match self {
IconKind::SiteLogo => "site_logo",
IconKind::AppIcon => "app_icon",
IconKind::SiteFavicon => "site_favicon",
})
}
}
impl FromStr for IconKind {
type Err = String;
type Err = String;
fn from_str(kind: &str) -> Result<Self, Self::Err> {
match kind {
"site_logo" => Ok(IconKind::SiteLogo),
"app_icon" => Ok(IconKind::AppIcon),
"site_favicon" => Ok(IconKind::SiteFavicon),
_ => Err("unknown icon kind!".into()),
fn from_str(kind: &str) -> Result<Self, Self::Err> {
match kind {
"site_logo" => Ok(IconKind::SiteLogo),
"app_icon" => Ok(IconKind::AppIcon),
"site_favicon" => Ok(IconKind::SiteFavicon),
_ => Err("unknown icon kind!".into()),
}
}
}
}
#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)]
pub struct Icon {
pub url: Url,
pub headers: HashMap<String, String>,
pub kind: IconKind,
#[serde(flatten)]
pub info: IconInfo,
pub url: Url,
pub headers: HashMap<String, String>,
pub kind: IconKind,
#[serde(flatten)]
pub info: IconInfo,
}
impl Hash for Icon {
fn hash<H: Hasher>(&self, state: &mut H) {
(
&self.url,
self
.headers
.iter()
.sorted_by_key(|(key, _)| *key)
.collect::<Vec<_>>(),
)
.hash(state);
}
fn hash<H: Hasher>(&self, state: &mut H) {
(
&self.url,
self.headers
.iter()
.sorted_by_key(|(key, _)| *key)
.collect::<Vec<_>>(),
)
.hash(state);
}
}
impl Icon {
pub fn new(url: Url, kind: IconKind, info: IconInfo) -> Self {
Icon::new_with_headers(url, HashMap::new(), kind, info)
}
pub fn new_with_headers(
url: Url,
headers: HashMap<String, String>,
kind: IconKind,
info: IconInfo,
) -> Self {
Self {
url,
headers,
kind,
info,
pub fn new(url: Url, kind: IconKind, info: IconInfo) -> Self {
Icon::new_with_headers(url, HashMap::new(), kind, info)
}
}
pub async fn load(
url: Url,
kind: IconKind,
sizes: Option<String>,
) -> Result<Self, Box<dyn Error>> {
Icon::load_with_headers(url, HashMap::new(), kind, sizes).await
}
pub fn new_with_headers(
url: Url,
headers: HashMap<String, String>,
kind: IconKind,
info: IconInfo,
) -> Self {
Self {
url,
headers,
kind,
info,
}
}
pub async fn load_with_headers(
url: Url,
headers: HashMap<String, String>,
kind: IconKind,
sizes: Option<String>,
) -> Result<Self, Box<dyn Error>> {
let info = IconInfo::load(url.clone(), (&headers).try_into().unwrap(), sizes).await?;
pub async fn load(
url: Url,
kind: IconKind,
sizes: Option<String>,
) -> Result<Self, Box<dyn Error>> {
Icon::load_with_headers(url, HashMap::new(), kind, sizes).await
}
Ok(Icon::new_with_headers(url, headers, kind, info))
}
pub async fn load_with_headers(
url: Url,
headers: HashMap<String, String>,
kind: IconKind,
sizes: Option<String>,
) -> Result<Self, Box<dyn Error>> {
let info = IconInfo::load(url.clone(), (&headers).try_into().unwrap(), sizes).await?;
Ok(Icon::new_with_headers(url, headers, kind, info))
}
}
impl Ord for Icon {
fn cmp(&self, other: &Self) -> Ordering {
self.info.cmp(&other.info)
}
fn cmp(&self, other: &Self) -> Ordering {
self.info.cmp(&other.info)
}
}
impl PartialOrd for Icon {
fn partial_cmp(&self, other: &Self) -> Option<Ordering> {
Some(self.cmp(other))
}
fn partial_cmp(&self, other: &Self) -> Option<Ordering> {
Some(self.cmp(other))
}
}

View file

@ -11,198 +11,200 @@ use url::Url;
use vec1::Vec1;
pub struct SiteIcons {
blacklist: Option<Box<dyn Fn(&Url) -> bool>>,
blacklist: Option<Box<dyn Fn(&Url) -> bool>>,
}
#[derive(Debug, Clone)]
enum LoadedKind {
DefaultManifest(Option<Vec1<Icon>>),
HeadTags(Option<Vec1<Icon>>),
DefaultFavicon(Option<Icon>),
SiteLogo(Option<Icon>),
DefaultManifest(Option<Vec1<Icon>>),
HeadTags(Option<Vec1<Icon>>),
DefaultFavicon(Option<Icon>),
SiteLogo(Option<Icon>),
}
impl SiteIcons {
pub fn new() -> Self {
SiteIcons { blacklist: None }
}
pub fn new_with_blacklist(blacklist: impl Fn(&Url) -> bool + 'static) -> Self {
SiteIcons {
blacklist: Some(Box::new(blacklist)),
pub fn new() -> Self {
SiteIcons { blacklist: None }
}
}
pub fn is_blacklisted(&self, url: &Url) -> bool {
if let Some(is_blacklisted) = &self.blacklist {
is_blacklisted(url)
} else {
false
}
}
pub async fn load_website<U: IntoUrl>(
&mut self,
url: U,
best_matches_only: bool,
) -> Result<Vec<Icon>, Box<dyn Error>> {
let url = url.into_url()?;
let manifest_urls = vec![
push_url(&url, "manifest.json"),
push_url(&url, "manifest.webmanifest"),
url.join("/manifest.json")?,
url.join("/manifest.webmanifest")?,
]
.into_iter()
.unique();
let favicon_urls = vec![
push_url(&url, "favicon.svg"),
url.join("/favicon.svg")?,
push_url(&url, "favicon.ico"),
url.join("/favicon.ico")?,
]
.into_iter()
.unique();
let html_response = async {
let res = CLIENT
.get(url.clone())
.header(ACCEPT, "text/html")
.send()
.await
.ok()?
.error_for_status()
.ok()?;
let url = res.url().clone();
if self.is_blacklisted(&url) {
None
} else {
let body = res.bytes_stream().map(|res| {
res
.map(|bytes| bytes.to_vec())
.map_err(|err| err.to_string())
});
let mut publisher = Publisher::new(128);
let subscriber = publisher.subscribe();
Some((
url,
async move { StreamPublisher::new(&mut publisher, body).await }.shared(),
subscriber,
))
}
}
.shared();
let mut futures = vec![
async {
let html_response = html_response.clone().await;
LoadedKind::HeadTags(match html_response {
Some((url, _, body)) => html_parser::parse_head(&url, body)
.await
.ok()
.and_then(|icons| icons.try_into().ok()),
None => None,
})
}
.boxed_local(),
async {
let html_response = html_response.clone().await;
LoadedKind::SiteLogo(match html_response {
Some((url, complete, body)) => {
let (icons, _) = join!(
html_parser::parse_site_logo(&url, body, |url| self.is_blacklisted(url)),
complete
);
icons.ok()
}
None => None,
})
}
.boxed_local(),
async {
let manifests = join_all(manifest_urls.map(|url| SiteIcons::load_manifest(url))).await;
LoadedKind::DefaultManifest(
manifests
.into_iter()
.find_map(|manifest| manifest.ok().and_then(|icons| icons.try_into().ok())),
)
}
.boxed_local(),
async {
let favicons =
join_all(favicon_urls.map(|url| Icon::load(url.clone(), IconKind::SiteFavicon, None)))
.await;
LoadedKind::DefaultFavicon(favicons.into_iter().find_map(|favicon| favicon.ok()))
}
.boxed_local(),
];
let mut icons: Vec<Icon> = Vec::new();
let mut found_best_match = false;
let mut previous_loads = Vec::new();
while !futures.is_empty() {
let (loaded, index, _) = select_all(&mut futures).await;
futures.remove(index);
match loaded.clone() {
LoadedKind::DefaultManifest(manifest_icons) => {
if let Some(manifest_icons) = manifest_icons {
icons.extend(manifest_icons);
found_best_match = true;
}
pub fn new_with_blacklist(blacklist: impl Fn(&Url) -> bool + 'static) -> Self {
SiteIcons {
blacklist: Some(Box::new(blacklist)),
}
LoadedKind::DefaultFavicon(favicon) => {
if let Some(favicon) = favicon {
icons.push(favicon);
}
if previous_loads
.iter()
.any(|kind| matches!(kind, LoadedKind::HeadTags(_)))
{
found_best_match = true;
pub fn is_blacklisted(&self, url: &Url) -> bool {
if let Some(is_blacklisted) = &self.blacklist {
is_blacklisted(url)
} else {
false
}
}
pub async fn load_website<U: IntoUrl>(
&mut self,
url: U,
best_matches_only: bool,
) -> Result<Vec<Icon>, Box<dyn Error>> {
let url = url.into_url()?;
let manifest_urls = vec![
push_url(&url, "manifest.json"),
push_url(&url, "manifest.webmanifest"),
url.join("/manifest.json")?,
url.join("/manifest.webmanifest")?,
]
.into_iter()
.unique();
let favicon_urls = vec![
push_url(&url, "favicon.svg"),
url.join("/favicon.svg")?,
push_url(&url, "favicon.ico"),
url.join("/favicon.ico")?,
]
.into_iter()
.unique();
let html_response = async {
let res = CLIENT
.get(url.clone())
.header(ACCEPT, "text/html")
.send()
.await
.ok()?
.error_for_status()
.ok()?;
let url = res.url().clone();
if self.is_blacklisted(&url) {
None
} else {
let body = res.bytes_stream().map(|res| {
res.map(|bytes| bytes.to_vec())
.map_err(|err| err.to_string())
});
let mut publisher = Publisher::new(128);
let subscriber = publisher.subscribe();
Some((
url,
async move { StreamPublisher::new(&mut publisher, body).await }.shared(),
subscriber,
))
}
}
}
LoadedKind::HeadTags(head_icons) => {
if let Some(head_icons) = head_icons {
icons.extend(head_icons);
found_best_match = true;
} else if previous_loads
.iter()
.any(|kind| matches!(kind, LoadedKind::DefaultFavicon(Some(_))))
{
found_best_match = true;
}
.shared();
let mut futures = vec![
async {
let html_response = html_response.clone().await;
LoadedKind::HeadTags(match html_response {
Some((url, _, body)) => html_parser::parse_head(&url, body)
.await
.ok()
.and_then(|icons| icons.try_into().ok()),
None => None,
})
}
.boxed_local(),
async {
let html_response = html_response.clone().await;
LoadedKind::SiteLogo(match html_response {
Some((url, complete, body)) => {
let (icons, _) = join!(
html_parser::parse_site_logo(&url, body, |url| self
.is_blacklisted(url)),
complete
);
icons.ok()
}
None => None,
})
}
.boxed_local(),
async {
let manifests =
join_all(manifest_urls.map(|url| SiteIcons::load_manifest(url))).await;
LoadedKind::DefaultManifest(
manifests
.into_iter()
.find_map(|manifest| manifest.ok().and_then(|icons| icons.try_into().ok())),
)
}
.boxed_local(),
async {
let favicons = join_all(
favicon_urls.map(|url| Icon::load(url.clone(), IconKind::SiteFavicon, None)),
)
.await;
LoadedKind::DefaultFavicon(favicons.into_iter().find_map(|favicon| favicon.ok()))
}
.boxed_local(),
];
let mut icons: Vec<Icon> = Vec::new();
let mut found_best_match = false;
let mut previous_loads = Vec::new();
while !futures.is_empty() {
let (loaded, index, _) = select_all(&mut futures).await;
futures.remove(index);
match loaded.clone() {
LoadedKind::DefaultManifest(manifest_icons) => {
if let Some(manifest_icons) = manifest_icons {
icons.extend(manifest_icons);
found_best_match = true;
}
}
LoadedKind::DefaultFavicon(favicon) => {
if let Some(favicon) = favicon {
icons.push(favicon);
if previous_loads
.iter()
.any(|kind| matches!(kind, LoadedKind::HeadTags(_)))
{
found_best_match = true;
}
}
}
LoadedKind::HeadTags(head_icons) => {
if let Some(head_icons) = head_icons {
icons.extend(head_icons);
found_best_match = true;
} else if previous_loads
.iter()
.any(|kind| matches!(kind, LoadedKind::DefaultFavicon(Some(_))))
{
found_best_match = true;
}
}
LoadedKind::SiteLogo(logo) => {
if let Some(logo) = logo {
icons.push(logo);
}
}
}
previous_loads.push(loaded);
icons.sort();
icons = icons.into_iter().unique().collect();
if best_matches_only && found_best_match {
break;
}
}
LoadedKind::SiteLogo(logo) => {
if let Some(logo) = logo {
icons.push(logo);
}
}
}
previous_loads.push(loaded);
icons.sort();
icons = icons.into_iter().unique().collect();
if best_matches_only && found_best_match {
break;
}
Ok(icons)
}
Ok(icons)
}
}

View file

@ -35,12 +35,12 @@ pub use icons::*;
use once_cell::sync::Lazy;
use reqwest::{
header::{HeaderMap, HeaderValue, USER_AGENT},
Client,
header::{HeaderMap, HeaderValue, USER_AGENT},
Client,
};
static CLIENT: Lazy<Client> = Lazy::new(|| {
let mut headers = HeaderMap::new();
headers.insert(USER_AGENT, HeaderValue::from_str("Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/88.0.4324.104 Safari/537.36").unwrap());
Client::builder().default_headers(headers).build().unwrap()
let mut headers = HeaderMap::new();
headers.insert(USER_AGENT, HeaderValue::from_str("Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/88.0.4324.104 Safari/537.36").unwrap());
Client::builder().default_headers(headers).build().unwrap()
});

View file

@ -8,49 +8,47 @@ use url::Url;
#[derive(Debug, Deserialize)]
struct ManifestIcon {
src: String,
sizes: Option<String>,
src: String,
sizes: Option<String>,
}
#[derive(Debug, Deserialize)]
struct Manifest {
icons: Vec<ManifestIcon>,
icons: Vec<ManifestIcon>,
}
impl SiteIcons {
pub async fn load_manifest<U: IntoUrl>(url: U) -> Result<Vec<Icon>, Box<dyn Error>> {
let url = url.into_url()?;
pub async fn load_manifest<U: IntoUrl>(url: U) -> Result<Vec<Icon>, Box<dyn Error>> {
let url = url.into_url()?;
Ok(load_manifest_cached(url).await?)
}
Ok(load_manifest_cached(url).await?)
}
}
#[cached(sync_writes = true)]
async fn load_manifest_cached(url: Url) -> Result<Vec<Icon>, String> {
let url = &url;
let url = &url;
let manifest: Manifest = CLIENT
.get(url.clone())
.send()
.await
.map_err(|e| format!("{}: {:?}", url, e))?
.error_for_status()
.map_err(|e| format!("{}: {:?}", url, e))?
.json()
.await
.map_err(|e| format!("{}: {:?}", url, e))?;
let manifest: Manifest = CLIENT
.get(url.clone())
.send()
.await
.map_err(|e| format!("{}: {:?}", url, e))?
.error_for_status()
.map_err(|e| format!("{}: {:?}", url, e))?
.json()
.await
.map_err(|e| format!("{}: {:?}", url, e))?;
Ok(
join_all(manifest.icons.into_iter().map(|icon| async move {
if let Ok(src) = url.join(&icon.src) {
Icon::load(src, IconKind::AppIcon, icon.sizes).await.ok()
} else {
None
}
Ok(join_all(manifest.icons.into_iter().map(|icon| async move {
if let Ok(src) = url.join(&icon.src) {
Icon::load(src, IconKind::AppIcon, icon.sizes).await.ok()
} else {
None
}
}))
.await
.into_iter()
.filter_map(|icon| icon)
.collect(),
)
.collect())
}

View file

@ -1,43 +1,43 @@
use std::{
pin::Pin,
task::{Context, Poll},
pin::Pin,
task::{Context, Poll},
};
use futures::Future;
pub async fn poll_in_background<F, B, FO, BO>(future: F, background_future: B) -> FO
where
F: Future<Output = FO> + Unpin,
B: Future<Output = BO> + Unpin,
{
struct BackgroundPoller<F, B> {
future: F,
background_future: B,
}
impl<F, B, FO, BO> Future for BackgroundPoller<F, B>
where
F: Future<Output = FO> + Unpin,
B: Future<Output = BO> + Unpin,
{
type Output = FO;
fn poll(self: Pin<&mut Self>, cx: &mut Context<'_>) -> Poll<Self::Output> {
let this = self.get_mut();
let result = Pin::new(&mut this.future).poll(cx);
if result.is_pending() {
let _ = Pin::new(&mut this.background_future).poll(cx);
}
result
{
struct BackgroundPoller<F, B> {
future: F,
background_future: B,
}
}
BackgroundPoller {
future,
background_future,
}
.await
impl<F, B, FO, BO> Future for BackgroundPoller<F, B>
where
F: Future<Output = FO> + Unpin,
B: Future<Output = BO> + Unpin,
{
type Output = FO;
fn poll(self: Pin<&mut Self>, cx: &mut Context<'_>) -> Poll<Self::Output> {
let this = self.get_mut();
let result = Pin::new(&mut this.future).poll(cx);
if result.is_pending() {
let _ = Pin::new(&mut this.background_future).poll(cx);
}
result
}
}
BackgroundPoller {
future,
background_future,
}
.await
}

View file

@ -12,10 +12,10 @@ macro_rules! join_with {
}
macro_rules! regex {
($re:literal $(,)?) => {{
static RE: once_cell::sync::OnceCell<regex::Regex> = once_cell::sync::OnceCell::new();
RE.get_or_init(|| regex::Regex::new($re).unwrap())
}};
($re:literal $(,)?) => {{
static RE: once_cell::sync::OnceCell<regex::Regex> = once_cell::sync::OnceCell::new();
RE.get_or_init(|| regex::Regex::new($re).unwrap())
}};
}
macro_rules! assert_slice_eq {

View file

@ -10,7 +10,7 @@ pub use svg_encoder::*;
use url::Url;
pub fn push_url(url: &Url, segment: &str) -> Url {
let mut url = url.clone();
url.path_segments_mut().unwrap().push(segment);
url
let mut url = url.clone();
url.path_segments_mut().unwrap().push(segment);
url
}

View file

@ -2,52 +2,52 @@ use percent_encoding::{utf8_percent_encode, AsciiSet, CONTROLS};
use std::borrow::Cow;
const DATA_URI: &AsciiSet = &CONTROLS
.add(b'\r')
.add(b'\n')
.add(b'%')
.add(b'#')
.add(b'(')
.add(b')')
.add(b'<')
.add(b'>')
.add(b'?')
.add(b'[')
.add(b'\\')
.add(b']')
.add(b'^')
.add(b'`')
.add(b'{')
.add(b'|')
.add(b'}');
.add(b'\r')
.add(b'\n')
.add(b'%')
.add(b'#')
.add(b'(')
.add(b')')
.add(b'<')
.add(b'>')
.add(b'?')
.add(b'[')
.add(b'\\')
.add(b']')
.add(b'^')
.add(b'`')
.add(b'{')
.add(b'|')
.add(b'}');
pub fn encode_svg(svg: &str) -> String {
// add namespace
let encoded = if !svg.contains("http://www.w3.org/2000/svg") {
regex!("<svg").replace(svg, "<svg xmlns='http://www.w3.org/2000/svg'")
} else {
svg.into()
};
// add namespace
let encoded = if !svg.contains("http://www.w3.org/2000/svg") {
regex!("<svg").replace(svg, "<svg xmlns='http://www.w3.org/2000/svg'")
} else {
svg.into()
};
// use single quotes instead of double to avoid encoding.
let mut encoded = regex!("\"").replace_all(&encoded, "'");
// use single quotes instead of double to avoid encoding.
let mut encoded = regex!("\"").replace_all(&encoded, "'");
// remove a fill=none attribute
if let Some(captures) = regex!("^[^>]+fill='?(none)'?").captures(&encoded) {
let index = captures.get(1).unwrap();
let mut result = String::new();
for (i, c) in encoded.chars().enumerate() {
if i < index.start() || i >= index.end() {
result.push(c);
}
// remove a fill=none attribute
if let Some(captures) = regex!("^[^>]+fill='?(none)'?").captures(&encoded) {
let index = captures.get(1).unwrap();
let mut result = String::new();
for (i, c) in encoded.chars().enumerate() {
if i < index.start() || i >= index.end() {
result.push(c);
}
}
encoded = Cow::from(result);
}
encoded = Cow::from(result);
}
// remove whitespace
let encoded = regex!(r">\s{1,}<").replace_all(&encoded, "><");
let encoded = regex!(r"\s{2,}").replace_all(&encoded, " ");
// remove whitespace
let encoded = regex!(r">\s{1,}<").replace_all(&encoded, "><");
let encoded = regex!(r"\s{2,}").replace_all(&encoded, " ");
let encoded = utf8_percent_encode(&encoded, DATA_URI);
let encoded = utf8_percent_encode(&encoded, DATA_URI);
format!("data:image/svg+xml,{}", encoded)
format!("data:image/svg+xml,{}", encoded)
}