feat(0.6): performance

This commit is contained in:
Sam Denty 2023-01-03 14:21:54 +00:00
parent 99ed10ff27
commit 34972db18b
No known key found for this signature in database
GPG key ID: 7B4EAF7B9E291B79
24 changed files with 974 additions and 564 deletions

115
Cargo.lock generated
View file

@ -31,6 +31,23 @@ dependencies = [
"libc", "libc",
] ]
[[package]]
name = "async-trait"
version = "0.1.60"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "677d1d8ab452a3936018a687b20e6f7cf5363d713b732b8884001317b0e48aa3"
dependencies = [
"proc-macro2",
"quote",
"syn",
]
[[package]]
name = "async_once"
version = "0.2.6"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "2ce4f10ea3abcd6617873bae9f91d1c5332b4a778bd9ce34d0cd517474c1de82"
[[package]] [[package]]
name = "atty" name = "atty"
version = "0.2.14" version = "0.2.14"
@ -78,6 +95,43 @@ version = "1.3.0"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "dfb24e866b15a1af2a1b663f10c6b6b8f397a84aadb828f12e5b289ec23a3a3c" checksum = "dfb24e866b15a1af2a1b663f10c6b6b8f397a84aadb828f12e5b289ec23a3a3c"
[[package]]
name = "cached"
version = "0.41.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "ec6d20b3d24b6c74e2c5331d2d3d8d1976a9883c7da179aa851afa4c90d62e36"
dependencies = [
"async-trait",
"async_once",
"cached_proc_macro",
"cached_proc_macro_types",
"futures",
"hashbrown",
"instant",
"lazy_static",
"once_cell",
"thiserror",
"tokio",
]
[[package]]
name = "cached_proc_macro"
version = "0.15.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "751f7f4e7a091545e7f6c65bacc404eaee7e87bfb1f9ece234a1caa173dc16f2"
dependencies = [
"cached_proc_macro_types",
"darling 0.13.4",
"quote",
"syn",
]
[[package]]
name = "cached_proc_macro_types"
version = "0.1.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "3a4f925191b4367301851c6d99b09890311d74b0d43f274c0b34c86d308a3663"
[[package]] [[package]]
name = "cc" name = "cc"
version = "1.0.77" version = "1.0.77"
@ -272,14 +326,38 @@ dependencies = [
"syn", "syn",
] ]
[[package]]
name = "darling"
version = "0.13.4"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "a01d95850c592940db9b8194bc39f4bc0e89dee5c4265e4b1807c34a9aba453c"
dependencies = [
"darling_core 0.13.4",
"darling_macro 0.13.4",
]
[[package]] [[package]]
name = "darling" name = "darling"
version = "0.14.2" version = "0.14.2"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "b0dd3cd20dc6b5a876612a6e5accfe7f3dd883db6d07acfbf14c128f61550dfa" checksum = "b0dd3cd20dc6b5a876612a6e5accfe7f3dd883db6d07acfbf14c128f61550dfa"
dependencies = [ dependencies = [
"darling_core", "darling_core 0.14.2",
"darling_macro", "darling_macro 0.14.2",
]
[[package]]
name = "darling_core"
version = "0.13.4"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "859d65a907b6852c9361e3185c862aae7fafd2887876799fa55f5f99dc40d610"
dependencies = [
"fnv",
"ident_case",
"proc-macro2",
"quote",
"strsim",
"syn",
] ]
[[package]] [[package]]
@ -296,13 +374,24 @@ dependencies = [
"syn", "syn",
] ]
[[package]]
name = "darling_macro"
version = "0.13.4"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "9c972679f83bdf9c42bd905396b6c3588a843a17f0f16dfcfa3e2c5d57441835"
dependencies = [
"darling_core 0.13.4",
"quote",
"syn",
]
[[package]] [[package]]
name = "darling_macro" name = "darling_macro"
version = "0.14.2" version = "0.14.2"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "7618812407e9402654622dd402b0a89dff9ba93badd6540781526117b92aab7e" checksum = "7618812407e9402654622dd402b0a89dff9ba93badd6540781526117b92aab7e"
dependencies = [ dependencies = [
"darling_core", "darling_core 0.14.2",
"quote", "quote",
"syn", "syn",
] ]
@ -384,6 +473,17 @@ dependencies = [
"instant", "instant",
] ]
[[package]]
name = "flo_stream"
version = "0.7.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "0a7246db09b6a924fb11fedc1e33c34e6d5d0ba3c95a87cd2994f9581cf5a470"
dependencies = [
"futures",
"lazy_static",
"smallvec",
]
[[package]] [[package]]
name = "fnv" name = "fnv"
version = "1.0.7" version = "1.0.7"
@ -762,6 +862,9 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "7a5bbe824c507c5da5956355e86a746d82e0e1464f65d862cc5e71da70e94b2c" checksum = "7a5bbe824c507c5da5956355e86a746d82e0e1464f65d862cc5e71da70e94b2c"
dependencies = [ dependencies = [
"cfg-if", "cfg-if",
"js-sys",
"wasm-bindgen",
"web-sys",
] ]
[[package]] [[package]]
@ -1650,7 +1753,7 @@ version = "2.1.0"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "e3452b4c0f6c1e357f73fdb87cd1efabaa12acf328c7a528e252893baeb3f4aa" checksum = "e3452b4c0f6c1e357f73fdb87cd1efabaa12acf328c7a528e252893baeb3f4aa"
dependencies = [ dependencies = [
"darling", "darling 0.14.2",
"proc-macro2", "proc-macro2",
"quote", "quote",
"syn", "syn",
@ -1683,12 +1786,14 @@ checksum = "7bd3e3206899af3f8b12af284fafc038cc1dc2b41d1b89dd17297221c5d225de"
[[package]] [[package]]
name = "site_icons" name = "site_icons"
version = "0.5.0" version = "0.6.0"
dependencies = [ dependencies = [
"byteorder", "byteorder",
"cached",
"clap", "clap",
"data-url", "data-url",
"env_logger", "env_logger",
"flo_stream",
"futures", "futures",
"html5ever", "html5ever",
"itertools", "itertools",

View file

@ -1,6 +1,6 @@
[package] [package]
name = "site_icons" name = "site_icons"
version = "0.5.0" version = "0.6.0"
authors = ["Sam Denty <sam@samdenty.com>"] authors = ["Sam Denty <sam@samdenty.com>"]
edition = "2018" edition = "2018"
license = "GPL-3.0" license = "GPL-3.0"
@ -19,6 +19,7 @@ crate-type = ["cdylib", "rlib"]
[dependencies] [dependencies]
vec1 = { version = "1.10.1", features = ["serde"] } vec1 = { version = "1.10.1", features = ["serde"] }
flo_stream = "0.7"
itertools = "0.10.5" itertools = "0.10.5"
serde_with = "2.1.0" serde_with = "2.1.0"
html5ever = "0.26.0" html5ever = "0.26.0"
@ -44,11 +45,16 @@ reqwest = { package = "reqwest-wasm", version = "0.11.16", features = [
"blocking", "blocking",
"stream", "stream",
] } ] }
cached = { version = "0.41.0", default_features = false, features = [
"proc_macro",
"wasm",
] }
[target.'cfg(not(target_arch = "wasm32"))'.dependencies] [target.'cfg(not(target_arch = "wasm32"))'.dependencies]
clap = { version = "3.2.23", features = ["derive"] } clap = { version = "3.2.23", features = ["derive"] }
tokio = { version = "1.22.0", features = ["full"] } tokio = { version = "1.22.0", features = ["full"] }
env_logger = "0.9.3" env_logger = "0.9.3"
cached = "0.41.0"
reqwest = { version = "0.11.13", features = [ reqwest = { version = "0.11.13", features = [
"json", "json",
"cookies", "cookies",

View file

@ -1,12 +1,15 @@
use clap::Parser; use clap::Parser;
use env_logger::Builder; use env_logger::Builder;
use log::LevelFilter; use log::LevelFilter;
use site_icons::Icons; use site_icons::SiteIcons;
use std::error::Error; use std::error::Error;
#[derive(Parser)] #[derive(Parser)]
struct Opts { struct Opts {
urls: Vec<String>, url: String,
#[clap(long)]
fast: bool,
#[clap(long)] #[clap(long)]
json: bool, json: bool,
#[clap(long)] #[clap(long)]
@ -16,7 +19,7 @@ struct Opts {
#[tokio::main] #[tokio::main]
async fn main() -> Result<(), Box<dyn Error>> { async fn main() -> Result<(), Box<dyn Error>> {
let mut icons = Icons::new(); let mut icons = SiteIcons::new();
let opts: Opts = Opts::parse(); let opts: Opts = Opts::parse();
if opts.debug { if opts.debug {
@ -25,11 +28,7 @@ async fn main() -> Result<(), Box<dyn Error>> {
builder.init(); builder.init();
} }
for url in opts.urls { let entries = icons.load_website(opts.url, opts.fast).await?;
icons.load_website(&url).await?;
}
let entries = icons.entries().await;
if opts.json { if opts.json {
println!("{}", serde_json::to_string_pretty(&entries)?) println!("{}", serde_json::to_string_pretty(&entries)?)

122
src/html_parser/head.rs Normal file
View file

@ -0,0 +1,122 @@
use crate::utils::poll_in_background;
use crate::Icon;
use crate::IconKind;
use crate::SiteIcons;
use futures::future::join_all;
use futures::FutureExt;
use futures::Stream;
use futures::StreamExt;
use lol_html::{element, errors::RewritingError, HtmlRewriter, Settings};
use std::{
cell::RefCell,
error::Error,
fmt::{self, Display},
};
use url::Url;
#[derive(Debug)]
struct EndOfHead {}
impl Display for EndOfHead {
fn fmt(&self, _: &mut fmt::Formatter<'_>) -> fmt::Result {
Ok(())
}
}
impl Error for EndOfHead {}
pub async fn parse_head(
url: &Url,
mut body: impl Stream<Item = Result<Vec<u8>, String>> + Unpin,
) -> Result<Vec<Icon>, Box<dyn Error>> {
let mut icons = Vec::new();
let new_icons = RefCell::new(Vec::new());
{
let mut rewriter = HtmlRewriter::new(
Settings {
element_content_handlers: vec![
element!("head", |head| {
head.on_end_tag(|_| Err(Box::new(EndOfHead {})))?;
Ok(())
}),
element!("link[rel='manifest']", |manifest| {
if let Some(href) = manifest
.get_attribute("href")
.and_then(|href| url.join(&href).ok())
{
new_icons.borrow_mut().push(
async { SiteIcons::load_manifest(href).await.unwrap_or(Vec::new()) }
.boxed_local()
.shared(),
)
}
Ok(())
}),
element!(
join_with!(
",",
"link[rel='icon']",
"link[rel='shortcut icon']",
"link[rel='apple-touch-icon']",
"link[rel='apple-touch-icon-precomposed']"
),
|link| {
let rel = link.get_attribute("rel").unwrap();
if let Some(href) = link
.get_attribute("href")
.and_then(|href| url.join(&href).ok())
{
let kind = if rel.contains("apple-touch-icon") {
IconKind::AppIcon
} else {
IconKind::SiteFavicon
};
let sizes = link.get_attribute("sizes");
new_icons.borrow_mut().push(
async {
Icon::load(href, kind, sizes)
.await
.map(|icon| vec![icon])
.unwrap_or(Vec::new())
}
.boxed_local()
.shared(),
)
};
Ok(())
}
),
],
..Settings::default()
},
|_: &[u8]| {},
);
while let Some(data) = poll_in_background(body.next(), join_all(icons.clone())).await {
let result = rewriter.write(&data?);
icons.extend(new_icons.borrow_mut().drain(..));
match result {
Err(RewritingError::ContentHandlerError(result)) => {
match result.downcast::<EndOfHead>() {
Ok(_) => break,
Err(err) => return Err(err),
};
}
result => result?,
}
}
}
let icons = join_all(icons).await.into_iter().flatten().collect();
Ok(icons)
}

5
src/html_parser/mod.rs Normal file
View file

@ -0,0 +1,5 @@
mod head;
mod site_logo;
pub use head::*;
pub use site_logo::*;

View file

@ -0,0 +1,157 @@
use crate::{utils::encode_svg, Icon, IconKind};
use futures::{Stream, StreamExt};
use html5ever::{
driver,
tendril::{Tendril, TendrilSink},
};
use scraper::{ElementRef, Html};
use std::error::Error;
use std::iter;
use tldextract::TldOption;
use url::Url;
pub async fn parse_site_logo(
url: &Url,
mut body: impl Stream<Item = Result<Vec<u8>, String>> + Unpin,
is_blacklisted: impl Fn(&Url) -> bool,
) -> Result<Icon, Box<dyn Error>> {
let mut parser = driver::parse_document(Html::new_document(), Default::default());
while let Some(data) = body.next().await {
if let Ok(data) = Tendril::try_from_byte_slice(&data?) {
parser.process(data)
}
}
let document = parser.finish();
let mut logos: Vec<_> = document
.select(selector!(
"a[href='/'] img, a[href='/'] svg",
"header img, header svg",
"img[src*=logo]",
"img[alt*=logo], svg[alt*=logo]",
"*[class*=logo] img, *[class*=logo] svg",
"*[id*=logo] img, *[id*=logo] svg",
"img[class*=logo], svg[class*=logo]",
"img[id*=logo], svg[id*=logo]",
))
.enumerate()
.filter_map(|(i, elem_ref)| {
let elem = elem_ref.value();
let ancestors = elem_ref
.ancestors()
.map(ElementRef::wrap)
.flatten()
.map(|elem_ref| elem_ref.value())
.collect::<Vec<_>>();
let skip_classnames = regex!("menu|search");
let should_skip = ancestors.iter().any(|ancestor| {
ancestor
.attr("class")
.map(|attr| skip_classnames.is_match(&attr.to_lowercase()))
.or_else(|| {
ancestor
.attr("id")
.map(|attr| skip_classnames.is_match(&attr.to_lowercase()))
})
.unwrap_or(false)
});
if should_skip {
return None;
}
let mut weight = 0;
// if in the header
if ancestors.iter().any(|element| element.name() == "header") {
weight += 2;
}
if i == 0 {
weight += 1;
}
let mentions = |attr_name, is_match: Box<dyn Fn(&str) -> bool>| {
ancestors.iter().chain(iter::once(&elem)).any(|ancestor| {
ancestor
.attr(attr_name)
.map(|attr| is_match(&attr.to_lowercase()))
.unwrap_or(false)
})
};
if mentions("href", Box::new(|attr| attr == "/")) {
weight += 5;
};
let mentions_logo = |attr_name| {
mentions(
attr_name,
Box::new(|attr| regex!("logo([^s]|$)").is_match(attr)),
)
};
if mentions_logo("class") || mentions_logo("id") {
weight += 3;
}
if mentions_logo("alt") {
weight += 2;
}
if mentions_logo("src") {
weight += 1;
}
if let Some(site_name) = url
.domain()
.and_then(|domain| TldOption::default().build().extract(domain).unwrap().domain)
{
// if the alt contains the site_name then highest priority
if site_name
.to_lowercase()
.split('-')
.any(|segment| mentions("alt", Box::new(move |attr| attr.contains(segment))))
{
weight += 10;
}
}
let href = if elem.name() == "svg" {
Some(Url::parse(&encode_svg(&elem_ref.html())).unwrap())
} else {
elem.attr("src").and_then(|href| url.join(&href).ok())
};
if let Some(href) = &href {
if is_blacklisted(href) {
return None;
}
}
href.map(|href| (href, elem_ref, weight))
})
.collect();
logos.sort_by(|(_, _, a_weight), (_, _, b_weight)| b_weight.cmp(a_weight));
// prefer <img> over svg
let mut prev_weight = None;
for (href, elem_ref, weight) in &logos {
if let Some(prev_weight) = prev_weight {
if weight != prev_weight {
break;
}
}
prev_weight = Some(weight);
if elem_ref.value().name() == "img" {
return Icon::load(href.clone(), IconKind::SiteLogo, None).await;
}
}
match logos.into_iter().next() {
Some((href, _, _)) => Icon::load(href.clone(), IconKind::SiteLogo, None).await,
None => Err("No site logo found".into()),
}
}

View file

@ -1,61 +0,0 @@
use super::IconInfo;
use serde::{Deserialize, Serialize};
use serde_with::{DeserializeFromStr, SerializeDisplay};
use std::{
cmp::Ordering,
collections::HashMap,
fmt::{self, Display},
str::FromStr,
};
use url::Url;
#[derive(Debug, Clone, PartialOrd, PartialEq, Ord, Eq, SerializeDisplay, DeserializeFromStr)]
pub enum IconKind {
AppIcon,
SiteLogo,
SiteFavicon,
}
impl Display for IconKind {
fn fmt(&self, f: &mut fmt::Formatter) -> Result<(), fmt::Error> {
f.write_str(match self {
IconKind::SiteLogo => "site_logo",
IconKind::AppIcon => "app_icon",
IconKind::SiteFavicon => "site_favicon",
})
}
}
impl FromStr for IconKind {
type Err = String;
fn from_str(kind: &str) -> Result<Self, Self::Err> {
match kind {
"site_logo" => Ok(IconKind::SiteLogo),
"app_icon" => Ok(IconKind::AppIcon),
"site_favicon" => Ok(IconKind::SiteFavicon),
_ => Err("unknown icon kind!".into()),
}
}
}
#[derive(Debug, Serialize, Deserialize, PartialEq, Eq)]
pub struct Icon {
pub url: Url,
pub headers: HashMap<String, String>,
pub kind: IconKind,
#[serde(flatten)]
pub info: IconInfo,
}
impl Ord for Icon {
fn cmp(&self, other: &Self) -> Ordering {
self.info.cmp(&other.info)
}
}
impl PartialOrd for Icon {
fn partial_cmp(&self, other: &Self) -> Option<Ordering> {
Some(self.cmp(other))
}
}

View file

@ -1,4 +1,5 @@
use crate::{icon_size::*, CLIENT}; use super::*;
use crate::CLIENT;
use data_url::DataUrl; use data_url::DataUrl;
use futures::{io::Cursor, prelude::*, stream::TryStreamExt}; use futures::{io::Cursor, prelude::*, stream::TryStreamExt};
use mime::MediaType; use mime::MediaType;
@ -6,6 +7,7 @@ use reqwest::{header::*, Url};
use serde::{Deserialize, Serialize}; use serde::{Deserialize, Serialize};
use std::{ use std::{
cmp::Ordering, cmp::Ordering,
convert::TryFrom,
error::Error, error::Error,
fmt::{self, Display}, fmt::{self, Display},
io, io,
@ -72,7 +74,7 @@ impl IconInfo {
headers: HeaderMap, headers: HeaderMap,
sizes: Option<String>, sizes: Option<String>,
) -> Result<IconInfo, Box<dyn Error>> { ) -> Result<IconInfo, Box<dyn Error>> {
let sizes = sizes.as_ref().and_then(|s| IconSizes::from_str(s).ok()); let sizes = sizes.as_ref().and_then(|s| IconSizes::try_from(s).ok());
let (mime, mut body): (_, Box<dyn AsyncRead + Unpin>) = match url.scheme() { let (mime, mut body): (_, Box<dyn AsyncRead + Unpin>) = match url.scheme() {
"data" => { "data" => {
@ -92,6 +94,43 @@ impl IconInfo {
} }
_ => { _ => {
match &url.path().split('.').last().unwrap_or("").to_lowercase()[..] {
"svg" => {
if let Some(sizes) = sizes {
return Ok(IconInfo::SVG {
size: Some(*sizes.largest()),
});
}
}
"png" => {
if let Some(sizes) = sizes {
return Ok(IconInfo::PNG {
size: *sizes.largest(),
});
}
}
"jpeg" | "jpg" => {
if let Some(sizes) = sizes {
return Ok(IconInfo::JPEG {
size: *sizes.largest(),
});
}
}
"ico" => {
if let Some(sizes) = sizes {
return Ok(IconInfo::ICO { sizes });
}
}
"gif" => {
if let Some(sizes) = sizes {
return Ok(IconInfo::GIF {
size: *sizes.largest(),
});
}
}
_ => {}
};
let res = CLIENT.get(url).headers(headers).send().await?; let res = CLIENT.get(url).headers(headers).send().await?;
if !res.status().is_success() { if !res.status().is_success() {
return Err("failed to fetch".into()); return Err("failed to fetch".into());

View file

@ -22,7 +22,22 @@ impl Display for IconSizes {
} }
impl IconSizes { impl IconSizes {
pub fn from_str(sizes_str: &str) -> Result<IconSizes, Box<dyn Error>> { pub fn add_size(&mut self, size: IconSize) {
match self.0.binary_search(&size) {
Ok(_) => {}
Err(pos) => self.0.insert(pos, size),
}
}
pub fn largest(&self) -> &IconSize {
self.0.first()
}
}
impl TryFrom<&str> for IconSizes {
type Error = Box<dyn Error>;
fn try_from(sizes_str: &str) -> Result<Self, Self::Error> {
let size_strs = sizes_str.split(" "); let size_strs = sizes_str.split(" ");
let mut sizes = Vec::new(); let mut sizes = Vec::new();
@ -34,16 +49,21 @@ impl IconSizes {
Ok(sizes.try_into()?) Ok(sizes.try_into()?)
} }
}
pub fn add_size(&mut self, size: IconSize) { impl TryFrom<&String> for IconSizes {
match self.0.binary_search(&size) { type Error = Box<dyn Error>;
Ok(_) => {}
Err(pos) => self.0.insert(pos, size),
}
}
pub fn largest(&self) -> &IconSize { fn try_from(sizes_str: &String) -> Result<Self, Self::Error> {
self.0.first() IconSizes::try_from(sizes_str.as_str())
}
}
impl TryFrom<String> for IconSizes {
type Error = Box<dyn Error>;
fn try_from(sizes_str: String) -> Result<Self, Self::Error> {
IconSizes::try_from(sizes_str.as_str())
} }
} }

68
src/icon/icon_size/svg.rs Normal file
View file

@ -0,0 +1,68 @@
use super::IconSize;
use futures::prelude::*;
use lol_html::{element, HtmlRewriter, Settings};
use std::{cell::RefCell, error::Error};
fn parse_size<S: ToString>(size: S) -> Option<u32> {
size
.to_string()
.parse::<f64>()
.ok()
.map(|size| size.round() as u32)
}
pub async fn get_svg_size<R: AsyncRead + Unpin>(
first_bytes: &[u8; 2],
reader: &mut R,
) -> Result<Option<IconSize>, Box<dyn Error>> {
let size = RefCell::new(None);
let mut rewriter = HtmlRewriter::new(
Settings {
element_content_handlers: vec![
// Rewrite insecure hyperlinks
element!("svg", |el| {
let viewbox = el.get_attribute("viewbox");
let width = el.get_attribute("width").and_then(parse_size);
let height = el.get_attribute("height").and_then(parse_size);
*size.borrow_mut() = Some(if let (Some(width), Some(height)) = (width, height) {
Some(IconSize::new(width, height))
} else if let Some(viewbox) = viewbox {
regex!(r"^\d+\s+\d+\s+(\d+\.?[\d]?)\s+(\d+\.?[\d]?)")
.captures(&viewbox)
.map(|captures| {
let width = parse_size(captures.get(1).unwrap().as_str()).unwrap();
let height = parse_size(captures.get(2).unwrap().as_str()).unwrap();
IconSize::new(width, height)
})
} else {
None
});
Ok(())
}),
],
..Settings::default()
},
|_: &[u8]| {},
);
rewriter.write(first_bytes)?;
let mut buffer = [0; 100];
loop {
let n = reader.read(&mut buffer).await?;
if n == 0 {
return Err("invalid svg".into());
}
rewriter.write(&buffer[..n])?;
if let Some(size) = *size.borrow() {
return Ok(size);
}
}
}

123
src/icon/mod.rs Normal file
View file

@ -0,0 +1,123 @@
mod icon_info;
mod icon_size;
pub use icon_info::*;
pub use icon_size::*;
use itertools::Itertools;
use serde::{Deserialize, Serialize};
use serde_with::{DeserializeFromStr, SerializeDisplay};
use std::{
cmp::Ordering,
collections::HashMap,
convert::TryInto,
error::Error,
fmt::{self, Display},
hash::{Hash, Hasher},
str::FromStr,
};
use url::Url;
#[derive(Debug, Clone, PartialOrd, PartialEq, Ord, Eq, SerializeDisplay, DeserializeFromStr)]
pub enum IconKind {
AppIcon,
SiteFavicon,
SiteLogo,
}
impl Display for IconKind {
fn fmt(&self, f: &mut fmt::Formatter) -> Result<(), fmt::Error> {
f.write_str(match self {
IconKind::SiteLogo => "site_logo",
IconKind::AppIcon => "app_icon",
IconKind::SiteFavicon => "site_favicon",
})
}
}
impl FromStr for IconKind {
type Err = String;
fn from_str(kind: &str) -> Result<Self, Self::Err> {
match kind {
"site_logo" => Ok(IconKind::SiteLogo),
"app_icon" => Ok(IconKind::AppIcon),
"site_favicon" => Ok(IconKind::SiteFavicon),
_ => Err("unknown icon kind!".into()),
}
}
}
#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)]
pub struct Icon {
pub url: Url,
pub headers: HashMap<String, String>,
pub kind: IconKind,
#[serde(flatten)]
pub info: IconInfo,
}
impl Hash for Icon {
fn hash<H: Hasher>(&self, state: &mut H) {
(
&self.url,
self
.headers
.iter()
.sorted_by_key(|(key, _)| *key)
.collect::<Vec<_>>(),
)
.hash(state);
}
}
impl Icon {
pub fn new(url: Url, kind: IconKind, info: IconInfo) -> Self {
Icon::new_with_headers(url, HashMap::new(), kind, info)
}
pub fn new_with_headers(
url: Url,
headers: HashMap<String, String>,
kind: IconKind,
info: IconInfo,
) -> Self {
Self {
url,
headers,
kind,
info,
}
}
pub async fn load(
url: Url,
kind: IconKind,
sizes: Option<String>,
) -> Result<Self, Box<dyn Error>> {
Icon::load_with_headers(url, HashMap::new(), kind, sizes).await
}
pub async fn load_with_headers(
url: Url,
headers: HashMap<String, String>,
kind: IconKind,
sizes: Option<String>,
) -> Result<Self, Box<dyn Error>> {
let info = IconInfo::load(url.clone(), (&headers).try_into().unwrap(), sizes).await?;
Ok(Icon::new_with_headers(url, headers, kind, info))
}
}
impl Ord for Icon {
fn cmp(&self, other: &Self) -> Ordering {
self.info.cmp(&other.info)
}
}
impl PartialOrd for Icon {
fn partial_cmp(&self, other: &Self) -> Option<Ordering> {
Some(self.cmp(other))
}
}

View file

@ -1,84 +0,0 @@
use super::IconSize;
use futures::prelude::*;
use lol_html::{element, errors::RewritingError, HtmlRewriter, Settings};
use std::{
error::Error,
fmt::{self, Display},
};
#[derive(Debug)]
struct SizeResult(Option<IconSize>);
impl Display for SizeResult {
fn fmt(&self, _: &mut fmt::Formatter<'_>) -> fmt::Result {
Ok(())
}
}
impl Error for SizeResult {}
fn parse_size<S: ToString>(size: S) -> Option<u32> {
size
.to_string()
.parse::<f64>()
.ok()
.map(|size| size.round() as u32)
}
pub async fn get_svg_size<R: AsyncRead + Unpin>(
first_bytes: &[u8; 2],
reader: &mut R,
) -> Result<Option<IconSize>, Box<dyn Error>> {
let mut rewriter = HtmlRewriter::new(
Settings {
element_content_handlers: vec![
// Rewrite insecure hyperlinks
element!("svg", |el| {
let viewbox = el.get_attribute("viewbox");
let width = el.get_attribute("width").and_then(parse_size);
let height = el.get_attribute("height").and_then(parse_size);
Err(Box::new(SizeResult(
if let (Some(width), Some(height)) = (width, height) {
Some(IconSize::new(width, height))
} else if let Some(viewbox) = viewbox {
regex!(r"^\d+\s+\d+\s+(\d+\.?[\d]?)\s+(\d+\.?[\d]?)")
.captures(&viewbox)
.map(|captures| {
let width = parse_size(captures.get(1).unwrap().as_str()).unwrap();
let height = parse_size(captures.get(2).unwrap().as_str()).unwrap();
IconSize::new(width, height)
})
} else {
None
},
)))
}),
],
..Settings::default()
},
|_: &[u8]| {},
);
rewriter.write(first_bytes)?;
let mut buffer = [0; 100];
loop {
let n = reader.read(&mut buffer).await?;
if n == 0 {
return Err("invalid svg".into());
}
match rewriter.write(&buffer[..n]) {
Err(RewritingError::ContentHandlerError(result)) => {
let result = result.downcast::<SizeResult>().unwrap();
return Ok(result.0);
}
result => result?,
}
}
}

View file

@ -1,123 +1,38 @@
use crate::{utils::encode_svg, Icon, IconInfo, IconKind, CLIENT}; use crate::{html_parser, utils::push_url, Icon, IconKind, CLIENT};
use future::join_all; use flo_stream::{MessagePublisher, Publisher, StreamPublisher};
use futures::StreamExt; use futures::future::{join_all, select_all};
use futures::{prelude::*, task::noop_waker}; use futures::prelude::*;
use html5ever::{ use futures::{join, StreamExt};
driver, use itertools::Itertools;
tendril::{Tendril, TendrilSink},
};
use reqwest::{header::*, IntoUrl}; use reqwest::{header::*, IntoUrl};
use scraper::{ElementRef, Html};
use serde::Deserialize;
use std::convert::TryInto; use std::convert::TryInto;
use std::iter; use std::error::Error;
use std::task::Poll;
use std::{collections::HashMap, error::Error, pin::Pin, task::Context};
use tldextract::TldOption;
use url::Url; use url::Url;
use vec1::Vec1;
pub struct Icons { pub struct SiteIcons {
blacklist: Option<Box<dyn Fn(&Url) -> bool>>, blacklist: Option<Box<dyn Fn(&Url) -> bool>>,
entries: Vec<Icon>,
pending_entries: HashMap<
Url,
(
IconKind,
HashMap<String, String>,
Pin<Box<dyn Future<Output = Result<IconInfo, Box<dyn Error>>>>>,
),
>,
} }
fn add_icon_entry( #[derive(Debug, Clone)]
entries: &mut Vec<Icon>, enum LoadedKind {
url: Url, DefaultManifest(Option<Vec1<Icon>>),
headers: HashMap<String, String>, HeadTags(Option<Vec1<Icon>>),
kind: IconKind, DefaultFavicon(Option<Icon>),
info: Result<IconInfo, Box<dyn Error>>, SiteLogo(Option<Icon>),
) {
match info {
Ok(info) => entries.push(Icon {
url,
headers,
kind,
info,
}),
Err(_) => warn_err!(info, "failed to parse icon"),
}
} }
impl Icons { impl SiteIcons {
pub fn new() -> Self { pub fn new() -> Self {
Icons { SiteIcons { blacklist: None }
blacklist: None,
entries: Vec::new(),
pending_entries: HashMap::new(),
}
} }
pub fn new_with_blacklist(blacklist: impl Fn(&Url) -> bool + 'static) -> Self { pub fn new_with_blacklist(blacklist: impl Fn(&Url) -> bool + 'static) -> Self {
Icons { SiteIcons {
blacklist: Some(Box::new(blacklist)), blacklist: Some(Box::new(blacklist)),
entries: Vec::new(),
pending_entries: HashMap::new(),
} }
} }
/// Add an icon URL and start fetching it
pub fn add_icon(&mut self, url: Url, kind: IconKind, sizes: Option<String>) {
self.add_icon_with_headers(url, HashMap::new(), kind, sizes)
}
/// Add an icon URL and start fetching it,
/// along with the specified headers
pub fn add_icon_with_headers(
&mut self,
url: Url,
headers: HashMap<String, String>,
kind: IconKind,
sizes: Option<String>,
) {
// check to see if it already exists
let mut entries = self.entries.iter_mut();
if let Some(existing_kind) = self
.pending_entries
.get_mut(&url)
.map(|(kind, _, _)| kind)
.or_else(|| {
entries.find_map(|icon| {
if icon.url.eq(&url) {
Some(&mut icon.kind)
} else {
None
}
})
})
{
// if the kind is more important, replace it
if &kind > existing_kind {
*existing_kind = kind;
}
return;
}
let mut info = Box::pin(IconInfo::load(
url.clone(),
(&headers).try_into().unwrap(),
sizes,
));
// Start fetching the icon
let noop_waker = noop_waker();
let cx = &mut Context::from_waker(&noop_waker);
match info.poll_unpin(cx) {
Poll::Ready(info) => add_icon_entry(&mut self.entries, url, headers, kind, info),
Poll::Pending => {
self.pending_entries.insert(url, (kind, headers, info));
}
};
}
pub fn is_blacklisted(&self, url: &Url) -> bool { pub fn is_blacklisted(&self, url: &Url) -> bool {
if let Some(is_blacklisted) = &self.blacklist { if let Some(is_blacklisted) = &self.blacklist {
is_blacklisted(url) is_blacklisted(url)
@ -126,271 +41,163 @@ impl Icons {
} }
} }
pub async fn load_website<U: IntoUrl>(&mut self, url: U) -> Result<(), Box<dyn Error>> { pub async fn load_website<U: IntoUrl>(
&mut self,
url: U,
best_matches_only: bool,
) -> Result<Vec<Icon>, Box<dyn Error>> {
let url = url.into_url()?;
let manifest_urls = vec![
push_url(&url, "manifest.json"),
push_url(&url, "manifest.webmanifest"),
url.join("/manifest.json")?,
url.join("/manifest.webmanifest")?,
]
.into_iter()
.unique();
let favicon_urls = vec![push_url(&url, "favicon.ico"), url.join("/favicon.ico")?]
.into_iter()
.unique();
let html_response = async {
let res = CLIENT let res = CLIENT
.get(url) .get(url.clone())
.header(ACCEPT, "text/html") .header(ACCEPT, "text/html")
.send() .send()
.await? .await
.error_for_status()?; .ok()?
.error_for_status()
.ok()?;
let url = res.url().clone(); let url = res.url().clone();
if self.is_blacklisted(&url) { if self.is_blacklisted(&url) {
return Ok(()); None
}
let mut body = res.bytes_stream();
let mut parser = driver::parse_document(Html::new_document(), Default::default());
while let Some(data) = body.next().await {
if let Ok(data) = Tendril::try_from_byte_slice(&data?) {
parser.process(data)
}
}
let document = parser.finish();
{
let mut found_favicon = false;
for elem_ref in document.select(selector!(
"link[rel='icon']",
"link[rel='shortcut icon']",
"link[rel='apple-touch-icon']",
"link[rel='apple-touch-icon-precomposed']"
)) {
let elem = elem_ref.value();
if let Some(href) = elem.attr("href").and_then(|href| url.join(&href).ok()) {
let rel = elem.attr("rel").unwrap();
self.add_icon(
href,
if rel.contains("apple-touch-icon") {
IconKind::AppIcon
} else { } else {
IconKind::SiteFavicon let body = res.bytes_stream().map(|res| {
}, res
elem.attr("sizes").map(|sizes| sizes.into()), .map(|bytes| bytes.to_vec())
); .map_err(|err| err.to_string())
found_favicon = true;
};
}
// Check for default favicon.ico
if !found_favicon {
self.add_icon(
url.join("/favicon.ico").unwrap(),
IconKind::SiteFavicon,
None,
);
}
}
{
let mut logos: Vec<_> = document
.select(selector!(
"a[href='/'] img, a[href='/'] svg",
"header img, header svg",
"img[src*=logo]",
"img[alt*=logo], svg[alt*=logo]",
"*[class*=logo] img, *[class*=logo] svg",
"*[id*=logo] img, *[id*=logo] svg",
"img[class*=logo], svg[class*=logo]",
"img[id*=logo], svg[id*=logo]",
))
.enumerate()
.filter_map(|(i, elem_ref)| {
let elem = elem_ref.value();
let ancestors = elem_ref
.ancestors()
.map(ElementRef::wrap)
.flatten()
.map(|elem_ref| elem_ref.value())
.collect::<Vec<_>>();
let skip_classnames = regex!("menu|search");
let should_skip = ancestors.iter().any(|ancestor| {
ancestor
.attr("class")
.map(|attr| skip_classnames.is_match(&attr.to_lowercase()))
.or_else(|| {
ancestor
.attr("id")
.map(|attr| skip_classnames.is_match(&attr.to_lowercase()))
})
.unwrap_or(false)
}); });
if should_skip { let mut publisher = Publisher::new(128);
return None; let subscriber = publisher.subscribe();
Some((
url,
async move { StreamPublisher::new(&mut publisher, body).await }.shared(),
subscriber,
))
} }
let mut weight = 0;
// if in the header
if ancestors.iter().any(|element| element.name() == "header") {
weight += 2;
} }
.shared();
if i == 0 { let mut futures = vec![
weight += 1; async {
} let html_response = html_response.clone().await;
let mentions = |attr_name, is_match: Box<dyn Fn(&str) -> bool>| { LoadedKind::HeadTags(match html_response {
ancestors.iter().chain(iter::once(&elem)).any(|ancestor| { Some((url, _, body)) => html_parser::parse_head(&url, body)
ancestor .await
.attr(attr_name) .ok()
.map(|attr| is_match(&attr.to_lowercase())) .and_then(|icons| icons.try_into().ok()),
.unwrap_or(false) None => None,
}) })
};
if mentions("href", Box::new(|attr| attr == "/")) {
weight += 5;
};
let mentions_logo = |attr_name| {
mentions(
attr_name,
Box::new(|attr| regex!("logo([^s]|$)").is_match(attr)),
)
};
if mentions_logo("class") || mentions_logo("id") {
weight += 3;
}
if mentions_logo("alt") {
weight += 2;
}
if mentions_logo("src") {
weight += 1;
} }
.boxed_local(),
async {
let html_response = html_response.clone().await;
if let Some(site_name) = url LoadedKind::SiteLogo(match html_response {
.domain() Some((url, complete, body)) => {
.and_then(|domain| TldOption::default().build().extract(domain).unwrap().domain) let (icons, _) = join!(
{ html_parser::parse_site_logo(&url, body, |url| self.is_blacklisted(url)),
// if the alt contains the site_name then highest priority complete
if site_name );
.to_lowercase()
.split('-')
.any(|segment| mentions("alt", Box::new(move |attr| attr.contains(segment))))
{
weight += 10;
}
}
let href = if elem.name() == "svg" { icons.ok()
Some(Url::parse(&encode_svg(&elem_ref.html())).unwrap())
} else {
elem.attr("src").and_then(|href| url.join(&href).ok())
};
if let Some(href) = &href {
if self.is_blacklisted(href) {
return None;
} }
} None => None,
href.map(|href| (href, elem_ref, weight))
}) })
.collect(); }
.boxed_local(),
async {
let manifests = join_all(manifest_urls.map(|url| SiteIcons::load_manifest(url))).await;
logos.sort_by(|(_, _, a_weight), (_, _, b_weight)| b_weight.cmp(a_weight)); LoadedKind::DefaultManifest(
manifests
// prefer <img> over svg
let mut prev_weight = None;
for (href, elem_ref, weight) in &logos {
if let Some(prev_weight) = prev_weight {
if weight != prev_weight {
break;
}
}
prev_weight = Some(weight);
if elem_ref.value().name() == "img" {
self.add_icon(href.clone(), IconKind::SiteLogo, None);
break;
}
}
if let Some((href, _, _)) = logos.into_iter().next() {
self.add_icon(href, IconKind::SiteLogo, None);
}
}
for elem_ref in document.select(selector!("link[rel='manifest']")) {
if let Some(href) = elem_ref
.value()
.attr("href")
.and_then(|href| url.join(&href).ok())
{
warn_err!(self.load_manifest(href).await, "failed to fetch manifest");
}
}
Ok(())
}
pub async fn load_manifest(&mut self, manifest_url: Url) -> Result<(), Box<dyn Error>> {
#[derive(Deserialize)]
struct ManifestIcon {
src: String,
sizes: Option<String>,
}
#[derive(Deserialize)]
struct Manifest {
icons: Option<Vec<ManifestIcon>>,
}
let manifest: Manifest = CLIENT
.get(manifest_url.as_str())
.send()
.await?
.json()
.await?;
if let Some(icons) = manifest.icons {
for icon in icons {
if let Ok(src) = manifest_url.join(&icon.src) {
let _ = self.add_icon(src, IconKind::AppIcon, icon.sizes);
}
}
}
Ok(())
}
/// Fetch all the icons. Ordered from highest to lowest resolution
///
/// ```
/// async fn run() {
/// let mut icons = site_icons::Icons::new();
/// icons.load_website("https://github.com").await.unwrap();
///
/// let entries = icons.entries().await;
/// for icon in entries {
/// println!("{:?}", icon)
/// }
/// }
/// ```
pub async fn entries(mut self) -> Vec<Icon> {
let (urls, infos): (Vec<_>, Vec<_>) = self
.pending_entries
.into_iter() .into_iter()
.map(|(url, (kind, headers, info))| ((url, headers, kind), info)) .find_map(|manifest| manifest.ok().and_then(|icons| icons.try_into().ok())),
.unzip(); )
}
.boxed_local(),
async {
let favicons =
join_all(favicon_urls.map(|url| Icon::load(url.clone(), IconKind::SiteFavicon, None)))
.await;
let mut urls = urls.into_iter(); LoadedKind::DefaultFavicon(favicons.into_iter().find_map(|favicon| favicon.ok()))
}
.boxed_local(),
];
for info in join_all(infos).await { let mut icons: Vec<Icon> = Vec::new();
let (url, headers, kind) = urls.next().unwrap(); let mut found_best_match = false;
add_icon_entry(&mut self.entries, url, headers, kind, info); let mut previous_loads = Vec::new();
while !futures.is_empty() {
let (loaded, index, _) = select_all(&mut futures).await;
futures.remove(index);
match loaded.clone() {
LoadedKind::DefaultManifest(manifest_icons) => {
if let Some(manifest_icons) = manifest_icons {
icons.extend(manifest_icons);
found_best_match = true;
}
}
LoadedKind::DefaultFavicon(favicon) => {
if let Some(favicon) = favicon {
icons.push(favicon);
if previous_loads
.iter()
.any(|kind| matches!(kind, LoadedKind::HeadTags(_)))
{
found_best_match = true;
}
}
}
LoadedKind::HeadTags(head_icons) => {
if let Some(head_icons) = head_icons {
icons.extend(head_icons);
found_best_match = true;
} else if previous_loads
.iter()
.any(|kind| matches!(kind, LoadedKind::DefaultFavicon(Some(_))))
{
found_best_match = true;
}
}
LoadedKind::SiteLogo(logo) => {
if let Some(logo) = logo {
icons.push(logo);
}
}
} }
self.entries.sort(); previous_loads.push(loaded);
self.entries icons.sort();
icons = icons.into_iter().unique().collect();
if best_matches_only && found_best_match {
break;
}
}
Ok(icons)
} }
} }

View file

@ -1,3 +1,4 @@
#![feature(async_closure)]
//! # site_icons //! # site_icons
//! An efficient website icon scraper. //! An efficient website icon scraper.
//! //!
@ -26,15 +27,13 @@ extern crate serde_with;
extern crate log; extern crate log;
#[macro_use] #[macro_use]
mod macros;
mod icon;
mod icon_info;
mod icon_size;
mod icons;
mod utils; mod utils;
mod html_parser;
mod icon;
mod icons;
mod manifest;
pub use icon::*; pub use icon::*;
pub use icon_info::*;
pub use icons::*; pub use icons::*;
use once_cell::sync::Lazy; use once_cell::sync::Lazy;
@ -49,23 +48,23 @@ static CLIENT: Lazy<Client> = Lazy::new(|| {
Client::builder().default_headers(headers).build().unwrap() Client::builder().default_headers(headers).build().unwrap()
}); });
#[cfg(test)] // #[cfg(test)]
mod tests { // mod tests {
use super::*; // use super::*;
#[tokio::test] // #[tokio::test]
async fn test_icons() { // async fn test_icons() {
let mut icons = Icons::new(); // let mut icons = SiteIcons::new();
// scrape the icons from a url // // scrape the icons from a url
icons.load_website("https://github.com").await.unwrap(); // icons.load_website("https://github.com").await.unwrap();
// fetch all icons, ensuring they exist & determining size // // fetch all icons, ensuring they exist & determining size
let entries = icons.entries().await; // let entries = icons.entries().await;
// entries are sorted from highest to lowest resolution // // entries are sorted from highest to lowest resolution
for icon in &entries { // for icon in &entries {
println!("{:?}", icon) // println!("{:?}", icon)
} // }
assert_eq!(entries.len() > 0, true); // assert_eq!(entries.len() > 0, true);
} // }
} // }

54
src/manifest.rs Normal file
View file

@ -0,0 +1,54 @@
use crate::{Icon, IconKind, SiteIcons, CLIENT};
use cached::proc_macro::cached;
use futures::future::join_all;
use reqwest::IntoUrl;
use serde::Deserialize;
use std::error::Error;
use url::Url;
#[derive(Debug, Deserialize)]
struct ManifestIcon {
src: String,
sizes: Option<String>,
}
#[derive(Debug, Deserialize)]
struct Manifest {
icons: Vec<ManifestIcon>,
}
impl SiteIcons {
pub async fn load_manifest<U: IntoUrl>(url: U) -> Result<Vec<Icon>, Box<dyn Error>> {
let url = url.into_url()?;
Ok(load_manifest_cached(url).await?)
}
}
#[cached(sync_writes = true)]
async fn load_manifest_cached(url: Url) -> Result<Vec<Icon>, String> {
let url = &url;
let manifest: Manifest = CLIENT
.get(url.clone())
.send()
.await
.map_err(|e| format!("{}: {:?}", url, e))?
.json()
.await
.map_err(|e| format!("{}: {:?}", url, e))?;
Ok(
join_all(manifest.icons.into_iter().map(async move |icon| {
if let Ok(src) = url.join(&icon.src) {
Icon::load(src, IconKind::AppIcon, icon.sizes).await.ok()
} else {
None
}
}))
.await
.into_iter()
.filter_map(|icon| icon)
.collect(),
)
}

View file

@ -0,0 +1,43 @@
use std::{
pin::Pin,
task::{Context, Poll},
};
use futures::Future;
pub async fn poll_in_background<F, B, FO, BO>(future: F, background_future: B) -> FO
where
F: Future<Output = FO> + Unpin,
B: Future<Output = BO> + Unpin,
{
struct BackgroundPoller<F, B> {
future: F,
background_future: B,
}
impl<F, B, FO, BO> Future for BackgroundPoller<F, B>
where
F: Future<Output = FO> + Unpin,
B: Future<Output = BO> + Unpin,
{
type Output = FO;
fn poll(self: Pin<&mut Self>, cx: &mut Context<'_>) -> Poll<Self::Output> {
let this = self.get_mut();
let result = Pin::new(&mut this.future).poll(cx);
if result.is_pending() {
let _ = Pin::new(&mut this.background_future).poll(cx);
}
result
}
}
BackgroundPoller {
future,
background_future,
}
.await
}

View file

@ -1,11 +1,11 @@
macro_rules! selector { macro_rules! selector {
($($selector:expr),+ $(,)?) => {{ ($($selector:expr),+ $(,)?) => {{
static RE: once_cell::sync::OnceCell<scraper::Selector> = once_cell::sync::OnceCell::new(); static RE: once_cell::sync::OnceCell<scraper::Selector> = once_cell::sync::OnceCell::new();
RE.get_or_init(|| scraper::Selector::parse(join!(",", $($selector),+)).unwrap()) RE.get_or_init(|| scraper::Selector::parse(join_with!(",", $($selector),+)).unwrap())
}}; }};
} }
macro_rules! join { macro_rules! join_with {
($pattern:literal,$first:expr$(, $($rest:expr),*)? $(,)?) => { ($pattern:literal,$first:expr$(, $($rest:expr),*)? $(,)?) => {
concat!($first$(, $($pattern, $rest),*)?) concat!($first$(, $($pattern, $rest),*)?)
}; };
@ -18,14 +18,6 @@ macro_rules! regex {
}}; }};
} }
macro_rules! warn_err {
($result:expr, $($arg:tt)*) => {{
if let Err(err) = $result {
warn!("{} {}", format!($($arg)*), err);
}
}};
}
macro_rules! assert_slice_eq { macro_rules! assert_slice_eq {
($cur:expr, $offset:expr, $slice:expr, $($arg:tt)+) => {{ ($cur:expr, $offset:expr, $slice:expr, $($arg:tt)+) => {{
if !super::slice_eq($cur, $offset, $slice)? { if !super::slice_eq($cur, $offset, $slice)? {

16
src/utils/mod.rs Normal file
View file

@ -0,0 +1,16 @@
#[macro_use]
mod macros;
mod background_poll;
mod svg_encoder;
pub use background_poll::*;
pub use macros::*;
pub use svg_encoder::*;
use url::Url;
pub fn push_url(url: &Url, segment: &str) -> Url {
let mut url = url.clone();
url.path_segments_mut().unwrap().push(segment);
url
}