favicon-scraper/src/lib.rs

95 lines
2.8 KiB
Rust

//! # favicon-scraper
//!
//! A simple crate to scrape favicons asynchronously that's intended to *just work*
//!
//! To get started, have a look at [`scrape`]!
pub mod error;
pub mod html;
pub mod icon;
pub mod manifest;
pub use error::Error;
use futures::future::{join, join_all};
use html::HTML;
use icon::{Icon, IconKind};
use manifest::scan_manifest;
use reqwest::{Client, IntoUrl};
use url::Url;
/// Perform scraping.
///
/// The URL scheme **must** be either `http` or `https`.
///
/// This will load the given URL, parse the returned HTML, and if found, also load and parse any linked manifests.
///
/// Any found icons will be partially loaded to get their size.
/// ICO files will be interpreted as their largest size as per [`imagesize`'s README](https://github.com/Roughsketch/imagesize/blob/017b33da886a27484614e9527d14fc5f3f0d5079/README.md?plain=1#L41).
/// ```
/// # tokio_test::block_on(async {
/// use favicon_scraper::{scrape, Error};
///
/// let icons = scrape("https://kitsunes.dev").await.unwrap();
///
/// // Only HTTP(S) is supported
/// assert!(matches!(
/// scrape("ftp://example.com").await,
/// Err(Error::UnsupportedURLScheme)
/// ));
/// # })
/// ```
pub async fn scrape(url: impl IntoUrl) -> Result<Vec<Icon>, Error> {
let url = url.into_url()?;
if !matches!(url.scheme(), "http" | "https") {
return Err(Error::UnsupportedURLScheme);
}
let client = Client::new();
let hardcoded_urls = join_all(vec![
try_hardcoded_path(&client, &url, "/favicon.ico"),
try_hardcoded_path(&client, &url, "/favicon.svg"),
try_hardcoded_path(&client, &url, "/favicon.png"),
]);
let html = HTML::scan_html(&client, url.clone());
let (hardcoded_urls, html) = join(hardcoded_urls, html).await;
let mut icons: Vec<Icon> = hardcoded_urls.into_iter().flatten().collect();
if let Ok(mut html) = html {
icons.append(&mut html.icons);
if let Some(manifest) = html.manifest {
if let Ok(mut manifest_icons) = scan_manifest(&client, manifest).await {
icons.append(&mut manifest_icons);
}
}
}
Ok(icons)
}
async fn try_hardcoded_path(client: &Client, url: &Url, path: &'static str) -> Option<Icon> {
let url = url.join(path).unwrap();
Icon::from_url(client, url, IconKind::HardcodedURL)
.await
.ok()
}
#[cfg(test)]
mod tests {
use super::*;
// Using this as a test because site_icons failed on it for some reason
#[tokio::test]
async fn test_catwithaclarinet() {
let icons = scrape("https://ck.catwithaclari.net").await.unwrap();
println!("Found {} icons:\n", icons.len());
for icon in icons {
println!("URL: {}", icon.url);
println!("Size: {}x{} pixels", icon.size.width, icon.size.height);
println!("Kind of icon: {:?}\n", icon.kind);
}
}
}