//! # favicon-scraper //! //! A simple crate to scrape favicons asynchronously that's intended to *just work* //! //! To get started, have a look at [`scrape`]! pub mod error; pub mod html; pub mod icon; pub mod manifest; pub use error::Error; use futures::future::{join, join_all}; use html::HTML; use icon::{Icon, IconKind}; use manifest::scan_manifest; use reqwest::{Client, IntoUrl}; use url::Url; /// Perform scraping. /// /// The URL scheme **must** be either `http` or `https`. /// /// This will load the given URL, parse the returned HTML, and if found, also load and parse any linked manifests. /// /// Any found icons will be partially loaded to get their size. /// ICO files will be interpreted as their largest size as per [`imagesize`'s README](https://github.com/Roughsketch/imagesize/blob/017b33da886a27484614e9527d14fc5f3f0d5079/README.md?plain=1#L41). /// ``` /// # tokio_test::block_on(async { /// use favicon_scraper::{scrape, Error}; /// /// let icons = scrape("https://kitsunes.dev").await.unwrap(); /// /// // Only HTTP(S) is supported /// assert!(matches!( /// scrape("ftp://example.com").await, /// Err(Error::UnsupportedURLScheme) /// )); /// # }) /// ``` pub async fn scrape(url: impl IntoUrl) -> Result, Error> { let url = url.into_url()?; if !matches!(url.scheme(), "http" | "https") { return Err(Error::UnsupportedURLScheme); } let client = Client::new(); let hardcoded_urls = join_all(vec![ try_hardcoded_path(&client, &url, "/favicon.ico"), try_hardcoded_path(&client, &url, "/favicon.svg"), try_hardcoded_path(&client, &url, "/favicon.png"), ]); let html = HTML::scan_html(&client, url.clone()); let (hardcoded_urls, html) = join(hardcoded_urls, html).await; let mut icons: Vec = hardcoded_urls.into_iter().flatten().collect(); if let Ok(mut html) = html { icons.append(&mut html.icons); if let Some(manifest) = html.manifest { if let Ok(mut manifest_icons) = scan_manifest(&client, manifest).await { icons.append(&mut manifest_icons); } } } Ok(icons) } async fn try_hardcoded_path(client: &Client, url: &Url, path: &'static str) -> Option { let url = url.join(path).unwrap(); Icon::from_url(client, url, IconKind::HardcodedURL) .await .ok() } #[cfg(test)] mod tests { use super::*; // Using this as a test because site_icons failed on it for some reason #[tokio::test] async fn test_catwithaclarinet() { let icons = scrape("https://ck.catwithaclari.net").await.unwrap(); println!("Found {} icons:\n", icons.len()); for icon in icons { println!("URL: {}", icon.url); println!("Size: {}x{} pixels", icon.size.width, icon.size.height); println!("Kind of icon: {:?}\n", icon.kind); } } }