95 lines
2.8 KiB
Rust
95 lines
2.8 KiB
Rust
//! # favicon-scraper
|
|
//!
|
|
//! A simple crate to scrape favicons asynchronously that's intended to *just work*
|
|
//!
|
|
//! To get started, have a look at [`scrape`]!
|
|
|
|
pub mod error;
|
|
pub mod html;
|
|
pub mod icon;
|
|
pub mod manifest;
|
|
|
|
pub use error::Error;
|
|
use futures::future::{join, join_all};
|
|
use html::HTML;
|
|
use icon::{Icon, IconKind};
|
|
use manifest::scan_manifest;
|
|
use reqwest::{Client, IntoUrl};
|
|
use url::Url;
|
|
|
|
/// Perform scraping.
|
|
///
|
|
/// The URL scheme **must** be either `http` or `https`.
|
|
///
|
|
/// This will load the given URL, parse the returned HTML, and if found, also load and parse any linked manifests.
|
|
///
|
|
/// Any found icons will be partially loaded to get their size.
|
|
/// ICO files will be interpreted as their largest size as per [`imagesize`'s README](https://github.com/Roughsketch/imagesize/blob/017b33da886a27484614e9527d14fc5f3f0d5079/README.md?plain=1#L41).
|
|
/// ```
|
|
/// # tokio_test::block_on(async {
|
|
/// use favicon_scraper::{scrape, Error};
|
|
///
|
|
/// let icons = scrape("https://kitsunes.dev").await.unwrap();
|
|
///
|
|
/// // Only HTTP(S) is supported
|
|
/// assert!(matches!(
|
|
/// scrape("ftp://example.com").await,
|
|
/// Err(Error::UnsupportedURLScheme)
|
|
/// ));
|
|
/// # })
|
|
/// ```
|
|
pub async fn scrape(url: impl IntoUrl) -> Result<Vec<Icon>, Error> {
|
|
let url = url.into_url()?;
|
|
if !matches!(url.scheme(), "http" | "https") {
|
|
return Err(Error::UnsupportedURLScheme);
|
|
}
|
|
let client = Client::new();
|
|
|
|
let hardcoded_urls = join_all(vec![
|
|
try_hardcoded_path(&client, &url, "/favicon.ico"),
|
|
try_hardcoded_path(&client, &url, "/favicon.svg"),
|
|
try_hardcoded_path(&client, &url, "/favicon.png"),
|
|
]);
|
|
|
|
let html = HTML::scan_html(&client, url.clone());
|
|
|
|
let (hardcoded_urls, html) = join(hardcoded_urls, html).await;
|
|
|
|
let mut icons: Vec<Icon> = hardcoded_urls.into_iter().flatten().collect();
|
|
|
|
if let Ok(mut html) = html {
|
|
icons.append(&mut html.icons);
|
|
|
|
if let Some(manifest) = html.manifest {
|
|
if let Ok(mut manifest_icons) = scan_manifest(&client, manifest).await {
|
|
icons.append(&mut manifest_icons);
|
|
}
|
|
}
|
|
}
|
|
|
|
Ok(icons)
|
|
}
|
|
|
|
async fn try_hardcoded_path(client: &Client, url: &Url, path: &'static str) -> Option<Icon> {
|
|
let url = url.join(path).unwrap();
|
|
Icon::from_url(client, url, IconKind::HardcodedURL)
|
|
.await
|
|
.ok()
|
|
}
|
|
|
|
#[cfg(test)]
|
|
mod tests {
|
|
use super::*;
|
|
|
|
// Using this as a test because site_icons failed on it for some reason
|
|
#[tokio::test]
|
|
async fn test_catwithaclarinet() {
|
|
let icons = scrape("https://ck.catwithaclari.net").await.unwrap();
|
|
println!("Found {} icons:\n", icons.len());
|
|
for icon in icons {
|
|
println!("URL: {}", icon.url);
|
|
println!("Size: {}x{} pixels", icon.size.width, icon.size.height);
|
|
println!("Kind of icon: {:?}\n", icon.kind);
|
|
}
|
|
}
|
|
}
|