Compare commits

..

No commits in common. "main" and "v0.2.0" have entirely different histories.
main ... v0.2.0

7 changed files with 13 additions and 66 deletions

View file

@ -1,20 +0,0 @@
name: Build & Test
on: [push]
jobs:
build-run:
runs-on: docker
container:
image: rust
steps:
- name: Update package repos
run: apt update
- name: Install Node using apt
run: apt install nodejs -y
- name: Checkout repo
uses: actions/checkout@v4
- name: Build using Cargo
run: cargo build --verbose
- name: Run unit tests
run: cargo test --verbose

2
Cargo.lock generated
View file

@ -226,7 +226,7 @@ checksum = "37909eebbb50d72f9059c3b6d82c0463f2ff062c9e95845c43a6c9c0355411be"
[[package]]
name = "favicon-scraper"
version = "0.3.1"
version = "0.2.0"
dependencies = [
"futures",
"imagesize",

View file

@ -1,13 +1,12 @@
[package]
name = "favicon-scraper"
version = "0.3.1"
version = "0.2.0"
edition = "2021"
license = "MIT"
description = "A favicon scraper that just works"
homepage = "https://kitsunes.dev/Nekomata/favicon-scraper"
repository = "https://kitsunes.dev/Nekomata/favicon-scraper"
readme = "README.md"
exclude = ["/.forgejo"]
[dependencies]
futures = { version = "0.3.31", default-features = false, features = ["std"] }

View file

@ -1,15 +1,5 @@
# favicon-scraper
Scrapes favicons from websites. Does not particularly care for 100% optimal
performance, it just needs to work.
Scrapes favicons from websites.
To get started, try the `scrape` function:
```rust
use favicon_scraper::{scrape, Icon};
let icons: Vec<Icon> = scrape("https://google.com").await.unwrap();
// Should find something like "https://www.google.com/favicon.ico"
println!("Google's icon can be found at {}", icons[0].url);
```
Does not particularly care for 100% optimal performance, it just needs to work

View file

@ -31,29 +31,19 @@ impl HTML {
.filter_map(|u| url.join(u).ok())
}
fn parse_html(text: String, url: Url) -> (Option<Url>, Vec<Url>) {
/// Scans an HTML file for icons and a Web App Manifest.
pub async fn scan_html(client: &Client, url: impl IntoUrl) -> Result<Self, Error> {
let response = client.get(url).send().await?;
let url = response.url().to_owned(); // Specifically use the destination URL after redirects and such
let text = response.text().await?;
let html = SHTML::parse_document(&text);
let icon_selector = Selector::parse(ICON_SELECTOR).unwrap();
let manifest_selector = Selector::parse(MANIFEST_SELECTOR).unwrap();
let manifest = HTML::get_urls_from_html(&manifest_selector, &html, &url).next();
(
manifest,
HTML::get_urls_from_html(&icon_selector, &html, &url).collect(),
)
}
/// Scans an HTML file for icons and a Web App Manifest.
pub async fn scan_html(client: &Client, url: impl IntoUrl) -> Result<Self, Error> {
let response = client.get(url).send().await?;
let url = response.url().to_owned(); // Specifically use the destination URL after redirects and such
let text = response.text().await?;
let (manifest, icons) = HTML::parse_html(text, url);
let icons = icons
.into_iter()
let icons = HTML::get_urls_from_html(&icon_selector, &html, &url)
.map(|u| Icon::from_url(client, u, IconKind::LinkedInHTML));
let icons: Vec<Icon> = join_all(icons)
.await

View file

@ -12,7 +12,7 @@ pub mod manifest;
pub use error::Error;
use futures::future::{join, join_all};
use html::HTML;
pub use icon::{Icon, IconKind};
use icon::{Icon, IconKind};
use manifest::scan_manifest;
use reqwest::{Client, IntoUrl};
use url::Url;
@ -92,14 +92,4 @@ mod tests {
println!("Kind of icon: {:?}\n", icon.kind);
}
}
#[tokio::test]
async fn test_readme_example() {
use crate::{scrape, Icon};
let icons: Vec<Icon> = scrape("https://google.com").await.unwrap();
// Should find something like "https://www.google.com/favicon.ico"
println!("Google's icon can be found at {}", icons[0].url);
}
}

View file

@ -20,14 +20,12 @@ struct ManifestIcon {
/// Scans a Web App Manifest for icons.
pub async fn scan_manifest(client: &Client, url: impl IntoUrl) -> Result<Vec<Icon>, Error> {
let url = url.into_url()?;
let manifest: Manifest = client.get(url.clone()).send().await?.json().await?;
let manifest: Manifest = client.get(url).send().await?.json().await?;
Ok(join_all(
manifest
.icons
.into_iter()
.filter_map(|i| url.join(&i.src).ok())
.map(|u| Icon::from_url(client, u, IconKind::LinkedInManifest)),
.map(|i| Icon::from_url(client, i.src, IconKind::LinkedInManifest)),
)
.await
.into_iter()