Compare commits
7 commits
Author | SHA1 | Date | |
---|---|---|---|
81dbf946e9 | |||
61619ac660 | |||
314bc347de | |||
f1a4e2e819 | |||
108eb427b8 | |||
ac481ec06a | |||
5b111fd292 |
7 changed files with 66 additions and 13 deletions
20
.forgejo/workflows/ci.yaml
Normal file
20
.forgejo/workflows/ci.yaml
Normal file
|
@ -0,0 +1,20 @@
|
|||
name: Build & Test
|
||||
|
||||
on: [push]
|
||||
|
||||
jobs:
|
||||
build-run:
|
||||
runs-on: docker
|
||||
container:
|
||||
image: rust
|
||||
steps:
|
||||
- name: Update package repos
|
||||
run: apt update
|
||||
- name: Install Node using apt
|
||||
run: apt install nodejs -y
|
||||
- name: Checkout repo
|
||||
uses: actions/checkout@v4
|
||||
- name: Build using Cargo
|
||||
run: cargo build --verbose
|
||||
- name: Run unit tests
|
||||
run: cargo test --verbose
|
2
Cargo.lock
generated
2
Cargo.lock
generated
|
@ -226,7 +226,7 @@ checksum = "37909eebbb50d72f9059c3b6d82c0463f2ff062c9e95845c43a6c9c0355411be"
|
|||
|
||||
[[package]]
|
||||
name = "favicon-scraper"
|
||||
version = "0.2.0"
|
||||
version = "0.3.1"
|
||||
dependencies = [
|
||||
"futures",
|
||||
"imagesize",
|
||||
|
|
|
@ -1,12 +1,13 @@
|
|||
[package]
|
||||
name = "favicon-scraper"
|
||||
version = "0.2.0"
|
||||
version = "0.3.1"
|
||||
edition = "2021"
|
||||
license = "MIT"
|
||||
description = "A favicon scraper that just works"
|
||||
homepage = "https://kitsunes.dev/Nekomata/favicon-scraper"
|
||||
repository = "https://kitsunes.dev/Nekomata/favicon-scraper"
|
||||
readme = "README.md"
|
||||
exclude = ["/.forgejo"]
|
||||
|
||||
[dependencies]
|
||||
futures = { version = "0.3.31", default-features = false, features = ["std"] }
|
||||
|
|
14
README.md
14
README.md
|
@ -1,5 +1,15 @@
|
|||
# favicon-scraper
|
||||
|
||||
Scrapes favicons from websites.
|
||||
Scrapes favicons from websites. Does not particularly care for 100% optimal
|
||||
performance, it just needs to work.
|
||||
|
||||
Does not particularly care for 100% optimal performance, it just needs to work
|
||||
To get started, try the `scrape` function:
|
||||
|
||||
```rust
|
||||
use favicon_scraper::{scrape, Icon};
|
||||
|
||||
let icons: Vec<Icon> = scrape("https://google.com").await.unwrap();
|
||||
|
||||
// Should find something like "https://www.google.com/favicon.ico"
|
||||
println!("Google's icon can be found at {}", icons[0].url);
|
||||
```
|
||||
|
|
22
src/html.rs
22
src/html.rs
|
@ -31,19 +31,29 @@ impl HTML {
|
|||
.filter_map(|u| url.join(u).ok())
|
||||
}
|
||||
|
||||
/// Scans an HTML file for icons and a Web App Manifest.
|
||||
pub async fn scan_html(client: &Client, url: impl IntoUrl) -> Result<Self, Error> {
|
||||
let response = client.get(url).send().await?;
|
||||
let url = response.url().to_owned(); // Specifically use the destination URL after redirects and such
|
||||
let text = response.text().await?;
|
||||
fn parse_html(text: String, url: Url) -> (Option<Url>, Vec<Url>) {
|
||||
let html = SHTML::parse_document(&text);
|
||||
|
||||
let icon_selector = Selector::parse(ICON_SELECTOR).unwrap();
|
||||
let manifest_selector = Selector::parse(MANIFEST_SELECTOR).unwrap();
|
||||
|
||||
let manifest = HTML::get_urls_from_html(&manifest_selector, &html, &url).next();
|
||||
(
|
||||
manifest,
|
||||
HTML::get_urls_from_html(&icon_selector, &html, &url).collect(),
|
||||
)
|
||||
}
|
||||
|
||||
let icons = HTML::get_urls_from_html(&icon_selector, &html, &url)
|
||||
/// Scans an HTML file for icons and a Web App Manifest.
|
||||
pub async fn scan_html(client: &Client, url: impl IntoUrl) -> Result<Self, Error> {
|
||||
let response = client.get(url).send().await?;
|
||||
let url = response.url().to_owned(); // Specifically use the destination URL after redirects and such
|
||||
let text = response.text().await?;
|
||||
|
||||
let (manifest, icons) = HTML::parse_html(text, url);
|
||||
|
||||
let icons = icons
|
||||
.into_iter()
|
||||
.map(|u| Icon::from_url(client, u, IconKind::LinkedInHTML));
|
||||
let icons: Vec<Icon> = join_all(icons)
|
||||
.await
|
||||
|
|
12
src/lib.rs
12
src/lib.rs
|
@ -12,7 +12,7 @@ pub mod manifest;
|
|||
pub use error::Error;
|
||||
use futures::future::{join, join_all};
|
||||
use html::HTML;
|
||||
use icon::{Icon, IconKind};
|
||||
pub use icon::{Icon, IconKind};
|
||||
use manifest::scan_manifest;
|
||||
use reqwest::{Client, IntoUrl};
|
||||
use url::Url;
|
||||
|
@ -92,4 +92,14 @@ mod tests {
|
|||
println!("Kind of icon: {:?}\n", icon.kind);
|
||||
}
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn test_readme_example() {
|
||||
use crate::{scrape, Icon};
|
||||
|
||||
let icons: Vec<Icon> = scrape("https://google.com").await.unwrap();
|
||||
|
||||
// Should find something like "https://www.google.com/favicon.ico"
|
||||
println!("Google's icon can be found at {}", icons[0].url);
|
||||
}
|
||||
}
|
||||
|
|
|
@ -20,12 +20,14 @@ struct ManifestIcon {
|
|||
|
||||
/// Scans a Web App Manifest for icons.
|
||||
pub async fn scan_manifest(client: &Client, url: impl IntoUrl) -> Result<Vec<Icon>, Error> {
|
||||
let manifest: Manifest = client.get(url).send().await?.json().await?;
|
||||
let url = url.into_url()?;
|
||||
let manifest: Manifest = client.get(url.clone()).send().await?.json().await?;
|
||||
Ok(join_all(
|
||||
manifest
|
||||
.icons
|
||||
.into_iter()
|
||||
.map(|i| Icon::from_url(client, i.src, IconKind::LinkedInManifest)),
|
||||
.filter_map(|i| url.join(&i.src).ok())
|
||||
.map(|u| Icon::from_url(client, u, IconKind::LinkedInManifest)),
|
||||
)
|
||||
.await
|
||||
.into_iter()
|
||||
|
|
Loading…
Add table
Reference in a new issue