Compare commits
2 commits
68180dc5e5
...
ac481ec06a
Author | SHA1 | Date | |
---|---|---|---|
ac481ec06a | |||
5b111fd292 |
3 changed files with 18 additions and 8 deletions
2
Cargo.lock
generated
2
Cargo.lock
generated
|
@ -226,7 +226,7 @@ checksum = "37909eebbb50d72f9059c3b6d82c0463f2ff062c9e95845c43a6c9c0355411be"
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "favicon-scraper"
|
name = "favicon-scraper"
|
||||||
version = "0.2.0"
|
version = "0.3.0"
|
||||||
dependencies = [
|
dependencies = [
|
||||||
"futures",
|
"futures",
|
||||||
"imagesize",
|
"imagesize",
|
||||||
|
|
|
@ -1,6 +1,6 @@
|
||||||
[package]
|
[package]
|
||||||
name = "favicon-scraper"
|
name = "favicon-scraper"
|
||||||
version = "0.2.0"
|
version = "0.3.0"
|
||||||
edition = "2021"
|
edition = "2021"
|
||||||
license = "MIT"
|
license = "MIT"
|
||||||
description = "A favicon scraper that just works"
|
description = "A favicon scraper that just works"
|
||||||
|
|
22
src/html.rs
22
src/html.rs
|
@ -31,19 +31,29 @@ impl HTML {
|
||||||
.filter_map(|u| url.join(u).ok())
|
.filter_map(|u| url.join(u).ok())
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Scans an HTML file for icons and a Web App Manifest.
|
fn parse_html(text: String, url: Url) -> (Option<Url>, Vec<Url>) {
|
||||||
pub async fn scan_html(client: &Client, url: impl IntoUrl) -> Result<Self, Error> {
|
|
||||||
let response = client.get(url).send().await?;
|
|
||||||
let url = response.url().to_owned(); // Specifically use the destination URL after redirects and such
|
|
||||||
let text = response.text().await?;
|
|
||||||
let html = SHTML::parse_document(&text);
|
let html = SHTML::parse_document(&text);
|
||||||
|
|
||||||
let icon_selector = Selector::parse(ICON_SELECTOR).unwrap();
|
let icon_selector = Selector::parse(ICON_SELECTOR).unwrap();
|
||||||
let manifest_selector = Selector::parse(MANIFEST_SELECTOR).unwrap();
|
let manifest_selector = Selector::parse(MANIFEST_SELECTOR).unwrap();
|
||||||
|
|
||||||
let manifest = HTML::get_urls_from_html(&manifest_selector, &html, &url).next();
|
let manifest = HTML::get_urls_from_html(&manifest_selector, &html, &url).next();
|
||||||
|
(
|
||||||
|
manifest,
|
||||||
|
HTML::get_urls_from_html(&icon_selector, &html, &url).collect(),
|
||||||
|
)
|
||||||
|
}
|
||||||
|
|
||||||
let icons = HTML::get_urls_from_html(&icon_selector, &html, &url)
|
/// Scans an HTML file for icons and a Web App Manifest.
|
||||||
|
pub async fn scan_html(client: &Client, url: impl IntoUrl) -> Result<Self, Error> {
|
||||||
|
let response = client.get(url).send().await?;
|
||||||
|
let url = response.url().to_owned(); // Specifically use the destination URL after redirects and such
|
||||||
|
let text = response.text().await?;
|
||||||
|
|
||||||
|
let (manifest, icons) = HTML::parse_html(text, url);
|
||||||
|
|
||||||
|
let icons = icons
|
||||||
|
.into_iter()
|
||||||
.map(|u| Icon::from_url(client, u, IconKind::LinkedInHTML));
|
.map(|u| Icon::from_url(client, u, IconKind::LinkedInHTML));
|
||||||
let icons: Vec<Icon> = join_all(icons)
|
let icons: Vec<Icon> = join_all(icons)
|
||||||
.await
|
.await
|
||||||
|
|
Loading…
Add table
Reference in a new issue