Scraping is now thread-safe
This commit is contained in:
parent
68180dc5e5
commit
5b111fd292
1 changed files with 16 additions and 6 deletions
22
src/html.rs
22
src/html.rs
|
@ -31,19 +31,29 @@ impl HTML {
|
|||
.filter_map(|u| url.join(u).ok())
|
||||
}
|
||||
|
||||
/// Scans an HTML file for icons and a Web App Manifest.
|
||||
pub async fn scan_html(client: &Client, url: impl IntoUrl) -> Result<Self, Error> {
|
||||
let response = client.get(url).send().await?;
|
||||
let url = response.url().to_owned(); // Specifically use the destination URL after redirects and such
|
||||
let text = response.text().await?;
|
||||
fn parse_html(text: String, url: Url) -> (Option<Url>, Vec<Url>) {
|
||||
let html = SHTML::parse_document(&text);
|
||||
|
||||
let icon_selector = Selector::parse(ICON_SELECTOR).unwrap();
|
||||
let manifest_selector = Selector::parse(MANIFEST_SELECTOR).unwrap();
|
||||
|
||||
let manifest = HTML::get_urls_from_html(&manifest_selector, &html, &url).next();
|
||||
(
|
||||
manifest,
|
||||
HTML::get_urls_from_html(&icon_selector, &html, &url).collect(),
|
||||
)
|
||||
}
|
||||
|
||||
let icons = HTML::get_urls_from_html(&icon_selector, &html, &url)
|
||||
/// Scans an HTML file for icons and a Web App Manifest.
|
||||
pub async fn scan_html(client: &Client, url: impl IntoUrl) -> Result<Self, Error> {
|
||||
let response = client.get(url).send().await?;
|
||||
let url = response.url().to_owned(); // Specifically use the destination URL after redirects and such
|
||||
let text = response.text().await?;
|
||||
|
||||
let (manifest, icons) = HTML::parse_html(text, url);
|
||||
|
||||
let icons = icons
|
||||
.into_iter()
|
||||
.map(|u| Icon::from_url(client, u, IconKind::LinkedInHTML));
|
||||
let icons: Vec<Icon> = join_all(icons)
|
||||
.await
|
||||
|
|
Loading…
Add table
Reference in a new issue