initial commit

This commit is contained in:
Sam Denty 2021-01-29 12:23:15 +00:00
commit 6368454e36
No known key found for this signature in database
GPG key ID: F3E9308D4A43BC0E
17 changed files with 3069 additions and 0 deletions

1
.gitignore vendored Normal file
View file

@ -0,0 +1 @@
/target

3
.rustfmt.toml Normal file
View file

@ -0,0 +1,3 @@
indent_style = "Block"
reorder_imports = true
tab_spaces = 2

2157
Cargo.lock generated Normal file

File diff suppressed because it is too large Load diff

36
Cargo.toml Normal file
View file

@ -0,0 +1,36 @@
[package]
name = "site_icons"
version = "0.1.0"
authors = ["Sam Denty <sam@samdenty.com>"]
edition = "2018"
[package.metadata.wasm-pack.profile.release]
wasm-opt = ["-Oz", "--enable-mutable-globals"]
[lib]
crate-type = ["cdylib", "rlib"]
[dependencies]
clap = "3.0.0-beta.2"
itertools = "0.10.0"
serde_with = "1.6.1"
pin-utils = "0.1.0"
html5ever = "0.25.1"
url = { version = "2.2.0", features = ["serde"] }
regex = "1"
log = "0.4.14"
once_cell = "1.5.2"
scraper = "0.12.0"
tokio-byteorder = { git = "https://github.com/samdenty/tokio-byteorder", features = ["futures"] }
byteorder = "1.4.2"
data-url = "0.1.0"
mime = { git = "https://github.com/hyperium/mime" }
serde = { version = "1.0", features = ["derive", "rc"] }
serde_json = "1.0"
reqwest = { git = "https://github.com/samdenty/reqwest", features = ["json", "cookies", "blocking", "stream"] }
futures = "0.3.8"
wee_alloc = { version = "0.4.2", optional = true }
[target.'cfg(not(target_arch = "wasm32"))'.dependencies]
tokio = { version = "1.1.0", features = ["full"] }

4
Makefile.toml Normal file
View file

@ -0,0 +1,4 @@
[tasks.run]
env = { "RUST_LOG" = "site_icons" }
command = "cargo"
args = ["run", "--", "${@}"]

37
README.md Normal file
View file

@ -0,0 +1,37 @@
# site_icons
An efficient website icon scraper for rust
```rs
use site_icons::Icons;
let icons = Icons::new();
// scrape the icons from a url
icons.load_website("https://github.com").await?;
// fetch all icons, ensuring they exist & determining size
let entries = icons.entries().await;
for icon in entries {
println("{:?}", icon)
}
```
## Features
- Validates that all URLs exist and are actually images
- Determines the size of the icon by partially fetching it
- Supports WASM (and cloudflare workers)
### Sources
- HTML favicon tag (or looking for default `/favicon.ico`)
- [Web app manifest](https://developer.mozilla.org/en-US/docs/Web/Manifest) [`icons`](https://developer.mozilla.org/en-US/docs/Web/Manifest/icons) field
- `<img>` tags on the page, directly inside the header OR with a `src|alt|class` containing the text "logo"
## Running locally
Install [cargo make](https://github.com/sagiegurari/cargo-make) and then:
```bash
cargo make run https://github.com
```

32
src/bin/site_icons.rs Normal file
View file

@ -0,0 +1,32 @@
use clap::Clap;
use site_icons::Icons;
use std::error::Error;
#[derive(Clap)]
struct Opts {
urls: Vec<String>,
#[clap(long)]
json: bool,
}
#[tokio::main]
async fn main() -> Result<(), Box<dyn Error>> {
let mut icons = Icons::new();
let opts: Opts = Opts::parse();
for url in opts.urls {
icons.load_website(&url).await?;
}
let entries = icons.entries().await;
if opts.json {
println!("{}", serde_json::to_string_pretty(&entries)?)
} else {
for icon in entries {
println!("{} {} {}", icon.url, icon.kind, icon.info);
}
}
Ok(())
}

45
src/icon.rs Normal file
View file

@ -0,0 +1,45 @@
use super::IconInfo;
use serde::Serialize;
use std::{
cmp::Ordering,
fmt::{self, Display},
};
use url::Url;
#[derive(Debug, Serialize, Clone, PartialOrd, PartialEq, Ord, Eq)]
pub enum IconKind {
SiteLogo,
SiteFavicon,
AppIcon,
}
impl Display for IconKind {
fn fmt(&self, f: &mut fmt::Formatter) -> Result<(), fmt::Error> {
f.write_str(match self {
IconKind::SiteLogo => "site_logo",
IconKind::AppIcon => "app_icon",
IconKind::SiteFavicon => "site_favicon",
})
}
}
#[derive(Debug, Serialize, PartialEq, Eq)]
pub struct Icon {
pub url: Url,
#[serde(with = "serde_with::rust::display_fromstr")]
pub kind: IconKind,
#[serde(flatten)]
pub info: IconInfo,
}
impl Ord for Icon {
fn cmp(&self, other: &Self) -> Ordering {
self.info.cmp(&other.info)
}
}
impl PartialOrd for Icon {
fn partial_cmp(&self, other: &Self) -> Option<Ordering> {
Some(self.cmp(other))
}
}

162
src/icon_info.rs Normal file
View file

@ -0,0 +1,162 @@
use crate::{icon_size::*, CLIENT};
use data_url::DataUrl;
use futures::{io::Cursor, prelude::*, stream::TryStreamExt};
use mime::MediaType;
use reqwest::{header::*, Url};
use serde::{Deserialize, Serialize};
use std::{
cmp::Ordering,
error::Error,
fmt::{self, Display},
io::{self},
};
#[derive(Serialize, Deserialize)]
#[serde(rename_all = "lowercase")]
enum IconType {
PNG,
JPEG,
ICO,
}
#[derive(Debug, Serialize, Deserialize, Eq, PartialEq)]
#[serde(tag = "type")]
#[serde(rename_all = "lowercase")]
pub enum IconInfo {
PNG { size: IconSize },
JPEG { size: IconSize },
ICO { sizes: IconSizes },
SVG,
}
impl Display for IconInfo {
fn fmt(&self, f: &mut fmt::Formatter) -> Result<(), fmt::Error> {
match self {
IconInfo::PNG { size } => write!(f, "png {}", size),
IconInfo::JPEG { size } => write!(f, "jpeg {}", size),
IconInfo::ICO { sizes } => write!(f, "ico {}", sizes),
IconInfo::SVG => write!(f, "svg"),
}
}
}
impl Ord for IconInfo {
fn cmp(&self, other: &Self) -> Ordering {
let this_size = self.size();
let other_size = other.size();
if this_size.is_none() && other_size.is_none() {
Ordering::Equal
} else if let (Some(this_size), Some(other_size)) = (this_size, other_size) {
this_size.cmp(other_size)
} else if this_size.is_none() {
Ordering::Less
} else {
Ordering::Greater
}
}
}
impl PartialOrd for IconInfo {
fn partial_cmp(&self, other: &Self) -> Option<Ordering> {
Some(self.cmp(other))
}
}
impl IconInfo {
pub async fn get(url: Url, sizes: Option<String>) -> Result<IconInfo, Box<dyn Error>> {
let sizes = sizes.as_ref().and_then(|s| IconSizes::from_str(s).ok());
let (mime, mut body): (_, Box<dyn AsyncRead + Unpin>) = match url.scheme() {
"data" => {
let url = url.to_string();
let url = DataUrl::process(&url).map_err(|_| "failed to parse data uri")?;
let mime = url.mime_type().to_string().parse::<MediaType>()?;
let body = Cursor::new(
url
.decode_to_vec()
.map_err(|_| "failed to decode data uri body")?
.0,
);
(mime, Box::new(body))
}
_ => {
let res = CLIENT.get(url).send().await?;
if !res.status().is_success() {
return Err("failed to fetch".into());
};
let mime = res
.headers()
.get(CONTENT_TYPE)
.ok_or("no content type")?
.to_str()?
.parse::<MediaType>()?;
let body = res
.bytes_stream()
.map(|result| {
result.map_err(|error| io::Error::new(io::ErrorKind::Other, error.to_string()))
})
.into_async_read();
(mime, Box::new(body))
}
};
let kind = match (mime.type_(), mime.subtype()) {
(mime::IMAGE, mime::PNG) => {
if let Some(size) = sizes.map(|s| s.into_largest()) {
return Ok(IconInfo::PNG { size });
}
IconType::PNG
}
(mime::IMAGE, mime::JPEG) => {
if let Some(size) = sizes.map(|s| s.into_largest()) {
return Ok(IconInfo::JPEG { size });
}
IconType::JPEG
}
(mime::IMAGE, "x-icon") | (mime::IMAGE, "vnd.microsoft.icon") => {
if let Some(sizes) = sizes {
return Ok(IconInfo::ICO { sizes });
}
IconType::ICO
}
(mime::IMAGE, mime::SVG) => return Ok(IconInfo::SVG),
_ => return Err(format!("unsupported mime type {}", mime).into()),
};
Ok(match kind {
IconType::PNG => {
let size = get_png_sizes(&mut body).await?;
IconInfo::PNG { size }
}
IconType::ICO => {
let sizes = get_ico_sizes(&mut body).await?;
IconInfo::ICO { sizes }
}
IconType::JPEG => {
let size = get_jpeg_size(&mut body).await?;
IconInfo::JPEG { size }
}
})
}
pub fn size(&self) -> Option<&IconSize> {
match self {
IconInfo::ICO { sizes } => Some(sizes.largest()),
IconInfo::PNG { size } | IconInfo::JPEG { size } => Some(size),
IconInfo::SVG => None,
}
}
}

62
src/icon_size/ico.rs Normal file
View file

@ -0,0 +1,62 @@
use super::{png::get_png_sizes, IconSizes};
use byteorder::{LittleEndian, ReadBytesExt};
use futures::prelude::*;
use std::{
error::Error,
io::{Cursor, Seek, SeekFrom},
};
const ICO_TYPE: u16 = 1;
const INDEX_SIZE: u16 = 16;
pub async fn get_ico_sizes<R: AsyncRead + Unpin>(
reader: &mut R,
) -> Result<IconSizes, Box<dyn Error>> {
let mut offset = 0;
let mut header = [0; 6];
reader.read_exact(&mut header).await?;
offset += header.len();
let mut header = Cursor::new(header);
let header_type = header.read_u16::<LittleEndian>()?;
let icon_type = header.read_u16::<LittleEndian>()?;
if header_type != 0 || icon_type != ICO_TYPE {
return Err("bad header".into());
}
let icon_count = header.read_u16::<LittleEndian>()?;
let mut data = vec![0; (icon_count * INDEX_SIZE) as usize];
reader.read_exact(&mut data).await?;
offset += data.len();
let mut data = Cursor::new(data);
let mut sizes = IconSizes::new();
for i in 0..icon_count {
data.seek(SeekFrom::Start((INDEX_SIZE * i) as _))?;
let width = data.read_u8()?;
let height = data.read_u8()?;
if width == 0 && height == 0 {
data.seek(SeekFrom::Current(10))?;
let image_offset = data.read_u32::<LittleEndian>()?;
let mut data = vec![0; image_offset as usize - offset];
reader.read_exact(&mut data).await?;
offset += data.len();
let size = get_png_sizes(reader).await;
if let Ok(size) = size {
sizes.push(size);
}
} else {
sizes.add_size(width as _, height as _)
}
}
sizes.sort();
Ok(sizes)
}

View file

@ -0,0 +1,81 @@
use super::IconSize;
use itertools::Itertools;
use serde::{Deserialize, Serialize};
use serde_json::Value;
use std::{
cmp::Ordering,
error::Error,
fmt::{self, Display},
ops::{Deref, DerefMut},
};
#[derive(Debug, Deserialize, Serialize, PartialEq, Eq)]
pub struct IconSizes(Vec<IconSize>);
impl Display for IconSizes {
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
f.write_str(&self.0.iter().join(" "))
}
}
impl IconSizes {
pub fn new() -> Self {
IconSizes(Vec::new())
}
pub fn from_str(sizes_str: &str) -> Result<IconSizes, Box<dyn Error>> {
let size_strs = sizes_str.split(" ");
let mut sizes = IconSizes::new();
for size in size_strs {
if let Ok(size) = serde_json::from_value(Value::String(size.to_string())) {
sizes.push(size);
}
}
if sizes.is_empty() {
return Err("must contain a size".into());
}
sizes.sort();
Ok(sizes)
}
pub fn add_size(&mut self, width: u32, height: u32) {
self.push(IconSize::new(width, height))
}
pub fn largest(&self) -> &IconSize {
&self.0[0]
}
pub fn into_largest(self) -> IconSize {
self.0.into_iter().next().unwrap()
}
}
impl Deref for IconSizes {
type Target = Vec<IconSize>;
fn deref(&self) -> &Vec<IconSize> {
&self.0
}
}
impl DerefMut for IconSizes {
fn deref_mut(&mut self) -> &mut Vec<IconSize> {
&mut self.0
}
}
impl Ord for IconSizes {
fn cmp(&self, other: &Self) -> Ordering {
self.largest().cmp(&other.largest())
}
}
impl PartialOrd for IconSizes {
fn partial_cmp(&self, other: &Self) -> Option<Ordering> {
Some(self.cmp(other))
}
}

64
src/icon_size/jpeg.rs Normal file
View file

@ -0,0 +1,64 @@
use super::IconSize;
use crate::assert_slice_eq;
use byteorder::BigEndian;
use futures::prelude::*;
use std::{error::Error, io::Cursor};
use tokio_byteorder::AsyncReadBytesExt;
pub async fn get_jpeg_size<R: AsyncRead + Unpin>(
reader: &mut R,
) -> Result<IconSize, Box<dyn Error>> {
let mut data = [0; 2];
reader.read_exact(&mut data).await?;
let data = &mut Cursor::new(data);
// first marker of the file MUST be 0xFFD8
assert_slice_eq!(data, 0, &[0xFF, 0xD8], "bad header");
let mut marker = [0; 2];
let mut depth = 0i32;
loop {
// Read current marker (FF XX)
reader.read_exact(&mut marker).await?;
if marker[0] != 0xFF {
// Did not read a marker. Assume image is corrupt.
return Err("invalid jpeg".into());
}
let page = marker[1];
// Check for valid SOFn markers. C4, C8, and CC aren't dimension markers.
if (page >= 0xC0 && page <= 0xC3)
|| (page >= 0xC5 && page <= 0xC7)
|| (page >= 0xC9 && page <= 0xCB)
|| (page >= 0xCD && page <= 0xCF)
{
// Only get outside image size
if depth == 0 {
// Correct marker, go forward 3 bytes so we're at height offset
reader.read_exact(&mut [0; 3]).await?;
break;
}
} else if page == 0xD8 {
depth += 1;
} else if page == 0xD9 {
depth -= 1;
if depth < 0 {
return Err("invalid jpeg".into());
}
}
// Read the marker length and skip over it entirely
let page_size = reader.read_u16::<BigEndian>().await? as i64;
reader
.read_exact(&mut vec![0; (page_size - 2) as usize])
.await?;
}
let height = reader.read_u16::<BigEndian>().await?;
let width = reader.read_u16::<BigEndian>().await?;
Ok(IconSize::new(width as _, height as _))
}

103
src/icon_size/mod.rs Normal file
View file

@ -0,0 +1,103 @@
mod ico;
mod icon_sizes;
mod jpeg;
mod png;
pub use ico::*;
pub use icon_sizes::*;
pub use jpeg::*;
pub use png::*;
use serde::{de, Deserialize, Deserializer, Serialize, Serializer};
use std::{
cmp::Ordering,
error::Error,
fmt::{self, Display},
io::{Read, Seek, SeekFrom},
};
#[serde_as]
#[derive(Debug, PartialEq, Eq)]
pub struct IconSize {
width: u32,
height: u32,
}
impl Display for IconSize {
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
write!(f, "{}x{}", self.width, self.height)
}
}
impl IconSize {
pub fn new(width: u32, height: u32) -> Self {
Self { width, height }
}
}
impl Ord for IconSize {
fn cmp(&self, other: &Self) -> Ordering {
let self_res = self.width * self.height;
let other_res = other.width * other.height;
other_res.cmp(&self_res)
}
}
impl PartialOrd for IconSize {
fn partial_cmp(&self, other: &Self) -> Option<Ordering> {
Some(self.cmp(other))
}
}
impl Serialize for IconSize {
fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error>
where
S: Serializer,
{
serializer.collect_str(self)
}
}
impl<'de> Deserialize<'de> for IconSize {
fn deserialize<D>(deserializer: D) -> Result<Self, D::Error>
where
D: Deserializer<'de>,
{
let value: String = Deserialize::deserialize(deserializer)?;
let mut split = value.split("x");
let width = split
.next()
.ok_or(de::Error::custom("expected width"))?
.parse()
.map_err(de::Error::custom)?;
let height = split
.next()
.ok_or(de::Error::custom("expected height"))?
.parse()
.map_err(de::Error::custom)?;
Ok(IconSize::new(width, height))
}
}
fn slice_eq<T: Read + Seek + Unpin>(
cur: &mut T,
offset: u64,
slice: &[u8],
) -> Result<bool, Box<dyn Error>> {
cur.seek(SeekFrom::Start(offset))?;
let mut buffer = vec![0; slice.len()];
cur.read_exact(&mut buffer)?;
Ok(buffer == slice)
}
#[macro_export]
macro_rules! assert_slice_eq {
($cur:expr, $offset:expr, $slice:expr, $($arg:tt)+) => {{
if !super::slice_eq($cur, $offset, $slice)? {
return Err(format!($($arg)+).into());
}
}};
}

21
src/icon_size/png.rs Normal file
View file

@ -0,0 +1,21 @@
use super::IconSize;
use crate::assert_slice_eq;
use byteorder::{BigEndian, ReadBytesExt};
use futures::prelude::*;
use std::{error::Error, io::Cursor};
pub async fn get_png_sizes<R: AsyncRead + Unpin>(
reader: &mut R,
) -> Result<IconSize, Box<dyn Error>> {
let mut header = [0; 24];
reader.read_exact(&mut header).await?;
let header = &mut Cursor::new(header);
assert_slice_eq!(header, 0, b"\x89PNG\r\n\x1a\n", "bad header");
assert_slice_eq!(header, 12, b"IHDR", "bad header");
let width = header.read_u32::<BigEndian>()?;
let height = header.read_u32::<BigEndian>()?;
Ok(IconSize::new(width, height))
}

220
src/icons.rs Normal file
View file

@ -0,0 +1,220 @@
use crate::{selector, Icon, IconInfo, IconKind, CLIENT};
use future::join_all;
use futures::StreamExt;
use futures::{prelude::*, task::noop_waker};
use html5ever::{
driver,
tendril::{Tendril, TendrilSink},
};
use reqwest::{header::*, IntoUrl};
use scraper::Html;
use serde::Deserialize;
use std::task::Poll;
use std::{collections::HashMap, error::Error, pin::Pin, task::Context};
use url::Url;
pub struct Icons {
entries: Vec<Icon>,
pending_entries: HashMap<
Url,
(
IconKind,
Pin<Box<dyn Future<Output = Result<IconInfo, Box<dyn Error>>>>>,
),
>,
}
fn add_icon_entry(
entries: &mut Vec<Icon>,
url: Url,
kind: IconKind,
info: Result<IconInfo, Box<dyn Error>>,
) {
match info {
Ok(info) => entries.push(Icon { url, kind, info }),
Err(e) => {
warn!("failed to parse icon: {}", e);
}
}
}
impl Icons {
pub fn new() -> Self {
Icons {
entries: Vec::new(),
pending_entries: HashMap::new(),
}
}
/// Add an icon URL and start fetching it
pub fn add_icon(
&mut self,
url: Url,
kind: IconKind,
sizes: Option<String>,
) -> Result<(), Box<dyn Error>> {
// check to see if it already exists
let mut entries = self.entries.iter_mut();
if let Some(existing_kind) = self
.pending_entries
.get_mut(&url)
.map(|(kind, _)| kind)
.or_else(|| entries.find_map(|icon| (icon.url == url).then_some(&mut icon.kind)))
{
// if the kind is more important, replace it
if &kind > existing_kind {
*existing_kind = kind;
}
return Ok(());
}
let mut info = Box::pin(IconInfo::get(url.clone(), sizes));
// Start fetching the icon
let noop_waker = noop_waker();
let cx = &mut Context::from_waker(&noop_waker);
match info.poll_unpin(cx) {
Poll::Ready(info) => add_icon_entry(&mut self.entries, url, kind, info),
Poll::Pending => {
self.pending_entries.insert(url, (kind, info));
}
};
Ok(())
}
pub async fn load_website<U: IntoUrl>(&mut self, url: U) -> Result<(), Box<dyn Error>> {
let res = CLIENT.get(url).header(ACCEPT, "text/html").send().await?;
let url = res.url().clone();
let mut body = res.bytes_stream();
let mut parser = driver::parse_document(Html::new_document(), Default::default());
while let Some(data) = body.next().await {
let tendril = Tendril::try_from_byte_slice(&data?).map_err(|_| "failed to parse html")?;
parser.process(tendril);
}
let document = parser.finish();
{
let mut found_favicon = false;
for element_ref in document.select(selector!(
"link[rel='icon']",
"link[rel='shortcut icon']",
"link[rel='apple-touch-icon']",
"link[rel='apple-touch-icon-precomposed']"
)) {
let elem = element_ref.value();
if let Some(href) = elem.attr("href").and_then(|href| url.join(&href).ok()) {
if self
.add_icon(
href,
IconKind::SiteFavicon,
elem.attr("sizes").map(|sizes| sizes.into()),
)
.is_ok()
{
found_favicon = true;
};
};
}
// Check for default favicon.ico
if !found_favicon {
self.add_icon(url.join("/favicon.ico")?, IconKind::SiteFavicon, None)?;
}
}
for element_ref in document.select(selector!(
"header img",
"img[src*=logo]",
"img[alt*=logo]",
"img[class*=logo]"
)) {
if let Some(href) = element_ref
.value()
.attr("src")
.and_then(|href| url.join(&href).ok())
{
if self.add_icon(href, IconKind::SiteLogo, None).is_ok() {
break;
};
};
}
for element_ref in document.select(selector!("link[rel='manifest']")) {
if let Some(href) = element_ref
.value()
.attr("href")
.and_then(|href| url.join(&href).ok())
{
self.load_manifest(href).await?;
}
}
Ok(())
}
pub async fn load_manifest(&mut self, manifest_url: Url) -> Result<(), Box<dyn Error>> {
#[derive(Deserialize)]
struct ManifestIcon {
src: String,
sizes: Option<String>,
}
#[derive(Deserialize)]
struct Manifest {
icons: Option<Vec<ManifestIcon>>,
}
let manifest: Manifest = CLIENT
.get(manifest_url.as_str())
.send()
.await?
.json()
.await?;
if let Some(icons) = manifest.icons {
for icon in icons {
if let Ok(src) = manifest_url.join(&icon.src) {
let _ = self.add_icon(src, IconKind::AppIcon, icon.sizes);
}
}
}
Ok(())
}
/// Fetch all the icons and return a list of them.
///
/// List is ordered from highest resolution to lowest resolution
///
/// ```
/// # async fn run() {
/// let icons = Icons::new();
/// icons.load_website("https://github.com").await?;
///
/// let entries = icons.entries().await;
/// for icon in entries {
/// println("{:?}", icon)
/// }
/// ```
pub async fn entries(mut self) -> Vec<Icon> {
let (urls, infos): (Vec<_>, Vec<_>) = self
.pending_entries
.into_iter()
.map(|(url, (kind, info))| ((url, kind), info))
.unzip();
let mut urls = urls.into_iter();
for info in join_all(infos).await {
let (url, kind) = urls.next().unwrap();
add_icon_entry(&mut self.entries, url, kind, info);
}
self.entries.sort();
self.entries
}
}

27
src/lib.rs Normal file
View file

@ -0,0 +1,27 @@
#![feature(async_closure, map_into_keys_values, bool_to_option)]
#[macro_use]
extern crate serde_with;
#[macro_use]
extern crate log;
mod icon;
mod icon_info;
mod icon_size;
mod icons;
mod macros;
pub use icon::*;
pub use icon_info::*;
pub use icons::*;
use once_cell::sync::Lazy;
use reqwest::{
header::{HeaderMap, HeaderValue, USER_AGENT},
Client,
};
static CLIENT: Lazy<Client> = Lazy::new(|| {
let mut headers = HeaderMap::new();
headers.insert(USER_AGENT, HeaderValue::from_str("Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/88.0.4324.104 Safari/537.36").unwrap());
Client::builder().default_headers(headers).build().unwrap()
});

14
src/macros.rs Normal file
View file

@ -0,0 +1,14 @@
#[macro_export]
macro_rules! selector {
($($selector:expr),+ $(,)?) => {{
static RE: once_cell::sync::OnceCell<scraper::Selector> = once_cell::sync::OnceCell::new();
RE.get_or_init(|| scraper::Selector::parse(crate::join!(",", $($selector),+)).unwrap())
}};
}
#[macro_export]
macro_rules! join {
($pattern:literal,$first:expr$(, $($rest:expr),*)? $(,)?) => {
concat!($first$(, $($pattern, $rest),*)?)
};
}