From c7ea3326cbc5e3c1c5dbd4551f4e5a3b22761f33 Mon Sep 17 00:00:00 2001 From: CenTdemeern1 Date: Sun, 2 Feb 2025 23:54:36 +0100 Subject: [PATCH] Rest in piss site_icons You will not be missed --- Cargo.lock | 325 +++++++++++++++++++++++++++++++++++++++ Cargo.toml | 1 + README.md | 4 +- install-binary-deps.sh | 2 - src/api/instance_info.rs | 78 +++------- 5 files changed, 349 insertions(+), 61 deletions(-) delete mode 100755 install-binary-deps.sh diff --git a/Cargo.lock b/Cargo.lock index dbb768e..202ebf4 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -185,6 +185,29 @@ version = "0.8.7" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "773648b94d0e5d620f64f280777445740e61fe701025087ec8b57f45c791888b" +[[package]] +name = "cssparser" +version = "0.34.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b7c66d1cd8ed61bf80b38432613a7a2f09401ab8d0501110655f8b341484a3e3" +dependencies = [ + "cssparser-macros", + "dtoa-short", + "itoa", + "phf", + "smallvec", +] + +[[package]] +name = "cssparser-macros" +version = "0.6.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "13b588ba4ac1a99f7f2964d24b3d896ddc6bf847ee3855dbd4366f058cfcd331" +dependencies = [ + "quote", + "syn", +] + [[package]] name = "deranged" version = "0.3.11" @@ -194,6 +217,17 @@ dependencies = [ "powerfmt", ] +[[package]] +name = "derive_more" +version = "0.99.18" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5f33878137e4dafd7fa914ad4e259e18a4e8e532b9617a2d0150262bf53abfce" +dependencies = [ + "proc-macro2", + "quote", + "syn", +] + [[package]] name = "devise" version = "0.4.2" @@ -238,6 +272,27 @@ dependencies = [ "syn", ] +[[package]] +name = "dtoa" +version = "1.0.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "dcbb2bf8e87535c23f7a8a321e364ce21462d0ff10cb6407820e8e96dfff6653" + +[[package]] +name = "dtoa-short" +version = "0.3.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "cd1511a7b6a56299bd043a9c167a6d2bfb37bf84a6dfceaba651168adfb43c87" +dependencies = [ + "dtoa", +] + +[[package]] +name = "ego-tree" +version = "0.10.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b2972feb8dffe7bc8c5463b1dacda1b0dfbed3710e50f977d965429692d74cd8" + [[package]] name = "either" version = "1.13.0" @@ -275,11 +330,26 @@ version = "2.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "37909eebbb50d72f9059c3b6d82c0463f2ff062c9e95845c43a6c9c0355411be" +[[package]] +name = "favicon-scraper" +version = "0.3.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b9e2d1e19588bba8f650edac55fc88e1f2db227d18920d9e71322b46530f04a2" +dependencies = [ + "futures", + "imagesize", + "reqwest", + "scraper", + "serde", + "url", +] + [[package]] name = "fedirect" version = "0.1.0" dependencies = [ "bytes", + "favicon-scraper", "reqwest", "rocket", "semver", @@ -333,6 +403,16 @@ dependencies = [ "percent-encoding", ] +[[package]] +name = "futf" +version = "0.1.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "df420e2e84819663797d1ec6544b13c5be84629e7bb00dc960d6917db2987843" +dependencies = [ + "mac", + "new_debug_unreachable", +] + [[package]] name = "futures" version = "0.3.31" @@ -410,6 +490,15 @@ dependencies = [ "slab", ] +[[package]] +name = "fxhash" +version = "0.2.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c31b6d751ae2c7f11320402d34e41349dd1016f8d5d45e48c4312bc8625af50c" +dependencies = [ + "byteorder", +] + [[package]] name = "generator" version = "0.7.5" @@ -423,6 +512,15 @@ dependencies = [ "windows", ] +[[package]] +name = "getopts" +version = "0.2.21" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "14dbbfd5c71d70241ecf9e6f13737f7b5ce823821063188d7e46c41d371eebd5" +dependencies = [ + "unicode-width", +] + [[package]] name = "getrandom" version = "0.2.15" @@ -502,6 +600,20 @@ version = "0.4.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "fbf6a919d6cf397374f7dfeeea91d974c7c0a7221d0d0f4f20d859d329e53fcc" +[[package]] +name = "html5ever" +version = "0.29.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2e15626aaf9c351bc696217cbe29cb9b5e86c43f8a46b5e2f5c6c5cf7cb904ce" +dependencies = [ + "log", + "mac", + "markup5ever", + "proc-macro2", + "quote", + "syn", +] + [[package]] name = "http" version = "0.2.12" @@ -805,6 +917,12 @@ dependencies = [ "icu_properties", ] +[[package]] +name = "imagesize" +version = "0.13.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "edcd27d72f2f071c64249075f42e205ff93c9a4c5f6c6da53e79ed9f9832c285" + [[package]] name = "indexmap" version = "2.7.0" @@ -910,6 +1028,26 @@ dependencies = [ "tracing-subscriber", ] +[[package]] +name = "mac" +version = "0.1.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c41e0c4fef86961ac6d6f8a82609f55f31b05e4fce149ac5710e439df7619ba4" + +[[package]] +name = "markup5ever" +version = "0.14.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "82c88c6129bd24319e62a0359cb6b958fa7e8be6e19bb1663bc396b90883aca5" +dependencies = [ + "log", + "phf", + "phf_codegen", + "string_cache", + "string_cache_codegen", + "tendril", +] + [[package]] name = "matchers" version = "0.1.0" @@ -987,6 +1125,12 @@ dependencies = [ "tempfile", ] +[[package]] +name = "new_debug_unreachable" +version = "1.0.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "650eef8c711430f1a879fdd01d4745a7deea475becfb90269c06775983bbf086" + [[package]] name = "nu-ansi-term" version = "0.46.0" @@ -1130,6 +1274,77 @@ version = "2.3.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "e3148f5046208a5d56bcfc03053e3ca6334e51da8dfb19b6cdc8b306fae3283e" +[[package]] +name = "phf" +version = "0.11.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1fd6780a80ae0c52cc120a26a1a42c1ae51b247a253e4e06113d23d2c2edd078" +dependencies = [ + "phf_macros", + "phf_shared 0.11.3", +] + +[[package]] +name = "phf_codegen" +version = "0.11.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "aef8048c789fa5e851558d709946d6d79a8ff88c0440c587967f8e94bfb1216a" +dependencies = [ + "phf_generator 0.11.3", + "phf_shared 0.11.3", +] + +[[package]] +name = "phf_generator" +version = "0.10.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5d5285893bb5eb82e6aaf5d59ee909a06a16737a8970984dd7746ba9283498d6" +dependencies = [ + "phf_shared 0.10.0", + "rand", +] + +[[package]] +name = "phf_generator" +version = "0.11.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3c80231409c20246a13fddb31776fb942c38553c51e871f8cbd687a4cfb5843d" +dependencies = [ + "phf_shared 0.11.3", + "rand", +] + +[[package]] +name = "phf_macros" +version = "0.11.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f84ac04429c13a7ff43785d75ad27569f2951ce0ffd30a3321230db2fc727216" +dependencies = [ + "phf_generator 0.11.3", + "phf_shared 0.11.3", + "proc-macro2", + "quote", + "syn", +] + +[[package]] +name = "phf_shared" +version = "0.10.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b6796ad771acdc0123d2a88dc428b5e38ef24456743ddb1744ed628f9815c096" +dependencies = [ + "siphasher 0.3.11", +] + +[[package]] +name = "phf_shared" +version = "0.11.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "67eabc2ef2a60eb7faa00097bd1ffdb5bd28e62bf39990626a582201b7a754e5" +dependencies = [ + "siphasher 1.0.1", +] + [[package]] name = "pin-project-lite" version = "0.2.16" @@ -1163,6 +1378,12 @@ dependencies = [ "zerocopy", ] +[[package]] +name = "precomputed-hash" +version = "0.1.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "925383efa346730478fb4838dbe9137d2a47675ad789c546d150a6e1dd4ab31c" + [[package]] name = "proc-macro2" version = "1.0.93" @@ -1531,6 +1752,21 @@ version = "1.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "94143f37725109f92c262ed2cf5e59bce7498c01bcc1502d7b9afe439a4e9f49" +[[package]] +name = "scraper" +version = "0.22.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "cc3d051b884f40e309de6c149734eab57aa8cc1347992710dc80bcc1c2194c15" +dependencies = [ + "cssparser", + "ego-tree", + "getopts", + "html5ever", + "precomputed-hash", + "selectors", + "tendril", +] + [[package]] name = "security-framework" version = "2.11.1" @@ -1554,6 +1790,25 @@ dependencies = [ "libc", ] +[[package]] +name = "selectors" +version = "0.26.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "fd568a4c9bb598e291a08244a5c1f5a8a6650bee243b5b0f8dbb3d9cc1d87fe8" +dependencies = [ + "bitflags", + "cssparser", + "derive_more", + "fxhash", + "log", + "new_debug_unreachable", + "phf", + "phf_codegen", + "precomputed-hash", + "servo_arc", + "smallvec", +] + [[package]] name = "semver" version = "1.0.24" @@ -1613,6 +1868,15 @@ dependencies = [ "serde", ] +[[package]] +name = "servo_arc" +version = "0.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ae65c4249478a2647db249fb43e23cec56a2c8974a427e7bd8cb5a1d0964921a" +dependencies = [ + "stable_deref_trait", +] + [[package]] name = "sharded-slab" version = "0.1.7" @@ -1637,6 +1901,18 @@ dependencies = [ "libc", ] +[[package]] +name = "siphasher" +version = "0.3.11" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "38b58827f4464d87d377d175e90bf58eb00fd8716ff0a62f80356b5e61555d0d" + +[[package]] +name = "siphasher" +version = "1.0.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "56199f7ddabf13fe5074ce809e7d3f42b42ae711800501b5b16ea82ad029c39d" + [[package]] name = "slab" version = "0.4.9" @@ -1692,6 +1968,32 @@ dependencies = [ "loom", ] +[[package]] +name = "string_cache" +version = "0.8.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f91138e76242f575eb1d3b38b4f1362f10d3a43f47d182a5b359af488a02293b" +dependencies = [ + "new_debug_unreachable", + "once_cell", + "parking_lot", + "phf_shared 0.10.0", + "precomputed-hash", + "serde", +] + +[[package]] +name = "string_cache_codegen" +version = "0.5.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6bb30289b722be4ff74a408c3cc27edeaad656e06cb1fe8fa9231fa59c728988" +dependencies = [ + "phf_generator 0.10.0", + "phf_shared 0.10.0", + "proc-macro2", + "quote", +] + [[package]] name = "subtle" version = "2.6.1" @@ -1764,6 +2066,17 @@ dependencies = [ "windows-sys 0.59.0", ] +[[package]] +name = "tendril" +version = "0.4.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d24a120c5fc464a3458240ee02c299ebcb9d67b5249c8848b09d639dca8d7bb0" +dependencies = [ + "futf", + "mac", + "utf-8", +] + [[package]] name = "thread_local" version = "1.1.8" @@ -2040,6 +2353,12 @@ version = "1.0.14" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "adb9e6ca4f869e1180728b7950e35922a7fc6397f7b641499e8f3ef06e50dc83" +[[package]] +name = "unicode-width" +version = "0.1.14" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7dd6e30e90baa6f72411720665d41d89b9a3d039dc45b8faea1ddd07f617f6af" + [[package]] name = "unicode-xid" version = "0.2.6" @@ -2063,6 +2382,12 @@ dependencies = [ "percent-encoding", ] +[[package]] +name = "utf-8" +version = "0.7.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "09cc8ee72d2a9becf2f2febe0205bbed8fc6615b7cb429ad062dc7b7ddd036a9" + [[package]] name = "utf16_iter" version = "1.0.5" diff --git a/Cargo.toml b/Cargo.toml index 2ab9ce1..95fdd5d 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -5,6 +5,7 @@ edition = "2021" [dependencies] bytes = "1.9.0" +favicon-scraper = "0.3.1" reqwest = { version = "0.12.12", features = ["stream"] } rocket = { version = "0.5.1", features = ["json"] } semver = "1.0.24" diff --git a/README.md b/README.md index 9e52636..52e4b61 100644 --- a/README.md +++ b/README.md @@ -8,9 +8,7 @@ To compile TypeScript, the build script assumes Deno is installed. 1. [Install Rust via rustup](https://rustup.rs/) 2. [Install Deno](https://deno.com/) -3. Run `install-binary-deps.sh` (this installs the icon scraper and possibly - more in the future) -4. Use Cargo to build the project: +3. Use Cargo to build the project: ```sh # For example, to build for release, you can do diff --git a/install-binary-deps.sh b/install-binary-deps.sh deleted file mode 100755 index 333739c..0000000 --- a/install-binary-deps.sh +++ /dev/null @@ -1,2 +0,0 @@ -#!/usr/bin/env bash -cargo install site_icons diff --git a/src/api/instance_info.rs b/src/api/instance_info.rs index 438306e..95c417b 100644 --- a/src/api/instance_info.rs +++ b/src/api/instance_info.rs @@ -1,21 +1,14 @@ use std::net::ToSocketAddrs; +use favicon_scraper::{Icon, IconKind}; use rocket::serde::json::Json; use serde::{Deserialize, Serialize}; -use tokio::process::Command; use url::Url; use crate::known_software::KNOWN_SOFTWARE_NODEINFO_NAMES; const MINIMUM_ICON_SIZE: usize = 16; -fn parse_size(size: &str) -> Option { - let (x, y) = size.split_once("x")?; - let x: usize = x.parse().ok()?; - let y: usize = y.parse().ok()?; - Some(x.max(y)) -} - #[derive(Serialize)] pub struct InstanceInfo { name: String, @@ -24,42 +17,6 @@ pub struct InstanceInfo { icon_url: Option, } -#[derive(Deserialize)] -struct InstanceIcon { - href: String, - size: usize, -} - -#[derive(Deserialize)] -struct ScrapedIcon { - url: String, - size: Option, - sizes: Option>, -} - -impl TryInto for ScrapedIcon { - type Error = (); - - fn try_into(self) -> Result { - let size = if let Some(size) = self.size { - parse_size(&size) - } else if let Some(sizes) = self.sizes { - sizes - .into_iter() - .filter_map(|s| parse_size(&s)) - .filter(|&s| s >= MINIMUM_ICON_SIZE) - .min() - } else { - return Err(()); - } - .ok_or(())?; - Ok(InstanceIcon { - href: self.url, - size, - }) - } -} - #[derive(Deserialize)] struct NodeInfoDiscovery { links: Vec, @@ -88,22 +45,31 @@ struct NodeInfoMetadata { name: Option, } +/// Scrapes icons and returns the smallest one where the smallest axis is bigger than or equal to [MINIMUM_ICON_SIZE] async fn find_icon(host: &str) -> Option { - let icons: Vec = serde_json::from_slice( - &Command::new("site-icons") - .args(["--json", host]) - .output() - .await - .ok()? - .stdout, - ) - .ok()?; + let icons: Vec = favicon_scraper::scrape(host) + .await + .ok()? + .into_iter() + .filter(|i| i.size.width.min(i.size.height) >= MINIMUM_ICON_SIZE) + .collect(); + + let priority = |kind: &IconKind| match kind { + IconKind::LinkedInHTML => 0, + IconKind::LinkedInManifest => 1, + IconKind::HardcodedURL => 2, + _ => 3, + }; + let preferred_kind = icons + .iter() + .map(|i| i.kind) + .min_by(|x, y| priority(x).cmp(&priority(y)))?; // None if icons is empty + icons .into_iter() - .filter_map(|i| -> Option { i.try_into().ok() }) - .filter(|i| i.size >= MINIMUM_ICON_SIZE) + .filter(|i| i.kind == preferred_kind) .min_by_key(|i| i.size) - .map(|i| i.href) + .map(|i| i.url.into()) } #[get("/instance_info//")]