Use external scraper for finding icons

This commit is contained in:
CenTdemeern1 2025-01-30 17:13:50 +01:00
parent c856ab9900
commit 1449e2f5df
3 changed files with 67 additions and 42 deletions

3
Cargo.lock generated
View file

@ -1,6 +1,6 @@
# This file is automatically @generated by Cargo. # This file is automatically @generated by Cargo.
# It is not intended for manual editing. # It is not intended for manual editing.
version = 3 version = 4
[[package]] [[package]]
name = "addr2line" name = "addr2line"
@ -285,6 +285,7 @@ dependencies = [
"semver", "semver",
"serde", "serde",
"serde_json", "serde_json",
"tokio",
"url", "url",
] ]

View file

@ -10,4 +10,5 @@ rocket = { version = "0.5.1", features = ["json"] }
semver = "1.0.24" semver = "1.0.24"
serde = { version = "1.0.217", features = ["derive"] } serde = { version = "1.0.217", features = ["derive"] }
serde_json = "1.0.135" serde_json = "1.0.135"
tokio = { version = "1.43.0", features = ["process"] }
url = "2.5.4" url = "2.5.4"

View file

@ -2,10 +2,20 @@ use std::net::ToSocketAddrs;
use rocket::serde::json::Json; use rocket::serde::json::Json;
use serde::{Deserialize, Serialize}; use serde::{Deserialize, Serialize};
use tokio::process::Command;
use url::Url; use url::Url;
use crate::known_software::KNOWN_SOFTWARE_NODEINFO_NAMES; use crate::known_software::KNOWN_SOFTWARE_NODEINFO_NAMES;
const MINIMUM_ICON_SIZE: usize = 16;
fn parse_size(size: &str) -> Option<usize> {
let (x, y) = size.split_once("x")?;
let x: usize = x.parse().ok()?;
let y: usize = y.parse().ok()?;
Some(x.max(y))
}
#[derive(Serialize)] #[derive(Serialize)]
pub struct InstanceInfo { pub struct InstanceInfo {
name: String, name: String,
@ -15,24 +25,38 @@ pub struct InstanceInfo {
} }
#[derive(Deserialize)] #[derive(Deserialize)]
struct InstanceManifest { struct InstanceIcon {
name: Option<String>, href: String,
short_name: Option<String>, size: usize,
icons: Option<Vec<InstanceIcon>>,
} }
#[derive(Deserialize)] #[derive(Deserialize)]
struct InstanceIcon { struct ScrapedIcon {
src: String, url: String,
sizes: String, size: Option<String>,
sizes: Option<Vec<String>>,
} }
impl InstanceIcon { impl TryInto<InstanceIcon> for ScrapedIcon {
fn get_size(&self) -> Option<usize> { type Error = ();
let (x, y) = self.sizes.split_once("x")?;
let x: usize = x.parse().ok()?; fn try_into(self) -> Result<InstanceIcon, Self::Error> {
let y: usize = y.parse().ok()?; let size = if let Some(size) = self.size {
Some(x.max(y)) parse_size(&size)
} else if let Some(sizes) = self.sizes {
sizes
.into_iter()
.filter_map(|s| parse_size(&s))
.filter(|&s| s >= MINIMUM_ICON_SIZE)
.min()
} else {
return Err(());
}
.ok_or(())?;
Ok(InstanceIcon {
href: self.url,
size,
})
} }
} }
@ -64,31 +88,34 @@ struct NodeInfoMetadata {
name: Option<String>, name: Option<String>,
} }
async fn get_info_from_manifest(url: Url) -> Option<[Option<String>; 3]> { async fn find_icon(host: &str) -> Option<String> {
// FIXME: Iceshrimp.NET doesn't have a manifest... let icons: Vec<ScrapedIcon> = serde_json::from_slice(
let response = reqwest::get(url.clone()).await.ok()?.text().await.ok()?; &Command::new("site-icons")
let manifest: InstanceManifest = serde_json::from_str(&response).ok()?; .args(["--json", host])
Some([ .output()
manifest.name, .await
manifest.short_name, .ok()?
manifest .stdout,
.icons )
.as_ref() .ok()?;
.and_then(|icons| icons.iter().min_by_key(|icon| icon.get_size())) icons
.map(|icon| icon.src.to_owned()), .into_iter()
]) .filter_map(|i| -> Option<InstanceIcon> { i.try_into().ok() })
.filter(|i| i.size > MINIMUM_ICON_SIZE)
.min_by_key(|i| i.size)
.map(|i| i.href)
} }
#[get("/instance_info/<secure>/<host>")] #[get("/instance_info/<secure>/<host>")]
pub async fn instance_info(secure: bool, host: &str) -> Option<Json<InstanceInfo>> { pub async fn instance_info(secure: bool, host: &str) -> Option<Json<InstanceInfo>> {
let mut url = Url::parse(if secure { let mut url = Url::parse(if secure {
"https://temp.host/manifest.json" "https://temp.host/"
} else { } else {
"http://temp.host/manifest.json" "http://temp.host/"
}) })
.ok()?; .unwrap();
url.set_host(Some(host)).ok()?; // Using this to catch malformed hosts url.set_host(Some(host)).ok()?; // Using this to catch malformed hosts
let host = url.host_str()?; // Shadow the original host in case things were filtered out let host = url.host_str()?.to_owned(); // Shadow the original host in case things were filtered out
// Check if the host is globally routable. // Check if the host is globally routable.
// This should help filter out a bunch of invalid or potentially malicious requests // This should help filter out a bunch of invalid or potentially malicious requests
@ -103,13 +130,8 @@ pub async fn instance_info(secure: bool, host: &str) -> Option<Json<InstanceInfo
return None; return None;
} }
let [name, short_name, icon_url] = get_info_from_manifest(url.clone()) let icon_url = find_icon(url.as_str()).await;
.await
.unwrap_or_default();
let icon_url = icon_url
.and_then(|i| url.join(&i).ok())
.map(|u| u.to_string());
// FIXME: Iceshrimp.NET doesn't have a nodeinfo discovery file either.............
url.set_path("/.well-known/nodeinfo"); url.set_path("/.well-known/nodeinfo");
let response = reqwest::get(url.clone()).await.ok()?.text().await.ok()?; let response = reqwest::get(url.clone()).await.ok()?.text().await.ok()?;
let nodeinfo_discovery: NodeInfoDiscovery = serde_json::from_str(&response).ok()?; let nodeinfo_discovery: NodeInfoDiscovery = serde_json::from_str(&response).ok()?;
@ -126,11 +148,12 @@ pub async fn instance_info(secure: bool, host: &str) -> Option<Json<InstanceInfo
.and_then(|v| fork_map.get(v.build.as_str())) .and_then(|v| fork_map.get(v.build.as_str()))
.unwrap_or(software_name) .unwrap_or(software_name)
.to_owned(); .to_owned();
Some(Json(InstanceInfo { Some(Json(InstanceInfo {
name: name name: nodeinfo
.or(short_name) .metadata
.or(nodeinfo.metadata.and_then(|m| m.name)) .and_then(|m| m.name)
.unwrap_or(url.host_str().unwrap().to_owned()), .unwrap_or(host.to_owned()),
software, software,
icon_url, icon_url,
})) }))