/* * asklyphe-frontend wikipedia.rs * - wikipedia helper functions * * Copyright (C) 2025 Real Microsoft, LLC * * This program is free software: you can redistribute it and/or modify it under the terms of the GNU Affero General Public License as published by the Free Software Foundation, version 3. * * This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Affero General Public License for more details. * * You should have received a copy of the GNU Affero General Public License along with this program. If not, see . */ use std::collections::{BTreeMap, HashMap}; use std::sync::atomic::{AtomicI64, Ordering}; use std::sync::Mutex; use std::time::Duration; use isahc::auth::Authentication; use isahc::config::RedirectPolicy; use isahc::{HttpClient, Request}; use isahc::prelude::*; use once_cell::sync::Lazy; use serde::Deserialize; use tracing::{error, warn}; #[derive(Clone, Debug, Deserialize)] pub struct WikipediaPage { pub title: String, pub description: String, pub extract: String, pub thumbnail: Option, pub content_urls: WikipediaContentUrls, } #[derive(Clone, Debug, Deserialize)] pub struct WikipediaThumbnail { pub source: Option, } #[derive(Clone, Debug, Deserialize)] pub struct WikipediaContentUrls { pub desktop: WikipediaUrls, } #[derive(Clone, Debug, Deserialize)] pub struct WikipediaUrls { pub page: String, } pub static USER_AGENT: Lazy = Lazy::new(|| format!("AskLyphe/{} (https://asklyphe.com; nikocs@voremicrocomputers.com)", env!("CARGO_PKG_VERSION"))); pub static WIKIPEDIA_CACHE: Lazy>> = Lazy::new(|| Mutex::new(BTreeMap::new())); pub static WIKIPEDIA_TIMEOUT: AtomicI64 = AtomicI64::new(0); #[derive(Clone, Debug)] pub struct WikipediaSummary { pub title: String, pub description: String, pub extract: String, pub image: Option, pub url: String, } pub async fn get_wikipedia_page(query_text: &str, timeout_secs: u64) -> Option { if let Some(cached) = { let cache = WIKIPEDIA_CACHE.lock().unwrap(); cache.get(query_text).cloned() } { let compare = "may refer to:"; if cached.extract.ends_with(compare) { return None; } Some(cached) } else { if WIKIPEDIA_TIMEOUT.load(Ordering::Relaxed) > 0 { if WIKIPEDIA_TIMEOUT.load(Ordering::Relaxed) > chrono::Utc::now().timestamp() { return None; } else { WIKIPEDIA_TIMEOUT.store(0, Ordering::Relaxed); } } let url = format!( r#"https://en.wikipedia.org/api/rest_v1/page/summary/{}?redirect=true"#, query_text); let client = HttpClient::builder() .redirect_policy(RedirectPolicy::Limit(10)) .timeout(Duration::from_secs(timeout_secs)) .build(); if let Err(e) = client { error!("failed to build client for fetching wikipedia: {e}"); return None; } let client = client.unwrap(); let request = Request::get(url.clone()) .header("user-agent", USER_AGENT.as_str()) .header("accept", "*/*") .body(()); if let Err(e) = request { error!("failed to fetch wikipedia api: {e}"); return None; } let request = request.unwrap(); let response = client.send_async(request).await; if let Err(e) = response { error!("failed to fetch wikipedia api: {e}"); return None; } let mut response = response.unwrap(); if response.status() != 200 && response.status() != 302 { return None; } let body = response.text().await; if let Err(e) = body { error!("failed to fetch wikipedia api: {e}"); return None; } let body = body.unwrap(); let wikiresult = serde_json::from_str::(&body); if let Err(e) = wikiresult { error!("failed to deserialise wikipedia api: {e}"); return None; } let wikiresult = wikiresult.unwrap(); let mut wiki = WikipediaSummary { title: wikiresult.title.clone(), description: wikiresult.description.clone(), extract: wikiresult.extract.clone(), image: None, url: wikiresult.content_urls.desktop.page.clone(), }; let compare = "may refer to:"; if wiki.extract.ends_with(compare) { WIKIPEDIA_CACHE.lock().unwrap().insert(query_text.to_string(), wiki.clone()); return None; } const MAX_LENGTH: usize = 800; if wiki.description.len() > MAX_LENGTH { let mut shortened = String::new(); for (i, c) in wiki.description.chars().enumerate() { shortened.push(c); if i > MAX_LENGTH { break; } } shortened.push_str("..."); wiki.description = shortened; } wiki.image = wikiresult.thumbnail.as_ref().and_then(|v| v.source.clone()); WIKIPEDIA_CACHE.lock().unwrap().insert(query_text.to_string(), wiki.clone()); Some(wiki) } }