asklyphe/asklyphe-frontend/src/wikipedia.rs

152 lines
No EOL
5.4 KiB
Rust

/*
* asklyphe-frontend wikipedia.rs
* - wikipedia helper functions
*
* Copyright (C) 2025 Real Microsoft, LLC
*
* This program is free software: you can redistribute it and/or modify it under the terms of the GNU Affero General Public License as published by the Free Software Foundation, version 3.
*
* This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Affero General Public License for more details.
*
* You should have received a copy of the GNU Affero General Public License along with this program. If not, see <https://www.gnu.org/licenses/>.
*/
use std::collections::{BTreeMap, HashMap};
use std::sync::atomic::{AtomicI64, Ordering};
use std::sync::Mutex;
use std::time::Duration;
use isahc::auth::Authentication;
use isahc::config::RedirectPolicy;
use isahc::{HttpClient, Request};
use isahc::prelude::*;
use once_cell::sync::Lazy;
use serde::Deserialize;
use tracing::{error, warn};
#[derive(Clone, Debug, Deserialize)]
pub struct WikipediaPage {
pub title: String,
pub description: String,
pub extract: String,
pub thumbnail: Option<WikipediaThumbnail>,
pub content_urls: WikipediaContentUrls,
}
#[derive(Clone, Debug, Deserialize)]
pub struct WikipediaThumbnail {
pub source: Option<String>,
}
#[derive(Clone, Debug, Deserialize)]
pub struct WikipediaContentUrls {
pub desktop: WikipediaUrls,
}
#[derive(Clone, Debug, Deserialize)]
pub struct WikipediaUrls {
pub page: String,
}
pub static USER_AGENT: Lazy<String> = Lazy::new(|| format!("AskLyphe/{} (https://asklyphe.com; nikocs@voremicrocomputers.com)", env!("CARGO_PKG_VERSION")));
pub static WIKIPEDIA_CACHE: Lazy<Mutex<BTreeMap<String, WikipediaSummary>>> = Lazy::new(|| Mutex::new(BTreeMap::new()));
pub static WIKIPEDIA_TIMEOUT: AtomicI64 = AtomicI64::new(0);
#[derive(Clone, Debug)]
pub struct WikipediaSummary {
pub title: String,
pub description: String,
pub extract: String,
pub image: Option<String>,
pub url: String,
}
pub async fn get_wikipedia_page(query_text: &str, timeout_secs: u64) -> Option<WikipediaSummary> {
if let Some(cached) = {
let cache = WIKIPEDIA_CACHE.lock().unwrap();
cache.get(query_text).cloned()
} {
let compare = "may refer to:";
if cached.extract.ends_with(compare) {
return None;
}
Some(cached)
} else {
if WIKIPEDIA_TIMEOUT.load(Ordering::Relaxed) > 0 {
if WIKIPEDIA_TIMEOUT.load(Ordering::Relaxed) > chrono::Utc::now().timestamp() {
return None;
} else {
WIKIPEDIA_TIMEOUT.store(0, Ordering::Relaxed);
}
}
let url = format!(
r#"https://en.wikipedia.org/api/rest_v1/page/summary/{}?redirect=true"#, query_text);
let client = HttpClient::builder()
.redirect_policy(RedirectPolicy::Limit(10))
.timeout(Duration::from_secs(timeout_secs))
.build();
if let Err(e) = client {
error!("failed to build client for fetching wikipedia: {e}");
return None;
}
let client = client.unwrap();
let request = Request::get(url.clone())
.header("user-agent", USER_AGENT.as_str())
.header("accept", "*/*")
.body(());
if let Err(e) = request {
error!("failed to fetch wikipedia api: {e}");
return None;
}
let request = request.unwrap();
let response = client.send_async(request).await;
if let Err(e) = response {
error!("failed to fetch wikipedia api: {e}");
return None;
}
let mut response = response.unwrap();
if response.status() != 200 && response.status() != 302 {
return None;
}
let body = response.text().await;
if let Err(e) = body {
error!("failed to fetch wikipedia api: {e}");
return None;
}
let body = body.unwrap();
let wikiresult = serde_json::from_str::<WikipediaPage>(&body);
if let Err(e) = wikiresult {
error!("failed to deserialise wikipedia api: {e}");
return None;
}
let wikiresult = wikiresult.unwrap();
let mut wiki = WikipediaSummary {
title: wikiresult.title.clone(),
description: wikiresult.description.clone(),
extract: wikiresult.extract.clone(),
image: None,
url: wikiresult.content_urls.desktop.page.clone(),
};
let compare = "may refer to:";
if wiki.extract.ends_with(compare) {
WIKIPEDIA_CACHE.lock().unwrap().insert(query_text.to_string(), wiki.clone());
return None;
}
const MAX_LENGTH: usize = 800;
if wiki.description.len() > MAX_LENGTH {
let mut shortened = String::new();
for (i, c) in wiki.description.chars().enumerate() {
shortened.push(c);
if i > MAX_LENGTH {
break;
}
}
shortened.push_str("...");
wiki.description = shortened;
}
wiki.image = wikiresult.thumbnail.as_ref().and_then(|v| v.source.clone());
WIKIPEDIA_CACHE.lock().unwrap().insert(query_text.to_string(), wiki.clone());
Some(wiki)
}
}