forked from asklyphe-public/asklyphe
152 lines
5.4 KiB
Rust
152 lines
5.4 KiB
Rust
|
/*
|
||
|
* asklyphe-frontend wikipedia.rs
|
||
|
* - wikipedia helper functions
|
||
|
*
|
||
|
* Copyright (C) 2025 Real Microsoft, LLC
|
||
|
*
|
||
|
* This program is free software: you can redistribute it and/or modify it under the terms of the GNU Affero General Public License as published by the Free Software Foundation, version 3.
|
||
|
*
|
||
|
* This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Affero General Public License for more details.
|
||
|
*
|
||
|
* You should have received a copy of the GNU Affero General Public License along with this program. If not, see <https://www.gnu.org/licenses/>.
|
||
|
*/
|
||
|
|
||
|
use std::collections::{BTreeMap, HashMap};
|
||
|
use std::sync::atomic::{AtomicI64, Ordering};
|
||
|
use std::sync::Mutex;
|
||
|
use std::time::Duration;
|
||
|
use isahc::auth::Authentication;
|
||
|
use isahc::config::RedirectPolicy;
|
||
|
use isahc::{HttpClient, Request};
|
||
|
use isahc::prelude::*;
|
||
|
use once_cell::sync::Lazy;
|
||
|
use serde::Deserialize;
|
||
|
use tracing::{error, warn};
|
||
|
|
||
|
#[derive(Clone, Debug, Deserialize)]
|
||
|
pub struct WikipediaPage {
|
||
|
pub title: String,
|
||
|
pub description: String,
|
||
|
pub extract: String,
|
||
|
pub thumbnail: Option<WikipediaThumbnail>,
|
||
|
pub content_urls: WikipediaContentUrls,
|
||
|
}
|
||
|
#[derive(Clone, Debug, Deserialize)]
|
||
|
pub struct WikipediaThumbnail {
|
||
|
pub source: Option<String>,
|
||
|
}
|
||
|
#[derive(Clone, Debug, Deserialize)]
|
||
|
pub struct WikipediaContentUrls {
|
||
|
pub desktop: WikipediaUrls,
|
||
|
}
|
||
|
#[derive(Clone, Debug, Deserialize)]
|
||
|
pub struct WikipediaUrls {
|
||
|
pub page: String,
|
||
|
}
|
||
|
|
||
|
|
||
|
|
||
|
pub static USER_AGENT: Lazy<String> = Lazy::new(|| format!("AskLyphe/{} (https://asklyphe.com; nikocs@voremicrocomputers.com)", env!("CARGO_PKG_VERSION")));
|
||
|
|
||
|
pub static WIKIPEDIA_CACHE: Lazy<Mutex<BTreeMap<String, WikipediaSummary>>> = Lazy::new(|| Mutex::new(BTreeMap::new()));
|
||
|
pub static WIKIPEDIA_TIMEOUT: AtomicI64 = AtomicI64::new(0);
|
||
|
|
||
|
#[derive(Clone, Debug)]
|
||
|
pub struct WikipediaSummary {
|
||
|
pub title: String,
|
||
|
pub description: String,
|
||
|
pub extract: String,
|
||
|
pub image: Option<String>,
|
||
|
pub url: String,
|
||
|
}
|
||
|
|
||
|
pub async fn get_wikipedia_page(query_text: &str, timeout_secs: u64) -> Option<WikipediaSummary> {
|
||
|
if let Some(cached) = {
|
||
|
let cache = WIKIPEDIA_CACHE.lock().unwrap();
|
||
|
cache.get(query_text).cloned()
|
||
|
} {
|
||
|
let compare = "may refer to:";
|
||
|
if cached.extract.ends_with(compare) {
|
||
|
return None;
|
||
|
}
|
||
|
Some(cached)
|
||
|
} else {
|
||
|
if WIKIPEDIA_TIMEOUT.load(Ordering::Relaxed) > 0 {
|
||
|
if WIKIPEDIA_TIMEOUT.load(Ordering::Relaxed) > chrono::Utc::now().timestamp() {
|
||
|
return None;
|
||
|
} else {
|
||
|
WIKIPEDIA_TIMEOUT.store(0, Ordering::Relaxed);
|
||
|
}
|
||
|
}
|
||
|
|
||
|
let url = format!(
|
||
|
r#"https://en.wikipedia.org/api/rest_v1/page/summary/{}?redirect=true"#, query_text);
|
||
|
|
||
|
let client = HttpClient::builder()
|
||
|
.redirect_policy(RedirectPolicy::Limit(10))
|
||
|
.timeout(Duration::from_secs(timeout_secs))
|
||
|
.build();
|
||
|
if let Err(e) = client {
|
||
|
error!("failed to build client for fetching wikipedia: {e}");
|
||
|
return None;
|
||
|
}
|
||
|
let client = client.unwrap();
|
||
|
let request = Request::get(url.clone())
|
||
|
.header("user-agent", USER_AGENT.as_str())
|
||
|
.header("accept", "*/*")
|
||
|
.body(());
|
||
|
if let Err(e) = request {
|
||
|
error!("failed to fetch wikipedia api: {e}");
|
||
|
return None;
|
||
|
}
|
||
|
let request = request.unwrap();
|
||
|
let response = client.send_async(request).await;
|
||
|
if let Err(e) = response {
|
||
|
error!("failed to fetch wikipedia api: {e}");
|
||
|
return None;
|
||
|
}
|
||
|
let mut response = response.unwrap();
|
||
|
if response.status() != 200 && response.status() != 302 {
|
||
|
return None;
|
||
|
}
|
||
|
let body = response.text().await;
|
||
|
if let Err(e) = body {
|
||
|
error!("failed to fetch wikipedia api: {e}");
|
||
|
return None;
|
||
|
}
|
||
|
let body = body.unwrap();
|
||
|
let wikiresult = serde_json::from_str::<WikipediaPage>(&body);
|
||
|
if let Err(e) = wikiresult {
|
||
|
error!("failed to deserialise wikipedia api: {e}");
|
||
|
return None;
|
||
|
}
|
||
|
let wikiresult = wikiresult.unwrap();
|
||
|
let mut wiki = WikipediaSummary {
|
||
|
title: wikiresult.title.clone(),
|
||
|
description: wikiresult.description.clone(),
|
||
|
extract: wikiresult.extract.clone(),
|
||
|
image: None,
|
||
|
url: wikiresult.content_urls.desktop.page.clone(),
|
||
|
};
|
||
|
let compare = "may refer to:";
|
||
|
if wiki.extract.ends_with(compare) {
|
||
|
WIKIPEDIA_CACHE.lock().unwrap().insert(query_text.to_string(), wiki.clone());
|
||
|
return None;
|
||
|
}
|
||
|
const MAX_LENGTH: usize = 800;
|
||
|
if wiki.description.len() > MAX_LENGTH {
|
||
|
let mut shortened = String::new();
|
||
|
for (i, c) in wiki.description.chars().enumerate() {
|
||
|
shortened.push(c);
|
||
|
if i > MAX_LENGTH {
|
||
|
break;
|
||
|
}
|
||
|
}
|
||
|
shortened.push_str("...");
|
||
|
wiki.description = shortened;
|
||
|
}
|
||
|
wiki.image = wikiresult.thumbnail.as_ref().and_then(|v| v.source.clone());
|
||
|
WIKIPEDIA_CACHE.lock().unwrap().insert(query_text.to_string(), wiki.clone());
|
||
|
Some(wiki)
|
||
|
}
|
||
|
}
|