152 lines
		
	
	
		
			No EOL
		
	
	
		
			5.4 KiB
		
	
	
	
		
			Rust
		
	
	
	
	
	
			
		
		
	
	
			152 lines
		
	
	
		
			No EOL
		
	
	
		
			5.4 KiB
		
	
	
	
		
			Rust
		
	
	
	
	
	
/*
 | 
						|
 * asklyphe-frontend wikipedia.rs
 | 
						|
 * - wikipedia helper functions
 | 
						|
 *
 | 
						|
 * Copyright (C) 2025 Real Microsoft, LLC
 | 
						|
 *
 | 
						|
 * This program is free software: you can redistribute it and/or modify it under the terms of the GNU Affero General Public License as published by the Free Software Foundation, version 3.
 | 
						|
 *
 | 
						|
 * This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Affero General Public License for more details.
 | 
						|
 *
 | 
						|
 * You should have received a copy of the GNU Affero General Public License along with this program. If not, see <https://www.gnu.org/licenses/>.
 | 
						|
*/
 | 
						|
 | 
						|
use std::collections::{BTreeMap, HashMap};
 | 
						|
use std::sync::atomic::{AtomicI64, Ordering};
 | 
						|
use std::sync::Mutex;
 | 
						|
use std::time::Duration;
 | 
						|
use isahc::auth::Authentication;
 | 
						|
use isahc::config::RedirectPolicy;
 | 
						|
use isahc::{HttpClient, Request};
 | 
						|
use isahc::prelude::*;
 | 
						|
use once_cell::sync::Lazy;
 | 
						|
use serde::Deserialize;
 | 
						|
use tracing::{error, warn};
 | 
						|
 | 
						|
#[derive(Clone, Debug, Deserialize)]
 | 
						|
pub struct WikipediaPage {
 | 
						|
    pub title: String,
 | 
						|
    pub description: String,
 | 
						|
    pub extract: String,
 | 
						|
    pub thumbnail: Option<WikipediaThumbnail>,
 | 
						|
    pub content_urls: WikipediaContentUrls,
 | 
						|
}
 | 
						|
#[derive(Clone, Debug, Deserialize)]
 | 
						|
pub struct WikipediaThumbnail {
 | 
						|
    pub source: Option<String>,
 | 
						|
}
 | 
						|
#[derive(Clone, Debug, Deserialize)]
 | 
						|
pub struct WikipediaContentUrls {
 | 
						|
    pub desktop: WikipediaUrls,
 | 
						|
}
 | 
						|
#[derive(Clone, Debug, Deserialize)]
 | 
						|
pub struct WikipediaUrls {
 | 
						|
    pub page: String,
 | 
						|
}
 | 
						|
 | 
						|
 | 
						|
 | 
						|
pub static USER_AGENT: Lazy<String> = Lazy::new(|| format!("AskLyphe/{} (https://asklyphe.com; nikocs@voremicrocomputers.com)", env!("CARGO_PKG_VERSION")));
 | 
						|
 | 
						|
pub static WIKIPEDIA_CACHE: Lazy<Mutex<BTreeMap<String, WikipediaSummary>>> = Lazy::new(|| Mutex::new(BTreeMap::new()));
 | 
						|
pub static WIKIPEDIA_TIMEOUT: AtomicI64 = AtomicI64::new(0);
 | 
						|
 | 
						|
#[derive(Clone, Debug)]
 | 
						|
pub struct WikipediaSummary {
 | 
						|
    pub title: String,
 | 
						|
    pub description: String,
 | 
						|
    pub extract: String,
 | 
						|
    pub image: Option<String>,
 | 
						|
    pub url: String,
 | 
						|
}
 | 
						|
 | 
						|
pub async fn get_wikipedia_page(query_text: &str, timeout_secs: u64) -> Option<WikipediaSummary> {
 | 
						|
    if let Some(cached) = {
 | 
						|
        let cache = WIKIPEDIA_CACHE.lock().unwrap();
 | 
						|
        cache.get(query_text).cloned()
 | 
						|
    } {
 | 
						|
        let compare = "may refer to:";
 | 
						|
        if cached.extract.ends_with(compare) {
 | 
						|
            return None;
 | 
						|
        }
 | 
						|
        Some(cached)
 | 
						|
    } else {
 | 
						|
        if WIKIPEDIA_TIMEOUT.load(Ordering::Relaxed) > 0 {
 | 
						|
            if WIKIPEDIA_TIMEOUT.load(Ordering::Relaxed) > chrono::Utc::now().timestamp() {
 | 
						|
                return None;
 | 
						|
            } else {
 | 
						|
                WIKIPEDIA_TIMEOUT.store(0, Ordering::Relaxed);
 | 
						|
            }
 | 
						|
        }
 | 
						|
 | 
						|
        let url = format!(
 | 
						|
            r#"https://en.wikipedia.org/api/rest_v1/page/summary/{}?redirect=true"#, query_text);
 | 
						|
 | 
						|
        let client = HttpClient::builder()
 | 
						|
            .redirect_policy(RedirectPolicy::Limit(10))
 | 
						|
            .timeout(Duration::from_secs(timeout_secs))
 | 
						|
            .build();
 | 
						|
        if let Err(e) = client {
 | 
						|
            error!("failed to build client for fetching wikipedia: {e}");
 | 
						|
            return None;
 | 
						|
        }
 | 
						|
        let client = client.unwrap();
 | 
						|
        let request = Request::get(url.clone())
 | 
						|
            .header("user-agent", USER_AGENT.as_str())
 | 
						|
            .header("accept", "*/*")
 | 
						|
            .body(());
 | 
						|
        if let Err(e) = request {
 | 
						|
            error!("failed to fetch wikipedia api: {e}");
 | 
						|
            return None;
 | 
						|
        }
 | 
						|
        let request = request.unwrap();
 | 
						|
        let response = client.send_async(request).await;
 | 
						|
        if let Err(e) = response {
 | 
						|
            error!("failed to fetch wikipedia api: {e}");
 | 
						|
            return None;
 | 
						|
        }
 | 
						|
        let mut response = response.unwrap();
 | 
						|
        if response.status() != 200 && response.status() != 302 {
 | 
						|
            return None;
 | 
						|
        }
 | 
						|
        let body = response.text().await;
 | 
						|
        if let Err(e) = body {
 | 
						|
            error!("failed to fetch wikipedia api: {e}");
 | 
						|
            return None;
 | 
						|
        }
 | 
						|
        let body = body.unwrap();
 | 
						|
        let wikiresult = serde_json::from_str::<WikipediaPage>(&body);
 | 
						|
        if let Err(e) = wikiresult {
 | 
						|
            error!("failed to deserialise wikipedia api: {e}");
 | 
						|
            return None;
 | 
						|
        }
 | 
						|
        let wikiresult = wikiresult.unwrap();
 | 
						|
        let mut wiki = WikipediaSummary {
 | 
						|
            title: wikiresult.title.clone(),
 | 
						|
            description: wikiresult.description.clone(),
 | 
						|
            extract: wikiresult.extract.clone(),
 | 
						|
            image: None,
 | 
						|
            url: wikiresult.content_urls.desktop.page.clone(),
 | 
						|
        };
 | 
						|
        let compare = "may refer to:";
 | 
						|
        if wiki.extract.ends_with(compare) {
 | 
						|
            WIKIPEDIA_CACHE.lock().unwrap().insert(query_text.to_string(), wiki.clone());
 | 
						|
            return None;
 | 
						|
        }
 | 
						|
        const MAX_LENGTH: usize = 800;
 | 
						|
        if wiki.description.len() > MAX_LENGTH {
 | 
						|
            let mut shortened = String::new();
 | 
						|
            for (i, c) in wiki.description.chars().enumerate() {
 | 
						|
                shortened.push(c);
 | 
						|
                if i > MAX_LENGTH {
 | 
						|
                    break;
 | 
						|
                }
 | 
						|
            }
 | 
						|
            shortened.push_str("...");
 | 
						|
            wiki.description = shortened;
 | 
						|
        }
 | 
						|
        wiki.image = wikiresult.thumbnail.as_ref().and_then(|v| v.source.clone());
 | 
						|
        WIKIPEDIA_CACHE.lock().unwrap().insert(query_text.to_string(), wiki.clone());
 | 
						|
        Some(wiki)
 | 
						|
    }
 | 
						|
} |