/* * asklyphe-frontend routes/search.rs * - http routes for web searching * * Copyright (C) 2025 Real Microsoft, LLC * * This program is free software: you can redistribute it and/or modify it under the terms of the GNU Affero General Public License as published by the Free Software Foundation, version 3. * * This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Affero General Public License for more details. * * You should have received a copy of the GNU Affero General Public License along with this program. If not, see . */ use crate::routes::index::frontpage_error; use crate::routes::{authenticate_user, Themes, UserInfo}; use crate::searchbot::{gather_image_results, gather_search_results}; use crate::unit_converter; use crate::unit_converter::UnitConversion; use crate::wikipedia::WikipediaSummary; use crate::{wikipedia, Opts, ALPHA, BUILT_ON, GIT_COMMIT, VERSION, YEAR}; use askama::Template; use asklyphe_common::nats; use asklyphe_common::nats::bingservice::{ BingServiceQuery, BingServiceRequest, BingServiceResponse, }; use asklyphe_common::nats::comms; use asklyphe_common::nats::searchservice::{ SearchSrvcQuery, SearchSrvcRequest, SearchSrvcResponse, }; use async_nats::jetstream; use axum::extract::Query; use axum::http::StatusCode; use axum::response::{IntoResponse, Redirect}; use axum::Extension; use axum_extra::extract::CookieJar; use isahc::config::{IpVersion, RedirectPolicy}; use isahc::RequestExt; use serde::Serialize; use std::collections::{BTreeMap, BTreeSet, HashMap}; use std::ops::Deref; use std::sync::Arc; use std::time::Duration; use tokio::sync::{Mutex, RwLock}; use tracing::error; #[derive(Serialize)] pub struct SearchResult { pub title: Option, pub description: Option, pub url: String, pub percentage: String, pub value: String, pub asklyphe: bool, pub bing: bool, pub google: bool, } #[derive(Serialize)] pub struct ImageSearchResult { pub src: String, pub url: String, pub bing: bool, pub google: bool, } #[derive(Debug, Default)] pub struct Complications { disabled: bool, wikipedia: Option, unit_converter: Option, } pub async fn search( jar: CookieJar, Query(params): Query>, Extension(nats): Extension>, Extension(opts): Extension, ) -> impl IntoResponse { let stype = params .get("stype") .unwrap_or(&"web".to_string()) .to_string(); if stype == "image" { return image_search(jar, Query(params), Extension(nats), Extension(opts)) .await .into_response(); } let use_javascript = params.get("js").unwrap_or(&"0".to_string()).to_string(); if use_javascript != *"1" { search_nojs(jar, Query(params), Extension(nats), Extension(opts)) .await .into_response() } else { search_js(jar, Query(params), Extension(nats), Extension(opts)) .await .into_response() } } #[derive(Template)] #[template(path = "search_js.html")] struct SearchTemplateJavascript { info: UserInfo, error: Option, complications: Complications, search_query: String, websearch_url: String, imagesearch_url: String, version: String, git_commit: String, built_on: String, year: String, alpha: bool, theme: Themes, } pub async fn search_js( jar: CookieJar, Query(params): Query>, Extension(nats): Extension>, Extension(opts): Extension, ) -> impl IntoResponse { fn error_response(query: String, info: UserInfo, error: &str) -> SearchTemplateJavascript { let theme = info.get_theme(); let querystr = url_encoded_data::stringify(&[("q", query.as_str())]); SearchTemplateJavascript { info, error: Some(format!( "internal server error ({})! report to developers (:", error )), complications: Default::default(), search_query: query, websearch_url: format!("/ask?{querystr}&js=1"), imagesearch_url: format!("/ask?{querystr}&js=1&stype=image"), version: VERSION.to_string(), git_commit: GIT_COMMIT.to_string(), built_on: BUILT_ON.to_string(), year: YEAR.to_string(), alpha: ALPHA, theme, } } if let Some(token) = jar.get("token") { let token = token.value().to_string(); let info = match authenticate_user(nats.clone(), token).await { Ok(i) => i, Err(e) => { return ( jar.remove("token"), frontpage_error(e.as_str(), opts.auth_url.clone()), ) .into_response(); } }; let mut query = params.get("q").unwrap_or(&"Deez".to_string()).to_string(); let og_query = query.clone(); let mut complications = Complications::default(); // todo: better way of specifying that user doesn't want complications if !query.contains("-complications") { let mut wikiquery = query.clone().to_lowercase(); wikiquery.retain(|c| c.is_alphanumeric() || c.is_ascii_whitespace()); wikiquery = wikiquery.replace(' ', "%20"); // todo: proper url escaping let wikipedia_comp = tokio::spawn(async move { wikipedia::get_wikipedia_page(&wikiquery, 20).await }); complications.wikipedia = wikipedia_comp.await.unwrap_or_default(); let mut unit_query = query.clone().to_lowercase(); unit_query = unit_query.replace("metre", "meter"); let unit_comp = unit_converter::convert_unit(&unit_query); complications.unit_converter = unit_comp; } else { complications.disabled = true; query = query.replace("-complications", ""); } let theme = info.get_theme(); let querystr = url_encoded_data::stringify(&[("q", og_query.as_str())]); SearchTemplateJavascript { info, error: None, complications, search_query: og_query, websearch_url: format!("/ask?{querystr}&js=1"), imagesearch_url: format!("/ask?{querystr}&js=1&stype=image"), version: VERSION.to_string(), git_commit: GIT_COMMIT.to_string(), built_on: BUILT_ON.to_string(), year: YEAR.to_string(), alpha: ALPHA, theme, } .into_response() } else { Redirect::to("/").into_response() } } #[derive(Template)] #[template(path = "search.html")] pub struct SearchTemplate { pub info: UserInfo, pub error: Option, pub note: Option, pub complications: Complications, pub search_query: String, pub query_time: f64, pub page_rank_time: f64, pub max_relevance: String, pub search_results: Vec, pub blocked: Vec<(String, String)>, pub websearch_url: String, pub imagesearch_url: String, pub version: String, pub git_commit: String, pub built_on: String, pub year: String, pub alpha: bool, pub theme: Themes, } pub async fn search_nojs( jar: CookieJar, Query(params): Query>, Extension(nats): Extension>, Extension(opts): Extension, ) -> impl IntoResponse { fn error_response(query: String, info: UserInfo, error: &str) -> SearchTemplate { let theme = info.get_theme(); let querystr = url_encoded_data::stringify(&[("q", query.as_str())]); SearchTemplate { info, error: Some(format!( "internal server error ({})! report to developers (:", error )), note: None, complications: Default::default(), search_query: query, query_time: 0.0, page_rank_time: 0.0, max_relevance: "".to_string(), search_results: vec![], blocked: vec![], websearch_url: format!("/ask?{querystr}&js=0"), imagesearch_url: format!("/ask?{querystr}&js=0&stype=image"), version: VERSION.to_string(), git_commit: GIT_COMMIT.to_string(), built_on: BUILT_ON.to_string(), year: YEAR.to_string(), alpha: ALPHA, theme, } } if let Some(token) = jar.get("token") { let token = token.value().to_string(); let info = match authenticate_user(nats.clone(), token).await { Ok(i) => i, Err(e) => { return ( jar.remove("token"), frontpage_error(e.as_str(), opts.auth_url.clone()), ) .into_response(); } }; let mut query = params.get("q").unwrap_or(&"Deez".to_string()).to_string(); let og_query = query.clone(); let mut complications = Complications::default(); // todo: better way of specifying that user doesn't want complications if !query.contains("-complications") { let mut wikiquery = query.clone().to_lowercase(); wikiquery.retain(|c| c.is_alphanumeric() || c.is_ascii_whitespace()); wikiquery = wikiquery.replace(' ', "%20"); // todo: proper url escaping let wikipedia_comp = tokio::spawn(async move { wikipedia::get_wikipedia_page(&wikiquery, 20).await }); complications.wikipedia = wikipedia_comp.await.unwrap_or_default(); let mut unit_query = query.clone().to_lowercase(); unit_query = unit_query.replace("metre", "meter"); let unit_comp = unit_converter::convert_unit(&unit_query); complications.unit_converter = unit_comp; } else { complications.disabled = true; query = query.replace("-complications", ""); } let engines = params .get("engines") .unwrap_or(&"asklyphe,bing,google".to_string()) .to_string(); let mut engines = engines .split(",") .map(|v| v.to_string()) .collect::>(); if opts.emergency { // disable asklyphe backend engines.retain(|v| v != "asklyphe"); } gather_search_results(nats, query.as_str(), info, complications, Some(engines)) .await .into_response() } else { Redirect::to("/").into_response() } } #[derive(Serialize)] struct BlockedResult { url: String, reason: String, } #[derive(Serialize)] struct SearchJsonInner { #[serde(skip_serializing_if = "Option::is_none")] note: Option, query_time: f64, page_rank_time: f64, max_relevance: String, search_results: Vec, blocked: Vec, } #[derive(Serialize)] struct SearchJson { #[serde(skip_serializing_if = "Option::is_none")] error: Option, #[serde(skip_serializing_if = "Option::is_none")] results: Option, } pub async fn search_json( jar: CookieJar, Query(params): Query>, Extension(nats): Extension>, Extension(opts): Extension, ) -> String { fn json(input: SearchJson) -> String { serde_json::to_string(&input).expect("failed to serialise!") } fn error_response(error: &str) -> String { json(SearchJson { error: Some(error.to_string()), results: None, }) } if let Some(token) = jar.get("token") { let token = token.value().to_string(); let info = match authenticate_user(nats.clone(), token).await { Ok(i) => i, Err(e) => { return error_response("not authenticated"); } }; let query = params.get("q").unwrap_or(&"Deez".to_string()).to_string(); let engines = params .get("engines") .unwrap_or(&"asklyphe,bing,google".to_string()) .to_string(); let mut engines = engines .split(",") .map(|v| v.to_string()) .collect::>(); if opts.emergency { // disable asklyphe backend engines.retain(|v| v != "asklyphe"); } let result = gather_search_results( nats, query.as_str(), info, Complications::default(), Some(engines), ) .await; json(SearchJson { error: None, results: Some(SearchJsonInner { note: result.note, query_time: result.query_time, page_rank_time: result.page_rank_time, max_relevance: result.max_relevance, search_results: result.search_results, blocked: result .blocked .into_iter() .map(|(url, reason)| BlockedResult { url, reason }) .collect(), }), }) } else { error_response("not authenticated") } } #[derive(Template)] #[template(path = "image_search.html")] pub struct ImageSearchTemplate { pub info: UserInfo, pub error: Option, pub note: Option, pub search_query: String, pub search_results: Vec, pub blocked: Vec<(String, String)>, pub websearch_url: String, pub imagesearch_url: String, pub version: String, pub git_commit: String, pub built_on: String, pub year: String, pub alpha: bool, pub theme: Themes, } pub async fn image_search( jar: CookieJar, Query(params): Query>, Extension(nats): Extension>, Extension(opts): Extension, ) -> impl IntoResponse { fn error_response(query: String, info: UserInfo, error: &str) -> ImageSearchTemplate { let theme = info.get_theme(); let querystr = url_encoded_data::stringify(&[("q", query.as_str())]); ImageSearchTemplate { info, error: Some(format!( "internal server error ({})! report to developers (:", error )), note: None, search_query: query, search_results: vec![], blocked: vec![], websearch_url: format!("/ask?{querystr}&js=0"), imagesearch_url: format!("/ask?{querystr}&js=0&stype=image"), version: VERSION.to_string(), git_commit: GIT_COMMIT.to_string(), built_on: BUILT_ON.to_string(), year: YEAR.to_string(), alpha: ALPHA, theme, } } if let Some(token) = jar.get("token") { let token = token.value().to_string(); let info = match authenticate_user(nats.clone(), token).await { Ok(i) => i, Err(e) => { return ( jar.remove("token"), frontpage_error(e.as_str(), opts.auth_url.clone()), ) .into_response(); } }; let query = params.get("q").unwrap_or(&"Deez".to_string()).to_string(); let js = params.get("js").unwrap_or(&"0".to_string()).to_string(); let engines = params .get("engines") .unwrap_or(&"bing,google".to_string()) .to_string(); let engines = engines .split(",") .map(|v| v.to_string()) .collect::>(); gather_image_results(nats, &query, info, Some(engines), js == "1") .await .into_response() } else { Redirect::to("/").into_response() } } pub async fn image_proxy( jar: CookieJar, Query(params): Query>, Extension(nats): Extension>, Extension(opts): Extension, ) -> impl IntoResponse { static PROXY_TOKEN_CACHE: RwLock> = RwLock::const_new(BTreeSet::new()); static IMAGE_CACHE: RwLock>> = RwLock::const_new(BTreeMap::new()); const TOKEN_CACHE_TTL_MINS: u64 = 10; const IMAGE_CACHE_TTL_MINS: u64 = 10; if let Some(token) = jar.get("token") { let token = token.value().to_string(); let mut authenticated = false; { let cache = PROXY_TOKEN_CACHE.read().await; authenticated = cache.contains(&token); } if !authenticated { let _info = match authenticate_user(nats.clone(), token.clone()).await { Ok(i) => i, Err(e) => { return StatusCode::UNAUTHORIZED.into_response(); } }; { let mut cache = PROXY_TOKEN_CACHE.write().await; cache.insert(token.clone()); tokio::spawn(async move { tokio::time::sleep(Duration::from_secs(TOKEN_CACHE_TTL_MINS * 60)).await; let mut cache = PROXY_TOKEN_CACHE.write().await; cache.remove(&token); }); } authenticated = true; } if !authenticated { return StatusCode::UNAUTHORIZED.into_response(); } use isahc::prelude::*; let url = params.get("url"); if url.is_none() { return StatusCode::BAD_REQUEST.into_response(); } let mut url = url.unwrap().to_string(); let mut host = url.split("://"); let host = host.nth(1).unwrap_or(&url); // fixme: we really need a better solution const BLOCKED_HOSTS: &[&str] = &[ "0", "10", "100.6", "100.7", "100.8", "100.9", "100.10", "100.11", "100.12", "127", "169.254", "172.1", "172.2", "172.30", "172.31", "192.168", "198.18", "198.19", "localhost", ]; for blocked in BLOCKED_HOSTS { if host.starts_with(blocked) { return StatusCode::NO_CONTENT.into_response(); } } if !(url.starts_with("http://") || url.starts_with("https://")) { url.insert_str(0, "http://"); } // fixme: replace with actual smart encoding system url = url.replace(" ", "%20"); { let cache = IMAGE_CACHE.read().await; if let Some(cached_image) = cache.get(&url).cloned() { return cached_image.into_response(); } } let response = isahc::Request::get(url.clone()) .ip_version(IpVersion::V4) .header("user-agent", "AskLyphe Image Proxy (+https://asklyphe.com)") .timeout(Duration::from_secs(10)) .redirect_policy(RedirectPolicy::Limit(6)) .body(()) .unwrap() .send_async() .await; if response.is_err() { return StatusCode::NO_CONTENT.into_response(); } let mut response = response.unwrap(); let data = response.bytes().await; if data.is_err() { return StatusCode::NO_CONTENT.into_response(); } let data = data.unwrap(); const MAX_IMAGE_SIZE_IN_CACHE_MB: usize = 512; if !data.is_empty() && data.len() < MAX_IMAGE_SIZE_IN_CACHE_MB * 1024 * 1024 { let mut cache = IMAGE_CACHE.write().await; const MAX_CACHE_SIZE_GB: usize = 10; const MAX_CACHE_SIZE_MB: usize = MAX_CACHE_SIZE_GB * 1024 * 1024; const MAX_CACHE_ENTRIES: usize = MAX_CACHE_SIZE_MB / MAX_IMAGE_SIZE_IN_CACHE_MB; if cache.len() < MAX_CACHE_ENTRIES { cache.insert(url.clone(), data.clone()); tokio::spawn(async move { tokio::time::sleep(Duration::from_secs(IMAGE_CACHE_TTL_MINS * 60)).await; let mut cache = IMAGE_CACHE.write().await; cache.remove(&url); }); } } data.into_response() } else { StatusCode::UNAUTHORIZED.into_response() } }