asklyphe/asklyphe-frontend/src/routes/search.rs
Evie Viau 95ba628934
Some checks failed
/ build-all-services (push) Has been cancelled
feature: Implement random theme and theme enum
For task T155
2025-03-19 20:58:23 -07:00

607 lines
20 KiB
Rust

/*
* asklyphe-frontend routes/search.rs
* - http routes for web searching
*
* Copyright (C) 2025 Real Microsoft, LLC
*
* This program is free software: you can redistribute it and/or modify it under the terms of the GNU Affero General Public License as published by the Free Software Foundation, version 3.
*
* This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Affero General Public License for more details.
*
* You should have received a copy of the GNU Affero General Public License along with this program. If not, see <https://www.gnu.org/licenses/>.
*/
use crate::routes::index::frontpage_error;
use crate::routes::{authenticate_user, Themes, UserInfo};
use crate::searchbot::{gather_image_results, gather_search_results};
use crate::unit_converter;
use crate::unit_converter::UnitConversion;
use crate::wikipedia::WikipediaSummary;
use crate::{wikipedia, Opts, ALPHA, BUILT_ON, GIT_COMMIT, VERSION, YEAR};
use askama::Template;
use asklyphe_common::nats;
use asklyphe_common::nats::bingservice::{
BingServiceQuery, BingServiceRequest, BingServiceResponse,
};
use asklyphe_common::nats::comms;
use asklyphe_common::nats::searchservice::{
SearchSrvcQuery, SearchSrvcRequest, SearchSrvcResponse,
};
use async_nats::jetstream;
use axum::extract::Query;
use axum::http::StatusCode;
use axum::response::{IntoResponse, Redirect};
use axum::Extension;
use axum_extra::extract::CookieJar;
use isahc::config::{IpVersion, RedirectPolicy};
use isahc::RequestExt;
use serde::Serialize;
use std::collections::{BTreeMap, BTreeSet, HashMap};
use std::ops::Deref;
use std::sync::Arc;
use std::time::Duration;
use tokio::sync::{Mutex, RwLock};
use tracing::error;
#[derive(Serialize)]
pub struct SearchResult {
pub title: Option<String>,
pub description: Option<String>,
pub url: String,
pub percentage: String,
pub value: String,
pub asklyphe: bool,
pub bing: bool,
pub google: bool,
}
#[derive(Serialize)]
pub struct ImageSearchResult {
pub src: String,
pub url: String,
pub bing: bool,
pub google: bool,
}
#[derive(Debug, Default)]
pub struct Complications {
disabled: bool,
wikipedia: Option<WikipediaSummary>,
unit_converter: Option<UnitConversion>,
}
pub async fn search(
jar: CookieJar,
Query(params): Query<HashMap<String, String>>,
Extension(nats): Extension<Arc<jetstream::Context>>,
Extension(opts): Extension<Opts>,
) -> impl IntoResponse {
let stype = params
.get("stype")
.unwrap_or(&"web".to_string())
.to_string();
if stype == "image" {
return image_search(jar, Query(params), Extension(nats), Extension(opts))
.await
.into_response();
}
let use_javascript = params.get("js").unwrap_or(&"0".to_string()).to_string();
if use_javascript != *"1" {
search_nojs(jar, Query(params), Extension(nats), Extension(opts))
.await
.into_response()
} else {
search_js(jar, Query(params), Extension(nats), Extension(opts))
.await
.into_response()
}
}
#[derive(Template)]
#[template(path = "search_js.html")]
struct SearchTemplateJavascript {
info: UserInfo,
error: Option<String>,
complications: Complications,
search_query: String,
websearch_url: String,
imagesearch_url: String,
version: String,
git_commit: String,
built_on: String,
year: String,
alpha: bool,
theme: Themes,
}
pub async fn search_js(
jar: CookieJar,
Query(params): Query<HashMap<String, String>>,
Extension(nats): Extension<Arc<jetstream::Context>>,
Extension(opts): Extension<Opts>,
) -> impl IntoResponse {
fn error_response(query: String, info: UserInfo, error: &str) -> SearchTemplateJavascript {
let theme = info.get_theme();
let querystr = url_encoded_data::stringify(&[("q", query.as_str())]);
SearchTemplateJavascript {
info,
error: Some(format!(
"internal server error ({})! report to developers (:",
error
)),
complications: Default::default(),
search_query: query,
websearch_url: format!("/ask?{querystr}&js=1"),
imagesearch_url: format!("/ask?{querystr}&js=1&stype=image"),
version: VERSION.to_string(),
git_commit: GIT_COMMIT.to_string(),
built_on: BUILT_ON.to_string(),
year: YEAR.to_string(),
alpha: ALPHA,
theme,
}
}
if let Some(token) = jar.get("token") {
let token = token.value().to_string();
let info = match authenticate_user(nats.clone(), token).await {
Ok(i) => i,
Err(e) => {
return (
jar.remove("token"),
frontpage_error(e.as_str(), opts.auth_url.clone()),
)
.into_response();
}
};
let mut query = params.get("q").unwrap_or(&"Deez".to_string()).to_string();
let og_query = query.clone();
let mut complications = Complications::default();
// todo: better way of specifying that user doesn't want complications
if !query.contains("-complications") {
let mut wikiquery = query.clone().to_lowercase();
wikiquery.retain(|c| c.is_alphanumeric() || c.is_ascii_whitespace());
wikiquery = wikiquery.replace(' ', "%20");
// todo: proper url escaping
let wikipedia_comp =
tokio::spawn(async move { wikipedia::get_wikipedia_page(&wikiquery, 20).await });
complications.wikipedia = wikipedia_comp.await.unwrap_or_default();
let mut unit_query = query.clone().to_lowercase();
unit_query = unit_query.replace("metre", "meter");
let unit_comp = unit_converter::convert_unit(&unit_query);
complications.unit_converter = unit_comp;
} else {
complications.disabled = true;
query = query.replace("-complications", "");
}
let theme = info.get_theme();
let querystr = url_encoded_data::stringify(&[("q", og_query.as_str())]);
SearchTemplateJavascript {
info,
error: None,
complications,
search_query: og_query,
websearch_url: format!("/ask?{querystr}&js=1"),
imagesearch_url: format!("/ask?{querystr}&js=1&stype=image"),
version: VERSION.to_string(),
git_commit: GIT_COMMIT.to_string(),
built_on: BUILT_ON.to_string(),
year: YEAR.to_string(),
alpha: ALPHA,
theme,
}
.into_response()
} else {
Redirect::to("/").into_response()
}
}
#[derive(Template)]
#[template(path = "search.html")]
pub struct SearchTemplate {
pub info: UserInfo,
pub error: Option<String>,
pub note: Option<String>,
pub complications: Complications,
pub search_query: String,
pub query_time: f64,
pub page_rank_time: f64,
pub max_relevance: String,
pub search_results: Vec<SearchResult>,
pub blocked: Vec<(String, String)>,
pub websearch_url: String,
pub imagesearch_url: String,
pub version: String,
pub git_commit: String,
pub built_on: String,
pub year: String,
pub alpha: bool,
pub theme: Themes,
}
pub async fn search_nojs(
jar: CookieJar,
Query(params): Query<HashMap<String, String>>,
Extension(nats): Extension<Arc<jetstream::Context>>,
Extension(opts): Extension<Opts>,
) -> impl IntoResponse {
fn error_response(query: String, info: UserInfo, error: &str) -> SearchTemplate {
let theme = info.get_theme();
let querystr = url_encoded_data::stringify(&[("q", query.as_str())]);
SearchTemplate {
info,
error: Some(format!(
"internal server error ({})! report to developers (:",
error
)),
note: None,
complications: Default::default(),
search_query: query,
query_time: 0.0,
page_rank_time: 0.0,
max_relevance: "".to_string(),
search_results: vec![],
blocked: vec![],
websearch_url: format!("/ask?{querystr}&js=0"),
imagesearch_url: format!("/ask?{querystr}&js=0&stype=image"),
version: VERSION.to_string(),
git_commit: GIT_COMMIT.to_string(),
built_on: BUILT_ON.to_string(),
year: YEAR.to_string(),
alpha: ALPHA,
theme,
}
}
if let Some(token) = jar.get("token") {
let token = token.value().to_string();
let info = match authenticate_user(nats.clone(), token).await {
Ok(i) => i,
Err(e) => {
return (
jar.remove("token"),
frontpage_error(e.as_str(), opts.auth_url.clone()),
)
.into_response();
}
};
let mut query = params.get("q").unwrap_or(&"Deez".to_string()).to_string();
let og_query = query.clone();
let mut complications = Complications::default();
// todo: better way of specifying that user doesn't want complications
if !query.contains("-complications") {
let mut wikiquery = query.clone().to_lowercase();
wikiquery.retain(|c| c.is_alphanumeric() || c.is_ascii_whitespace());
wikiquery = wikiquery.replace(' ', "%20");
// todo: proper url escaping
let wikipedia_comp =
tokio::spawn(async move { wikipedia::get_wikipedia_page(&wikiquery, 20).await });
complications.wikipedia = wikipedia_comp.await.unwrap_or_default();
let mut unit_query = query.clone().to_lowercase();
unit_query = unit_query.replace("metre", "meter");
let unit_comp = unit_converter::convert_unit(&unit_query);
complications.unit_converter = unit_comp;
} else {
complications.disabled = true;
query = query.replace("-complications", "");
}
let engines = params
.get("engines")
.unwrap_or(&"asklyphe,bing,google".to_string())
.to_string();
let mut engines = engines
.split(",")
.map(|v| v.to_string())
.collect::<Vec<String>>();
if opts.emergency {
// disable asklyphe backend
engines.retain(|v| v != "asklyphe");
}
gather_search_results(nats, query.as_str(), info, complications, Some(engines))
.await
.into_response()
} else {
Redirect::to("/").into_response()
}
}
#[derive(Serialize)]
struct BlockedResult {
url: String,
reason: String,
}
#[derive(Serialize)]
struct SearchJsonInner {
#[serde(skip_serializing_if = "Option::is_none")]
note: Option<String>,
query_time: f64,
page_rank_time: f64,
max_relevance: String,
search_results: Vec<SearchResult>,
blocked: Vec<BlockedResult>,
}
#[derive(Serialize)]
struct SearchJson {
#[serde(skip_serializing_if = "Option::is_none")]
error: Option<String>,
#[serde(skip_serializing_if = "Option::is_none")]
results: Option<SearchJsonInner>,
}
pub async fn search_json(
jar: CookieJar,
Query(params): Query<HashMap<String, String>>,
Extension(nats): Extension<Arc<jetstream::Context>>,
Extension(opts): Extension<Opts>,
) -> String {
fn json(input: SearchJson) -> String {
serde_json::to_string(&input).expect("failed to serialise!")
}
fn error_response(error: &str) -> String {
json(SearchJson {
error: Some(error.to_string()),
results: None,
})
}
if let Some(token) = jar.get("token") {
let token = token.value().to_string();
let info = match authenticate_user(nats.clone(), token).await {
Ok(i) => i,
Err(e) => {
return error_response("not authenticated");
}
};
let query = params.get("q").unwrap_or(&"Deez".to_string()).to_string();
let engines = params
.get("engines")
.unwrap_or(&"asklyphe,bing,google".to_string())
.to_string();
let mut engines = engines
.split(",")
.map(|v| v.to_string())
.collect::<Vec<String>>();
if opts.emergency {
// disable asklyphe backend
engines.retain(|v| v != "asklyphe");
}
let result = gather_search_results(
nats,
query.as_str(),
info,
Complications::default(),
Some(engines),
)
.await;
json(SearchJson {
error: None,
results: Some(SearchJsonInner {
note: result.note,
query_time: result.query_time,
page_rank_time: result.page_rank_time,
max_relevance: result.max_relevance,
search_results: result.search_results,
blocked: result
.blocked
.into_iter()
.map(|(url, reason)| BlockedResult { url, reason })
.collect(),
}),
})
} else {
error_response("not authenticated")
}
}
#[derive(Template)]
#[template(path = "image_search.html")]
pub struct ImageSearchTemplate {
pub info: UserInfo,
pub error: Option<String>,
pub note: Option<String>,
pub search_query: String,
pub search_results: Vec<ImageSearchResult>,
pub blocked: Vec<(String, String)>,
pub websearch_url: String,
pub imagesearch_url: String,
pub version: String,
pub git_commit: String,
pub built_on: String,
pub year: String,
pub alpha: bool,
pub theme: Themes,
}
pub async fn image_search(
jar: CookieJar,
Query(params): Query<HashMap<String, String>>,
Extension(nats): Extension<Arc<jetstream::Context>>,
Extension(opts): Extension<Opts>,
) -> impl IntoResponse {
fn error_response(query: String, info: UserInfo, error: &str) -> ImageSearchTemplate {
let theme = info.get_theme();
let querystr = url_encoded_data::stringify(&[("q", query.as_str())]);
ImageSearchTemplate {
info,
error: Some(format!(
"internal server error ({})! report to developers (:",
error
)),
note: None,
search_query: query,
search_results: vec![],
blocked: vec![],
websearch_url: format!("/ask?{querystr}&js=0"),
imagesearch_url: format!("/ask?{querystr}&js=0&stype=image"),
version: VERSION.to_string(),
git_commit: GIT_COMMIT.to_string(),
built_on: BUILT_ON.to_string(),
year: YEAR.to_string(),
alpha: ALPHA,
theme,
}
}
if let Some(token) = jar.get("token") {
let token = token.value().to_string();
let info = match authenticate_user(nats.clone(), token).await {
Ok(i) => i,
Err(e) => {
return (
jar.remove("token"),
frontpage_error(e.as_str(), opts.auth_url.clone()),
)
.into_response();
}
};
let query = params.get("q").unwrap_or(&"Deez".to_string()).to_string();
let js = params.get("js").unwrap_or(&"0".to_string()).to_string();
let engines = params
.get("engines")
.unwrap_or(&"bing,google".to_string())
.to_string();
let engines = engines
.split(",")
.map(|v| v.to_string())
.collect::<Vec<String>>();
gather_image_results(nats, &query, info, Some(engines), js == "1")
.await
.into_response()
} else {
Redirect::to("/").into_response()
}
}
pub async fn image_proxy(
jar: CookieJar,
Query(params): Query<HashMap<String, String>>,
Extension(nats): Extension<Arc<jetstream::Context>>,
Extension(opts): Extension<Opts>,
) -> impl IntoResponse {
static PROXY_TOKEN_CACHE: RwLock<BTreeSet<String>> = RwLock::const_new(BTreeSet::new());
static IMAGE_CACHE: RwLock<BTreeMap<String, Vec<u8>>> = RwLock::const_new(BTreeMap::new());
const TOKEN_CACHE_TTL_MINS: u64 = 10;
const IMAGE_CACHE_TTL_MINS: u64 = 10;
if let Some(token) = jar.get("token") {
let token = token.value().to_string();
let mut authenticated = false;
{
let cache = PROXY_TOKEN_CACHE.read().await;
authenticated = cache.contains(&token);
}
if !authenticated {
let _info = match authenticate_user(nats.clone(), token.clone()).await {
Ok(i) => i,
Err(e) => {
return StatusCode::UNAUTHORIZED.into_response();
}
};
{
let mut cache = PROXY_TOKEN_CACHE.write().await;
cache.insert(token.clone());
tokio::spawn(async move {
tokio::time::sleep(Duration::from_secs(TOKEN_CACHE_TTL_MINS * 60)).await;
let mut cache = PROXY_TOKEN_CACHE.write().await;
cache.remove(&token);
});
}
authenticated = true;
}
if !authenticated {
return StatusCode::UNAUTHORIZED.into_response();
}
use isahc::prelude::*;
let url = params.get("url");
if url.is_none() {
return StatusCode::BAD_REQUEST.into_response();
}
let mut url = url.unwrap().to_string();
let mut host = url.split("://");
let host = host.nth(1).unwrap_or(&url);
// fixme: we really need a better solution
const BLOCKED_HOSTS: &[&str] = &[
"0",
"10",
"100.6",
"100.7",
"100.8",
"100.9",
"100.10",
"100.11",
"100.12",
"127",
"169.254",
"172.1",
"172.2",
"172.30",
"172.31",
"192.168",
"198.18",
"198.19",
"localhost",
];
for blocked in BLOCKED_HOSTS {
if host.starts_with(blocked) {
return StatusCode::NO_CONTENT.into_response();
}
}
if !(url.starts_with("http://") || url.starts_with("https://")) {
url.insert_str(0, "http://");
}
// fixme: replace with actual smart encoding system
url = url.replace(" ", "%20");
{
let cache = IMAGE_CACHE.read().await;
if let Some(cached_image) = cache.get(&url).cloned() {
return cached_image.into_response();
}
}
let response = isahc::Request::get(url.clone())
.ip_version(IpVersion::V4)
.header("user-agent", "AskLyphe Image Proxy (+https://asklyphe.com)")
.timeout(Duration::from_secs(10))
.redirect_policy(RedirectPolicy::Limit(6))
.body(())
.unwrap()
.send_async()
.await;
if response.is_err() {
return StatusCode::NO_CONTENT.into_response();
}
let mut response = response.unwrap();
let data = response.bytes().await;
if data.is_err() {
return StatusCode::NO_CONTENT.into_response();
}
let data = data.unwrap();
const MAX_IMAGE_SIZE_IN_CACHE_MB: usize = 512;
if !data.is_empty() && data.len() < MAX_IMAGE_SIZE_IN_CACHE_MB * 1024 * 1024 {
let mut cache = IMAGE_CACHE.write().await;
const MAX_CACHE_SIZE_GB: usize = 10;
const MAX_CACHE_SIZE_MB: usize = MAX_CACHE_SIZE_GB * 1024 * 1024;
const MAX_CACHE_ENTRIES: usize =
MAX_CACHE_SIZE_MB / MAX_IMAGE_SIZE_IN_CACHE_MB;
if cache.len() < MAX_CACHE_ENTRIES {
cache.insert(url.clone(), data.clone());
tokio::spawn(async move {
tokio::time::sleep(Duration::from_secs(IMAGE_CACHE_TTL_MINS * 60)).await;
let mut cache = IMAGE_CACHE.write().await;
cache.remove(&url);
});
}
}
data.into_response()
} else {
StatusCode::UNAUTHORIZED.into_response()
}
}