Some checks failed
/ build-all-services (push) Has been cancelled
For task T155
607 lines
20 KiB
Rust
607 lines
20 KiB
Rust
/*
|
|
* asklyphe-frontend routes/search.rs
|
|
* - http routes for web searching
|
|
*
|
|
* Copyright (C) 2025 Real Microsoft, LLC
|
|
*
|
|
* This program is free software: you can redistribute it and/or modify it under the terms of the GNU Affero General Public License as published by the Free Software Foundation, version 3.
|
|
*
|
|
* This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Affero General Public License for more details.
|
|
*
|
|
* You should have received a copy of the GNU Affero General Public License along with this program. If not, see <https://www.gnu.org/licenses/>.
|
|
*/
|
|
|
|
use crate::routes::index::frontpage_error;
|
|
use crate::routes::{authenticate_user, Themes, UserInfo};
|
|
use crate::searchbot::{gather_image_results, gather_search_results};
|
|
use crate::unit_converter;
|
|
use crate::unit_converter::UnitConversion;
|
|
use crate::wikipedia::WikipediaSummary;
|
|
use crate::{wikipedia, Opts, ALPHA, BUILT_ON, GIT_COMMIT, VERSION, YEAR};
|
|
use askama::Template;
|
|
use asklyphe_common::nats;
|
|
use asklyphe_common::nats::bingservice::{
|
|
BingServiceQuery, BingServiceRequest, BingServiceResponse,
|
|
};
|
|
use asklyphe_common::nats::comms;
|
|
use asklyphe_common::nats::searchservice::{
|
|
SearchSrvcQuery, SearchSrvcRequest, SearchSrvcResponse,
|
|
};
|
|
use async_nats::jetstream;
|
|
use axum::extract::Query;
|
|
use axum::http::StatusCode;
|
|
use axum::response::{IntoResponse, Redirect};
|
|
use axum::Extension;
|
|
use axum_extra::extract::CookieJar;
|
|
use isahc::config::{IpVersion, RedirectPolicy};
|
|
use isahc::RequestExt;
|
|
use serde::Serialize;
|
|
use std::collections::{BTreeMap, BTreeSet, HashMap};
|
|
use std::ops::Deref;
|
|
use std::sync::Arc;
|
|
use std::time::Duration;
|
|
use tokio::sync::{Mutex, RwLock};
|
|
use tracing::error;
|
|
|
|
#[derive(Serialize)]
|
|
pub struct SearchResult {
|
|
pub title: Option<String>,
|
|
pub description: Option<String>,
|
|
pub url: String,
|
|
pub percentage: String,
|
|
pub value: String,
|
|
pub asklyphe: bool,
|
|
pub bing: bool,
|
|
pub google: bool,
|
|
}
|
|
|
|
#[derive(Serialize)]
|
|
pub struct ImageSearchResult {
|
|
pub src: String,
|
|
pub url: String,
|
|
pub bing: bool,
|
|
pub google: bool,
|
|
}
|
|
|
|
#[derive(Debug, Default)]
|
|
pub struct Complications {
|
|
disabled: bool,
|
|
wikipedia: Option<WikipediaSummary>,
|
|
unit_converter: Option<UnitConversion>,
|
|
}
|
|
|
|
pub async fn search(
|
|
jar: CookieJar,
|
|
Query(params): Query<HashMap<String, String>>,
|
|
Extension(nats): Extension<Arc<jetstream::Context>>,
|
|
Extension(opts): Extension<Opts>,
|
|
) -> impl IntoResponse {
|
|
let stype = params
|
|
.get("stype")
|
|
.unwrap_or(&"web".to_string())
|
|
.to_string();
|
|
if stype == "image" {
|
|
return image_search(jar, Query(params), Extension(nats), Extension(opts))
|
|
.await
|
|
.into_response();
|
|
}
|
|
let use_javascript = params.get("js").unwrap_or(&"0".to_string()).to_string();
|
|
if use_javascript != *"1" {
|
|
search_nojs(jar, Query(params), Extension(nats), Extension(opts))
|
|
.await
|
|
.into_response()
|
|
} else {
|
|
search_js(jar, Query(params), Extension(nats), Extension(opts))
|
|
.await
|
|
.into_response()
|
|
}
|
|
}
|
|
|
|
#[derive(Template)]
|
|
#[template(path = "search_js.html")]
|
|
struct SearchTemplateJavascript {
|
|
info: UserInfo,
|
|
error: Option<String>,
|
|
complications: Complications,
|
|
search_query: String,
|
|
websearch_url: String,
|
|
imagesearch_url: String,
|
|
version: String,
|
|
git_commit: String,
|
|
built_on: String,
|
|
year: String,
|
|
alpha: bool,
|
|
theme: Themes,
|
|
}
|
|
|
|
pub async fn search_js(
|
|
jar: CookieJar,
|
|
Query(params): Query<HashMap<String, String>>,
|
|
Extension(nats): Extension<Arc<jetstream::Context>>,
|
|
Extension(opts): Extension<Opts>,
|
|
) -> impl IntoResponse {
|
|
fn error_response(query: String, info: UserInfo, error: &str) -> SearchTemplateJavascript {
|
|
let theme = info.get_theme();
|
|
let querystr = url_encoded_data::stringify(&[("q", query.as_str())]);
|
|
SearchTemplateJavascript {
|
|
info,
|
|
error: Some(format!(
|
|
"internal server error ({})! report to developers (:",
|
|
error
|
|
)),
|
|
complications: Default::default(),
|
|
search_query: query,
|
|
websearch_url: format!("/ask?{querystr}&js=1"),
|
|
imagesearch_url: format!("/ask?{querystr}&js=1&stype=image"),
|
|
version: VERSION.to_string(),
|
|
git_commit: GIT_COMMIT.to_string(),
|
|
built_on: BUILT_ON.to_string(),
|
|
year: YEAR.to_string(),
|
|
alpha: ALPHA,
|
|
theme,
|
|
}
|
|
}
|
|
if let Some(token) = jar.get("token") {
|
|
let token = token.value().to_string();
|
|
let info = match authenticate_user(nats.clone(), token).await {
|
|
Ok(i) => i,
|
|
Err(e) => {
|
|
return (
|
|
jar.remove("token"),
|
|
frontpage_error(e.as_str(), opts.auth_url.clone()),
|
|
)
|
|
.into_response();
|
|
}
|
|
};
|
|
let mut query = params.get("q").unwrap_or(&"Deez".to_string()).to_string();
|
|
let og_query = query.clone();
|
|
let mut complications = Complications::default();
|
|
// todo: better way of specifying that user doesn't want complications
|
|
if !query.contains("-complications") {
|
|
let mut wikiquery = query.clone().to_lowercase();
|
|
wikiquery.retain(|c| c.is_alphanumeric() || c.is_ascii_whitespace());
|
|
wikiquery = wikiquery.replace(' ', "%20");
|
|
// todo: proper url escaping
|
|
let wikipedia_comp =
|
|
tokio::spawn(async move { wikipedia::get_wikipedia_page(&wikiquery, 20).await });
|
|
complications.wikipedia = wikipedia_comp.await.unwrap_or_default();
|
|
|
|
let mut unit_query = query.clone().to_lowercase();
|
|
unit_query = unit_query.replace("metre", "meter");
|
|
let unit_comp = unit_converter::convert_unit(&unit_query);
|
|
complications.unit_converter = unit_comp;
|
|
} else {
|
|
complications.disabled = true;
|
|
query = query.replace("-complications", "");
|
|
}
|
|
|
|
let theme = info.get_theme();
|
|
let querystr = url_encoded_data::stringify(&[("q", og_query.as_str())]);
|
|
SearchTemplateJavascript {
|
|
info,
|
|
error: None,
|
|
complications,
|
|
search_query: og_query,
|
|
websearch_url: format!("/ask?{querystr}&js=1"),
|
|
imagesearch_url: format!("/ask?{querystr}&js=1&stype=image"),
|
|
version: VERSION.to_string(),
|
|
git_commit: GIT_COMMIT.to_string(),
|
|
built_on: BUILT_ON.to_string(),
|
|
year: YEAR.to_string(),
|
|
alpha: ALPHA,
|
|
theme,
|
|
}
|
|
.into_response()
|
|
} else {
|
|
Redirect::to("/").into_response()
|
|
}
|
|
}
|
|
|
|
#[derive(Template)]
|
|
#[template(path = "search.html")]
|
|
pub struct SearchTemplate {
|
|
pub info: UserInfo,
|
|
pub error: Option<String>,
|
|
pub note: Option<String>,
|
|
pub complications: Complications,
|
|
pub search_query: String,
|
|
pub query_time: f64,
|
|
pub page_rank_time: f64,
|
|
pub max_relevance: String,
|
|
pub search_results: Vec<SearchResult>,
|
|
pub blocked: Vec<(String, String)>,
|
|
pub websearch_url: String,
|
|
pub imagesearch_url: String,
|
|
pub version: String,
|
|
pub git_commit: String,
|
|
pub built_on: String,
|
|
pub year: String,
|
|
pub alpha: bool,
|
|
pub theme: Themes,
|
|
}
|
|
|
|
pub async fn search_nojs(
|
|
jar: CookieJar,
|
|
Query(params): Query<HashMap<String, String>>,
|
|
Extension(nats): Extension<Arc<jetstream::Context>>,
|
|
Extension(opts): Extension<Opts>,
|
|
) -> impl IntoResponse {
|
|
fn error_response(query: String, info: UserInfo, error: &str) -> SearchTemplate {
|
|
let theme = info.get_theme();
|
|
let querystr = url_encoded_data::stringify(&[("q", query.as_str())]);
|
|
SearchTemplate {
|
|
info,
|
|
error: Some(format!(
|
|
"internal server error ({})! report to developers (:",
|
|
error
|
|
)),
|
|
note: None,
|
|
complications: Default::default(),
|
|
search_query: query,
|
|
query_time: 0.0,
|
|
page_rank_time: 0.0,
|
|
max_relevance: "".to_string(),
|
|
search_results: vec![],
|
|
blocked: vec![],
|
|
websearch_url: format!("/ask?{querystr}&js=0"),
|
|
imagesearch_url: format!("/ask?{querystr}&js=0&stype=image"),
|
|
version: VERSION.to_string(),
|
|
git_commit: GIT_COMMIT.to_string(),
|
|
built_on: BUILT_ON.to_string(),
|
|
year: YEAR.to_string(),
|
|
alpha: ALPHA,
|
|
theme,
|
|
}
|
|
}
|
|
if let Some(token) = jar.get("token") {
|
|
let token = token.value().to_string();
|
|
let info = match authenticate_user(nats.clone(), token).await {
|
|
Ok(i) => i,
|
|
Err(e) => {
|
|
return (
|
|
jar.remove("token"),
|
|
frontpage_error(e.as_str(), opts.auth_url.clone()),
|
|
)
|
|
.into_response();
|
|
}
|
|
};
|
|
let mut query = params.get("q").unwrap_or(&"Deez".to_string()).to_string();
|
|
let og_query = query.clone();
|
|
let mut complications = Complications::default();
|
|
// todo: better way of specifying that user doesn't want complications
|
|
if !query.contains("-complications") {
|
|
let mut wikiquery = query.clone().to_lowercase();
|
|
wikiquery.retain(|c| c.is_alphanumeric() || c.is_ascii_whitespace());
|
|
wikiquery = wikiquery.replace(' ', "%20");
|
|
// todo: proper url escaping
|
|
let wikipedia_comp =
|
|
tokio::spawn(async move { wikipedia::get_wikipedia_page(&wikiquery, 20).await });
|
|
complications.wikipedia = wikipedia_comp.await.unwrap_or_default();
|
|
|
|
let mut unit_query = query.clone().to_lowercase();
|
|
unit_query = unit_query.replace("metre", "meter");
|
|
let unit_comp = unit_converter::convert_unit(&unit_query);
|
|
complications.unit_converter = unit_comp;
|
|
} else {
|
|
complications.disabled = true;
|
|
query = query.replace("-complications", "");
|
|
}
|
|
|
|
let engines = params
|
|
.get("engines")
|
|
.unwrap_or(&"asklyphe,bing,google".to_string())
|
|
.to_string();
|
|
let mut engines = engines
|
|
.split(",")
|
|
.map(|v| v.to_string())
|
|
.collect::<Vec<String>>();
|
|
if opts.emergency {
|
|
// disable asklyphe backend
|
|
engines.retain(|v| v != "asklyphe");
|
|
}
|
|
|
|
gather_search_results(nats, query.as_str(), info, complications, Some(engines))
|
|
.await
|
|
.into_response()
|
|
} else {
|
|
Redirect::to("/").into_response()
|
|
}
|
|
}
|
|
|
|
#[derive(Serialize)]
|
|
struct BlockedResult {
|
|
url: String,
|
|
reason: String,
|
|
}
|
|
|
|
#[derive(Serialize)]
|
|
struct SearchJsonInner {
|
|
#[serde(skip_serializing_if = "Option::is_none")]
|
|
note: Option<String>,
|
|
query_time: f64,
|
|
page_rank_time: f64,
|
|
max_relevance: String,
|
|
search_results: Vec<SearchResult>,
|
|
blocked: Vec<BlockedResult>,
|
|
}
|
|
|
|
#[derive(Serialize)]
|
|
struct SearchJson {
|
|
#[serde(skip_serializing_if = "Option::is_none")]
|
|
error: Option<String>,
|
|
#[serde(skip_serializing_if = "Option::is_none")]
|
|
results: Option<SearchJsonInner>,
|
|
}
|
|
|
|
pub async fn search_json(
|
|
jar: CookieJar,
|
|
Query(params): Query<HashMap<String, String>>,
|
|
Extension(nats): Extension<Arc<jetstream::Context>>,
|
|
Extension(opts): Extension<Opts>,
|
|
) -> String {
|
|
fn json(input: SearchJson) -> String {
|
|
serde_json::to_string(&input).expect("failed to serialise!")
|
|
}
|
|
|
|
fn error_response(error: &str) -> String {
|
|
json(SearchJson {
|
|
error: Some(error.to_string()),
|
|
results: None,
|
|
})
|
|
}
|
|
if let Some(token) = jar.get("token") {
|
|
let token = token.value().to_string();
|
|
let info = match authenticate_user(nats.clone(), token).await {
|
|
Ok(i) => i,
|
|
Err(e) => {
|
|
return error_response("not authenticated");
|
|
}
|
|
};
|
|
let query = params.get("q").unwrap_or(&"Deez".to_string()).to_string();
|
|
let engines = params
|
|
.get("engines")
|
|
.unwrap_or(&"asklyphe,bing,google".to_string())
|
|
.to_string();
|
|
let mut engines = engines
|
|
.split(",")
|
|
.map(|v| v.to_string())
|
|
.collect::<Vec<String>>();
|
|
if opts.emergency {
|
|
// disable asklyphe backend
|
|
engines.retain(|v| v != "asklyphe");
|
|
}
|
|
|
|
let result = gather_search_results(
|
|
nats,
|
|
query.as_str(),
|
|
info,
|
|
Complications::default(),
|
|
Some(engines),
|
|
)
|
|
.await;
|
|
|
|
json(SearchJson {
|
|
error: None,
|
|
results: Some(SearchJsonInner {
|
|
note: result.note,
|
|
query_time: result.query_time,
|
|
page_rank_time: result.page_rank_time,
|
|
max_relevance: result.max_relevance,
|
|
search_results: result.search_results,
|
|
blocked: result
|
|
.blocked
|
|
.into_iter()
|
|
.map(|(url, reason)| BlockedResult { url, reason })
|
|
.collect(),
|
|
}),
|
|
})
|
|
} else {
|
|
error_response("not authenticated")
|
|
}
|
|
}
|
|
|
|
#[derive(Template)]
|
|
#[template(path = "image_search.html")]
|
|
pub struct ImageSearchTemplate {
|
|
pub info: UserInfo,
|
|
pub error: Option<String>,
|
|
pub note: Option<String>,
|
|
pub search_query: String,
|
|
pub search_results: Vec<ImageSearchResult>,
|
|
pub blocked: Vec<(String, String)>,
|
|
pub websearch_url: String,
|
|
pub imagesearch_url: String,
|
|
pub version: String,
|
|
pub git_commit: String,
|
|
pub built_on: String,
|
|
pub year: String,
|
|
pub alpha: bool,
|
|
pub theme: Themes,
|
|
}
|
|
pub async fn image_search(
|
|
jar: CookieJar,
|
|
Query(params): Query<HashMap<String, String>>,
|
|
Extension(nats): Extension<Arc<jetstream::Context>>,
|
|
Extension(opts): Extension<Opts>,
|
|
) -> impl IntoResponse {
|
|
fn error_response(query: String, info: UserInfo, error: &str) -> ImageSearchTemplate {
|
|
let theme = info.get_theme();
|
|
let querystr = url_encoded_data::stringify(&[("q", query.as_str())]);
|
|
ImageSearchTemplate {
|
|
info,
|
|
error: Some(format!(
|
|
"internal server error ({})! report to developers (:",
|
|
error
|
|
)),
|
|
note: None,
|
|
search_query: query,
|
|
search_results: vec![],
|
|
blocked: vec![],
|
|
websearch_url: format!("/ask?{querystr}&js=0"),
|
|
imagesearch_url: format!("/ask?{querystr}&js=0&stype=image"),
|
|
version: VERSION.to_string(),
|
|
git_commit: GIT_COMMIT.to_string(),
|
|
built_on: BUILT_ON.to_string(),
|
|
year: YEAR.to_string(),
|
|
alpha: ALPHA,
|
|
theme,
|
|
}
|
|
}
|
|
if let Some(token) = jar.get("token") {
|
|
let token = token.value().to_string();
|
|
let info = match authenticate_user(nats.clone(), token).await {
|
|
Ok(i) => i,
|
|
Err(e) => {
|
|
return (
|
|
jar.remove("token"),
|
|
frontpage_error(e.as_str(), opts.auth_url.clone()),
|
|
)
|
|
.into_response();
|
|
}
|
|
};
|
|
let query = params.get("q").unwrap_or(&"Deez".to_string()).to_string();
|
|
let js = params.get("js").unwrap_or(&"0".to_string()).to_string();
|
|
let engines = params
|
|
.get("engines")
|
|
.unwrap_or(&"bing,google".to_string())
|
|
.to_string();
|
|
let engines = engines
|
|
.split(",")
|
|
.map(|v| v.to_string())
|
|
.collect::<Vec<String>>();
|
|
|
|
gather_image_results(nats, &query, info, Some(engines), js == "1")
|
|
.await
|
|
.into_response()
|
|
} else {
|
|
Redirect::to("/").into_response()
|
|
}
|
|
}
|
|
|
|
pub async fn image_proxy(
|
|
jar: CookieJar,
|
|
Query(params): Query<HashMap<String, String>>,
|
|
Extension(nats): Extension<Arc<jetstream::Context>>,
|
|
Extension(opts): Extension<Opts>,
|
|
) -> impl IntoResponse {
|
|
static PROXY_TOKEN_CACHE: RwLock<BTreeSet<String>> = RwLock::const_new(BTreeSet::new());
|
|
static IMAGE_CACHE: RwLock<BTreeMap<String, Vec<u8>>> = RwLock::const_new(BTreeMap::new());
|
|
const TOKEN_CACHE_TTL_MINS: u64 = 10;
|
|
const IMAGE_CACHE_TTL_MINS: u64 = 10;
|
|
if let Some(token) = jar.get("token") {
|
|
let token = token.value().to_string();
|
|
let mut authenticated = false;
|
|
{
|
|
let cache = PROXY_TOKEN_CACHE.read().await;
|
|
authenticated = cache.contains(&token);
|
|
}
|
|
if !authenticated {
|
|
let _info = match authenticate_user(nats.clone(), token.clone()).await {
|
|
Ok(i) => i,
|
|
Err(e) => {
|
|
return StatusCode::UNAUTHORIZED.into_response();
|
|
}
|
|
};
|
|
{
|
|
let mut cache = PROXY_TOKEN_CACHE.write().await;
|
|
cache.insert(token.clone());
|
|
tokio::spawn(async move {
|
|
tokio::time::sleep(Duration::from_secs(TOKEN_CACHE_TTL_MINS * 60)).await;
|
|
let mut cache = PROXY_TOKEN_CACHE.write().await;
|
|
cache.remove(&token);
|
|
});
|
|
}
|
|
authenticated = true;
|
|
}
|
|
if !authenticated {
|
|
return StatusCode::UNAUTHORIZED.into_response();
|
|
}
|
|
use isahc::prelude::*;
|
|
let url = params.get("url");
|
|
if url.is_none() {
|
|
return StatusCode::BAD_REQUEST.into_response();
|
|
}
|
|
let mut url = url.unwrap().to_string();
|
|
|
|
let mut host = url.split("://");
|
|
let host = host.nth(1).unwrap_or(&url);
|
|
// fixme: we really need a better solution
|
|
const BLOCKED_HOSTS: &[&str] = &[
|
|
"0",
|
|
"10",
|
|
"100.6",
|
|
"100.7",
|
|
"100.8",
|
|
"100.9",
|
|
"100.10",
|
|
"100.11",
|
|
"100.12",
|
|
"127",
|
|
"169.254",
|
|
"172.1",
|
|
"172.2",
|
|
"172.30",
|
|
"172.31",
|
|
"192.168",
|
|
"198.18",
|
|
"198.19",
|
|
"localhost",
|
|
];
|
|
for blocked in BLOCKED_HOSTS {
|
|
if host.starts_with(blocked) {
|
|
return StatusCode::NO_CONTENT.into_response();
|
|
}
|
|
}
|
|
|
|
if !(url.starts_with("http://") || url.starts_with("https://")) {
|
|
url.insert_str(0, "http://");
|
|
}
|
|
// fixme: replace with actual smart encoding system
|
|
url = url.replace(" ", "%20");
|
|
|
|
{
|
|
let cache = IMAGE_CACHE.read().await;
|
|
if let Some(cached_image) = cache.get(&url).cloned() {
|
|
return cached_image.into_response();
|
|
}
|
|
}
|
|
|
|
let response = isahc::Request::get(url.clone())
|
|
.ip_version(IpVersion::V4)
|
|
.header("user-agent", "AskLyphe Image Proxy (+https://asklyphe.com)")
|
|
.timeout(Duration::from_secs(10))
|
|
.redirect_policy(RedirectPolicy::Limit(6))
|
|
.body(())
|
|
.unwrap()
|
|
.send_async()
|
|
.await;
|
|
if response.is_err() {
|
|
return StatusCode::NO_CONTENT.into_response();
|
|
}
|
|
let mut response = response.unwrap();
|
|
let data = response.bytes().await;
|
|
if data.is_err() {
|
|
return StatusCode::NO_CONTENT.into_response();
|
|
}
|
|
let data = data.unwrap();
|
|
const MAX_IMAGE_SIZE_IN_CACHE_MB: usize = 512;
|
|
if !data.is_empty() && data.len() < MAX_IMAGE_SIZE_IN_CACHE_MB * 1024 * 1024 {
|
|
let mut cache = IMAGE_CACHE.write().await;
|
|
const MAX_CACHE_SIZE_GB: usize = 10;
|
|
const MAX_CACHE_SIZE_MB: usize = MAX_CACHE_SIZE_GB * 1024 * 1024;
|
|
const MAX_CACHE_ENTRIES: usize =
|
|
MAX_CACHE_SIZE_MB / MAX_IMAGE_SIZE_IN_CACHE_MB;
|
|
if cache.len() < MAX_CACHE_ENTRIES {
|
|
cache.insert(url.clone(), data.clone());
|
|
tokio::spawn(async move {
|
|
tokio::time::sleep(Duration::from_secs(IMAGE_CACHE_TTL_MINS * 60)).await;
|
|
let mut cache = IMAGE_CACHE.write().await;
|
|
cache.remove(&url);
|
|
});
|
|
}
|
|
}
|
|
data.into_response()
|
|
} else {
|
|
StatusCode::UNAUTHORIZED.into_response()
|
|
}
|
|
}
|