asklyphe/asklyphe-frontend/src/searchbot.rs

509 lines
22 KiB
Rust

/*
* asklyphe-frontend searchbot.rs
* - commonly used functions for querying the searchservice
*
* Copyright (C) 2025 Real Microsoft, LLC
*
* This program is free software: you can redistribute it and/or modify it under the terms of the GNU Affero General Public License as published by the Free Software Foundation, version 3.
*
* This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Affero General Public License for more details.
*
* You should have received a copy of the GNU Affero General Public License along with this program. If not, see <https://www.gnu.org/licenses/>.
*/
use std::collections::{BTreeMap, BTreeSet};
use std::ops::Deref;
use std::sync::Arc;
use std::sync::atomic::Ordering;
use asklyphe_common::nats;
use asklyphe_common::nats::bingservice::{BingServiceQuery, BingServiceRequest, BingServiceResponse};
use asklyphe_common::nats::comms;
use asklyphe_common::nats::searchservice::{SearchSrvcQuery, SearchSrvcRequest, SearchSrvcResponse};
use async_nats::jetstream;
use async_nats::jetstream::consumer::PullConsumer;
use async_nats::jetstream::stream::RetentionPolicy;
use futures::StreamExt;
use tokio::sync::Mutex;
use tracing::log::error;
use tracing::warn;
use ulid::Ulid;
use crate::routes::search::{Complications, ImageSearchResult, ImageSearchTemplate, SearchResult, SearchTemplate};
use crate::routes::UserInfo;
use crate::{BUILT_ON, GIT_COMMIT, ALPHA, VERSION, WEBSITE_COUNT, YEAR};
pub async fn update_website_counter(nats: Arc<jetstream::Context>) {
let result = comms::query_service(comms::Query::SearchService(SearchSrvcQuery {
request: SearchSrvcRequest::SiteCountRequest,
replyto: "".to_string(),
}), nats.deref(), false).await;
if let Ok(comms::ServiceResponse::SearchService(result)) = result {
match result {
SearchSrvcResponse::OtherError(e) => {
warn!("received error while asking for website count {e}");
}
SearchSrvcResponse::SiteCountResponse(count) => {
WEBSITE_COUNT.store(count.count, Ordering::Relaxed);
}
_ => {
warn!("received invalid response while asking for website count");
}
}
}
}
pub async fn gather_search_results(nats: Arc<jetstream::Context>, query: &str, user_info: UserInfo, complications: Complications, engines: Option<Vec<String>>) -> SearchTemplate {
let mut search_results = vec![];
let mut note = None;
let asklyphe = if let Some(engines) = &engines { engines.contains(&"asklyphe".to_string()) } else { true };
let bing = if let Some(engines) = &engines { engines.contains(&"bing".to_string()) } else { true };
let google = if let Some(engines) = &engines { engines.contains(&"google".to_string()) } else { true };
// bing
if bing {
let result = comms::query_service(comms::Query::BingService(BingServiceQuery {
request: BingServiceRequest::SearchRequest(nats::bingservice::BingSearchRequest {
query: query.to_lowercase(),
}),
replyto: "".to_string(),
}), nats.deref(), true).await;
if let Ok(comms::ServiceResponse::BingService(result)) = result {
match result {
BingServiceResponse::InvalidRequest => {
note = Some("invalid request! report to developers!".to_string());
}
BingServiceResponse::OtherError(e) => {
error!("bing service gave unknown error {e}!!");
note = Some("internal server error! report to developers!".to_string());
}
BingServiceResponse::SearchResponse(results) => {
let result_count = results.results.len();
search_results.extend(results.results.into_iter().enumerate().map(|(i, v)| {
const MAX_LENGTH: usize = 800;
const MAX_URL_LENGTH: usize = 100;
SearchResult {
url: v.url,
title: v.title.map(|v| {
let initial = html_escape::decode_html_entities(&v).to_string();
let mut shortened = String::new();
if initial.len() > MAX_URL_LENGTH {
for (i, c) in initial.chars().enumerate() {
shortened.push(c);
if i > MAX_URL_LENGTH {
break;
}
}
shortened.push_str("...");
} else {
shortened = initial;
}
shortened
}),
description: v.description.map(|v| {
let initial = html_escape::decode_html_entities(&v).to_string();
let mut shortened = String::new();
if initial.len() > MAX_LENGTH {
for (i, c) in initial.chars().enumerate() {
shortened.push(c);
if i > MAX_LENGTH {
break;
}
}
shortened.push_str("...");
} else {
shortened = initial;
}
shortened
}),
percentage: format!("{:.2}", ((1.0 - (i as f64 / result_count as f64)) * 50.0) + 40.0),
value: format!("{}", i),
asklyphe: false,
bing: true,
google: false,
}
}));
}
BingServiceResponse::ImageResponse(_) => {
error!("bing service gave image response to search request!!");
note = Some("internal server error! report to developers!".to_string());
}
}
}
}
// google
if google {
let result = comms::query_service(comms::Query::GoogleService(BingServiceQuery {
request: BingServiceRequest::SearchRequest(nats::bingservice::BingSearchRequest {
query: query.to_lowercase(),
}),
replyto: "".to_string(),
}), nats.deref(), true).await;
if let Ok(comms::ServiceResponse::BingService(result)) = result {
match result {
BingServiceResponse::InvalidRequest => {
note = Some("invalid request! report to developers!".to_string());
}
BingServiceResponse::OtherError(e) => {
error!("google service gave unknown error {e}!!");
note = Some("internal server error! report to developers!".to_string());
}
BingServiceResponse::SearchResponse(results) => {
let result_count = results.results.len();
search_results.extend(results.results.into_iter().enumerate().map(|(i, v)| {
const MAX_LENGTH: usize = 800;
const MAX_URL_LENGTH: usize = 100;
SearchResult {
url: v.url,
title: v.title.map(|v| {
let initial = html_escape::decode_html_entities(&v).to_string();
let mut shortened = String::new();
if initial.len() > MAX_URL_LENGTH {
for (i, c) in initial.chars().enumerate() {
shortened.push(c);
if i > MAX_URL_LENGTH {
break;
}
}
shortened.push_str("...");
} else {
shortened = initial;
}
shortened
}),
description: v.description.map(|v| {
let initial = html_escape::decode_html_entities(&v).to_string();
let mut shortened = String::new();
if initial.len() > MAX_LENGTH {
for (i, c) in initial.chars().enumerate() {
shortened.push(c);
if i > MAX_LENGTH {
break;
}
}
shortened.push_str("...");
} else {
shortened = initial;
}
shortened
}),
percentage: format!("{:.2}", ((1.0 - (i as f64 / result_count as f64)) * 50.0) + 40.0),
value: format!("{}", i),
asklyphe: false,
bing: false,
google: true,
}
}));
}
BingServiceResponse::ImageResponse(_) => {
error!("google service gave image response to search request!!");
note = Some("internal server error! report to developers!".to_string());
}
}
}
}
// raw lyphe
let mut query_time = 0.0;
let mut page_rank_time = 0.0;
let mut max_relevance = 0.0;
let mut blocked = vec![];
if asklyphe {
let result = comms::query_service(comms::Query::SearchService(SearchSrvcQuery {
request: SearchSrvcRequest::SearchRequest(nats::searchservice::SearchRequest {
query: query.to_lowercase(),
}),
replyto: "".to_string(),
}), nats.deref(), true).await;
if let Ok(comms::ServiceResponse::SearchService(result)) = result {
match result {
SearchSrvcResponse::InvalidRequest => {
note = Some("invalid request! report to developers!".to_string());
}
SearchSrvcResponse::OtherError(e) => {
error!("search service gave unknown error {e}!!");
note = Some("unknown error! report to developers!".to_string());
}
SearchSrvcResponse::SearchResponse(results) => {
if note.is_none() {
note = if !results.exact_phrase_found {
Some("didn't find exact phrase, returning sites containing requested words".to_string())
} else {
None
};
}
query_time = results.total_query_seconds;
page_rank_time = results.pagerank_time_seconds;
max_relevance = results.max_relevance;
blocked = results.blocked;
search_results.extend(results.results.into_iter().map(|v| {
const MAX_LENGTH: usize = 800;
const MAX_URL_LENGTH: usize = 100;
SearchResult {
title: v.title.map(|v| {
let initial = html_escape::decode_html_entities(&v).to_string();
let mut shortened = String::new();
if initial.len() > MAX_URL_LENGTH {
for (i, c) in initial.chars().enumerate() {
shortened.push(c);
if i > MAX_URL_LENGTH {
break;
}
}
shortened.push_str("...");
} else {
shortened = initial;
}
shortened
}),
description: v.description.map(|v| {
let initial = html_escape::decode_html_entities(&v).to_string();
let mut shortened = String::new();
if initial.len() > MAX_LENGTH {
for (i, c) in initial.chars().enumerate() {
shortened.push(c);
if i > MAX_LENGTH {
break;
}
}
shortened.push_str("...");
} else {
shortened = initial;
}
shortened
}),
url: v.url,
percentage: format!("{:.2}", (v.relevance / results.max_relevance) * 100.0),
value: format!("{:.2}", v.relevance),
asklyphe: true,
bing: false,
google: false,
}
}));
}
_ => {
note = Some("bad response! report to developers!".to_string());
}
}
} else {
note = Some("unknown! report to developers!".to_string());
}
}
search_results.sort_by(|a, b| {
b.percentage.parse::<f64>().unwrap().total_cmp(&a.percentage.parse::<f64>().unwrap())
});
let mut already_included = BTreeMap::new();
let mut remove = vec![];
let mut add_bing = vec![];
let mut add_google = vec![];
for (i, result) in search_results.iter().enumerate() {
let mut trimmed_url = result.url.clone();
trimmed_url = trimmed_url.trim_end_matches('/').to_string();
if already_included.contains_key(&trimmed_url) && !result.asklyphe {
remove.push(i);
let main = already_included.get(&trimmed_url).unwrap();
if result.bing {
add_bing.push(*main);
}
if result.google {
add_google.push(*main);
}
}
already_included.insert(trimmed_url, i);
}
for i in add_bing {
search_results[i].bing = true;
}
for i in add_google {
search_results[i].google = true;
}
for (i, rm) in remove.into_iter().enumerate() {
search_results.remove(rm - i);
}
let theme = user_info.theme.clone();
let querystr = url_encoded_data::stringify(&[("q", query)]);
SearchTemplate {
info: user_info,
error: None,
note,
complications,
search_query: query.to_string(),
query_time,
page_rank_time,
max_relevance: format!("{:.2}", max_relevance),
search_results,
blocked,
websearch_url: format!("/ask?{querystr}&js=0"),
imagesearch_url: format!("/ask?{querystr}&js=0&stype=image"),
version: VERSION.to_string(),
git_commit: GIT_COMMIT.to_string(),
built_on: BUILT_ON.to_string(),
year: YEAR.to_string(),
alpha: ALPHA,
theme,
}
}
pub async fn gather_image_results(nats: Arc<jetstream::Context>, query: &str, user_info: UserInfo, engines: Option<Vec<String>>, js: bool) -> ImageSearchTemplate {
let mut search_results = vec![];
let mut note = None;
let bing = if let Some(engines) = &engines { engines.contains(&"bing".to_string()) } else { true };
let google = if let Some(engines) = &engines { engines.contains(&"google".to_string()) } else { true };
// google
if google {
let result = comms::query_service(comms::Query::GoogleService(BingServiceQuery {
request: BingServiceRequest::ImageRequest(nats::bingservice::BingImageRequest {
query: query.to_lowercase(),
}),
replyto: "".to_string(),
}), nats.deref(), true).await;
if let Ok(comms::ServiceResponse::BingService(result)) = result {
match result {
BingServiceResponse::InvalidRequest => {
note = Some("invalid request! report to developers!".to_string());
}
BingServiceResponse::OtherError(e) => {
error!("google service gave unknown error {e}!!");
note = Some("internal server error! report to developers!".to_string());
}
BingServiceResponse::ImageResponse(results) => {
search_results.extend(results.into_iter().enumerate().map(|(i, v)| {
const MAX_LENGTH: usize = 800;
const MAX_URL_LENGTH: usize = 100;
ImageSearchResult {
src: v.url.clone(),
url: v.url,
bing: false,
google: true,
}
}));
}
BingServiceResponse::SearchResponse(_) => {
error!("google service gave search response for image response!!");
note = Some("internal server error! report to developers!".to_string());
}
}
}
}
// bing
if bing {
let result = comms::query_service(comms::Query::BingService(BingServiceQuery {
request: BingServiceRequest::ImageRequest(nats::bingservice::BingImageRequest {
query: query.to_lowercase(),
}),
replyto: "".to_string(),
}), nats.deref(), true).await;
if let Ok(comms::ServiceResponse::BingService(result)) = result {
match result {
BingServiceResponse::InvalidRequest => {
note = Some("invalid request! report to developers!".to_string());
}
BingServiceResponse::OtherError(e) => {
error!("bing service gave unknown error {e}!!");
note = Some("internal server error! report to developers!".to_string());
}
BingServiceResponse::ImageResponse(results) => {
search_results.extend(results.into_iter().enumerate().map(|(i, v)| {
const MAX_LENGTH: usize = 800;
const MAX_URL_LENGTH: usize = 100;
ImageSearchResult {
src: v.url.clone(),
url: v.url,
bing: true,
google: false,
}
}));
}
BingServiceResponse::SearchResponse(_) => {
error!("bing service gave search response for image response!!");
note = Some("internal server error! report to developers!".to_string());
}
}
}
}
let mut already_included = BTreeMap::new();
let mut remove = vec![];
let mut add_bing = vec![];
let mut add_google = vec![];
for (i, result) in search_results.iter().enumerate() {
let mut trimmed_url = result.url.clone();
trimmed_url = trimmed_url.trim_end_matches('/').to_string();
if already_included.contains_key(&trimmed_url) {
remove.push(i);
let main = already_included.get(&trimmed_url).unwrap();
if result.bing {
add_bing.push(*main);
}
if result.google {
add_google.push(*main);
}
}
already_included.insert(trimmed_url, i);
}
for i in add_bing {
search_results[i].bing = true;
}
for i in add_google {
search_results[i].google = true;
}
for (i, rm) in remove.into_iter().enumerate() {
search_results.remove(rm - i);
}
for result in &mut search_results {
let url = url_encoded_data::stringify(&[("url", &result.url)]);
result.src = format!("/imgproxy?{}", url);
}
let theme = user_info.theme.clone();
ImageSearchTemplate {
info: user_info,
error: None,
note,
search_query: query.to_string(),
search_results,
blocked: vec![],
websearch_url: format!("/ask?q={query}&js={}", if js { 1 } else { 0 }),
imagesearch_url: format!("/ask?q={query}&js={}&stype=image", if js { 1 } else { 0 }),
version: VERSION.to_string(),
git_commit: GIT_COMMIT.to_string(),
built_on: BUILT_ON.to_string(),
year: YEAR.to_string(),
alpha: ALPHA,
theme,
}
}