forked from asklyphe-public/asklyphe
509 lines
22 KiB
Rust
509 lines
22 KiB
Rust
/*
|
|
* asklyphe-frontend searchbot.rs
|
|
* - commonly used functions for querying the searchservice
|
|
*
|
|
* Copyright (C) 2025 Real Microsoft, LLC
|
|
*
|
|
* This program is free software: you can redistribute it and/or modify it under the terms of the GNU Affero General Public License as published by the Free Software Foundation, version 3.
|
|
*
|
|
* This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Affero General Public License for more details.
|
|
*
|
|
* You should have received a copy of the GNU Affero General Public License along with this program. If not, see <https://www.gnu.org/licenses/>.
|
|
*/
|
|
|
|
use std::collections::{BTreeMap, BTreeSet};
|
|
use std::ops::Deref;
|
|
use std::sync::Arc;
|
|
use std::sync::atomic::Ordering;
|
|
use asklyphe_common::nats;
|
|
use asklyphe_common::nats::bingservice::{BingServiceQuery, BingServiceRequest, BingServiceResponse};
|
|
use asklyphe_common::nats::comms;
|
|
use asklyphe_common::nats::searchservice::{SearchSrvcQuery, SearchSrvcRequest, SearchSrvcResponse};
|
|
use async_nats::jetstream;
|
|
use async_nats::jetstream::consumer::PullConsumer;
|
|
use async_nats::jetstream::stream::RetentionPolicy;
|
|
use futures::StreamExt;
|
|
use tokio::sync::Mutex;
|
|
use tracing::log::error;
|
|
use tracing::warn;
|
|
use ulid::Ulid;
|
|
use crate::routes::search::{Complications, ImageSearchResult, ImageSearchTemplate, SearchResult, SearchTemplate};
|
|
use crate::routes::UserInfo;
|
|
use crate::{BUILT_ON, GIT_COMMIT, ALPHA, VERSION, WEBSITE_COUNT, YEAR};
|
|
|
|
pub async fn update_website_counter(nats: Arc<jetstream::Context>) {
|
|
let result = comms::query_service(comms::Query::SearchService(SearchSrvcQuery {
|
|
request: SearchSrvcRequest::SiteCountRequest,
|
|
replyto: "".to_string(),
|
|
}), nats.deref(), false).await;
|
|
|
|
if let Ok(comms::ServiceResponse::SearchService(result)) = result {
|
|
match result {
|
|
SearchSrvcResponse::OtherError(e) => {
|
|
warn!("received error while asking for website count {e}");
|
|
}
|
|
SearchSrvcResponse::SiteCountResponse(count) => {
|
|
WEBSITE_COUNT.store(count.count, Ordering::Relaxed);
|
|
}
|
|
_ => {
|
|
warn!("received invalid response while asking for website count");
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
pub async fn gather_search_results(nats: Arc<jetstream::Context>, query: &str, user_info: UserInfo, complications: Complications, engines: Option<Vec<String>>) -> SearchTemplate {
|
|
let mut search_results = vec![];
|
|
let mut note = None;
|
|
|
|
let asklyphe = if let Some(engines) = &engines { engines.contains(&"asklyphe".to_string()) } else { true };
|
|
let bing = if let Some(engines) = &engines { engines.contains(&"bing".to_string()) } else { true };
|
|
let google = if let Some(engines) = &engines { engines.contains(&"google".to_string()) } else { true };
|
|
|
|
// bing
|
|
|
|
if bing {
|
|
let result = comms::query_service(comms::Query::BingService(BingServiceQuery {
|
|
request: BingServiceRequest::SearchRequest(nats::bingservice::BingSearchRequest {
|
|
query: query.to_lowercase(),
|
|
}),
|
|
replyto: "".to_string(),
|
|
}), nats.deref(), true).await;
|
|
|
|
if let Ok(comms::ServiceResponse::BingService(result)) = result {
|
|
match result {
|
|
BingServiceResponse::InvalidRequest => {
|
|
note = Some("invalid request! report to developers!".to_string());
|
|
}
|
|
BingServiceResponse::OtherError(e) => {
|
|
error!("bing service gave unknown error {e}!!");
|
|
note = Some("internal server error! report to developers!".to_string());
|
|
}
|
|
BingServiceResponse::SearchResponse(results) => {
|
|
let result_count = results.results.len();
|
|
|
|
search_results.extend(results.results.into_iter().enumerate().map(|(i, v)| {
|
|
const MAX_LENGTH: usize = 800;
|
|
const MAX_URL_LENGTH: usize = 100;
|
|
SearchResult {
|
|
url: v.url,
|
|
title: v.title.map(|v| {
|
|
let initial = html_escape::decode_html_entities(&v).to_string();
|
|
let mut shortened = String::new();
|
|
if initial.len() > MAX_URL_LENGTH {
|
|
for (i, c) in initial.chars().enumerate() {
|
|
shortened.push(c);
|
|
if i > MAX_URL_LENGTH {
|
|
break;
|
|
}
|
|
}
|
|
shortened.push_str("...");
|
|
} else {
|
|
shortened = initial;
|
|
}
|
|
shortened
|
|
}),
|
|
description: v.description.map(|v| {
|
|
let initial = html_escape::decode_html_entities(&v).to_string();
|
|
let mut shortened = String::new();
|
|
if initial.len() > MAX_LENGTH {
|
|
for (i, c) in initial.chars().enumerate() {
|
|
shortened.push(c);
|
|
if i > MAX_LENGTH {
|
|
break;
|
|
}
|
|
}
|
|
shortened.push_str("...");
|
|
} else {
|
|
shortened = initial;
|
|
}
|
|
shortened
|
|
}),
|
|
percentage: format!("{:.2}", ((1.0 - (i as f64 / result_count as f64)) * 50.0) + 40.0),
|
|
value: format!("{}", i),
|
|
asklyphe: false,
|
|
bing: true,
|
|
google: false,
|
|
}
|
|
}));
|
|
}
|
|
BingServiceResponse::ImageResponse(_) => {
|
|
error!("bing service gave image response to search request!!");
|
|
note = Some("internal server error! report to developers!".to_string());
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
// google
|
|
|
|
if google {
|
|
let result = comms::query_service(comms::Query::GoogleService(BingServiceQuery {
|
|
request: BingServiceRequest::SearchRequest(nats::bingservice::BingSearchRequest {
|
|
query: query.to_lowercase(),
|
|
}),
|
|
replyto: "".to_string(),
|
|
}), nats.deref(), true).await;
|
|
|
|
if let Ok(comms::ServiceResponse::BingService(result)) = result {
|
|
match result {
|
|
BingServiceResponse::InvalidRequest => {
|
|
note = Some("invalid request! report to developers!".to_string());
|
|
}
|
|
BingServiceResponse::OtherError(e) => {
|
|
error!("google service gave unknown error {e}!!");
|
|
note = Some("internal server error! report to developers!".to_string());
|
|
}
|
|
BingServiceResponse::SearchResponse(results) => {
|
|
let result_count = results.results.len();
|
|
|
|
search_results.extend(results.results.into_iter().enumerate().map(|(i, v)| {
|
|
const MAX_LENGTH: usize = 800;
|
|
const MAX_URL_LENGTH: usize = 100;
|
|
SearchResult {
|
|
url: v.url,
|
|
title: v.title.map(|v| {
|
|
let initial = html_escape::decode_html_entities(&v).to_string();
|
|
let mut shortened = String::new();
|
|
if initial.len() > MAX_URL_LENGTH {
|
|
for (i, c) in initial.chars().enumerate() {
|
|
shortened.push(c);
|
|
if i > MAX_URL_LENGTH {
|
|
break;
|
|
}
|
|
}
|
|
shortened.push_str("...");
|
|
} else {
|
|
shortened = initial;
|
|
}
|
|
shortened
|
|
}),
|
|
description: v.description.map(|v| {
|
|
let initial = html_escape::decode_html_entities(&v).to_string();
|
|
let mut shortened = String::new();
|
|
if initial.len() > MAX_LENGTH {
|
|
for (i, c) in initial.chars().enumerate() {
|
|
shortened.push(c);
|
|
if i > MAX_LENGTH {
|
|
break;
|
|
}
|
|
}
|
|
shortened.push_str("...");
|
|
} else {
|
|
shortened = initial;
|
|
}
|
|
shortened
|
|
}),
|
|
percentage: format!("{:.2}", ((1.0 - (i as f64 / result_count as f64)) * 50.0) + 40.0),
|
|
value: format!("{}", i),
|
|
asklyphe: false,
|
|
bing: false,
|
|
google: true,
|
|
}
|
|
}));
|
|
}
|
|
BingServiceResponse::ImageResponse(_) => {
|
|
error!("google service gave image response to search request!!");
|
|
note = Some("internal server error! report to developers!".to_string());
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
// raw lyphe
|
|
let mut query_time = 0.0;
|
|
let mut page_rank_time = 0.0;
|
|
let mut max_relevance = 0.0;
|
|
let mut blocked = vec![];
|
|
|
|
if asklyphe {
|
|
let result = comms::query_service(comms::Query::SearchService(SearchSrvcQuery {
|
|
request: SearchSrvcRequest::SearchRequest(nats::searchservice::SearchRequest {
|
|
query: query.to_lowercase(),
|
|
}),
|
|
replyto: "".to_string(),
|
|
}), nats.deref(), true).await;
|
|
|
|
|
|
if let Ok(comms::ServiceResponse::SearchService(result)) = result {
|
|
match result {
|
|
SearchSrvcResponse::InvalidRequest => {
|
|
note = Some("invalid request! report to developers!".to_string());
|
|
}
|
|
SearchSrvcResponse::OtherError(e) => {
|
|
error!("search service gave unknown error {e}!!");
|
|
note = Some("unknown error! report to developers!".to_string());
|
|
}
|
|
SearchSrvcResponse::SearchResponse(results) => {
|
|
if note.is_none() {
|
|
note = if !results.exact_phrase_found {
|
|
Some("didn't find exact phrase, returning sites containing requested words".to_string())
|
|
} else {
|
|
None
|
|
};
|
|
}
|
|
|
|
query_time = results.total_query_seconds;
|
|
page_rank_time = results.pagerank_time_seconds;
|
|
max_relevance = results.max_relevance;
|
|
|
|
blocked = results.blocked;
|
|
|
|
search_results.extend(results.results.into_iter().map(|v| {
|
|
const MAX_LENGTH: usize = 800;
|
|
const MAX_URL_LENGTH: usize = 100;
|
|
SearchResult {
|
|
title: v.title.map(|v| {
|
|
let initial = html_escape::decode_html_entities(&v).to_string();
|
|
let mut shortened = String::new();
|
|
if initial.len() > MAX_URL_LENGTH {
|
|
for (i, c) in initial.chars().enumerate() {
|
|
shortened.push(c);
|
|
if i > MAX_URL_LENGTH {
|
|
break;
|
|
}
|
|
}
|
|
shortened.push_str("...");
|
|
} else {
|
|
shortened = initial;
|
|
}
|
|
shortened
|
|
}),
|
|
description: v.description.map(|v| {
|
|
let initial = html_escape::decode_html_entities(&v).to_string();
|
|
let mut shortened = String::new();
|
|
if initial.len() > MAX_LENGTH {
|
|
for (i, c) in initial.chars().enumerate() {
|
|
shortened.push(c);
|
|
if i > MAX_LENGTH {
|
|
break;
|
|
}
|
|
}
|
|
shortened.push_str("...");
|
|
} else {
|
|
shortened = initial;
|
|
}
|
|
shortened
|
|
}),
|
|
url: v.url,
|
|
percentage: format!("{:.2}", (v.relevance / results.max_relevance) * 100.0),
|
|
value: format!("{:.2}", v.relevance),
|
|
asklyphe: true,
|
|
bing: false,
|
|
google: false,
|
|
}
|
|
}));
|
|
}
|
|
_ => {
|
|
note = Some("bad response! report to developers!".to_string());
|
|
}
|
|
}
|
|
} else {
|
|
note = Some("unknown! report to developers!".to_string());
|
|
}
|
|
}
|
|
|
|
search_results.sort_by(|a, b| {
|
|
b.percentage.parse::<f64>().unwrap().total_cmp(&a.percentage.parse::<f64>().unwrap())
|
|
});
|
|
|
|
let mut already_included = BTreeMap::new();
|
|
|
|
let mut remove = vec![];
|
|
|
|
let mut add_bing = vec![];
|
|
let mut add_google = vec![];
|
|
|
|
for (i, result) in search_results.iter().enumerate() {
|
|
let mut trimmed_url = result.url.clone();
|
|
trimmed_url = trimmed_url.trim_end_matches('/').to_string();
|
|
if already_included.contains_key(&trimmed_url) && !result.asklyphe {
|
|
remove.push(i);
|
|
let main = already_included.get(&trimmed_url).unwrap();
|
|
if result.bing {
|
|
add_bing.push(*main);
|
|
}
|
|
if result.google {
|
|
add_google.push(*main);
|
|
}
|
|
}
|
|
already_included.insert(trimmed_url, i);
|
|
}
|
|
|
|
for i in add_bing {
|
|
search_results[i].bing = true;
|
|
}
|
|
for i in add_google {
|
|
search_results[i].google = true;
|
|
}
|
|
|
|
for (i, rm) in remove.into_iter().enumerate() {
|
|
search_results.remove(rm - i);
|
|
}
|
|
|
|
let theme = user_info.theme.clone();
|
|
let querystr = url_encoded_data::stringify(&[("q", query)]);
|
|
SearchTemplate {
|
|
info: user_info,
|
|
error: None,
|
|
note,
|
|
complications,
|
|
search_query: query.to_string(),
|
|
query_time,
|
|
page_rank_time,
|
|
max_relevance: format!("{:.2}", max_relevance),
|
|
search_results,
|
|
blocked,
|
|
websearch_url: format!("/ask?{querystr}&js=0"),
|
|
imagesearch_url: format!("/ask?{querystr}&js=0&stype=image"),
|
|
version: VERSION.to_string(),
|
|
git_commit: GIT_COMMIT.to_string(),
|
|
built_on: BUILT_ON.to_string(),
|
|
year: YEAR.to_string(),
|
|
alpha: ALPHA,
|
|
theme,
|
|
}
|
|
}
|
|
|
|
pub async fn gather_image_results(nats: Arc<jetstream::Context>, query: &str, user_info: UserInfo, engines: Option<Vec<String>>, js: bool) -> ImageSearchTemplate {
|
|
let mut search_results = vec![];
|
|
let mut note = None;
|
|
|
|
let bing = if let Some(engines) = &engines { engines.contains(&"bing".to_string()) } else { true };
|
|
let google = if let Some(engines) = &engines { engines.contains(&"google".to_string()) } else { true };
|
|
|
|
// google
|
|
|
|
if google {
|
|
let result = comms::query_service(comms::Query::GoogleService(BingServiceQuery {
|
|
request: BingServiceRequest::ImageRequest(nats::bingservice::BingImageRequest {
|
|
query: query.to_lowercase(),
|
|
}),
|
|
replyto: "".to_string(),
|
|
}), nats.deref(), true).await;
|
|
|
|
if let Ok(comms::ServiceResponse::BingService(result)) = result {
|
|
match result {
|
|
BingServiceResponse::InvalidRequest => {
|
|
note = Some("invalid request! report to developers!".to_string());
|
|
}
|
|
BingServiceResponse::OtherError(e) => {
|
|
error!("google service gave unknown error {e}!!");
|
|
note = Some("internal server error! report to developers!".to_string());
|
|
}
|
|
BingServiceResponse::ImageResponse(results) => {
|
|
search_results.extend(results.into_iter().enumerate().map(|(i, v)| {
|
|
const MAX_LENGTH: usize = 800;
|
|
const MAX_URL_LENGTH: usize = 100;
|
|
ImageSearchResult {
|
|
src: v.url.clone(),
|
|
url: v.url,
|
|
bing: false,
|
|
google: true,
|
|
}
|
|
}));
|
|
}
|
|
BingServiceResponse::SearchResponse(_) => {
|
|
error!("google service gave search response for image response!!");
|
|
note = Some("internal server error! report to developers!".to_string());
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
// bing
|
|
|
|
if bing {
|
|
let result = comms::query_service(comms::Query::BingService(BingServiceQuery {
|
|
request: BingServiceRequest::ImageRequest(nats::bingservice::BingImageRequest {
|
|
query: query.to_lowercase(),
|
|
}),
|
|
replyto: "".to_string(),
|
|
}), nats.deref(), true).await;
|
|
|
|
if let Ok(comms::ServiceResponse::BingService(result)) = result {
|
|
match result {
|
|
BingServiceResponse::InvalidRequest => {
|
|
note = Some("invalid request! report to developers!".to_string());
|
|
}
|
|
BingServiceResponse::OtherError(e) => {
|
|
error!("bing service gave unknown error {e}!!");
|
|
note = Some("internal server error! report to developers!".to_string());
|
|
}
|
|
BingServiceResponse::ImageResponse(results) => {
|
|
search_results.extend(results.into_iter().enumerate().map(|(i, v)| {
|
|
const MAX_LENGTH: usize = 800;
|
|
const MAX_URL_LENGTH: usize = 100;
|
|
ImageSearchResult {
|
|
src: v.url.clone(),
|
|
url: v.url,
|
|
bing: true,
|
|
google: false,
|
|
}
|
|
}));
|
|
}
|
|
BingServiceResponse::SearchResponse(_) => {
|
|
error!("bing service gave search response for image response!!");
|
|
note = Some("internal server error! report to developers!".to_string());
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
let mut already_included = BTreeMap::new();
|
|
|
|
let mut remove = vec![];
|
|
|
|
let mut add_bing = vec![];
|
|
let mut add_google = vec![];
|
|
|
|
for (i, result) in search_results.iter().enumerate() {
|
|
let mut trimmed_url = result.url.clone();
|
|
trimmed_url = trimmed_url.trim_end_matches('/').to_string();
|
|
if already_included.contains_key(&trimmed_url) {
|
|
remove.push(i);
|
|
let main = already_included.get(&trimmed_url).unwrap();
|
|
if result.bing {
|
|
add_bing.push(*main);
|
|
}
|
|
if result.google {
|
|
add_google.push(*main);
|
|
}
|
|
}
|
|
already_included.insert(trimmed_url, i);
|
|
}
|
|
|
|
for i in add_bing {
|
|
search_results[i].bing = true;
|
|
}
|
|
for i in add_google {
|
|
search_results[i].google = true;
|
|
}
|
|
|
|
for (i, rm) in remove.into_iter().enumerate() {
|
|
search_results.remove(rm - i);
|
|
}
|
|
|
|
for result in &mut search_results {
|
|
let url = url_encoded_data::stringify(&[("url", &result.url)]);
|
|
result.src = format!("/imgproxy?{}", url);
|
|
}
|
|
|
|
let theme = user_info.theme.clone();
|
|
ImageSearchTemplate {
|
|
info: user_info,
|
|
error: None,
|
|
note,
|
|
search_query: query.to_string(),
|
|
search_results,
|
|
blocked: vec![],
|
|
websearch_url: format!("/ask?q={query}&js={}", if js { 1 } else { 0 }),
|
|
imagesearch_url: format!("/ask?q={query}&js={}&stype=image", if js { 1 } else { 0 }),
|
|
version: VERSION.to_string(),
|
|
git_commit: GIT_COMMIT.to_string(),
|
|
built_on: BUILT_ON.to_string(),
|
|
year: YEAR.to_string(),
|
|
alpha: ALPHA,
|
|
theme,
|
|
}
|
|
}
|