/* * asklyphe-frontend searchbot.rs * - commonly used functions for querying the searchservice * * Copyright (C) 2025 Real Microsoft, LLC * * This program is free software: you can redistribute it and/or modify it under the terms of the GNU Affero General Public License as published by the Free Software Foundation, version 3. * * This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Affero General Public License for more details. * * You should have received a copy of the GNU Affero General Public License along with this program. If not, see . */ use std::collections::{BTreeMap, BTreeSet}; use std::ops::Deref; use std::sync::Arc; use std::sync::atomic::Ordering; use asklyphe_common::nats; use asklyphe_common::nats::bingservice::{BingServiceQuery, BingServiceRequest, BingServiceResponse}; use asklyphe_common::nats::comms; use asklyphe_common::nats::searchservice::{SearchSrvcQuery, SearchSrvcRequest, SearchSrvcResponse}; use async_nats::jetstream; use async_nats::jetstream::consumer::PullConsumer; use async_nats::jetstream::stream::RetentionPolicy; use futures::StreamExt; use tokio::sync::Mutex; use tracing::log::error; use tracing::warn; use ulid::Ulid; use crate::routes::search::{Complications, ImageSearchResult, ImageSearchTemplate, SearchResult, SearchTemplate}; use crate::routes::UserInfo; use crate::{BUILT_ON, GIT_COMMIT, ALPHA, VERSION, WEBSITE_COUNT, YEAR}; pub async fn update_website_counter(nats: Arc) { let result = comms::query_service(comms::Query::SearchService(SearchSrvcQuery { request: SearchSrvcRequest::SiteCountRequest, replyto: "".to_string(), }), nats.deref(), false).await; if let Ok(comms::ServiceResponse::SearchService(result)) = result { match result { SearchSrvcResponse::OtherError(e) => { warn!("received error while asking for website count {e}"); } SearchSrvcResponse::SiteCountResponse(count) => { WEBSITE_COUNT.store(count.count, Ordering::Relaxed); } _ => { warn!("received invalid response while asking for website count"); } } } } pub async fn gather_search_results(nats: Arc, query: &str, user_info: UserInfo, complications: Complications, engines: Option>) -> SearchTemplate { let mut search_results = vec![]; let mut note = None; let asklyphe = if let Some(engines) = &engines { engines.contains(&"asklyphe".to_string()) } else { true }; let bing = if let Some(engines) = &engines { engines.contains(&"bing".to_string()) } else { true }; let google = if let Some(engines) = &engines { engines.contains(&"google".to_string()) } else { true }; // bing if bing { let result = comms::query_service(comms::Query::BingService(BingServiceQuery { request: BingServiceRequest::SearchRequest(nats::bingservice::BingSearchRequest { query: query.to_lowercase(), }), replyto: "".to_string(), }), nats.deref(), true).await; if let Ok(comms::ServiceResponse::BingService(result)) = result { match result { BingServiceResponse::InvalidRequest => { note = Some("invalid request! report to developers!".to_string()); } BingServiceResponse::OtherError(e) => { error!("bing service gave unknown error {e}!!"); note = Some("internal server error! report to developers!".to_string()); } BingServiceResponse::SearchResponse(results) => { let result_count = results.results.len(); search_results.extend(results.results.into_iter().enumerate().map(|(i, v)| { const MAX_LENGTH: usize = 800; const MAX_URL_LENGTH: usize = 100; SearchResult { url: v.url, title: v.title.map(|v| { let initial = html_escape::decode_html_entities(&v).to_string(); let mut shortened = String::new(); if initial.len() > MAX_URL_LENGTH { for (i, c) in initial.chars().enumerate() { shortened.push(c); if i > MAX_URL_LENGTH { break; } } shortened.push_str("..."); } else { shortened = initial; } shortened }), description: v.description.map(|v| { let initial = html_escape::decode_html_entities(&v).to_string(); let mut shortened = String::new(); if initial.len() > MAX_LENGTH { for (i, c) in initial.chars().enumerate() { shortened.push(c); if i > MAX_LENGTH { break; } } shortened.push_str("..."); } else { shortened = initial; } shortened }), percentage: format!("{:.2}", ((1.0 - (i as f64 / result_count as f64)) * 50.0) + 40.0), value: format!("{}", i), asklyphe: false, bing: true, google: false, } })); } BingServiceResponse::ImageResponse(_) => { error!("bing service gave image response to search request!!"); note = Some("internal server error! report to developers!".to_string()); } } } } // google if google { let result = comms::query_service(comms::Query::GoogleService(BingServiceQuery { request: BingServiceRequest::SearchRequest(nats::bingservice::BingSearchRequest { query: query.to_lowercase(), }), replyto: "".to_string(), }), nats.deref(), true).await; if let Ok(comms::ServiceResponse::BingService(result)) = result { match result { BingServiceResponse::InvalidRequest => { note = Some("invalid request! report to developers!".to_string()); } BingServiceResponse::OtherError(e) => { error!("google service gave unknown error {e}!!"); note = Some("internal server error! report to developers!".to_string()); } BingServiceResponse::SearchResponse(results) => { let result_count = results.results.len(); search_results.extend(results.results.into_iter().enumerate().map(|(i, v)| { const MAX_LENGTH: usize = 800; const MAX_URL_LENGTH: usize = 100; SearchResult { url: v.url, title: v.title.map(|v| { let initial = html_escape::decode_html_entities(&v).to_string(); let mut shortened = String::new(); if initial.len() > MAX_URL_LENGTH { for (i, c) in initial.chars().enumerate() { shortened.push(c); if i > MAX_URL_LENGTH { break; } } shortened.push_str("..."); } else { shortened = initial; } shortened }), description: v.description.map(|v| { let initial = html_escape::decode_html_entities(&v).to_string(); let mut shortened = String::new(); if initial.len() > MAX_LENGTH { for (i, c) in initial.chars().enumerate() { shortened.push(c); if i > MAX_LENGTH { break; } } shortened.push_str("..."); } else { shortened = initial; } shortened }), percentage: format!("{:.2}", ((1.0 - (i as f64 / result_count as f64)) * 50.0) + 40.0), value: format!("{}", i), asklyphe: false, bing: false, google: true, } })); } BingServiceResponse::ImageResponse(_) => { error!("google service gave image response to search request!!"); note = Some("internal server error! report to developers!".to_string()); } } } } // raw lyphe let mut query_time = 0.0; let mut page_rank_time = 0.0; let mut max_relevance = 0.0; let mut blocked = vec![]; if asklyphe { let result = comms::query_service(comms::Query::SearchService(SearchSrvcQuery { request: SearchSrvcRequest::SearchRequest(nats::searchservice::SearchRequest { query: query.to_lowercase(), }), replyto: "".to_string(), }), nats.deref(), true).await; if let Ok(comms::ServiceResponse::SearchService(result)) = result { match result { SearchSrvcResponse::InvalidRequest => { note = Some("invalid request! report to developers!".to_string()); } SearchSrvcResponse::OtherError(e) => { error!("search service gave unknown error {e}!!"); note = Some("unknown error! report to developers!".to_string()); } SearchSrvcResponse::SearchResponse(results) => { if note.is_none() { note = if !results.exact_phrase_found { Some("didn't find exact phrase, returning sites containing requested words".to_string()) } else { None }; } query_time = results.total_query_seconds; page_rank_time = results.pagerank_time_seconds; max_relevance = results.max_relevance; blocked = results.blocked; search_results.extend(results.results.into_iter().map(|v| { const MAX_LENGTH: usize = 800; const MAX_URL_LENGTH: usize = 100; SearchResult { title: v.title.map(|v| { let initial = html_escape::decode_html_entities(&v).to_string(); let mut shortened = String::new(); if initial.len() > MAX_URL_LENGTH { for (i, c) in initial.chars().enumerate() { shortened.push(c); if i > MAX_URL_LENGTH { break; } } shortened.push_str("..."); } else { shortened = initial; } shortened }), description: v.description.map(|v| { let initial = html_escape::decode_html_entities(&v).to_string(); let mut shortened = String::new(); if initial.len() > MAX_LENGTH { for (i, c) in initial.chars().enumerate() { shortened.push(c); if i > MAX_LENGTH { break; } } shortened.push_str("..."); } else { shortened = initial; } shortened }), url: v.url, percentage: format!("{:.2}", (v.relevance / results.max_relevance) * 100.0), value: format!("{:.2}", v.relevance), asklyphe: true, bing: false, google: false, } })); } _ => { note = Some("bad response! report to developers!".to_string()); } } } else { note = Some("unknown! report to developers!".to_string()); } } search_results.sort_by(|a, b| { b.percentage.parse::().unwrap().total_cmp(&a.percentage.parse::().unwrap()) }); let mut already_included = BTreeMap::new(); let mut remove = vec![]; let mut add_bing = vec![]; let mut add_google = vec![]; for (i, result) in search_results.iter().enumerate() { let mut trimmed_url = result.url.clone(); trimmed_url = trimmed_url.trim_end_matches('/').to_string(); if already_included.contains_key(&trimmed_url) && !result.asklyphe { remove.push(i); let main = already_included.get(&trimmed_url).unwrap(); if result.bing { add_bing.push(*main); } if result.google { add_google.push(*main); } } already_included.insert(trimmed_url, i); } for i in add_bing { search_results[i].bing = true; } for i in add_google { search_results[i].google = true; } for (i, rm) in remove.into_iter().enumerate() { search_results.remove(rm - i); } let theme = user_info.theme.clone(); let querystr = url_encoded_data::stringify(&[("q", query)]); SearchTemplate { info: user_info, error: None, note, complications, search_query: query.to_string(), query_time, page_rank_time, max_relevance: format!("{:.2}", max_relevance), search_results, blocked, websearch_url: format!("/ask?{querystr}&js=0"), imagesearch_url: format!("/ask?{querystr}&js=0&stype=image"), version: VERSION.to_string(), git_commit: GIT_COMMIT.to_string(), built_on: BUILT_ON.to_string(), year: YEAR.to_string(), alpha: ALPHA, theme, } } pub async fn gather_image_results(nats: Arc, query: &str, user_info: UserInfo, engines: Option>, js: bool) -> ImageSearchTemplate { let mut search_results = vec![]; let mut note = None; let bing = if let Some(engines) = &engines { engines.contains(&"bing".to_string()) } else { true }; let google = if let Some(engines) = &engines { engines.contains(&"google".to_string()) } else { true }; // google if google { let result = comms::query_service(comms::Query::GoogleService(BingServiceQuery { request: BingServiceRequest::ImageRequest(nats::bingservice::BingImageRequest { query: query.to_lowercase(), }), replyto: "".to_string(), }), nats.deref(), true).await; if let Ok(comms::ServiceResponse::BingService(result)) = result { match result { BingServiceResponse::InvalidRequest => { note = Some("invalid request! report to developers!".to_string()); } BingServiceResponse::OtherError(e) => { error!("google service gave unknown error {e}!!"); note = Some("internal server error! report to developers!".to_string()); } BingServiceResponse::ImageResponse(results) => { search_results.extend(results.into_iter().enumerate().map(|(i, v)| { const MAX_LENGTH: usize = 800; const MAX_URL_LENGTH: usize = 100; ImageSearchResult { src: v.url.clone(), url: v.url, bing: false, google: true, } })); } BingServiceResponse::SearchResponse(_) => { error!("google service gave search response for image response!!"); note = Some("internal server error! report to developers!".to_string()); } } } } // bing if bing { let result = comms::query_service(comms::Query::BingService(BingServiceQuery { request: BingServiceRequest::ImageRequest(nats::bingservice::BingImageRequest { query: query.to_lowercase(), }), replyto: "".to_string(), }), nats.deref(), true).await; if let Ok(comms::ServiceResponse::BingService(result)) = result { match result { BingServiceResponse::InvalidRequest => { note = Some("invalid request! report to developers!".to_string()); } BingServiceResponse::OtherError(e) => { error!("bing service gave unknown error {e}!!"); note = Some("internal server error! report to developers!".to_string()); } BingServiceResponse::ImageResponse(results) => { search_results.extend(results.into_iter().enumerate().map(|(i, v)| { const MAX_LENGTH: usize = 800; const MAX_URL_LENGTH: usize = 100; ImageSearchResult { src: v.url.clone(), url: v.url, bing: true, google: false, } })); } BingServiceResponse::SearchResponse(_) => { error!("bing service gave search response for image response!!"); note = Some("internal server error! report to developers!".to_string()); } } } } let mut already_included = BTreeMap::new(); let mut remove = vec![]; let mut add_bing = vec![]; let mut add_google = vec![]; for (i, result) in search_results.iter().enumerate() { let mut trimmed_url = result.url.clone(); trimmed_url = trimmed_url.trim_end_matches('/').to_string(); if already_included.contains_key(&trimmed_url) { remove.push(i); let main = already_included.get(&trimmed_url).unwrap(); if result.bing { add_bing.push(*main); } if result.google { add_google.push(*main); } } already_included.insert(trimmed_url, i); } for i in add_bing { search_results[i].bing = true; } for i in add_google { search_results[i].google = true; } for (i, rm) in remove.into_iter().enumerate() { search_results.remove(rm - i); } for result in &mut search_results { let url = url_encoded_data::stringify(&[("url", &result.url)]); result.src = format!("/imgproxy?{}", url); } let theme = user_info.theme.clone(); ImageSearchTemplate { info: user_info, error: None, note, search_query: query.to_string(), search_results, blocked: vec![], websearch_url: format!("/ask?q={query}&js={}", if js { 1 } else { 0 }), imagesearch_url: format!("/ask?q={query}&js={}&stype=image", if js { 1 } else { 0 }), version: VERSION.to_string(), git_commit: GIT_COMMIT.to_string(), built_on: BUILT_ON.to_string(), year: YEAR.to_string(), alpha: ALPHA, theme, } }