forked from asklyphe-public/asklyphe
118 lines
No EOL
4.4 KiB
Rust
118 lines
No EOL
4.4 KiB
Rust
/*
|
|
* searchservice process.rs
|
|
* - route incoming nats requests to their specific functions
|
|
*
|
|
* Copyright (C) 2025 Real Microsoft, LLC
|
|
*
|
|
* This program is free software: you can redistribute it and/or modify it under the terms of the GNU Affero General Public License as published by the Free Software Foundation, version 3.
|
|
*
|
|
* This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Affero General Public License for more details.
|
|
*
|
|
* You should have received a copy of the GNU Affero General Public License along with this program. If not, see <https://www.gnu.org/licenses/>.
|
|
*/
|
|
|
|
use std::sync::Arc;
|
|
use async_nats::{jetstream, Subject};
|
|
use log::{debug, error, warn};
|
|
use tokio::sync::Mutex;
|
|
use futures::StreamExt;
|
|
use asklyphe_common::db::tables::{INFO_TABLE, WEBSITE_TABLE};
|
|
use asklyphe_common::foundationdb::{Database, KeySelector, RangeOption};
|
|
use asklyphe_common::foundationdb::options::{StreamingMode, TransactionOption};
|
|
use asklyphe_common::foundationdb::tuple::{pack, Subspace};
|
|
use asklyphe_common::nats::searchservice;
|
|
use asklyphe_common::nats::searchservice::{SearchSrvcQuery, SearchSrvcRequest, SearchSrvcResponse, SearchRequest, SiteCountResponse, SearchResponse};
|
|
use crate::{algorithm, hacks};
|
|
|
|
pub async fn process(query: SearchSrvcQuery, db: Database) -> SearchSrvcResponse {
|
|
// a future is used so that the whole program doesn't die if an algorithm panics
|
|
let response = tokio::spawn(async move {
|
|
match query.request {
|
|
SearchSrvcRequest::SearchRequest(req) => {
|
|
search_request(req, &db).await
|
|
}
|
|
SearchSrvcRequest::SiteCountRequest => {
|
|
count_websites(&db).await
|
|
}
|
|
}}).await;
|
|
|
|
if let Ok(response) = response {
|
|
response
|
|
} else if let Err(e) = response {
|
|
SearchSrvcResponse::OtherError(e.to_string())
|
|
} else {
|
|
unreachable!()
|
|
}
|
|
}
|
|
|
|
pub async fn search_request(req: SearchRequest, db: &Database) -> SearchSrvcResponse {
|
|
let words_initial: Vec<String> = req.query.split_whitespace().map(|s| s.to_string()).collect();
|
|
let mut words = vec![];
|
|
let mut no_separator_flag = false;
|
|
for word in words_initial {
|
|
if hacks::is_from_language_that_doesnt_use_word_separators(&word) {
|
|
words.extend(word.chars().map(|c| c.to_string()).collect::<Vec<String>>());
|
|
no_separator_flag = true;
|
|
} else {
|
|
words.push(word);
|
|
}
|
|
}
|
|
|
|
match algorithm::search(&db, words, no_separator_flag).await {
|
|
Some(results) => {
|
|
SearchSrvcResponse::SearchResponse(results)
|
|
}
|
|
None => {
|
|
SearchSrvcResponse::SearchResponse(SearchResponse {
|
|
results: vec![],
|
|
blocked: vec![],
|
|
pagerank_time_seconds: 0.0,
|
|
total_query_seconds: 0.0,
|
|
max_relevance: 0.0,
|
|
exact_phrase_found: false,
|
|
})
|
|
}
|
|
}
|
|
}
|
|
|
|
pub async fn count_websites(db: &Database) -> SearchSrvcResponse {
|
|
let mut counter: u64 = 0;
|
|
let subspace = Subspace::from(WEBSITE_TABLE);
|
|
let mut last_key = subspace.range().0;
|
|
let final_key = subspace.range().1;
|
|
|
|
for _failsafe in 0..10000 {
|
|
let trx = db.create_trx();
|
|
if let Err(e) = trx {
|
|
error!("DATABASE ERROR page_links_exiting_count {e}");
|
|
} else {
|
|
let trx = trx.unwrap();
|
|
// link -> from -> *
|
|
let mut range = RangeOption::from((last_key.clone(), final_key.clone()));
|
|
range.mode = StreamingMode::Iterator;
|
|
range.limit = Some(10096);
|
|
|
|
let mut stream = trx.get_ranges_keyvalues(range, true);
|
|
|
|
let mut this_time = 0;
|
|
|
|
while let Some(kv) = stream.next().await {
|
|
if let Ok(kv) = kv {
|
|
counter += 1;
|
|
this_time += 1;
|
|
last_key = kv.key().to_vec();
|
|
} else if let Err(e) = kv {
|
|
eprintln!("err while counting {e}");
|
|
}
|
|
}
|
|
|
|
if this_time <= 10 {
|
|
return SearchSrvcResponse::SiteCountResponse(SiteCountResponse {
|
|
count: counter / 8,
|
|
});
|
|
}
|
|
}
|
|
|
|
}
|
|
SearchSrvcResponse::OtherError("couldn't retrieve count ):".to_string())
|
|
} |