forked from asklyphe-public/asklyphe
initial spellcheck implementation
This commit is contained in:
parent
0c10b15447
commit
b4112c311c
3 changed files with 119 additions and 2 deletions
|
@ -14,6 +14,7 @@
|
||||||
pub mod searchbot;
|
pub mod searchbot;
|
||||||
pub mod wikipedia;
|
pub mod wikipedia;
|
||||||
pub mod unit_converter;
|
pub mod unit_converter;
|
||||||
|
pub mod spellcheck;
|
||||||
pub mod routes;
|
pub mod routes;
|
||||||
|
|
||||||
use std::{env, process};
|
use std::{env, process};
|
||||||
|
|
|
@ -18,6 +18,7 @@ use crate::unit_converter;
|
||||||
use crate::unit_converter::UnitConversion;
|
use crate::unit_converter::UnitConversion;
|
||||||
use crate::wikipedia::WikipediaSummary;
|
use crate::wikipedia::WikipediaSummary;
|
||||||
use crate::{wikipedia, Opts, ALPHA, BUILT_ON, GIT_COMMIT, VERSION, YEAR};
|
use crate::{wikipedia, Opts, ALPHA, BUILT_ON, GIT_COMMIT, VERSION, YEAR};
|
||||||
|
use crate::spellcheck;
|
||||||
use askama::Template;
|
use askama::Template;
|
||||||
use asklyphe_common::nats;
|
use asklyphe_common::nats;
|
||||||
use asklyphe_common::nats::bingservice::{
|
use asklyphe_common::nats::bingservice::{
|
||||||
|
@ -158,18 +159,20 @@ pub async fn search_js(
|
||||||
let mut complications = Complications::default();
|
let mut complications = Complications::default();
|
||||||
// todo: better way of specifying that user doesn't want complications
|
// todo: better way of specifying that user doesn't want complications
|
||||||
if !query.contains("-complications") {
|
if !query.contains("-complications") {
|
||||||
let mut wikiquery = query.clone().to_lowercase();
|
/*let mut wikiquery = query.clone().to_lowercase();
|
||||||
wikiquery.retain(|c| c.is_alphanumeric() || c.is_ascii_whitespace());
|
wikiquery.retain(|c| c.is_alphanumeric() || c.is_ascii_whitespace());
|
||||||
wikiquery = wikiquery.replace(' ', "%20");
|
wikiquery = wikiquery.replace(' ', "%20");
|
||||||
// todo: proper url escaping
|
// todo: proper url escaping
|
||||||
let wikipedia_comp =
|
let wikipedia_comp =
|
||||||
tokio::spawn(async move { wikipedia::get_wikipedia_page(&wikiquery, 20).await });
|
tokio::spawn(async move { wikipedia::get_wikipedia_page(&wikiquery, 20).await });
|
||||||
complications.wikipedia = wikipedia_comp.await.unwrap_or_default();
|
complications.wikipedia = wikipedia_comp.await.unwrap_or_default();*/
|
||||||
|
|
||||||
let mut unit_query = query.clone().to_lowercase();
|
let mut unit_query = query.clone().to_lowercase();
|
||||||
unit_query = unit_query.replace("metre", "meter");
|
unit_query = unit_query.replace("metre", "meter");
|
||||||
let unit_comp = unit_converter::convert_unit(&unit_query);
|
let unit_comp = unit_converter::convert_unit(&unit_query);
|
||||||
complications.unit_converter = unit_comp;
|
complications.unit_converter = unit_comp;
|
||||||
|
|
||||||
|
let corrections = spellcheck::check(&query);
|
||||||
} else {
|
} else {
|
||||||
complications.disabled = true;
|
complications.disabled = true;
|
||||||
query = query.replace("-complications", "");
|
query = query.replace("-complications", "");
|
||||||
|
|
113
asklyphe-frontend/src/spellcheck.rs
Normal file
113
asklyphe-frontend/src/spellcheck.rs
Normal file
|
@ -0,0 +1,113 @@
|
||||||
|
use tracing::{debug, error};
|
||||||
|
use std::{cmp, mem};
|
||||||
|
// use tokio::sync::{Mutex, RwLock};
|
||||||
|
|
||||||
|
include!("./google-10000-english-no-swears.txt");
|
||||||
|
|
||||||
|
// max distance before no alternatives are considered
|
||||||
|
const MAX_DISTANCE: usize = 6;
|
||||||
|
|
||||||
|
pub fn check(query: &String) -> Vec<String> {
|
||||||
|
error!("Query: {}", query);
|
||||||
|
let distances = query.split(" ")
|
||||||
|
.map(|qword| qword.to_lowercase())
|
||||||
|
.map(
|
||||||
|
|qword|
|
||||||
|
KNOWN_WORDS.iter()
|
||||||
|
.map(
|
||||||
|
|kword|
|
||||||
|
(qword.clone(), kword, levenshtein_distance(&qword, kword)))
|
||||||
|
.map(|val| (val.0, val.1, val.2))
|
||||||
|
.min_by(|a, b| a.2.cmp(&b.2)).unwrap()
|
||||||
|
).filter(|(_, _, d)| *d > 0)
|
||||||
|
.for_each(|word| {
|
||||||
|
debug!("instead of '{}' did you mean '{}'? (distance of {})", word.0, word.1, word.2);
|
||||||
|
});
|
||||||
|
// .collect::<Vec<_>>();
|
||||||
|
|
||||||
|
// distances
|
||||||
|
vec![]
|
||||||
|
}
|
||||||
|
|
||||||
|
// cost of 2 for add/remove, cost of 1 for replace
|
||||||
|
fn levenshtein_distance(a: &str, other: &str) -> usize {
|
||||||
|
let mut dist = vec![0usize; other.len() + 1];
|
||||||
|
let mut dist_prev = vec![0usize; other.len() + 1];
|
||||||
|
|
||||||
|
for i in 0..=other.len() {
|
||||||
|
dist_prev[i] = i;
|
||||||
|
}
|
||||||
|
|
||||||
|
for i in 1..=a.len() {
|
||||||
|
dist[0] = i;
|
||||||
|
|
||||||
|
for j in 1..=other.len() {
|
||||||
|
if a.get(i - 1..i).unwrap() == other.get(j - 1..j).unwrap() {
|
||||||
|
dist[j] = dist_prev[j - 1];
|
||||||
|
} else {
|
||||||
|
dist[j] = 1 + cmp::min(
|
||||||
|
dist.get(j - 1).unwrap(),
|
||||||
|
cmp::min(dist_prev.get(j).unwrap(), dist_prev.get(j - 1).unwrap()));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
// let temp = dist_prev;
|
||||||
|
// dist_prev = dist.clone();
|
||||||
|
// dist = temp;
|
||||||
|
// dist_prev = dist;
|
||||||
|
// dist = vec![0usize; max_len];
|
||||||
|
mem::swap(&mut dist, &mut dist_prev);
|
||||||
|
}
|
||||||
|
dist_prev[other.len()]
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
/*let mut distances = vec![vec![0usize; other.len() + 1]; a.len() + 1];
|
||||||
|
for i in 1..=a.len() {
|
||||||
|
distances[i][0] = i;
|
||||||
|
}
|
||||||
|
|
||||||
|
for j in 1..=other.len() {
|
||||||
|
distances[0][j] = j;
|
||||||
|
}
|
||||||
|
|
||||||
|
/*unsafe {
|
||||||
|
for i in 1..=a.len() {
|
||||||
|
for j in 1..=other.len() {
|
||||||
|
if *a.get_unchecked(i - 1..i) == *other.get_unchecked(j - 1..j) {
|
||||||
|
// 0
|
||||||
|
distances[i][j] = *distances.get_unchecked(i - 1).get_unchecked(j - 1);
|
||||||
|
} else {
|
||||||
|
// 1
|
||||||
|
distances[i][j] = 1 + cmp::min(
|
||||||
|
(*distances.get_unchecked(i - 1).get_unchecked(j - 1)),
|
||||||
|
cmp::min(
|
||||||
|
(*distances.get_unchecked(i - 1).get_unchecked(j)),
|
||||||
|
(*distances.get_unchecked(i).get_unchecked(j - 1))
|
||||||
|
)
|
||||||
|
);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}*/
|
||||||
|
|
||||||
|
|
||||||
|
for i in 1..=a.len() {
|
||||||
|
for j in 1..=other.len() {
|
||||||
|
if a.get(i - 1..i).unwrap() == other.get(j - 1..j).unwrap() {
|
||||||
|
// 0
|
||||||
|
distances[i][j] = *distances.get(i - 1).unwrap().get(j - 1).unwrap();
|
||||||
|
} else {
|
||||||
|
// 1
|
||||||
|
distances[i][j] = 1 + cmp::min(
|
||||||
|
(*distances.get(i - 1).unwrap().get(j - 1).unwrap()),
|
||||||
|
cmp::min(
|
||||||
|
(*distances.get(i - 1).unwrap().get(j).unwrap()),
|
||||||
|
(*distances.get(i).unwrap().get(j - 1).unwrap())
|
||||||
|
)
|
||||||
|
);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
*distances.get(a.len()).unwrap().get(other.len()).unwrap()*/
|
||||||
|
}
|
||||||
|
|
Loading…
Add table
Reference in a new issue