weight levenshtein distance calculation to prefer replacement over addition/removal

This commit is contained in:
Book-reader 2025-05-10 02:28:16 +12:00
parent 26f5196138
commit b91674678e

View file

@ -6,6 +6,7 @@ use std::sync::Mutex;
// how to generate words.txt:
// clone https://github.com/en-wl/wordlist && cd wordlist
// make
// ./scowl wl --deaccent > words0.txt
// filtered with this python script:
// -----------------------------------
@ -195,8 +196,8 @@ fn levenshtein_distance(a: &str, other: &str) -> usize {
// TODO: make addition/subtraction 1 more expensive than replacement, presumably by adding '+ 1' to 2/3 of these
// motivation: honex from bee movie script is turned into hone instead of honey, this will also generally improve results & is what wikipedia says to do (best reason)
dist[j] = 1 + cmp::min(
dist.get(j - 1).unwrap(),
cmp::min(dist_prev.get(j).unwrap(), dist_prev.get(j - 1).unwrap()));
*dist.get(j - 1).unwrap() + 1,
cmp::min(*dist_prev.get(j).unwrap() + 1, *dist_prev.get(j - 1).unwrap()));
}
}
mem::swap(&mut dist, &mut dist_prev);