weight levenshtein distance calculation to prefer replacement over addition/removal
This commit is contained in:
		
							parent
							
								
									26f5196138
								
							
						
					
					
						commit
						b91674678e
					
				
					 1 changed files with 3 additions and 2 deletions
				
			
		| 
						 | 
				
			
			@ -6,6 +6,7 @@ use std::sync::Mutex;
 | 
			
		|||
 | 
			
		||||
// how to generate words.txt:
 | 
			
		||||
// clone https://github.com/en-wl/wordlist && cd wordlist
 | 
			
		||||
// make
 | 
			
		||||
// ./scowl wl --deaccent > words0.txt
 | 
			
		||||
// filtered with this python script:
 | 
			
		||||
// -----------------------------------
 | 
			
		||||
| 
						 | 
				
			
			@ -195,8 +196,8 @@ fn levenshtein_distance(a: &str, other: &str) -> usize {
 | 
			
		|||
				// TODO: make addition/subtraction 1 more expensive than replacement, presumably by adding '+ 1' to 2/3 of these
 | 
			
		||||
				// motivation: honex from bee movie script is turned into hone instead of honey, this will also generally improve results & is what wikipedia says to do (best reason)
 | 
			
		||||
				dist[j] = 1 + cmp::min(
 | 
			
		||||
					dist.get(j - 1).unwrap(),
 | 
			
		||||
					cmp::min(dist_prev.get(j).unwrap(), dist_prev.get(j - 1).unwrap()));
 | 
			
		||||
					*dist.get(j - 1).unwrap() + 1,
 | 
			
		||||
					cmp::min(*dist_prev.get(j).unwrap() + 1, *dist_prev.get(j - 1).unwrap()));
 | 
			
		||||
			}
 | 
			
		||||
		}
 | 
			
		||||
		mem::swap(&mut dist, &mut dist_prev);
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
		Loading…
	
	Add table
		
		Reference in a new issue