forked from asklyphe-public/asklyphe
		
	
		
			
	
	
		
			62 lines
		
	
	
	
		
			2.9 KiB
		
	
	
	
		
			Rust
		
	
	
	
	
	
		
		
			
		
	
	
			62 lines
		
	
	
	
		
			2.9 KiB
		
	
	
	
		
			Rust
		
	
	
	
	
	
| 
								 | 
							
								/*
							 | 
						||
| 
								 | 
							
								 * searchservice hacks.rs
							 | 
						||
| 
								 | 
							
								 * - awful awful solutions to our issues
							 | 
						||
| 
								 | 
							
								 *
							 | 
						||
| 
								 | 
							
								 * Copyright (C) 2025 Real Microsoft, LLC
							 | 
						||
| 
								 | 
							
								 *
							 | 
						||
| 
								 | 
							
								 * This program is free software: you can redistribute it and/or modify it under the terms of the GNU Affero General Public License as published by the Free Software Foundation, version 3.
							 | 
						||
| 
								 | 
							
								 *
							 | 
						||
| 
								 | 
							
								 * This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Affero General Public License for more details.
							 | 
						||
| 
								 | 
							
								 *
							 | 
						||
| 
								 | 
							
								 * You should have received a copy of the GNU Affero General Public License along with this program. If not, see <https://www.gnu.org/licenses/>.
							 | 
						||
| 
								 | 
							
								*/
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								use std::ops::{ RangeInclusive};
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								const BURMESE_RANGE: RangeInclusive<char> = '\u{1000}'..='\u{104f}';
							 | 
						||
| 
								 | 
							
								const CHINESE_RANGE1: RangeInclusive<char> = '\u{4e00}'..='\u{9fff}';
							 | 
						||
| 
								 | 
							
								const CHINESE_RANGE2: RangeInclusive<char> = '\u{3400}'..='\u{4dbf}';
							 | 
						||
| 
								 | 
							
								const CHINESE_RANGE3: RangeInclusive<char> = '\u{20000}'..='\u{2a6df}';
							 | 
						||
| 
								 | 
							
								const CHINESE_RANGE4: RangeInclusive<char> = '\u{2A700}'..='\u{2B73F}';
							 | 
						||
| 
								 | 
							
								const CHINESE_RANGE5: RangeInclusive<char> = '\u{2B740}'..='\u{2B81F}';
							 | 
						||
| 
								 | 
							
								const CHINESE_RANGE6: RangeInclusive<char> = '\u{2B820}'..='\u{2CEAF}';
							 | 
						||
| 
								 | 
							
								const CHINESE_RANGE7: RangeInclusive<char> = '\u{2CEB0}'..='\u{2EBEF}';
							 | 
						||
| 
								 | 
							
								const CHINESE_RANGE8: RangeInclusive<char> = '\u{30000}'..='\u{3134F}';
							 | 
						||
| 
								 | 
							
								const CHINESE_RANGE9: RangeInclusive<char> = '\u{31350}'..='\u{323AF}';
							 | 
						||
| 
								 | 
							
								const CHINESE_RANGE10: RangeInclusive<char> = '\u{2EBF0}'..='\u{2EE5F}';
							 | 
						||
| 
								 | 
							
								const CHINESE_RANGE11: RangeInclusive<char> = '\u{F900}'..='\u{FAFF}';
							 | 
						||
| 
								 | 
							
								const JAPANESE_RANGE1: RangeInclusive<char> = '\u{3040}'..='\u{309F}';
							 | 
						||
| 
								 | 
							
								/// KATAKANA
							 | 
						||
| 
								 | 
							
								const JAPANESE_RANGE2: RangeInclusive<char> = '\u{30A0}'..='\u{30FF}';
							 | 
						||
| 
								 | 
							
								const JAVANESE_RANGE: RangeInclusive<char> = '\u{A980}'..='\u{A9DF}';
							 | 
						||
| 
								 | 
							
								const KHMER_RANGE1: RangeInclusive<char> = '\u{1780}'..='\u{17FF}';
							 | 
						||
| 
								 | 
							
								const KHMER_RANGE2: RangeInclusive<char> = '\u{19E0}'..='\u{19FF}';
							 | 
						||
| 
								 | 
							
								const LAO_RANGE: RangeInclusive<char> = '\u{0E80}'..='\u{0EFF}';
							 | 
						||
| 
								 | 
							
								const PHAGSPA_RANGE: RangeInclusive<char> = '\u{A840}'..='\u{A87F}';
							 | 
						||
| 
								 | 
							
								const TAITHAM_RANGE: RangeInclusive<char> = '\u{1A20}'..='\u{1AAF}';
							 | 
						||
| 
								 | 
							
								const THAI_RANGE: RangeInclusive<char> = '\u{0E00}'..='\u{E07F}';
							 | 
						||
| 
								 | 
							
								const TIBETAN_RANGE: RangeInclusive<char> = '\u{0F00}'..='\u{0FFF}';
							 | 
						||
| 
								 | 
							
								const NO_WORD_BOUNDRIES: &[RangeInclusive<char>] = &[
							 | 
						||
| 
								 | 
							
								    BURMESE_RANGE,
							 | 
						||
| 
								 | 
							
								    CHINESE_RANGE1, CHINESE_RANGE2, CHINESE_RANGE3, CHINESE_RANGE4, CHINESE_RANGE5, CHINESE_RANGE6, CHINESE_RANGE7, CHINESE_RANGE8, CHINESE_RANGE9, CHINESE_RANGE10, CHINESE_RANGE11,
							 | 
						||
| 
								 | 
							
								    JAPANESE_RANGE1, JAPANESE_RANGE2,
							 | 
						||
| 
								 | 
							
								    JAVANESE_RANGE,
							 | 
						||
| 
								 | 
							
								    KHMER_RANGE1, KHMER_RANGE2,
							 | 
						||
| 
								 | 
							
								    LAO_RANGE,
							 | 
						||
| 
								 | 
							
								    PHAGSPA_RANGE,
							 | 
						||
| 
								 | 
							
								    TAITHAM_RANGE,
							 | 
						||
| 
								 | 
							
								    THAI_RANGE,
							 | 
						||
| 
								 | 
							
								    TIBETAN_RANGE,
							 | 
						||
| 
								 | 
							
								];
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								pub fn is_from_language_that_doesnt_use_word_separators(str: &str) -> bool {
							 | 
						||
| 
								 | 
							
								    for c in str.chars() {
							 | 
						||
| 
								 | 
							
								        for range in NO_WORD_BOUNDRIES {
							 | 
						||
| 
								 | 
							
								            if range.contains(&c) {
							 | 
						||
| 
								 | 
							
								                return true;
							 | 
						||
| 
								 | 
							
								            }
							 | 
						||
| 
								 | 
							
								        }
							 | 
						||
| 
								 | 
							
								    }
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								    false
							 | 
						||
| 
								 | 
							
								}
							 |