forked from asklyphe-public/asklyphe
		
	
		
			
				
	
	
		
			62 lines
		
	
	
		
			No EOL
		
	
	
		
			2.9 KiB
		
	
	
	
		
			Rust
		
	
	
	
	
	
			
		
		
	
	
			62 lines
		
	
	
		
			No EOL
		
	
	
		
			2.9 KiB
		
	
	
	
		
			Rust
		
	
	
	
	
	
/*
 | 
						|
 * searchservice hacks.rs
 | 
						|
 * - awful awful solutions to our issues
 | 
						|
 *
 | 
						|
 * Copyright (C) 2025 Real Microsoft, LLC
 | 
						|
 *
 | 
						|
 * This program is free software: you can redistribute it and/or modify it under the terms of the GNU Affero General Public License as published by the Free Software Foundation, version 3.
 | 
						|
 *
 | 
						|
 * This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Affero General Public License for more details.
 | 
						|
 *
 | 
						|
 * You should have received a copy of the GNU Affero General Public License along with this program. If not, see <https://www.gnu.org/licenses/>.
 | 
						|
*/
 | 
						|
 | 
						|
use std::ops::{ RangeInclusive};
 | 
						|
 | 
						|
const BURMESE_RANGE: RangeInclusive<char> = '\u{1000}'..='\u{104f}';
 | 
						|
const CHINESE_RANGE1: RangeInclusive<char> = '\u{4e00}'..='\u{9fff}';
 | 
						|
const CHINESE_RANGE2: RangeInclusive<char> = '\u{3400}'..='\u{4dbf}';
 | 
						|
const CHINESE_RANGE3: RangeInclusive<char> = '\u{20000}'..='\u{2a6df}';
 | 
						|
const CHINESE_RANGE4: RangeInclusive<char> = '\u{2A700}'..='\u{2B73F}';
 | 
						|
const CHINESE_RANGE5: RangeInclusive<char> = '\u{2B740}'..='\u{2B81F}';
 | 
						|
const CHINESE_RANGE6: RangeInclusive<char> = '\u{2B820}'..='\u{2CEAF}';
 | 
						|
const CHINESE_RANGE7: RangeInclusive<char> = '\u{2CEB0}'..='\u{2EBEF}';
 | 
						|
const CHINESE_RANGE8: RangeInclusive<char> = '\u{30000}'..='\u{3134F}';
 | 
						|
const CHINESE_RANGE9: RangeInclusive<char> = '\u{31350}'..='\u{323AF}';
 | 
						|
const CHINESE_RANGE10: RangeInclusive<char> = '\u{2EBF0}'..='\u{2EE5F}';
 | 
						|
const CHINESE_RANGE11: RangeInclusive<char> = '\u{F900}'..='\u{FAFF}';
 | 
						|
const JAPANESE_RANGE1: RangeInclusive<char> = '\u{3040}'..='\u{309F}';
 | 
						|
/// KATAKANA
 | 
						|
const JAPANESE_RANGE2: RangeInclusive<char> = '\u{30A0}'..='\u{30FF}';
 | 
						|
const JAVANESE_RANGE: RangeInclusive<char> = '\u{A980}'..='\u{A9DF}';
 | 
						|
const KHMER_RANGE1: RangeInclusive<char> = '\u{1780}'..='\u{17FF}';
 | 
						|
const KHMER_RANGE2: RangeInclusive<char> = '\u{19E0}'..='\u{19FF}';
 | 
						|
const LAO_RANGE: RangeInclusive<char> = '\u{0E80}'..='\u{0EFF}';
 | 
						|
const PHAGSPA_RANGE: RangeInclusive<char> = '\u{A840}'..='\u{A87F}';
 | 
						|
const TAITHAM_RANGE: RangeInclusive<char> = '\u{1A20}'..='\u{1AAF}';
 | 
						|
const THAI_RANGE: RangeInclusive<char> = '\u{0E00}'..='\u{E07F}';
 | 
						|
const TIBETAN_RANGE: RangeInclusive<char> = '\u{0F00}'..='\u{0FFF}';
 | 
						|
const NO_WORD_BOUNDRIES: &[RangeInclusive<char>] = &[
 | 
						|
    BURMESE_RANGE,
 | 
						|
    CHINESE_RANGE1, CHINESE_RANGE2, CHINESE_RANGE3, CHINESE_RANGE4, CHINESE_RANGE5, CHINESE_RANGE6, CHINESE_RANGE7, CHINESE_RANGE8, CHINESE_RANGE9, CHINESE_RANGE10, CHINESE_RANGE11,
 | 
						|
    JAPANESE_RANGE1, JAPANESE_RANGE2,
 | 
						|
    JAVANESE_RANGE,
 | 
						|
    KHMER_RANGE1, KHMER_RANGE2,
 | 
						|
    LAO_RANGE,
 | 
						|
    PHAGSPA_RANGE,
 | 
						|
    TAITHAM_RANGE,
 | 
						|
    THAI_RANGE,
 | 
						|
    TIBETAN_RANGE,
 | 
						|
];
 | 
						|
 | 
						|
pub fn is_from_language_that_doesnt_use_word_separators(str: &str) -> bool {
 | 
						|
    for c in str.chars() {
 | 
						|
        for range in NO_WORD_BOUNDRIES {
 | 
						|
            if range.contains(&c) {
 | 
						|
                return true;
 | 
						|
            }
 | 
						|
        }
 | 
						|
    }
 | 
						|
 | 
						|
    false
 | 
						|
} |