/* * searchservice hacks.rs * - awful awful solutions to our issues * * Copyright (C) 2025 Real Microsoft, LLC * * This program is free software: you can redistribute it and/or modify it under the terms of the GNU Affero General Public License as published by the Free Software Foundation, version 3. * * This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Affero General Public License for more details. * * You should have received a copy of the GNU Affero General Public License along with this program. If not, see . */ use std::ops::{ RangeInclusive}; const BURMESE_RANGE: RangeInclusive = '\u{1000}'..='\u{104f}'; const CHINESE_RANGE1: RangeInclusive = '\u{4e00}'..='\u{9fff}'; const CHINESE_RANGE2: RangeInclusive = '\u{3400}'..='\u{4dbf}'; const CHINESE_RANGE3: RangeInclusive = '\u{20000}'..='\u{2a6df}'; const CHINESE_RANGE4: RangeInclusive = '\u{2A700}'..='\u{2B73F}'; const CHINESE_RANGE5: RangeInclusive = '\u{2B740}'..='\u{2B81F}'; const CHINESE_RANGE6: RangeInclusive = '\u{2B820}'..='\u{2CEAF}'; const CHINESE_RANGE7: RangeInclusive = '\u{2CEB0}'..='\u{2EBEF}'; const CHINESE_RANGE8: RangeInclusive = '\u{30000}'..='\u{3134F}'; const CHINESE_RANGE9: RangeInclusive = '\u{31350}'..='\u{323AF}'; const CHINESE_RANGE10: RangeInclusive = '\u{2EBF0}'..='\u{2EE5F}'; const CHINESE_RANGE11: RangeInclusive = '\u{F900}'..='\u{FAFF}'; const JAPANESE_RANGE1: RangeInclusive = '\u{3040}'..='\u{309F}'; /// KATAKANA const JAPANESE_RANGE2: RangeInclusive = '\u{30A0}'..='\u{30FF}'; const JAVANESE_RANGE: RangeInclusive = '\u{A980}'..='\u{A9DF}'; const KHMER_RANGE1: RangeInclusive = '\u{1780}'..='\u{17FF}'; const KHMER_RANGE2: RangeInclusive = '\u{19E0}'..='\u{19FF}'; const LAO_RANGE: RangeInclusive = '\u{0E80}'..='\u{0EFF}'; const PHAGSPA_RANGE: RangeInclusive = '\u{A840}'..='\u{A87F}'; const TAITHAM_RANGE: RangeInclusive = '\u{1A20}'..='\u{1AAF}'; const THAI_RANGE: RangeInclusive = '\u{0E00}'..='\u{E07F}'; const TIBETAN_RANGE: RangeInclusive = '\u{0F00}'..='\u{0FFF}'; const NO_WORD_BOUNDRIES: &[RangeInclusive] = &[ BURMESE_RANGE, CHINESE_RANGE1, CHINESE_RANGE2, CHINESE_RANGE3, CHINESE_RANGE4, CHINESE_RANGE5, CHINESE_RANGE6, CHINESE_RANGE7, CHINESE_RANGE8, CHINESE_RANGE9, CHINESE_RANGE10, CHINESE_RANGE11, JAPANESE_RANGE1, JAPANESE_RANGE2, JAVANESE_RANGE, KHMER_RANGE1, KHMER_RANGE2, LAO_RANGE, PHAGSPA_RANGE, TAITHAM_RANGE, THAI_RANGE, TIBETAN_RANGE, ]; pub fn is_from_language_that_doesnt_use_word_separators(str: &str) -> bool { for c in str.chars() { for range in NO_WORD_BOUNDRIES { if range.contains(&c) { return true; } } } false }