From 8605cf6e4167ee6a4610b14f63fa6025ac694303 Mon Sep 17 00:00:00 2001 From: Marcel Date: Fri, 13 Jan 2023 12:56:56 +0100 Subject: [PATCH] Base38 decoding --- matter/src/codec/base38.rs | 138 +++++++++++++++++++++++++++++++++++-- 1 file changed, 132 insertions(+), 6 deletions(-) diff --git a/matter/src/codec/base38.rs b/matter/src/codec/base38.rs index 975c5c9..ddaf199 100644 --- a/matter/src/codec/base38.rs +++ b/matter/src/codec/base38.rs @@ -15,14 +15,72 @@ * limitations under the License. */ -//! Base38 encoding functions. +//! Base38 encoding and decoding functions. + +use crate::error::Error; const BASE38_CHARS: [char; 38] = [ '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', 'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J', 'K', 'L', 'M', 'N', 'O', 'P', 'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X', 'Y', 'Z', '-', '.', ]; -/// Encodes a byte array into a base38 string. +const UNUSED: u8 = 255; + +// map of base38 charater to numeric value +// subtract 45 from the character, then index into this array, if possible +const DECODE_BASE38: [u8; 46] = [ + 36, // '-', =45 + 37, // '.', =46 + UNUSED, // '/', =47 + 0, // '0', =48 + 1, // '1', =49 + 2, // '2', =50 + 3, // '3', =51 + 4, // '4', =52 + 5, // '5', =53 + 6, // '6', =54 + 7, // '7', =55 + 8, // '8', =56 + 9, // '9', =57 + UNUSED, // ':', =58 + UNUSED, // ';', =59 + UNUSED, // '<', =50 + UNUSED, // '=', =61 + UNUSED, // '>', =62 + UNUSED, // '?', =63 + UNUSED, // '@', =64 + 10, // 'A', =65 + 11, // 'B', =66 + 12, // 'C', =67 + 13, // 'D', =68 + 14, // 'E', =69 + 15, // 'F', =70 + 16, // 'G', =71 + 17, // 'H', =72 + 18, // 'I', =73 + 19, // 'J', =74 + 20, // 'K', =75 + 21, // 'L', =76 + 22, // 'M', =77 + 23, // 'N', =78 + 24, // 'O', =79 + 25, // 'P', =80 + 26, // 'Q', =81 + 27, // 'R', =82 + 28, // 'S', =83 + 29, // 'T', =84 + 30, // 'U', =85 + 31, // 'V', =86 + 32, // 'W', =87 + 33, // 'X', =88 + 34, // 'Y', =89 + 35, // 'Z', =90 +]; + +const BASE38_CHARACTERS_NEEDED_IN_NBYTES_CHUNK: [u8; 3] = [2, 4, 5]; +const RADIX: u32 = BASE38_CHARS.len() as u32; + +/// Encode a byte array into a base38 string. pub fn encode(bytes: &[u8], length: usize) -> String { let mut offset = 0; let mut result = String::new(); @@ -66,16 +124,84 @@ fn encode_base38(mut value: u32, char_count: u8) -> String { result } +/// Decode a base38-encoded string into a byte slice +pub fn decode(base38_str: &str) -> Result, Error> { + let mut result = Vec::new(); + let mut base38_characters_number: usize = base38_str.len(); + let mut decoded_base38_characters: usize = 0; + + while base38_characters_number > 0 { + let base38_characters_in_chunk: usize; + let bytes_in_decoded_chunk: usize; + + if base38_characters_number >= BASE38_CHARACTERS_NEEDED_IN_NBYTES_CHUNK[2] as usize { + base38_characters_in_chunk = BASE38_CHARACTERS_NEEDED_IN_NBYTES_CHUNK[2] as usize; + bytes_in_decoded_chunk = 3; + } else if base38_characters_number == BASE38_CHARACTERS_NEEDED_IN_NBYTES_CHUNK[1] as usize { + base38_characters_in_chunk = BASE38_CHARACTERS_NEEDED_IN_NBYTES_CHUNK[1] as usize; + bytes_in_decoded_chunk = 2; + } else if base38_characters_number == BASE38_CHARACTERS_NEEDED_IN_NBYTES_CHUNK[0] as usize { + base38_characters_in_chunk = BASE38_CHARACTERS_NEEDED_IN_NBYTES_CHUNK[0] as usize; + bytes_in_decoded_chunk = 1; + } else { + return Err(Error::InvalidData); + } + + let mut value = 0u32; + + for i in (1..=base38_characters_in_chunk).rev() { + let mut base38_chars = base38_str.chars(); + let v = decode_char(base38_chars.nth(decoded_base38_characters + i - 1).unwrap())?; + + value = value * RADIX + v as u32; + } + + decoded_base38_characters += base38_characters_in_chunk; + base38_characters_number -= base38_characters_in_chunk; + + for _i in 0..bytes_in_decoded_chunk { + result.push(value as u8); + value >>= 8; + } + + if value > 0 { + // encoded value is too big to represent a correct chunk of size 1, 2 or 3 bytes + return Err(Error::InvalidArgument); + } + } + + Ok(result) +} + +fn decode_char(c: char) -> Result { + let c = c as u8; + if !(45..=90).contains(&c) { + return Err(Error::InvalidData); + } + + let c = DECODE_BASE38[c as usize - 45]; + if c == UNUSED { + return Err(Error::InvalidData); + } + + Ok(c) +} + #[cfg(test)] mod tests { use super::*; + const ENCODED: &str = "-MOA57ZU02IT2L2BJ00"; + const DECODED: [u8; 11] = [ + 0x88, 0xff, 0xa7, 0x91, 0x50, 0x40, 0x00, 0x47, 0x51, 0xdd, 0x02, + ]; #[test] fn can_base38_encode() { - const ENCODED: &str = "-MOA57ZU02IT2L2BJ00"; - const DECODED: [u8; 11] = [ - 0x88, 0xff, 0xa7, 0x91, 0x50, 0x40, 0x00, 0x47, 0x51, 0xdd, 0x02, - ]; assert_eq!(encode(&DECODED, 11), ENCODED); } + + #[test] + fn can_base38_decode() { + assert_eq!(decode(ENCODED).expect("can not decode base38"), DECODED); + } }