Base38 decoding

2023-01-13 12:56:56 +01:00 · 2023-01-13 12:56:56 +01:00 · 8605cf6e41
commit 8605cf6e41
parent 563e0cbd9b
1 changed files with 132 additions and 6 deletions
--- a/matter/src/codec/base38.rs
+++ b/matter/src/codec/base38.rs
@ -15,14 +15,72 @@
 *    limitations under the License.
 */

-//! Base38 encoding functions.
+//! Base38 encoding and decoding functions.
+
+use crate::error::Error;

 const BASE38_CHARS: [char; 38] = [
    '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', 'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I',
    'J', 'K', 'L', 'M', 'N', 'O', 'P', 'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X', 'Y', 'Z', '-', '.',
 ];

-/// Encodes a byte array into a base38 string.
+const UNUSED: u8 = 255;
+
+// map of base38 charater to numeric value
+// subtract 45 from the character, then index into this array, if possible
+const DECODE_BASE38: [u8; 46] = [
+    36,     // '-', =45
+    37,     // '.', =46
+    UNUSED, // '/', =47
+    0,      // '0', =48
+    1,      // '1', =49
+    2,      // '2', =50
+    3,      // '3', =51
+    4,      // '4', =52
+    5,      // '5', =53
+    6,      // '6', =54
+    7,      // '7', =55
+    8,      // '8', =56
+    9,      // '9', =57
+    UNUSED, // ':', =58
+    UNUSED, // ';', =59
+    UNUSED, // '<', =50
+    UNUSED, // '=', =61
+    UNUSED, // '>', =62
+    UNUSED, // '?', =63
+    UNUSED, // '@', =64
+    10,     // 'A', =65
+    11,     // 'B', =66
+    12,     // 'C', =67
+    13,     // 'D', =68
+    14,     // 'E', =69
+    15,     // 'F', =70
+    16,     // 'G', =71
+    17,     // 'H', =72
+    18,     // 'I', =73
+    19,     // 'J', =74
+    20,     // 'K', =75
+    21,     // 'L', =76
+    22,     // 'M', =77
+    23,     // 'N', =78
+    24,     // 'O', =79
+    25,     // 'P', =80
+    26,     // 'Q', =81
+    27,     // 'R', =82
+    28,     // 'S', =83
+    29,     // 'T', =84
+    30,     // 'U', =85
+    31,     // 'V', =86
+    32,     // 'W', =87
+    33,     // 'X', =88
+    34,     // 'Y', =89
+    35,     // 'Z', =90
+];
+
+const BASE38_CHARACTERS_NEEDED_IN_NBYTES_CHUNK: [u8; 3] = [2, 4, 5];
+const RADIX: u32 = BASE38_CHARS.len() as u32;
+
+/// Encode a byte array into a base38 string.
 pub fn encode(bytes: &[u8], length: usize) -> String {
    let mut offset = 0;
    let mut result = String::new();
@ -66,16 +124,84 @@ fn encode_base38(mut value: u32, char_count: u8) -> String {
    result
 }

+/// Decode a base38-encoded string into a byte slice
+pub fn decode(base38_str: &str) -> Result<Vec<u8>, Error> {
+    let mut result = Vec::new();
+    let mut base38_characters_number: usize = base38_str.len();
+    let mut decoded_base38_characters: usize = 0;
+
+    while base38_characters_number > 0 {
+        let base38_characters_in_chunk: usize;
+        let bytes_in_decoded_chunk: usize;
+
+        if base38_characters_number >= BASE38_CHARACTERS_NEEDED_IN_NBYTES_CHUNK[2] as usize {
+            base38_characters_in_chunk = BASE38_CHARACTERS_NEEDED_IN_NBYTES_CHUNK[2] as usize;
+            bytes_in_decoded_chunk = 3;
+        } else if base38_characters_number == BASE38_CHARACTERS_NEEDED_IN_NBYTES_CHUNK[1] as usize {
+            base38_characters_in_chunk = BASE38_CHARACTERS_NEEDED_IN_NBYTES_CHUNK[1] as usize;
+            bytes_in_decoded_chunk = 2;
+        } else if base38_characters_number == BASE38_CHARACTERS_NEEDED_IN_NBYTES_CHUNK[0] as usize {
+            base38_characters_in_chunk = BASE38_CHARACTERS_NEEDED_IN_NBYTES_CHUNK[0] as usize;
+            bytes_in_decoded_chunk = 1;
+        } else {
+            return Err(Error::InvalidData);
+        }
+
+        let mut value = 0u32;
+
+        for i in (1..=base38_characters_in_chunk).rev() {
+            let mut base38_chars = base38_str.chars();
+            let v = decode_char(base38_chars.nth(decoded_base38_characters + i - 1).unwrap())?;
+
+            value = value * RADIX + v as u32;
+        }
+
+        decoded_base38_characters += base38_characters_in_chunk;
+        base38_characters_number -= base38_characters_in_chunk;
+
+        for _i in 0..bytes_in_decoded_chunk {
+            result.push(value as u8);
+            value >>= 8;
+        }
+
+        if value > 0 {
+            // encoded value is too big to represent a correct chunk of size 1, 2 or 3 bytes
+            return Err(Error::InvalidArgument);
+        }
+    }
+
+    Ok(result)
+}
+
+fn decode_char(c: char) -> Result<u8, Error> {
+    let c = c as u8;
+    if !(45..=90).contains(&c) {
+        return Err(Error::InvalidData);
+    }
+
+    let c = DECODE_BASE38[c as usize - 45];
+    if c == UNUSED {
+        return Err(Error::InvalidData);
+    }
+
+    Ok(c)
+}
+
 #[cfg(test)]
 mod tests {
    use super::*;
+    const ENCODED: &str = "-MOA57ZU02IT2L2BJ00";
+    const DECODED: [u8; 11] = [
+        0x88, 0xff, 0xa7, 0x91, 0x50, 0x40, 0x00, 0x47, 0x51, 0xdd, 0x02,
+    ];

    #[test]
    fn can_base38_encode() {
-        const ENCODED: &str = "-MOA57ZU02IT2L2BJ00";
-        const DECODED: [u8; 11] = [
-            0x88, 0xff, 0xa7, 0x91, 0x50, 0x40, 0x00, 0x47, 0x51, 0xdd, 0x02,
-        ];
        assert_eq!(encode(&DECODED, 11), ENCODED);
    }
+
+    #[test]
+    fn can_base38_decode() {
+        assert_eq!(decode(ENCODED).expect("can not decode base38"), DECODED);
+    }
 }