Base38 decoding

This commit is contained in:
Marcel 2023-01-13 12:56:56 +01:00
parent 563e0cbd9b
commit 8605cf6e41

View file

@ -15,14 +15,72 @@
* limitations under the License.
*/
//! Base38 encoding functions.
//! Base38 encoding and decoding functions.
use crate::error::Error;
const BASE38_CHARS: [char; 38] = [
'0', '1', '2', '3', '4', '5', '6', '7', '8', '9', 'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I',
'J', 'K', 'L', 'M', 'N', 'O', 'P', 'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X', 'Y', 'Z', '-', '.',
];
/// Encodes a byte array into a base38 string.
const UNUSED: u8 = 255;
// map of base38 charater to numeric value
// subtract 45 from the character, then index into this array, if possible
const DECODE_BASE38: [u8; 46] = [
36, // '-', =45
37, // '.', =46
UNUSED, // '/', =47
0, // '0', =48
1, // '1', =49
2, // '2', =50
3, // '3', =51
4, // '4', =52
5, // '5', =53
6, // '6', =54
7, // '7', =55
8, // '8', =56
9, // '9', =57
UNUSED, // ':', =58
UNUSED, // ';', =59
UNUSED, // '<', =50
UNUSED, // '=', =61
UNUSED, // '>', =62
UNUSED, // '?', =63
UNUSED, // '@', =64
10, // 'A', =65
11, // 'B', =66
12, // 'C', =67
13, // 'D', =68
14, // 'E', =69
15, // 'F', =70
16, // 'G', =71
17, // 'H', =72
18, // 'I', =73
19, // 'J', =74
20, // 'K', =75
21, // 'L', =76
22, // 'M', =77
23, // 'N', =78
24, // 'O', =79
25, // 'P', =80
26, // 'Q', =81
27, // 'R', =82
28, // 'S', =83
29, // 'T', =84
30, // 'U', =85
31, // 'V', =86
32, // 'W', =87
33, // 'X', =88
34, // 'Y', =89
35, // 'Z', =90
];
const BASE38_CHARACTERS_NEEDED_IN_NBYTES_CHUNK: [u8; 3] = [2, 4, 5];
const RADIX: u32 = BASE38_CHARS.len() as u32;
/// Encode a byte array into a base38 string.
pub fn encode(bytes: &[u8], length: usize) -> String {
let mut offset = 0;
let mut result = String::new();
@ -66,16 +124,84 @@ fn encode_base38(mut value: u32, char_count: u8) -> String {
result
}
/// Decode a base38-encoded string into a byte slice
pub fn decode(base38_str: &str) -> Result<Vec<u8>, Error> {
let mut result = Vec::new();
let mut base38_characters_number: usize = base38_str.len();
let mut decoded_base38_characters: usize = 0;
while base38_characters_number > 0 {
let base38_characters_in_chunk: usize;
let bytes_in_decoded_chunk: usize;
if base38_characters_number >= BASE38_CHARACTERS_NEEDED_IN_NBYTES_CHUNK[2] as usize {
base38_characters_in_chunk = BASE38_CHARACTERS_NEEDED_IN_NBYTES_CHUNK[2] as usize;
bytes_in_decoded_chunk = 3;
} else if base38_characters_number == BASE38_CHARACTERS_NEEDED_IN_NBYTES_CHUNK[1] as usize {
base38_characters_in_chunk = BASE38_CHARACTERS_NEEDED_IN_NBYTES_CHUNK[1] as usize;
bytes_in_decoded_chunk = 2;
} else if base38_characters_number == BASE38_CHARACTERS_NEEDED_IN_NBYTES_CHUNK[0] as usize {
base38_characters_in_chunk = BASE38_CHARACTERS_NEEDED_IN_NBYTES_CHUNK[0] as usize;
bytes_in_decoded_chunk = 1;
} else {
return Err(Error::InvalidData);
}
let mut value = 0u32;
for i in (1..=base38_characters_in_chunk).rev() {
let mut base38_chars = base38_str.chars();
let v = decode_char(base38_chars.nth(decoded_base38_characters + i - 1).unwrap())?;
value = value * RADIX + v as u32;
}
decoded_base38_characters += base38_characters_in_chunk;
base38_characters_number -= base38_characters_in_chunk;
for _i in 0..bytes_in_decoded_chunk {
result.push(value as u8);
value >>= 8;
}
if value > 0 {
// encoded value is too big to represent a correct chunk of size 1, 2 or 3 bytes
return Err(Error::InvalidArgument);
}
}
Ok(result)
}
fn decode_char(c: char) -> Result<u8, Error> {
let c = c as u8;
if !(45..=90).contains(&c) {
return Err(Error::InvalidData);
}
let c = DECODE_BASE38[c as usize - 45];
if c == UNUSED {
return Err(Error::InvalidData);
}
Ok(c)
}
#[cfg(test)]
mod tests {
use super::*;
const ENCODED: &str = "-MOA57ZU02IT2L2BJ00";
const DECODED: [u8; 11] = [
0x88, 0xff, 0xa7, 0x91, 0x50, 0x40, 0x00, 0x47, 0x51, 0xdd, 0x02,
];
#[test]
fn can_base38_encode() {
const ENCODED: &str = "-MOA57ZU02IT2L2BJ00";
const DECODED: [u8; 11] = [
0x88, 0xff, 0xa7, 0x91, 0x50, 0x40, 0x00, 0x47, 0x51, 0xdd, 0x02,
];
assert_eq!(encode(&DECODED, 11), ENCODED);
}
#[test]
fn can_base38_decode() {
assert_eq!(decode(ENCODED).expect("can not decode base38"), DECODED);
}
}