From 51eff105bd5003b9975d90484ff11110351a8128 Mon Sep 17 00:00:00 2001 From: husky Date: Sun, 10 Sep 2023 00:03:26 -0700 Subject: [PATCH] initial work & implementation --- .gitignore | 2 + Cargo.toml | 9 ++ src/lib.rs | 389 +++++++++++++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 400 insertions(+) create mode 100644 .gitignore create mode 100644 Cargo.toml create mode 100644 src/lib.rs diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..4fffb2f --- /dev/null +++ b/.gitignore @@ -0,0 +1,2 @@ +/target +/Cargo.lock diff --git a/Cargo.toml b/Cargo.toml new file mode 100644 index 0000000..cf60a84 --- /dev/null +++ b/Cargo.toml @@ -0,0 +1,9 @@ +[package] +name = "ondisk_btree" +version = "0.1.0" +edition = "2021" + +# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html + +[dependencies] +vapfs = { path = "../fs/vapfs" } \ No newline at end of file diff --git a/src/lib.rs b/src/lib.rs new file mode 100644 index 0000000..a94ca7a --- /dev/null +++ b/src/lib.rs @@ -0,0 +1,389 @@ +#![no_std] + +extern crate alloc; + +use alloc::vec; +use alloc::vec::Vec; + +pub trait ToBytes { + fn to_bytes(&self) -> Vec; +} + +pub trait FromBytes { + fn from_bytes(bytes: &[u8]) -> Self; +} + +pub trait SizeOf { + fn size_of(&self) -> u32; +} + +impl ToBytes for Option { + fn to_bytes(&self) -> Vec { + match self { + Some(x) => { + let mut result = vec![1]; + result.extend_from_slice(&x.to_bytes()); + result + } + None => vec![0], + } + } +} + +impl FromBytes for Option { + fn from_bytes(bytes: &[u8]) -> Self { + if bytes[0] == 0 { + None + } else { + Some(T::from_bytes(&bytes[1..])) + } + } +} + +impl ToBytes for u32 { + fn to_bytes(&self) -> Vec { + self.to_be_bytes().to_vec() + } +} + +impl FromBytes for u32 { + fn from_bytes(bytes: &[u8]) -> Self { + u32::from_be_bytes([bytes[0], bytes[1], bytes[2], bytes[3]]) + } +} + +impl SizeOf for u32 { + fn size_of(&self) -> u32 { + 4 + } +} + + +#[derive(Clone)] +pub struct BTreeNode { + pub leaf: bool, + pub keys: Vec<(u32, T)>, + pub children: Vec, + pub n: u32, + pub m: u32, +} + +#[derive(Clone)] +pub struct BTree { + pub nodes: Vec>, + pub root: Option, + pub m: u32, +} + +impl BTreeNode { + pub fn new(m: u32, leaf: bool) -> Self { + BTreeNode { + leaf, + keys: vec![(0, T::default()); (2 * m - 1) as usize], + children: vec![0; 2 * m as usize], + n: 0, + m, + } + } +} + +// u8: leaf +// u16: keys length +// u16: children length +// u32: n +// [keys_length; (u32, sizeof(T) as u32, T)]: keys +// [children_length; u32]: children + +impl ToBytes for BTreeNode { + fn to_bytes(&self) -> Vec { + let mut result = vec![]; + result.push(self.m as u8); + result.push(self.leaf as u8); + result.extend_from_slice(&(self.keys.len() as u16).to_be_bytes()); + result.extend_from_slice(&(self.children.len() as u16).to_be_bytes()); + result.extend_from_slice(&self.n.to_be_bytes()); + for key in &self.keys { + result.extend_from_slice(&key.0.to_be_bytes()); + result.extend_from_slice(&key.1.size_of().to_be_bytes()); + result.extend_from_slice(&key.1.to_bytes()); + } + for child in &self.children { + result.extend_from_slice(&(*child as u32).to_be_bytes()); + } + result + } +} + +impl FromBytes for BTreeNode { + fn from_bytes(bytes: &[u8]) -> Self { + let m = bytes[0] as u32; + let mut result = BTreeNode::new(m, false); + result.leaf = bytes[1] != 0; + let keys_length = u16::from_be_bytes([bytes[2], bytes[3]]); + let children_length = u16::from_be_bytes([bytes[4], bytes[5]]); + result.n = u32::from_be_bytes([bytes[6], bytes[7], bytes[8], bytes[9]]); + let mut i = 10; + for j in 0..keys_length { + let key = u32::from_be_bytes([bytes[i], bytes[i + 1], bytes[i + 2], bytes[i + 3]]); + i += 4; + let size = u32::from_be_bytes([bytes[i], bytes[i + 1], bytes[i + 2], bytes[i + 3]]); + i += 4; + let value = T::from_bytes(&bytes[i..i + size as usize]); + i += size as usize; + result.keys[j as usize] = (key, value); + } + for j in 0..children_length { + let child = u32::from_be_bytes([bytes[i], bytes[i + 1], bytes[i + 2], bytes[i + 3]]); + i += 4; + result.children[j as usize] = child as usize; + } + result + } +} + +impl BTree { + pub fn new(m: u32) -> Self { + BTree { + nodes: vec![], + root: None, + m, + } + } + + /// finds the node with the given key, or None if not found + pub fn search(&mut self, key: u32) -> Option<&mut BTreeNode> { + let mut index = self.root?; + loop { + let mut i = 0; + while i < self.nodes[index].n as usize && self.nodes[index].keys[i].0 < key { + i += 1; + } + if i < self.nodes[index].n as usize && self.nodes[index].keys[i].0 == key { + return Some(&mut self.nodes[index]); + } + if self.nodes[index].leaf { + return None; + } + index = self.nodes[index].children[i]; + } + } + + /// splits the child `y` of node `node`, assumed that the node must be full + pub fn split_child(&mut self, i: usize, y: usize, node: usize) { + let mut z = BTreeNode::new(self.m, self.nodes[y].leaf); + z.n = self.m - 1; + + // copy the last (m-1) keys of y into new_node + for j in 0..self.m - 1 { + z.keys[j as usize] = self.nodes[y].keys[(j + self.m) as usize].clone(); + } + + // copy the last m children of y into new_node + if !self.nodes[y].leaf { + for j in 0..self.m { + z.children[j as usize] = self.nodes[y].children[(j + self.m) as usize]; + } + } + + self.nodes[y].n = self.m - 1; + + // canonicalize node + self.nodes.push(z); + let z = self.nodes.len() - 1; + + // insert new_node into node + let mut j = self.nodes[node].n; + while j >= (i + 1) as u32 { + self.nodes[node].children[j as usize + 1] = self.nodes[node].children[j as usize]; + j -= 1; + } + + self.nodes[node].children[i + 1] = z; + + // a key of y will move to this node, so we need to make room for it + let mut j = self.nodes[node].n as i32 - 1; + while j >= i as i32 { + self.nodes[node].keys[j as usize + 1] = self.nodes[node].keys[j as usize].clone(); + j -= 1; + } + + // copy the middle key of y into node + self.nodes[node].keys[i] = self.nodes[y].keys[(self.m - 1) as usize].clone(); + + self.nodes[node].n += 1; + } + + /// inserts the given key into the tree, assuming that the containing node is not full + pub fn insert_assuming_not_full(&mut self, key: u32, value: T, node: usize) { + let mut node = node; + + loop { + let mut i = (self.nodes[node].n - 1) as isize; + if self.nodes[node].leaf { + while i >= 0 && self.nodes[node].keys[i as usize].0 > key { + self.nodes[node].keys[i as usize + 1] = self.nodes[node].keys[i as usize].clone(); + i -= 1; + } + + self.nodes[node].keys[(i + 1) as usize] = (key, value); + self.nodes[node].n += 1; + return; + } else { + // find the child which is going to contain the key + while i >= 0 && self.nodes[node].keys[i as usize].0 > key { + i -= 1; + } + + // check if the child is full + if self.nodes[self.nodes[node].children[(i + 1) as usize]].n == (2 * self.m as usize - 1) as u32 { + self.split_child((i + 1) as usize, self.nodes[node].children[(i + 1) as usize], node); + + if self.nodes[node].keys[(i + 1) as usize].0 < key { + i += 1; + } + } + node = self.nodes[node].children[(i + 1) as usize]; + } + } + } + + /// inserts the given key into the tree + pub fn insert(&mut self, key: u32, value: T) { + if self.root.is_none() { + self.root = Some(0); + self.nodes.clear(); + let mut new = BTreeNode::new(self.m, true); + new.keys[0] = (key, value); + new.n = 1; + self.nodes.push(new); + } else if self.nodes[self.root.unwrap()].n >= (2 * self.m - 1) { + let mut new = BTreeNode::new(self.m, false); + let root = self.root.unwrap(); + new.children[0] = root; + let new_index = self.nodes.len(); + self.nodes.push(new); + self.split_child(0, root, new_index); + + // new root has two children now, decide which of the two will contain the key + let mut i = 0; + if self.nodes[new_index].keys[0].0 < key { + i += 1; + } + self.insert_assuming_not_full(key, value, self.nodes[new_index].children[i]); + + self.root = Some(new_index); + } else { + self.insert_assuming_not_full(key, value, self.root.unwrap()); + } + } + + /// traverses the tree in order and returns a vector of all keys + pub fn traverse_in_order(&self, node: usize) -> Vec { + let mut result = vec![]; + for i in 0..self.nodes[node].n { + if !self.nodes[node].leaf { + result.append(&mut self.traverse_in_order(self.nodes[node].children[i as usize])); + } + result.push(self.nodes[node].keys[i as usize].0); + } + + if !self.nodes[node].leaf { + result.append(&mut self.traverse_in_order(self.nodes[node].children[self.nodes[node].n as usize])); + } + + result + } +} + +impl ToBytes for BTree { + fn to_bytes(&self) -> Vec { + let mut result = vec![]; + result.extend_from_slice(&self.root.map(|u| u as u32).to_bytes()); + result.extend_from_slice(&self.m.to_be_bytes()); + result.extend_from_slice(&(self.nodes.len() as u32).to_be_bytes()); + for node in &self.nodes { + let bytes = node.to_bytes(); + result.extend_from_slice(&(bytes.len() as u32).to_be_bytes()); + result.extend_from_slice(&bytes); + } + result + } +} + +impl FromBytes for BTree { + fn from_bytes(bytes: &[u8]) -> Self { + let root = Option::::from_bytes(&bytes[0..5]).map(|u| u as usize); + let m = u32::from_be_bytes([bytes[5], bytes[6], bytes[7], bytes[8]]); + let nodes_length = u32::from_be_bytes([bytes[9], bytes[10], bytes[11], bytes[12]]); + let mut nodes = vec![]; + let mut i = 13; + for _ in 0..nodes_length { + let node_length = u32::from_be_bytes([bytes[i], bytes[i + 1], bytes[i + 2], bytes[i + 3]]); + i += 4; + nodes.push(BTreeNode::::from_bytes(&bytes[i..i + node_length as usize])); + i += node_length as usize; + } + BTree { + nodes, + root, + m, + } + } +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn it_works() { + impl ToBytes for i32 { + fn to_bytes(&self) -> Vec { + self.to_be_bytes().to_vec() + } + } + + impl FromBytes for i32 { + fn from_bytes(bytes: &[u8]) -> Self { + i32::from_be_bytes([bytes[0], bytes[1], bytes[2], bytes[3]]) + } + } + + impl SizeOf for i32 { + fn size_of(&self) -> u32 { + 4 + } + } + + let mut tree = BTree::new(3); + tree.insert(10, 0); + tree.insert(20, 0); + tree.insert(5, 0); + tree.insert(6, 1); + tree.insert(12, 1); + tree.insert(30, 0); + tree.insert(7, 0); + tree.insert(17, 0); + tree.insert(11, 0); + tree.insert(3, 0); + tree.insert(2, 0); + tree.insert(1, 0); + + fn tree_good(mut tree: BTree) { + // print traversed tree + assert_eq!(tree.traverse_in_order(tree.root.unwrap()), vec![1, 2, 3, 5, 6, 7, 10, 11, 12, 17, 20, 30]); + // get values of 6, 12, 17 + assert_eq!(tree.search(6).map(|x| x.keys.iter().filter(|x| x.0 == 6).map(|x| x.1).collect::>()), Some(vec![1])); + assert_eq!(tree.search(12).map(|x| x.keys.iter().filter(|x| x.0 == 12).map(|x| x.1).collect::>()), Some(vec![1])); + assert_eq!(tree.search(17).map(|x| x.keys.iter().filter(|x| x.0 == 17).map(|x| x.1).collect::>()), Some(vec![0])); + } + + tree_good(tree.clone()); + + // serialize and deserialize + let bytes = tree.to_bytes(); + let tree2 = BTree::::from_bytes(&bytes); + + tree_good(tree2.clone()); + } +}