initial work & implementation

This commit is contained in:
husky 2023-09-10 00:03:26 -07:00
parent c5be3963e3
commit 51eff105bd
No known key found for this signature in database
GPG key ID: 6B3D8CB511646891
3 changed files with 400 additions and 0 deletions

2
.gitignore vendored Normal file
View file

@ -0,0 +1,2 @@
/target
/Cargo.lock

9
Cargo.toml Normal file
View file

@ -0,0 +1,9 @@
[package]
name = "ondisk_btree"
version = "0.1.0"
edition = "2021"
# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
[dependencies]
vapfs = { path = "../fs/vapfs" }

389
src/lib.rs Normal file
View file

@ -0,0 +1,389 @@
#![no_std]
extern crate alloc;
use alloc::vec;
use alloc::vec::Vec;
pub trait ToBytes {
fn to_bytes(&self) -> Vec<u8>;
}
pub trait FromBytes {
fn from_bytes(bytes: &[u8]) -> Self;
}
pub trait SizeOf {
fn size_of(&self) -> u32;
}
impl<T: ToBytes> ToBytes for Option<T> {
fn to_bytes(&self) -> Vec<u8> {
match self {
Some(x) => {
let mut result = vec![1];
result.extend_from_slice(&x.to_bytes());
result
}
None => vec![0],
}
}
}
impl<T: FromBytes> FromBytes for Option<T> {
fn from_bytes(bytes: &[u8]) -> Self {
if bytes[0] == 0 {
None
} else {
Some(T::from_bytes(&bytes[1..]))
}
}
}
impl ToBytes for u32 {
fn to_bytes(&self) -> Vec<u8> {
self.to_be_bytes().to_vec()
}
}
impl FromBytes for u32 {
fn from_bytes(bytes: &[u8]) -> Self {
u32::from_be_bytes([bytes[0], bytes[1], bytes[2], bytes[3]])
}
}
impl SizeOf for u32 {
fn size_of(&self) -> u32 {
4
}
}
#[derive(Clone)]
pub struct BTreeNode<T: SizeOf + Clone + Default + ToBytes + FromBytes> {
pub leaf: bool,
pub keys: Vec<(u32, T)>,
pub children: Vec<usize>,
pub n: u32,
pub m: u32,
}
#[derive(Clone)]
pub struct BTree<T: SizeOf + Clone + Default + ToBytes + FromBytes> {
pub nodes: Vec<BTreeNode<T>>,
pub root: Option<usize>,
pub m: u32,
}
impl<T: SizeOf + Clone + Default + ToBytes + FromBytes> BTreeNode<T> {
pub fn new(m: u32, leaf: bool) -> Self {
BTreeNode {
leaf,
keys: vec![(0, T::default()); (2 * m - 1) as usize],
children: vec![0; 2 * m as usize],
n: 0,
m,
}
}
}
// u8: leaf
// u16: keys length
// u16: children length
// u32: n
// [keys_length; (u32, sizeof(T) as u32, T)]: keys
// [children_length; u32]: children
impl<T: SizeOf + Clone + Default + ToBytes + FromBytes> ToBytes for BTreeNode<T> {
fn to_bytes(&self) -> Vec<u8> {
let mut result = vec![];
result.push(self.m as u8);
result.push(self.leaf as u8);
result.extend_from_slice(&(self.keys.len() as u16).to_be_bytes());
result.extend_from_slice(&(self.children.len() as u16).to_be_bytes());
result.extend_from_slice(&self.n.to_be_bytes());
for key in &self.keys {
result.extend_from_slice(&key.0.to_be_bytes());
result.extend_from_slice(&key.1.size_of().to_be_bytes());
result.extend_from_slice(&key.1.to_bytes());
}
for child in &self.children {
result.extend_from_slice(&(*child as u32).to_be_bytes());
}
result
}
}
impl<T: SizeOf + Clone + Default + ToBytes + FromBytes> FromBytes for BTreeNode<T> {
fn from_bytes(bytes: &[u8]) -> Self {
let m = bytes[0] as u32;
let mut result = BTreeNode::new(m, false);
result.leaf = bytes[1] != 0;
let keys_length = u16::from_be_bytes([bytes[2], bytes[3]]);
let children_length = u16::from_be_bytes([bytes[4], bytes[5]]);
result.n = u32::from_be_bytes([bytes[6], bytes[7], bytes[8], bytes[9]]);
let mut i = 10;
for j in 0..keys_length {
let key = u32::from_be_bytes([bytes[i], bytes[i + 1], bytes[i + 2], bytes[i + 3]]);
i += 4;
let size = u32::from_be_bytes([bytes[i], bytes[i + 1], bytes[i + 2], bytes[i + 3]]);
i += 4;
let value = T::from_bytes(&bytes[i..i + size as usize]);
i += size as usize;
result.keys[j as usize] = (key, value);
}
for j in 0..children_length {
let child = u32::from_be_bytes([bytes[i], bytes[i + 1], bytes[i + 2], bytes[i + 3]]);
i += 4;
result.children[j as usize] = child as usize;
}
result
}
}
impl<T: SizeOf + Clone + Default + ToBytes + FromBytes> BTree<T> {
pub fn new(m: u32) -> Self {
BTree {
nodes: vec![],
root: None,
m,
}
}
/// finds the node with the given key, or None if not found
pub fn search(&mut self, key: u32) -> Option<&mut BTreeNode<T>> {
let mut index = self.root?;
loop {
let mut i = 0;
while i < self.nodes[index].n as usize && self.nodes[index].keys[i].0 < key {
i += 1;
}
if i < self.nodes[index].n as usize && self.nodes[index].keys[i].0 == key {
return Some(&mut self.nodes[index]);
}
if self.nodes[index].leaf {
return None;
}
index = self.nodes[index].children[i];
}
}
/// splits the child `y` of node `node`, assumed that the node must be full
pub fn split_child(&mut self, i: usize, y: usize, node: usize) {
let mut z = BTreeNode::new(self.m, self.nodes[y].leaf);
z.n = self.m - 1;
// copy the last (m-1) keys of y into new_node
for j in 0..self.m - 1 {
z.keys[j as usize] = self.nodes[y].keys[(j + self.m) as usize].clone();
}
// copy the last m children of y into new_node
if !self.nodes[y].leaf {
for j in 0..self.m {
z.children[j as usize] = self.nodes[y].children[(j + self.m) as usize];
}
}
self.nodes[y].n = self.m - 1;
// canonicalize node
self.nodes.push(z);
let z = self.nodes.len() - 1;
// insert new_node into node
let mut j = self.nodes[node].n;
while j >= (i + 1) as u32 {
self.nodes[node].children[j as usize + 1] = self.nodes[node].children[j as usize];
j -= 1;
}
self.nodes[node].children[i + 1] = z;
// a key of y will move to this node, so we need to make room for it
let mut j = self.nodes[node].n as i32 - 1;
while j >= i as i32 {
self.nodes[node].keys[j as usize + 1] = self.nodes[node].keys[j as usize].clone();
j -= 1;
}
// copy the middle key of y into node
self.nodes[node].keys[i] = self.nodes[y].keys[(self.m - 1) as usize].clone();
self.nodes[node].n += 1;
}
/// inserts the given key into the tree, assuming that the containing node is not full
pub fn insert_assuming_not_full(&mut self, key: u32, value: T, node: usize) {
let mut node = node;
loop {
let mut i = (self.nodes[node].n - 1) as isize;
if self.nodes[node].leaf {
while i >= 0 && self.nodes[node].keys[i as usize].0 > key {
self.nodes[node].keys[i as usize + 1] = self.nodes[node].keys[i as usize].clone();
i -= 1;
}
self.nodes[node].keys[(i + 1) as usize] = (key, value);
self.nodes[node].n += 1;
return;
} else {
// find the child which is going to contain the key
while i >= 0 && self.nodes[node].keys[i as usize].0 > key {
i -= 1;
}
// check if the child is full
if self.nodes[self.nodes[node].children[(i + 1) as usize]].n == (2 * self.m as usize - 1) as u32 {
self.split_child((i + 1) as usize, self.nodes[node].children[(i + 1) as usize], node);
if self.nodes[node].keys[(i + 1) as usize].0 < key {
i += 1;
}
}
node = self.nodes[node].children[(i + 1) as usize];
}
}
}
/// inserts the given key into the tree
pub fn insert(&mut self, key: u32, value: T) {
if self.root.is_none() {
self.root = Some(0);
self.nodes.clear();
let mut new = BTreeNode::new(self.m, true);
new.keys[0] = (key, value);
new.n = 1;
self.nodes.push(new);
} else if self.nodes[self.root.unwrap()].n >= (2 * self.m - 1) {
let mut new = BTreeNode::new(self.m, false);
let root = self.root.unwrap();
new.children[0] = root;
let new_index = self.nodes.len();
self.nodes.push(new);
self.split_child(0, root, new_index);
// new root has two children now, decide which of the two will contain the key
let mut i = 0;
if self.nodes[new_index].keys[0].0 < key {
i += 1;
}
self.insert_assuming_not_full(key, value, self.nodes[new_index].children[i]);
self.root = Some(new_index);
} else {
self.insert_assuming_not_full(key, value, self.root.unwrap());
}
}
/// traverses the tree in order and returns a vector of all keys
pub fn traverse_in_order(&self, node: usize) -> Vec<u32> {
let mut result = vec![];
for i in 0..self.nodes[node].n {
if !self.nodes[node].leaf {
result.append(&mut self.traverse_in_order(self.nodes[node].children[i as usize]));
}
result.push(self.nodes[node].keys[i as usize].0);
}
if !self.nodes[node].leaf {
result.append(&mut self.traverse_in_order(self.nodes[node].children[self.nodes[node].n as usize]));
}
result
}
}
impl<T: SizeOf + Clone + Default + ToBytes + FromBytes> ToBytes for BTree<T> {
fn to_bytes(&self) -> Vec<u8> {
let mut result = vec![];
result.extend_from_slice(&self.root.map(|u| u as u32).to_bytes());
result.extend_from_slice(&self.m.to_be_bytes());
result.extend_from_slice(&(self.nodes.len() as u32).to_be_bytes());
for node in &self.nodes {
let bytes = node.to_bytes();
result.extend_from_slice(&(bytes.len() as u32).to_be_bytes());
result.extend_from_slice(&bytes);
}
result
}
}
impl<T: SizeOf + Clone + Default + ToBytes + FromBytes> FromBytes for BTree<T> {
fn from_bytes(bytes: &[u8]) -> Self {
let root = Option::<u32>::from_bytes(&bytes[0..5]).map(|u| u as usize);
let m = u32::from_be_bytes([bytes[5], bytes[6], bytes[7], bytes[8]]);
let nodes_length = u32::from_be_bytes([bytes[9], bytes[10], bytes[11], bytes[12]]);
let mut nodes = vec![];
let mut i = 13;
for _ in 0..nodes_length {
let node_length = u32::from_be_bytes([bytes[i], bytes[i + 1], bytes[i + 2], bytes[i + 3]]);
i += 4;
nodes.push(BTreeNode::<T>::from_bytes(&bytes[i..i + node_length as usize]));
i += node_length as usize;
}
BTree {
nodes,
root,
m,
}
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn it_works() {
impl ToBytes for i32 {
fn to_bytes(&self) -> Vec<u8> {
self.to_be_bytes().to_vec()
}
}
impl FromBytes for i32 {
fn from_bytes(bytes: &[u8]) -> Self {
i32::from_be_bytes([bytes[0], bytes[1], bytes[2], bytes[3]])
}
}
impl SizeOf for i32 {
fn size_of(&self) -> u32 {
4
}
}
let mut tree = BTree::new(3);
tree.insert(10, 0);
tree.insert(20, 0);
tree.insert(5, 0);
tree.insert(6, 1);
tree.insert(12, 1);
tree.insert(30, 0);
tree.insert(7, 0);
tree.insert(17, 0);
tree.insert(11, 0);
tree.insert(3, 0);
tree.insert(2, 0);
tree.insert(1, 0);
fn tree_good(mut tree: BTree<i32>) {
// print traversed tree
assert_eq!(tree.traverse_in_order(tree.root.unwrap()), vec![1, 2, 3, 5, 6, 7, 10, 11, 12, 17, 20, 30]);
// get values of 6, 12, 17
assert_eq!(tree.search(6).map(|x| x.keys.iter().filter(|x| x.0 == 6).map(|x| x.1).collect::<Vec<_>>()), Some(vec![1]));
assert_eq!(tree.search(12).map(|x| x.keys.iter().filter(|x| x.0 == 12).map(|x| x.1).collect::<Vec<_>>()), Some(vec![1]));
assert_eq!(tree.search(17).map(|x| x.keys.iter().filter(|x| x.0 == 17).map(|x| x.1).collect::<Vec<_>>()), Some(vec![0]));
}
tree_good(tree.clone());
// serialize and deserialize
let bytes = tree.to_bytes();
let tree2 = BTree::<i32>::from_bytes(&bytes);
tree_good(tree2.clone());
}
}