initial work & implementation
This commit is contained in:
parent
c5be3963e3
commit
51eff105bd
3 changed files with 400 additions and 0 deletions
2
.gitignore
vendored
Normal file
2
.gitignore
vendored
Normal file
|
@ -0,0 +1,2 @@
|
|||
/target
|
||||
/Cargo.lock
|
9
Cargo.toml
Normal file
9
Cargo.toml
Normal file
|
@ -0,0 +1,9 @@
|
|||
[package]
|
||||
name = "ondisk_btree"
|
||||
version = "0.1.0"
|
||||
edition = "2021"
|
||||
|
||||
# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
|
||||
|
||||
[dependencies]
|
||||
vapfs = { path = "../fs/vapfs" }
|
389
src/lib.rs
Normal file
389
src/lib.rs
Normal file
|
@ -0,0 +1,389 @@
|
|||
#![no_std]
|
||||
|
||||
extern crate alloc;
|
||||
|
||||
use alloc::vec;
|
||||
use alloc::vec::Vec;
|
||||
|
||||
pub trait ToBytes {
|
||||
fn to_bytes(&self) -> Vec<u8>;
|
||||
}
|
||||
|
||||
pub trait FromBytes {
|
||||
fn from_bytes(bytes: &[u8]) -> Self;
|
||||
}
|
||||
|
||||
pub trait SizeOf {
|
||||
fn size_of(&self) -> u32;
|
||||
}
|
||||
|
||||
impl<T: ToBytes> ToBytes for Option<T> {
|
||||
fn to_bytes(&self) -> Vec<u8> {
|
||||
match self {
|
||||
Some(x) => {
|
||||
let mut result = vec![1];
|
||||
result.extend_from_slice(&x.to_bytes());
|
||||
result
|
||||
}
|
||||
None => vec![0],
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl<T: FromBytes> FromBytes for Option<T> {
|
||||
fn from_bytes(bytes: &[u8]) -> Self {
|
||||
if bytes[0] == 0 {
|
||||
None
|
||||
} else {
|
||||
Some(T::from_bytes(&bytes[1..]))
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl ToBytes for u32 {
|
||||
fn to_bytes(&self) -> Vec<u8> {
|
||||
self.to_be_bytes().to_vec()
|
||||
}
|
||||
}
|
||||
|
||||
impl FromBytes for u32 {
|
||||
fn from_bytes(bytes: &[u8]) -> Self {
|
||||
u32::from_be_bytes([bytes[0], bytes[1], bytes[2], bytes[3]])
|
||||
}
|
||||
}
|
||||
|
||||
impl SizeOf for u32 {
|
||||
fn size_of(&self) -> u32 {
|
||||
4
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
#[derive(Clone)]
|
||||
pub struct BTreeNode<T: SizeOf + Clone + Default + ToBytes + FromBytes> {
|
||||
pub leaf: bool,
|
||||
pub keys: Vec<(u32, T)>,
|
||||
pub children: Vec<usize>,
|
||||
pub n: u32,
|
||||
pub m: u32,
|
||||
}
|
||||
|
||||
#[derive(Clone)]
|
||||
pub struct BTree<T: SizeOf + Clone + Default + ToBytes + FromBytes> {
|
||||
pub nodes: Vec<BTreeNode<T>>,
|
||||
pub root: Option<usize>,
|
||||
pub m: u32,
|
||||
}
|
||||
|
||||
impl<T: SizeOf + Clone + Default + ToBytes + FromBytes> BTreeNode<T> {
|
||||
pub fn new(m: u32, leaf: bool) -> Self {
|
||||
BTreeNode {
|
||||
leaf,
|
||||
keys: vec![(0, T::default()); (2 * m - 1) as usize],
|
||||
children: vec![0; 2 * m as usize],
|
||||
n: 0,
|
||||
m,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// u8: leaf
|
||||
// u16: keys length
|
||||
// u16: children length
|
||||
// u32: n
|
||||
// [keys_length; (u32, sizeof(T) as u32, T)]: keys
|
||||
// [children_length; u32]: children
|
||||
|
||||
impl<T: SizeOf + Clone + Default + ToBytes + FromBytes> ToBytes for BTreeNode<T> {
|
||||
fn to_bytes(&self) -> Vec<u8> {
|
||||
let mut result = vec![];
|
||||
result.push(self.m as u8);
|
||||
result.push(self.leaf as u8);
|
||||
result.extend_from_slice(&(self.keys.len() as u16).to_be_bytes());
|
||||
result.extend_from_slice(&(self.children.len() as u16).to_be_bytes());
|
||||
result.extend_from_slice(&self.n.to_be_bytes());
|
||||
for key in &self.keys {
|
||||
result.extend_from_slice(&key.0.to_be_bytes());
|
||||
result.extend_from_slice(&key.1.size_of().to_be_bytes());
|
||||
result.extend_from_slice(&key.1.to_bytes());
|
||||
}
|
||||
for child in &self.children {
|
||||
result.extend_from_slice(&(*child as u32).to_be_bytes());
|
||||
}
|
||||
result
|
||||
}
|
||||
}
|
||||
|
||||
impl<T: SizeOf + Clone + Default + ToBytes + FromBytes> FromBytes for BTreeNode<T> {
|
||||
fn from_bytes(bytes: &[u8]) -> Self {
|
||||
let m = bytes[0] as u32;
|
||||
let mut result = BTreeNode::new(m, false);
|
||||
result.leaf = bytes[1] != 0;
|
||||
let keys_length = u16::from_be_bytes([bytes[2], bytes[3]]);
|
||||
let children_length = u16::from_be_bytes([bytes[4], bytes[5]]);
|
||||
result.n = u32::from_be_bytes([bytes[6], bytes[7], bytes[8], bytes[9]]);
|
||||
let mut i = 10;
|
||||
for j in 0..keys_length {
|
||||
let key = u32::from_be_bytes([bytes[i], bytes[i + 1], bytes[i + 2], bytes[i + 3]]);
|
||||
i += 4;
|
||||
let size = u32::from_be_bytes([bytes[i], bytes[i + 1], bytes[i + 2], bytes[i + 3]]);
|
||||
i += 4;
|
||||
let value = T::from_bytes(&bytes[i..i + size as usize]);
|
||||
i += size as usize;
|
||||
result.keys[j as usize] = (key, value);
|
||||
}
|
||||
for j in 0..children_length {
|
||||
let child = u32::from_be_bytes([bytes[i], bytes[i + 1], bytes[i + 2], bytes[i + 3]]);
|
||||
i += 4;
|
||||
result.children[j as usize] = child as usize;
|
||||
}
|
||||
result
|
||||
}
|
||||
}
|
||||
|
||||
impl<T: SizeOf + Clone + Default + ToBytes + FromBytes> BTree<T> {
|
||||
pub fn new(m: u32) -> Self {
|
||||
BTree {
|
||||
nodes: vec![],
|
||||
root: None,
|
||||
m,
|
||||
}
|
||||
}
|
||||
|
||||
/// finds the node with the given key, or None if not found
|
||||
pub fn search(&mut self, key: u32) -> Option<&mut BTreeNode<T>> {
|
||||
let mut index = self.root?;
|
||||
loop {
|
||||
let mut i = 0;
|
||||
while i < self.nodes[index].n as usize && self.nodes[index].keys[i].0 < key {
|
||||
i += 1;
|
||||
}
|
||||
if i < self.nodes[index].n as usize && self.nodes[index].keys[i].0 == key {
|
||||
return Some(&mut self.nodes[index]);
|
||||
}
|
||||
if self.nodes[index].leaf {
|
||||
return None;
|
||||
}
|
||||
index = self.nodes[index].children[i];
|
||||
}
|
||||
}
|
||||
|
||||
/// splits the child `y` of node `node`, assumed that the node must be full
|
||||
pub fn split_child(&mut self, i: usize, y: usize, node: usize) {
|
||||
let mut z = BTreeNode::new(self.m, self.nodes[y].leaf);
|
||||
z.n = self.m - 1;
|
||||
|
||||
// copy the last (m-1) keys of y into new_node
|
||||
for j in 0..self.m - 1 {
|
||||
z.keys[j as usize] = self.nodes[y].keys[(j + self.m) as usize].clone();
|
||||
}
|
||||
|
||||
// copy the last m children of y into new_node
|
||||
if !self.nodes[y].leaf {
|
||||
for j in 0..self.m {
|
||||
z.children[j as usize] = self.nodes[y].children[(j + self.m) as usize];
|
||||
}
|
||||
}
|
||||
|
||||
self.nodes[y].n = self.m - 1;
|
||||
|
||||
// canonicalize node
|
||||
self.nodes.push(z);
|
||||
let z = self.nodes.len() - 1;
|
||||
|
||||
// insert new_node into node
|
||||
let mut j = self.nodes[node].n;
|
||||
while j >= (i + 1) as u32 {
|
||||
self.nodes[node].children[j as usize + 1] = self.nodes[node].children[j as usize];
|
||||
j -= 1;
|
||||
}
|
||||
|
||||
self.nodes[node].children[i + 1] = z;
|
||||
|
||||
// a key of y will move to this node, so we need to make room for it
|
||||
let mut j = self.nodes[node].n as i32 - 1;
|
||||
while j >= i as i32 {
|
||||
self.nodes[node].keys[j as usize + 1] = self.nodes[node].keys[j as usize].clone();
|
||||
j -= 1;
|
||||
}
|
||||
|
||||
// copy the middle key of y into node
|
||||
self.nodes[node].keys[i] = self.nodes[y].keys[(self.m - 1) as usize].clone();
|
||||
|
||||
self.nodes[node].n += 1;
|
||||
}
|
||||
|
||||
/// inserts the given key into the tree, assuming that the containing node is not full
|
||||
pub fn insert_assuming_not_full(&mut self, key: u32, value: T, node: usize) {
|
||||
let mut node = node;
|
||||
|
||||
loop {
|
||||
let mut i = (self.nodes[node].n - 1) as isize;
|
||||
if self.nodes[node].leaf {
|
||||
while i >= 0 && self.nodes[node].keys[i as usize].0 > key {
|
||||
self.nodes[node].keys[i as usize + 1] = self.nodes[node].keys[i as usize].clone();
|
||||
i -= 1;
|
||||
}
|
||||
|
||||
self.nodes[node].keys[(i + 1) as usize] = (key, value);
|
||||
self.nodes[node].n += 1;
|
||||
return;
|
||||
} else {
|
||||
// find the child which is going to contain the key
|
||||
while i >= 0 && self.nodes[node].keys[i as usize].0 > key {
|
||||
i -= 1;
|
||||
}
|
||||
|
||||
// check if the child is full
|
||||
if self.nodes[self.nodes[node].children[(i + 1) as usize]].n == (2 * self.m as usize - 1) as u32 {
|
||||
self.split_child((i + 1) as usize, self.nodes[node].children[(i + 1) as usize], node);
|
||||
|
||||
if self.nodes[node].keys[(i + 1) as usize].0 < key {
|
||||
i += 1;
|
||||
}
|
||||
}
|
||||
node = self.nodes[node].children[(i + 1) as usize];
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// inserts the given key into the tree
|
||||
pub fn insert(&mut self, key: u32, value: T) {
|
||||
if self.root.is_none() {
|
||||
self.root = Some(0);
|
||||
self.nodes.clear();
|
||||
let mut new = BTreeNode::new(self.m, true);
|
||||
new.keys[0] = (key, value);
|
||||
new.n = 1;
|
||||
self.nodes.push(new);
|
||||
} else if self.nodes[self.root.unwrap()].n >= (2 * self.m - 1) {
|
||||
let mut new = BTreeNode::new(self.m, false);
|
||||
let root = self.root.unwrap();
|
||||
new.children[0] = root;
|
||||
let new_index = self.nodes.len();
|
||||
self.nodes.push(new);
|
||||
self.split_child(0, root, new_index);
|
||||
|
||||
// new root has two children now, decide which of the two will contain the key
|
||||
let mut i = 0;
|
||||
if self.nodes[new_index].keys[0].0 < key {
|
||||
i += 1;
|
||||
}
|
||||
self.insert_assuming_not_full(key, value, self.nodes[new_index].children[i]);
|
||||
|
||||
self.root = Some(new_index);
|
||||
} else {
|
||||
self.insert_assuming_not_full(key, value, self.root.unwrap());
|
||||
}
|
||||
}
|
||||
|
||||
/// traverses the tree in order and returns a vector of all keys
|
||||
pub fn traverse_in_order(&self, node: usize) -> Vec<u32> {
|
||||
let mut result = vec![];
|
||||
for i in 0..self.nodes[node].n {
|
||||
if !self.nodes[node].leaf {
|
||||
result.append(&mut self.traverse_in_order(self.nodes[node].children[i as usize]));
|
||||
}
|
||||
result.push(self.nodes[node].keys[i as usize].0);
|
||||
}
|
||||
|
||||
if !self.nodes[node].leaf {
|
||||
result.append(&mut self.traverse_in_order(self.nodes[node].children[self.nodes[node].n as usize]));
|
||||
}
|
||||
|
||||
result
|
||||
}
|
||||
}
|
||||
|
||||
impl<T: SizeOf + Clone + Default + ToBytes + FromBytes> ToBytes for BTree<T> {
|
||||
fn to_bytes(&self) -> Vec<u8> {
|
||||
let mut result = vec![];
|
||||
result.extend_from_slice(&self.root.map(|u| u as u32).to_bytes());
|
||||
result.extend_from_slice(&self.m.to_be_bytes());
|
||||
result.extend_from_slice(&(self.nodes.len() as u32).to_be_bytes());
|
||||
for node in &self.nodes {
|
||||
let bytes = node.to_bytes();
|
||||
result.extend_from_slice(&(bytes.len() as u32).to_be_bytes());
|
||||
result.extend_from_slice(&bytes);
|
||||
}
|
||||
result
|
||||
}
|
||||
}
|
||||
|
||||
impl<T: SizeOf + Clone + Default + ToBytes + FromBytes> FromBytes for BTree<T> {
|
||||
fn from_bytes(bytes: &[u8]) -> Self {
|
||||
let root = Option::<u32>::from_bytes(&bytes[0..5]).map(|u| u as usize);
|
||||
let m = u32::from_be_bytes([bytes[5], bytes[6], bytes[7], bytes[8]]);
|
||||
let nodes_length = u32::from_be_bytes([bytes[9], bytes[10], bytes[11], bytes[12]]);
|
||||
let mut nodes = vec![];
|
||||
let mut i = 13;
|
||||
for _ in 0..nodes_length {
|
||||
let node_length = u32::from_be_bytes([bytes[i], bytes[i + 1], bytes[i + 2], bytes[i + 3]]);
|
||||
i += 4;
|
||||
nodes.push(BTreeNode::<T>::from_bytes(&bytes[i..i + node_length as usize]));
|
||||
i += node_length as usize;
|
||||
}
|
||||
BTree {
|
||||
nodes,
|
||||
root,
|
||||
m,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
|
||||
#[test]
|
||||
fn it_works() {
|
||||
impl ToBytes for i32 {
|
||||
fn to_bytes(&self) -> Vec<u8> {
|
||||
self.to_be_bytes().to_vec()
|
||||
}
|
||||
}
|
||||
|
||||
impl FromBytes for i32 {
|
||||
fn from_bytes(bytes: &[u8]) -> Self {
|
||||
i32::from_be_bytes([bytes[0], bytes[1], bytes[2], bytes[3]])
|
||||
}
|
||||
}
|
||||
|
||||
impl SizeOf for i32 {
|
||||
fn size_of(&self) -> u32 {
|
||||
4
|
||||
}
|
||||
}
|
||||
|
||||
let mut tree = BTree::new(3);
|
||||
tree.insert(10, 0);
|
||||
tree.insert(20, 0);
|
||||
tree.insert(5, 0);
|
||||
tree.insert(6, 1);
|
||||
tree.insert(12, 1);
|
||||
tree.insert(30, 0);
|
||||
tree.insert(7, 0);
|
||||
tree.insert(17, 0);
|
||||
tree.insert(11, 0);
|
||||
tree.insert(3, 0);
|
||||
tree.insert(2, 0);
|
||||
tree.insert(1, 0);
|
||||
|
||||
fn tree_good(mut tree: BTree<i32>) {
|
||||
// print traversed tree
|
||||
assert_eq!(tree.traverse_in_order(tree.root.unwrap()), vec![1, 2, 3, 5, 6, 7, 10, 11, 12, 17, 20, 30]);
|
||||
// get values of 6, 12, 17
|
||||
assert_eq!(tree.search(6).map(|x| x.keys.iter().filter(|x| x.0 == 6).map(|x| x.1).collect::<Vec<_>>()), Some(vec![1]));
|
||||
assert_eq!(tree.search(12).map(|x| x.keys.iter().filter(|x| x.0 == 12).map(|x| x.1).collect::<Vec<_>>()), Some(vec![1]));
|
||||
assert_eq!(tree.search(17).map(|x| x.keys.iter().filter(|x| x.0 == 17).map(|x| x.1).collect::<Vec<_>>()), Some(vec![0]));
|
||||
}
|
||||
|
||||
tree_good(tree.clone());
|
||||
|
||||
// serialize and deserialize
|
||||
let bytes = tree.to_bytes();
|
||||
let tree2 = BTree::<i32>::from_bytes(&bytes);
|
||||
|
||||
tree_good(tree2.clone());
|
||||
}
|
||||
}
|
Loading…
Add table
Reference in a new issue