WIP pratt parsing

This commit is contained in:
Book-reader 2025-09-05 16:45:57 +12:00
parent 7a57c7bf1e
commit c2f210c32c

View file

@ -31,6 +31,16 @@ enum Op {
Func(Func), // A function is an Op that takes whatever the next thing is and binds it, either the next number or whatever is in parens Func(Func), // A function is an Op that takes whatever the next thing is and binds it, either the next number or whatever is in parens
} }
impl Op {
fn get_lbp(&self) -> f64 {
todo!();
}
fn get_rbp(&self) -> f64 {
todo!();
}
}
#[derive(Debug)] #[derive(Debug)]
enum BinOp { enum BinOp {
Add, Add,
@ -71,7 +81,7 @@ enum Const {
} }
#[derive(Debug)] #[derive(Debug)]
enum LexErr { enum ParseErr {
Eof, Eof,
Invalid, Invalid,
} }
@ -89,7 +99,7 @@ impl Lexer<'_> {
fn new(data: &str) -> Lexer<'_> { Lexer {data, data_ptr: data, idx: 0} } fn new(data: &str) -> Lexer<'_> { Lexer {data, data_ptr: data, idx: 0} }
fn next(&mut self) -> Result<Token, LexErr> { fn next(&mut self) -> Result<Token, ParseErr> {
match self.data.chars().nth(self.idx) { match self.data.chars().nth(self.idx) {
Some(val) => { Some(val) => {
debug!("lexing char '{}' at idx {}", val, self.idx); debug!("lexing char '{}' at idx {}", val, self.idx);
@ -114,16 +124,16 @@ impl Lexer<'_> {
match self.data[start..self.idx].parse() { match self.data[start..self.idx].parse() {
Ok(val) => Ok(Token::Atom(Atom::Number(val))), Ok(val) => Ok(Token::Atom(Atom::Number(val))),
Err(e) => Err(LexErr::Invalid), Err(e) => Err(ParseErr::Invalid),
} }
}, },
_ => { _ => {
debug!("got invalid char '{}'", val); debug!("got invalid char '{}'", val);
Err(LexErr::Invalid) Err(ParseErr::Invalid)
} }
} }
} }
None => Err(LexErr::Eof), None => Err(ParseErr::Eof),
} }
} }
@ -132,8 +142,8 @@ impl Lexer<'_> {
let mut tokens: Vec<Token> = vec![]; let mut tokens: Vec<Token> = vec![];
loop { loop {
match self.next() { match self.next() {
Err(LexErr::Eof) => return Some(tokens), Err(ParseErr::Eof) => return Some(tokens),
Err(LexErr::Invalid) => return None, Err(ParseErr::Invalid) => return None,
Ok(tok) => tokens.push(tok), Ok(tok) => tokens.push(tok),
} }
// debug!("tokens: {:?}", tokens); // debug!("tokens: {:?}", tokens);
@ -145,14 +155,6 @@ struct Parser<'a> {
lex: Lexer<'a>, lex: Lexer<'a>,
} }
#[derive(Debug)]
enum ParseErr {
Eof,
Invalid,
// TODO, add more or maybe just use an option instead
}
impl Parser<'_> { impl Parser<'_> {
fn new(lex: Lexer) -> Parser { Parser {lex} } fn new(lex: Lexer) -> Parser { Parser {lex} }
@ -161,11 +163,41 @@ impl Parser<'_> {
} }
fn parse_expr(&mut self, min_bp: f64) -> Result<Expr, ParseErr> { fn parse_expr(&mut self, min_bp: f64) -> Result<Expr, ParseErr> {
while let Ok(val) = self.lex.next() {debug!("token: {:?}", val)} /*while let Ok(val) = self.lex.next() {debug!("token: {:?}", val)}
match self.lex.next().err() { match self.lex.next().err() {
_ => return Err(ParseErr::Invalid), _ => return Err(ParseErr::Invalid),
}*/
let mut lhs: Expr = match self.lex.next() {
Ok(val) => match val {
Token::Atom(val) => Ok(Expr::Atom(val)),
Token::Op(op) => match op {
Op::BinOp(BinOp::LParen) => self.parse_expr(op.get_lbp()),
_ => Err(ParseErr::Invalid),
},
},
Err(err) => Err(err),
}.map_err(|err| { debug!("Unexpected error at start of expr: {:?}", err); err })?;
loop {
let op: Op = match self.lex.next() {
Err(ParseErr::Eof) => break,
Err(e) => return Err(e),
Ok(tok) => match tok {
Token::Op(op) => match op {
Op::BinOp(op) => match op {
BinOp::RParen => break,
_ => Ok(Op::BinOp(op)),
},
Op::Func(f) => Ok(Op::Func(f)),
}
_ => Err(ParseErr::Invalid),
}
}.map_err(|err| { debug!("Unexpected error inside expr: {:?}", err); err })?;
if (op.get_lbp() < min_bp) { break; }
let rhs: Expr = self.parse_expr(op.get_rbp())?;
lhs = Expr::Node(op, vec![lhs, rhs]);
} }
Ok(lhs)
} }
} }
@ -181,6 +213,6 @@ enum Op {
// TODO: look at that parser video again // TODO: look at that parser video again
#[derive(Debug)] #[derive(Debug)]
enum Expr { enum Expr {
Atom(Token), Atom(Atom),
Node(Op, Vec<Expr>), Node(Op, Vec<Expr>),
} }