Parsing seems to work now!

This commit is contained in:
Book-reader 2025-09-05 19:35:18 +12:00
parent c2f210c32c
commit 849393fbd1

View file

@ -17,7 +17,7 @@ pub fn calculate(query: &str) -> Option<Calculation> {
}
// TODO: put into own crate with dependency astro-float = "0.9.2" so I can use more than f64
#[derive(Debug)]
#[derive(Debug, Copy, Clone)]
enum Token {
Op(Op),
Atom(Atom),
@ -25,7 +25,7 @@ enum Token {
Func(Func),*/
}
#[derive(Debug)]
#[derive(Debug, Copy, Clone)]
enum Op {
BinOp(BinOp),
Func(Func), // A function is an Op that takes whatever the next thing is and binds it, either the next number or whatever is in parens
@ -33,15 +33,38 @@ enum Op {
impl Op {
fn get_lbp(&self) -> f64 {
todo!();
match self {
Op::BinOp(op) => match op {
BinOp::LParen => 0.0,
BinOp::RParen => 0.0,
BinOp::Add => 1.0,
BinOp::Subtract => 1.0,
BinOp::Multiply => 2.0,
BinOp::Divide => 2.0,
BinOp::Exponent => 3.0,
},
Op::Func(_) => 2.9, // TODO: decide if this is a good LBP
}
}
fn get_rbp(&self) -> f64 {
todo!();
match self {
Op::BinOp(op) => match op {
BinOp::LParen => 0.0,
BinOp::RParen => 0.0,
BinOp::Add => 1.1,
BinOp::Subtract => 1.1,
BinOp::Multiply => 2.1,
BinOp::Divide => 2.1,
BinOp::Exponent => 3.1,
},
Op::Func(_) => 4.0, // TODO: decide if this is a good RBP
}
}
}
#[derive(Debug)]
#[derive(Debug, Copy, Clone)]
enum BinOp {
Add,
Subtract,
@ -52,13 +75,13 @@ enum BinOp {
RParen,
}
#[derive(Debug)]
#[derive(Debug, Copy, Clone)]
enum Atom {
Number(f64), // TODO: use the unlimited precision floats library instead
Number(f64), // TODO: use the high precision floats library instead
Const(Const),
}
#[derive(Debug)]
#[derive(Debug, Copy, Clone)]
enum Func {
Sine,
Cosine,
@ -74,13 +97,13 @@ enum Func {
SquareRoot,
}
#[derive(Debug)]
#[derive(Debug, Copy, Clone)]
enum Const {
Pi,
E,
}
#[derive(Debug)]
#[derive(Debug, Copy, Clone)]
enum ParseErr {
Eof,
Invalid,
@ -92,14 +115,20 @@ struct Lexer<'a> {
data: &'a str,
data_ptr: &'a str,
idx: usize,
next_tok: Result<Token, ParseErr>,
}
// TODO: refactor with iterator that returns Option(Token) where one token option is Eof (or a enum of Token(Token) and Eof, or just Option(Option(Token)))
impl Lexer<'_> {
fn new(data: &str) -> Lexer<'_> { Lexer {data, data_ptr: data, idx: 0} }
fn new(data: &str) -> Lexer<'_> {
let mut n: Lexer = Lexer {data, data_ptr: data, idx: 0, next_tok: Err(ParseErr::Eof)};
n.next();
debug!("New finished!");
n
}
fn next(&mut self) -> Result<Token, ParseErr> {
fn _next(&mut self) -> Result<Token, ParseErr> {
match self.data.chars().nth(self.idx) {
Some(val) => {
debug!("lexing char '{}' at idx {}", val, self.idx);
@ -115,7 +144,7 @@ impl Lexer<'_> {
'^' => Ok(Token::Op(Op::BinOp(BinOp::Exponent))),
'(' => Ok(Token::Op(Op::BinOp(BinOp::LParen))),
')' => Ok(Token::Op(Op::BinOp(BinOp::RParen))),
_ if val.is_whitespace() => self.next(),
_ if val.is_whitespace() => self._next(),
// TODO: maybe parse '-' as part of number so I can do '1 + -1' and similar
_ if val.is_digit(10) => {
let start = self.idx - 1;
@ -128,6 +157,22 @@ impl Lexer<'_> {
}
},
_ => {
let mut l: usize;
l = matches(&self.data[self.idx - 1..], "sin");
if l != 0 {
self.idx += l;
return Ok(Token::Op(Op::Func(Func::Sine)));
}
l = matches(&self.data[self.idx - 1..], "cos");
if l != 0 {
self.idx += l;
return Ok(Token::Op(Op::Func(Func::Cosine)));
}
l = matches(&self.data[self.idx - 1..], "tan");
if l != 0 {
self.idx += l;
return Ok(Token::Op(Op::Func(Func::Tangent)));
}
debug!("got invalid char '{}'", val);
Err(ParseErr::Invalid)
}
@ -137,6 +182,16 @@ impl Lexer<'_> {
}
}
fn next(&mut self) -> Result<Token, ParseErr> {
let val = self.next_tok;
self.next_tok = self._next();
val
}
fn peek(&mut self) -> Result<Token, ParseErr> {
self.next_tok
}
// TODO: replace with iterator so I can do parser.parse(lexer.iter()) and parse does lex_iter.next() & such
fn lex_all(&mut self) -> Option<Vec<Token>> {
let mut tokens: Vec<Token> = vec![];
@ -151,6 +206,16 @@ impl Lexer<'_> {
}
}
fn matches(s: &str, check: &str) -> usize {
// debug!("s: \"{}\", check: \"{}\"c_len: {}, s_len: {}, s[c_len]: {:?}, s[c_len + 1]: {:?}", s, check, check.chars().count(), s.chars().count(), s.chars().nth(check.chars().count()), s.chars().nth(check.chars().count() + 1));
match (s.chars().count(), check.chars().count()) {
(s_len, c_len) if s_len < c_len => 0,
(s_len, c_len) if s_len == c_len && s == check => c_len - 1,
(s_len, c_len) if s_len > c_len && s.starts_with(check) && s.chars().nth(c_len).unwrap().is_whitespace() => c_len,
(_, _) => 0,
}
}
struct Parser<'a> {
lex: Lexer<'a>,
}
@ -173,27 +238,33 @@ impl Parser<'_> {
Token::Atom(val) => Ok(Expr::Atom(val)),
Token::Op(op) => match op {
Op::BinOp(BinOp::LParen) => self.parse_expr(op.get_lbp()),
Op::Func(f) => Ok(Expr::Node(Op::Func(f), vec![self.parse_expr(op.get_lbp())?])),
_ => Err(ParseErr::Invalid),
},
},
Err(err) => Err(err),
}.map_err(|err| { debug!("Unexpected error at start of expr: {:?}", err); err })?;
debug!("lhs of expression is {:?}", lhs);
loop {
let op: Op = match self.lex.next() {
let op: Op = match self.lex.peek() {
Err(ParseErr::Eof) => break,
Err(e) => return Err(e),
Err(e) => { debug!("In expr got err {:?}", e); Err(e) },
Ok(tok) => match tok {
Token::Op(op) => match op {
Op::BinOp(op) => match op {
BinOp::RParen => break,
_ => Ok(Op::BinOp(op)),
},
Op::Func(f) => Ok(Op::Func(f)),
Op::Func(f) => {
lhs = Expr::Node(Op::Func(f), vec![self.parse_expr(Op::Func(f).get_lbp())?]);
continue;
},
}
_ => Err(ParseErr::Invalid),
v => { debug!("Got unexpected token {:?}", v); Err(ParseErr::Invalid) },
}
}.map_err(|err| { debug!("Unexpected error inside expr: {:?}", err); err })?;
}.map_err(|err| { debug!("Unexpected error inside expr at {:?}", err); err })?;
if (op.get_lbp() < min_bp) { break; }
self.lex.next();
let rhs: Expr = self.parse_expr(op.get_rbp())?;
lhs = Expr::Node(op, vec![lhs, rhs]);
}
@ -201,16 +272,6 @@ impl Parser<'_> {
}
}
/*#[derive(Debug)]
enum Op {
Add(Expr, Expr),
Sub(Expr, Expr),
Mul(Expr, Expr),
Div(Expr, Expr),
Func(Func, Expr),
}*/
// TODO: look at that parser video again
#[derive(Debug)]
enum Expr {
Atom(Atom),