diff --git a/src/ast.rs b/src/ast.rs index 899e156..c2b97f3 100644 --- a/src/ast.rs +++ b/src/ast.rs @@ -1,5 +1,6 @@ use crate::token::{LiteralType, Token}; +#[derive(Debug, Clone)] pub enum Expr { Binary { left: Box, diff --git a/src/lib.rs b/src/lib.rs index 921b805..54ac438 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -1,13 +1,17 @@ use std::{ error::Error, + fmt::Display, fs, io::{self, Write}, }; +use parser::{ParseError, Parser}; +use printer::pretty_print; use scanner::Scanner; -use token::Token; +use token::{Token, TokenType}; mod ast; +mod parser; mod printer; mod scanner; mod token; @@ -16,26 +20,26 @@ mod utils; pub fn run_file(path: &str) -> Result<(), Box> { let file = fs::read_to_string(path)?; - run(&file); + run(&file)?; Ok(()) } -pub fn run(src: &str) { +pub fn run(src: &str) -> Result<(), Box> { let mut scanner = Scanner::new(src.to_string()); - let tokens = scanner.scan_tokens(); + let tokens = scanner.scan_tokens()?; - match tokens { - Err(ref errors) => { - for err in errors { - report(err.line, "", &err.msg); - } - } - Ok(tokens) => { - for token in tokens { - println!("{}", token); - } + let mut parser = Parser::new(tokens); + + let expression = parser.parse(); + + match expression { + Ok(expr) => println!("{}", pretty_print(&expr)), + Err(e) => { + error(e.token, &e.msg); } } + + Ok(()) } pub fn run_prompt() -> Result<(), Box> { @@ -46,7 +50,7 @@ pub fn run_prompt() -> Result<(), Box> { print!("> "); io::stdout().flush()?; stdin.read_line(input)?; - run(input); + let _ = run(input); } } @@ -59,3 +63,10 @@ pub struct RloxError { pub fn report(line: usize, location: &str, message: &str) { eprintln!("[line {line}] Error {location}: {message}"); } + +fn error(token: Token, message: &str) { + match token.t_type { + TokenType::EOF => report(token.line, " at end", message), + _ => report(token.line, &format!(" at '{}'", token.lexeme), message), + } +} diff --git a/src/parser.rs b/src/parser.rs new file mode 100644 index 0000000..0265a8c --- /dev/null +++ b/src/parser.rs @@ -0,0 +1,189 @@ +use std::fmt::Display; + +use crate::{ + ast::Expr, + token::{LiteralType, Token, TokenType}, +}; + +pub struct Parser<'a> { + tokens: &'a Vec, + current: usize, +} + +#[derive(Debug)] +pub struct ParseError { + pub token: Token, + pub msg: String, +} + +impl Parser<'_> { + pub fn new(tokens: &Vec) -> Parser<'_> { + Parser { tokens, current: 0 } + } + + pub fn parse(&mut self) -> Result { + self.expression() + } + + fn expression(&mut self) -> Result { + self.equality() + } + + fn equality(&mut self) -> Result { + use TokenType::*; + self.left_association_binary(&[BangEqual, EqualEqual], Self::comparison) + } + + fn comparison(&mut self) -> Result { + use TokenType::*; + self.left_association_binary(&[Greater, GreaterEqual, Less, LessEqual], Self::term) + } + + fn term(&mut self) -> Result { + use TokenType::*; + self.left_association_binary(&[Minus, Plus], Self::factor) + } + + fn factor(&mut self) -> Result { + use TokenType::*; + self.left_association_binary(&[Slash, Star], Self::unary) + } + + fn unary(&mut self) -> Result { + use TokenType::*; + if self.match_token(&[Bang, Minus]) { + let op = self.previous().clone(); + let right = self.unary()?; + return Ok(Expr::Unary { + op, + right: Box::new(right), + }); + } + + self.primary() + } + + fn primary(&mut self) -> Result { + use LiteralType::*; + use TokenType::*; + + fn create_literal(l_type: Option) -> Expr { + Expr::Literal { value: l_type } + } + + if self.match_token(&[False]) { + return Ok(create_literal(Some(Bool(false)))); + } + + if self.match_token(&[True]) { + return Ok(create_literal(Some(Bool(true)))); + } + + if self.match_token(&[TokenType::Number, TokenType::String]) { + return Ok(create_literal(self.previous().literal.clone())); + } + + // i included the enum name bcs of ambiguity of LiteralType and TokenType + if self.match_token(&[TokenType::Nil]) { + return Ok(create_literal(Some(LiteralType::Nil))); + } + + if self.match_token(&[LeftParen]) { + let expr = self.expression()?; + self.consume(RightParen, "Expect ')' after expression")?; + return Ok(Expr::Grouping { + expression: Box::new(expr), + }); + } + + Err(ParseError { + token: self.peek().clone(), + msg: "Expect expression.".to_string(), + }) + } + + fn consume(&mut self, t_type: TokenType, err_msg: &str) -> Result { + if self.check(t_type) { + return Ok(self.advance().clone()); + } + + Err(ParseError { + token: self.peek().clone(), + msg: err_msg.to_string(), + }) + } + + // will not be used for the time being (per the book) + fn synchronize(&mut self) { + use TokenType::*; + self.advance(); + + while !self.is_at_end() { + if self.previous().t_type == TokenType::Semicolon { + return; + } + + if let Class | Fun | Var | For | If | While | Print | Return = self.peek().t_type { + return; + } + } + + self.advance(); + } + + fn left_association_binary( + &mut self, + types: &[TokenType], + expr_fn: fn(&mut Self) -> Result, + ) -> Result { + let mut expr = expr_fn(self)?; + while self.match_token(types) { + let op = self.previous().clone(); + let right = expr_fn(self)?; + expr = Expr::Binary { + left: Box::new(expr), + op, + right: Box::new(right), + } + } + + Ok(expr) + } + + fn match_token(&mut self, types: &[TokenType]) -> bool { + for t_type in types { + if self.check(*t_type) { + self.advance(); + return true; + } + } + + false + } + + fn check(&self, t_type: TokenType) -> bool { + if self.is_at_end() { + return false; + } + self.peek().t_type == t_type + } + + fn advance(&mut self) -> &Token { + if !self.is_at_end() { + self.current += 1; + } + self.previous() + } + + fn is_at_end(&self) -> bool { + matches!(self.peek().t_type, TokenType::EOF) + } + + fn peek(&self) -> &Token { + &self.tokens[self.current] + } + + fn previous(&self) -> &Token { + &self.tokens[self.current - 1] + } +} diff --git a/src/printer.rs b/src/printer.rs index b744079..deaf8bd 100644 --- a/src/printer.rs +++ b/src/printer.rs @@ -7,6 +7,8 @@ pub fn pretty_print(expr: &Expr) -> String { Expr::Literal { value } => match value { Some(LiteralType::String(v)) => v.to_string(), Some(LiteralType::Number(v)) => v.to_string(), + Some(LiteralType::Bool(v)) => v.to_string(), + Some(LiteralType::Nil) => "Nil".to_string(), None => "None".to_string(), }, Expr::Unary { op, right } => parenthesize(&op.lexeme, &[right]), diff --git a/src/scanner.rs b/src/scanner.rs index cd4aec6..3c441df 100644 --- a/src/scanner.rs +++ b/src/scanner.rs @@ -1,4 +1,4 @@ -use std::{iter::Peekable, mem, str::Chars}; +use std::{fmt::Display, iter::Peekable, mem, str::Chars}; use crate::{ token::{LiteralType, Token, TokenType}, @@ -15,6 +15,19 @@ pub struct Scanner { line: usize, } +#[derive(Debug)] +pub struct ScannerError { + errors: Vec, +} + +impl Display for ScannerError { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + write!(f, "Errors: {:?}", self.errors) + } +} + +impl std::error::Error for ScannerError {} + impl Scanner { pub fn new(source: String) -> Self { // the reason for using unsafe here is to have the ability to use utf-8 symbols @@ -35,7 +48,7 @@ impl Scanner { // this is so awful for me to write. This function needs to be not mutable in theory and it // could be accomplished. TODO! - pub fn scan_tokens(&mut self) -> Result<&Vec, Vec> { + pub fn scan_tokens(&mut self) -> Result<&Vec, ScannerError> { let mut errors = Vec::new(); while self.peek().is_some() { self.start = self.current; @@ -53,7 +66,7 @@ impl Scanner { }); if !errors.is_empty() { - return Err(errors); + return Err(ScannerError { errors }); } Ok(&self.tokens) @@ -331,6 +344,7 @@ mod tests { let tokens = scanner.scan_tokens().expect_err("Should be an error"); let actual_error = tokens + .errors .iter() .find(|e| e.msg == "Unterminated string") .expect("Error not found. There should be an error"); diff --git a/src/token.rs b/src/token.rs index 48ad0cb..92d5751 100644 --- a/src/token.rs +++ b/src/token.rs @@ -52,19 +52,21 @@ pub enum TokenType { pub enum LiteralType { String(String), Number(f64), + Bool(bool), + Nil, } impl LiteralType { pub fn string_literal(val: &str) -> LiteralType { - return LiteralType::String(val.to_string()); + LiteralType::String(val.to_string()) } pub fn number_literal(val: f64) -> LiteralType { - return LiteralType::Number(val); + LiteralType::Number(val) } } -#[derive(Debug)] +#[derive(Debug, Clone)] pub struct Token { pub t_type: TokenType, pub lexeme: String,