diff --git a/src/lib.rs b/src/lib.rs index ee01e65..37686ca 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -34,7 +34,7 @@ pub fn run_prompt() -> Result<(), Box> { } } -#[derive(Debug)] +#[derive(Debug, PartialEq, Clone)] pub struct RloxError { msg: String, line: usize, diff --git a/src/scanner.rs b/src/scanner.rs index e2bc28c..50497ce 100644 --- a/src/scanner.rs +++ b/src/scanner.rs @@ -1,7 +1,7 @@ use std::{iter::Peekable, mem, str::Chars}; use crate::{ - token::{Token, TokenType}, + token::{LiteralType, Token, TokenType}, utils::StringUtils, RloxError, }; @@ -37,14 +37,11 @@ impl Scanner { // could be accomplished. TODO! fn scan_tokens(&mut self) -> Result<&Vec, Vec> { let mut errors = Vec::new(); - while let Some(character) = self.advance() { + while self.peek().is_some() { self.start = self.current; - let result = self.scan_token(character); + let result = self.scan_token(); if let Err(e) = result { - errors.push(RloxError { - msg: e.to_string(), - line: self.line, - }); + errors.push(e); } } @@ -62,11 +59,12 @@ impl Scanner { Ok(&self.tokens) } - //fn is_at_end(&self) -> bool { - // self.current >= self.source.len() - //} + fn is_at_end(&self) -> bool { + self.current >= self.source.len() + } - fn scan_token(&mut self, token: char) -> Result<(), &'static str> { + fn scan_token(&mut self) -> Result<(), RloxError> { + let token = self.advance().unwrap(); let mut error = Ok(()); match token { @@ -88,18 +86,24 @@ impl Scanner { '<' => self.add_token(TokenType::Less), '>' if self.peek_and_match('>') => self.add_token(TokenType::GreaterEqual), '>' => self.add_token(TokenType::Greater), - // checking for comments and just advance the iterator + // checking for comments and just advance the iterator if it's a comment '/' if self.peek_and_match('/') => { while self.peek().is_some_and(|x| x != '\n') { self.advance(); } } '/' => self.add_token(TokenType::Slash), - + '"' => error = self.string(), + '0'..='9' => self.number(), ' ' | '\r' | '\t' => (), '\n' => self.line += 1, - _ => error = Err("Unexpected character"), + _ => { + error = Err(RloxError { + msg: "Unexpected character".to_string(), + line: self.line, + }) + } }; error @@ -114,8 +118,8 @@ impl Scanner { self.add_token_literal(t_type, None) } - fn add_token_literal(&mut self, t_type: TokenType, literal: Option>) { - let text = self.source.substring(self.start, self.current); + fn add_token_literal(&mut self, t_type: TokenType, literal: Option) { + let text = self.source.slice(self.start..self.current); self.tokens.push(Token { t_type, lexeme: text.to_string(), @@ -128,6 +132,12 @@ impl Scanner { self.iter.peek().copied() } + fn peek_double(&mut self) -> Option { + let mut copied_iterator = self.iter.clone(); + copied_iterator.next(); + copied_iterator.peek().copied() + } + fn peek_and_match(&mut self, expected: char) -> bool { let peek = self.peek(); if peek.is_some_and(|x| x == expected) { @@ -137,6 +147,59 @@ impl Scanner { false } + + fn string(&mut self) -> Result<(), RloxError> { + let start_line = self.line; + while self.peek().is_some_and(|x| x != '"') { + if self.peek().is_some_and(|x| x == '\n') { + self.line += 1; + } + self.advance(); + } + + if self.peek().is_none() { + let error = RloxError { + msg: "Unterminated string".to_string(), + line: start_line, + }; + return Err(error); + } + + self.advance(); + + // clean out the quotes and wrap it in a string literal type + let value = LiteralType::String( + self.source + .slice(self.start + 1..self.current - 1) + .to_string(), + ); + + self.add_token_literal(TokenType::String, Some(value)); + + Ok(()) + } + + fn number(&mut self) { + while matches!(self.peek(), Some('0'..='9')) { + self.advance(); + } + + if self.peek().is_some_and(|x| x == '.') && matches!(self.peek_double(), Some('0'..='9')) { + self.advance(); + + while matches!(self.peek_double(), Some('0'..='9')) { + self.advance(); + } + } + + let number: f64 = self + .source + .slice(self.start..=self.current) + .parse() + .expect("There shouldn't be any errors. Please check"); + + self.add_token_literal(TokenType::Number, Some(LiteralType::Number(number))); + } } #[cfg(test)] @@ -173,4 +236,98 @@ mod tests { assert!(do_cols_match(&actual_tokens, &expected_tokens)); } + + #[test] + fn correct_string_scan() { + let value = r#" + // string! + "salam!""#; + + let mut scanner = Scanner::new(value.to_string()); + + let tokens: Vec<&Token> = scanner + .scan_tokens() + .expect("Should not be an error!") + .iter() + .filter(|x| matches!(x.t_type, TokenType::String)) + .collect(); + + let actual = tokens[0]; + + let expected = LiteralType::String("salam!".to_string()); + + assert_eq!(expected, actual.literal.as_ref().unwrap().clone()); + } + + #[test] + fn error_string_scan() { + let value = r#" + // Unterminated string bro wtf + "salam + + (){} {}"# + .to_string(); + + let mut scanner = Scanner::new(value); + + let expected_error = RloxError { + msg: "Unterminated string".to_string(), + line: 3, + }; + + let tokens = scanner.scan_tokens().expect_err("Should be an error"); + + let actual_error = tokens + .iter() + .find(|e| e.msg == "Unterminated string") + .expect("Error not found. There should be an error"); + + assert_eq!(expected_error, actual_error.clone()); + } + + #[test] + fn correct_whole_number_scan() { + let value = r#" + // number test + 123"# + .to_string(); + + let mut scanner = Scanner::new(value); + + let expected_value = LiteralType::Number(123.0); + + let tokens = scanner.scan_tokens().expect("There shouldn't be an error"); + + let token = tokens + .iter() + .find(|t| matches!(t.t_type, TokenType::Number)) + .expect("There should be a number here. Couldn't find it"); + + let actual_value = &token.literal; + + assert_eq!(expected_value, actual_value.as_ref().unwrap().clone()) + } + + #[test] + fn correct_fractional_number_scan() { + let value = r#" + // number test + 123.456"# + .to_string(); + + let mut scanner = Scanner::new(value); + + let expected_value = LiteralType::Number(123.456); + + let tokens = scanner.scan_tokens().expect("There shouldn't be an error"); + + let token = tokens + .iter() + .find(|t| matches!(t.t_type, TokenType::Number)) + .expect("There should be a number here. Couldn't find it"); + + let actual_value = &token.literal; + + assert_eq!(expected_value, actual_value.as_ref().unwrap().clone()) + } } diff --git a/src/token.rs b/src/token.rs index 246f7fa..a610a89 100644 --- a/src/token.rs +++ b/src/token.rs @@ -47,11 +47,18 @@ pub enum TokenType { EOF, } +// i've seen this implementation in the wild +#[derive(Debug, Clone, PartialEq)] +pub enum LiteralType { + String(String), + Number(f64), +} + #[derive(Debug)] pub struct Token { pub t_type: TokenType, pub lexeme: String, - pub literal: Option>, + pub literal: Option, pub line: usize, } diff --git a/src/utils.rs b/src/utils.rs index 2bda673..199cc08 100644 --- a/src/utils.rs +++ b/src/utils.rs @@ -1,5 +1,8 @@ +use std::ops::{Bound, RangeBounds}; + pub trait StringUtils { fn substring(&self, start: usize, end: usize) -> &str; + fn slice(&self, range: impl RangeBounds) -> &str; } impl StringUtils for String { @@ -33,4 +36,17 @@ impl StringUtils for String { } &self[byte_start..byte_end] } + + fn slice(&self, range: impl RangeBounds) -> &str { + let start = match range.start_bound() { + Bound::Included(bound) | Bound::Excluded(bound) => *bound, + Bound::Unbounded => 0, + }; + let len = match range.end_bound() { + Bound::Included(bound) => *bound + 1, + Bound::Excluded(bound) => *bound, + Bound::Unbounded => self.len(), + } - start; + self.substring(start, len) + } }