feat: implement string and number literals lexing

This commit is contained in:
Seymur Bagirov 2024-11-18 06:16:31 +04:00
parent 59ce325374
commit 02eb6bbc26
4 changed files with 198 additions and 18 deletions

View File

@ -34,7 +34,7 @@ pub fn run_prompt() -> Result<(), Box<dyn Error>> {
} }
} }
#[derive(Debug)] #[derive(Debug, PartialEq, Clone)]
pub struct RloxError { pub struct RloxError {
msg: String, msg: String,
line: usize, line: usize,

View File

@ -1,7 +1,7 @@
use std::{iter::Peekable, mem, str::Chars}; use std::{iter::Peekable, mem, str::Chars};
use crate::{ use crate::{
token::{Token, TokenType}, token::{LiteralType, Token, TokenType},
utils::StringUtils, utils::StringUtils,
RloxError, RloxError,
}; };
@ -37,14 +37,11 @@ impl Scanner {
// could be accomplished. TODO! // could be accomplished. TODO!
fn scan_tokens(&mut self) -> Result<&Vec<Token>, Vec<RloxError>> { fn scan_tokens(&mut self) -> Result<&Vec<Token>, Vec<RloxError>> {
let mut errors = Vec::new(); let mut errors = Vec::new();
while let Some(character) = self.advance() { while self.peek().is_some() {
self.start = self.current; self.start = self.current;
let result = self.scan_token(character); let result = self.scan_token();
if let Err(e) = result { if let Err(e) = result {
errors.push(RloxError { errors.push(e);
msg: e.to_string(),
line: self.line,
});
} }
} }
@ -62,11 +59,12 @@ impl Scanner {
Ok(&self.tokens) Ok(&self.tokens)
} }
//fn is_at_end(&self) -> bool { fn is_at_end(&self) -> bool {
// self.current >= self.source.len() self.current >= self.source.len()
//} }
fn scan_token(&mut self, token: char) -> Result<(), &'static str> { fn scan_token(&mut self) -> Result<(), RloxError> {
let token = self.advance().unwrap();
let mut error = Ok(()); let mut error = Ok(());
match token { match token {
@ -88,18 +86,24 @@ impl Scanner {
'<' => self.add_token(TokenType::Less), '<' => self.add_token(TokenType::Less),
'>' if self.peek_and_match('>') => self.add_token(TokenType::GreaterEqual), '>' if self.peek_and_match('>') => self.add_token(TokenType::GreaterEqual),
'>' => self.add_token(TokenType::Greater), '>' => self.add_token(TokenType::Greater),
// checking for comments and just advance the iterator // checking for comments and just advance the iterator if it's a comment
'/' if self.peek_and_match('/') => { '/' if self.peek_and_match('/') => {
while self.peek().is_some_and(|x| x != '\n') { while self.peek().is_some_and(|x| x != '\n') {
self.advance(); self.advance();
} }
} }
'/' => self.add_token(TokenType::Slash), '/' => self.add_token(TokenType::Slash),
'"' => error = self.string(),
'0'..='9' => self.number(),
' ' | '\r' | '\t' => (), ' ' | '\r' | '\t' => (),
'\n' => self.line += 1, '\n' => self.line += 1,
_ => error = Err("Unexpected character"), _ => {
error = Err(RloxError {
msg: "Unexpected character".to_string(),
line: self.line,
})
}
}; };
error error
@ -114,8 +118,8 @@ impl Scanner {
self.add_token_literal(t_type, None) self.add_token_literal(t_type, None)
} }
fn add_token_literal(&mut self, t_type: TokenType, literal: Option<Box<dyn std::any::Any>>) { fn add_token_literal(&mut self, t_type: TokenType, literal: Option<LiteralType>) {
let text = self.source.substring(self.start, self.current); let text = self.source.slice(self.start..self.current);
self.tokens.push(Token { self.tokens.push(Token {
t_type, t_type,
lexeme: text.to_string(), lexeme: text.to_string(),
@ -128,6 +132,12 @@ impl Scanner {
self.iter.peek().copied() self.iter.peek().copied()
} }
fn peek_double(&mut self) -> Option<char> {
let mut copied_iterator = self.iter.clone();
copied_iterator.next();
copied_iterator.peek().copied()
}
fn peek_and_match(&mut self, expected: char) -> bool { fn peek_and_match(&mut self, expected: char) -> bool {
let peek = self.peek(); let peek = self.peek();
if peek.is_some_and(|x| x == expected) { if peek.is_some_and(|x| x == expected) {
@ -137,6 +147,59 @@ impl Scanner {
false false
} }
fn string(&mut self) -> Result<(), RloxError> {
let start_line = self.line;
while self.peek().is_some_and(|x| x != '"') {
if self.peek().is_some_and(|x| x == '\n') {
self.line += 1;
}
self.advance();
}
if self.peek().is_none() {
let error = RloxError {
msg: "Unterminated string".to_string(),
line: start_line,
};
return Err(error);
}
self.advance();
// clean out the quotes and wrap it in a string literal type
let value = LiteralType::String(
self.source
.slice(self.start + 1..self.current - 1)
.to_string(),
);
self.add_token_literal(TokenType::String, Some(value));
Ok(())
}
fn number(&mut self) {
while matches!(self.peek(), Some('0'..='9')) {
self.advance();
}
if self.peek().is_some_and(|x| x == '.') && matches!(self.peek_double(), Some('0'..='9')) {
self.advance();
while matches!(self.peek_double(), Some('0'..='9')) {
self.advance();
}
}
let number: f64 = self
.source
.slice(self.start..=self.current)
.parse()
.expect("There shouldn't be any errors. Please check");
self.add_token_literal(TokenType::Number, Some(LiteralType::Number(number)));
}
} }
#[cfg(test)] #[cfg(test)]
@ -173,4 +236,98 @@ mod tests {
assert!(do_cols_match(&actual_tokens, &expected_tokens)); assert!(do_cols_match(&actual_tokens, &expected_tokens));
} }
#[test]
fn correct_string_scan() {
let value = r#"
// string!
"salam!""#;
let mut scanner = Scanner::new(value.to_string());
let tokens: Vec<&Token> = scanner
.scan_tokens()
.expect("Should not be an error!")
.iter()
.filter(|x| matches!(x.t_type, TokenType::String))
.collect();
let actual = tokens[0];
let expected = LiteralType::String("salam!".to_string());
assert_eq!(expected, actual.literal.as_ref().unwrap().clone());
}
#[test]
fn error_string_scan() {
let value = r#"
// Unterminated string bro wtf
"salam
(){} {}"#
.to_string();
let mut scanner = Scanner::new(value);
let expected_error = RloxError {
msg: "Unterminated string".to_string(),
line: 3,
};
let tokens = scanner.scan_tokens().expect_err("Should be an error");
let actual_error = tokens
.iter()
.find(|e| e.msg == "Unterminated string")
.expect("Error not found. There should be an error");
assert_eq!(expected_error, actual_error.clone());
}
#[test]
fn correct_whole_number_scan() {
let value = r#"
// number test
123"#
.to_string();
let mut scanner = Scanner::new(value);
let expected_value = LiteralType::Number(123.0);
let tokens = scanner.scan_tokens().expect("There shouldn't be an error");
let token = tokens
.iter()
.find(|t| matches!(t.t_type, TokenType::Number))
.expect("There should be a number here. Couldn't find it");
let actual_value = &token.literal;
assert_eq!(expected_value, actual_value.as_ref().unwrap().clone())
}
#[test]
fn correct_fractional_number_scan() {
let value = r#"
// number test
123.456"#
.to_string();
let mut scanner = Scanner::new(value);
let expected_value = LiteralType::Number(123.456);
let tokens = scanner.scan_tokens().expect("There shouldn't be an error");
let token = tokens
.iter()
.find(|t| matches!(t.t_type, TokenType::Number))
.expect("There should be a number here. Couldn't find it");
let actual_value = &token.literal;
assert_eq!(expected_value, actual_value.as_ref().unwrap().clone())
}
} }

View File

@ -47,11 +47,18 @@ pub enum TokenType {
EOF, EOF,
} }
// i've seen this implementation in the wild
#[derive(Debug, Clone, PartialEq)]
pub enum LiteralType {
String(String),
Number(f64),
}
#[derive(Debug)] #[derive(Debug)]
pub struct Token { pub struct Token {
pub t_type: TokenType, pub t_type: TokenType,
pub lexeme: String, pub lexeme: String,
pub literal: Option<Box<dyn std::any::Any>>, pub literal: Option<LiteralType>,
pub line: usize, pub line: usize,
} }

View File

@ -1,5 +1,8 @@
use std::ops::{Bound, RangeBounds};
pub trait StringUtils { pub trait StringUtils {
fn substring(&self, start: usize, end: usize) -> &str; fn substring(&self, start: usize, end: usize) -> &str;
fn slice(&self, range: impl RangeBounds<usize>) -> &str;
} }
impl StringUtils for String { impl StringUtils for String {
@ -33,4 +36,17 @@ impl StringUtils for String {
} }
&self[byte_start..byte_end] &self[byte_start..byte_end]
} }
fn slice(&self, range: impl RangeBounds<usize>) -> &str {
let start = match range.start_bound() {
Bound::Included(bound) | Bound::Excluded(bound) => *bound,
Bound::Unbounded => 0,
};
let len = match range.end_bound() {
Bound::Included(bound) => *bound + 1,
Bound::Excluded(bound) => *bound,
Bound::Unbounded => self.len(),
} - start;
self.substring(start, len)
}
} }