diff --git a/src/lib.rs b/src/lib.rs new file mode 100644 index 0000000..ee01e65 --- /dev/null +++ b/src/lib.rs @@ -0,0 +1,51 @@ +use std::{ + error::Error, + fs, + io::{self, Write}, +}; + +use token::Token; + +mod scanner; +mod token; +mod utils; + +pub fn run_file(path: &str) -> Result<(), Box> { + let file = fs::read_to_string(path)?; + + Ok(()) +} + +pub fn run(src: &str) { + let tokens: Vec = Vec::new(); +} + +pub fn run_prompt() -> Result<(), Box> { + let stdin = io::stdin(); + let input = &mut String::new(); + print!("> "); + io::stdout().flush()?; + loop { + input.clear(); + let _ = stdin.read_line(input)?; + + print!("> "); + io::stdout().flush()?; + } +} + +#[derive(Debug)] +pub struct RloxError { + msg: String, + line: usize, +} + +impl RloxError { + pub fn error(line: i32, message: &str) { + report(line, "", message); + } +} + +pub fn report(line: i32, location: &str, message: &str) { + eprintln!("[line {line}] Error {location}: {message}"); +} diff --git a/src/main.rs b/src/main.rs index f328e4d..5422fa7 100644 --- a/src/main.rs +++ b/src/main.rs @@ -1 +1,28 @@ -fn main() {} +use std::{env::args_os, ffi::OsString, process::ExitCode}; + +use izanami::{run_file, run_prompt}; + +fn main() -> ExitCode { + let args: Vec = args_os().collect(); + + if args.len() > 2 { + println!("usage: izanami [script]"); + return ExitCode::from(64); + } else if args.len() == 2 { + let result = run_file(args[1].to_str().unwrap()); + + if let Err(res) = result { + println!("Couldn't read the file. Reason: {}", &*res); + return ExitCode::from(1); + } + } else { + let result = run_prompt(); + + if let Err(res) = result { + println!("Error while processing the repl. Reason: {}", &*res); + return ExitCode::from(1); + } + } + + ExitCode::SUCCESS +} diff --git a/src/scanner.rs b/src/scanner.rs new file mode 100644 index 0000000..c50b85a --- /dev/null +++ b/src/scanner.rs @@ -0,0 +1,180 @@ +use std::{iter::Peekable, mem, str::Chars}; + +use crate::{ + token::{Token, TokenType}, + utils::StringUtils, + RloxError, +}; + +pub struct Scanner { + source: String, + tokens: Vec, + iter: Peekable>, + start: usize, + current: usize, + line: usize, +} + +impl Scanner { + fn new(source: String) -> Self { + // the reason for using unsafe here is to have the ability to use utf-8 symbols + // rust doesn't allow having both the iterator and iterable inside one + // structure(understandably so bcs of reference invalidation) + let chars = unsafe { + mem::transmute::, std::str::Chars<'static>>(source.chars()) + }; + Self { + source, + iter: chars.peekable(), + tokens: Vec::new(), + start: 0, + current: 0, + line: 1, + } + } + + // this is so awful for me to write. This function needs to be not mutable in theory and it + // could be accomplished. TODO! + fn scan_tokens(&mut self) -> Result<&Vec, Vec> { + let mut errors = Vec::new(); + while let Some(character) = self.advance() { + self.start = self.current; + let result = self.scan_token(character); + if let Err(e) = result { + errors.push(RloxError { + msg: e.to_string(), + line: self.line, + }); + } + } + + self.tokens.push(Token { + t_type: TokenType::EOF, + lexeme: "".to_string(), + literal: None, + line: self.line, + }); + + if !errors.is_empty() { + return Err(errors); + } + + Ok(&self.tokens) + } + + //fn is_at_end(&self) -> bool { + // self.current >= self.source.len() + //} + + fn scan_token(&mut self, token: char) -> Result<(), &'static str> { + let mut error = Ok(()); + + match token { + '(' => self.add_token(TokenType::LeftParen), + ')' => self.add_token(TokenType::RightParen), + '{' => self.add_token(TokenType::LeftBrace), + '}' => self.add_token(TokenType::RightBrace), + ',' => self.add_token(TokenType::Comma), + '.' => self.add_token(TokenType::Dot), + '-' => self.add_token(TokenType::Minus), + '+' => self.add_token(TokenType::Plus), + ';' => self.add_token(TokenType::Semicolon), + '*' => self.add_token(TokenType::Star), + '!' if self.peek_and_match('=') => self.add_token(TokenType::BangEqual), + '!' => self.add_token(TokenType::Bang), + '=' if self.peek_and_match('=') => self.add_token(TokenType::EqualEqual), + '=' => self.add_token(TokenType::Equal), + '<' if self.peek_and_match('=') => self.add_token(TokenType::LessEqual), + '<' => self.add_token(TokenType::Less), + '>' if self.peek_and_match('>') => self.add_token(TokenType::GreaterEqual), + '>' => self.add_token(TokenType::Greater), + // checking for comments and just advance the iterator + '/' if self.peek_and_match('/') => { + while self.peek().is_some_and(|x| x != '\n') { + self.advance(); + } + } + '/' => self.add_token(TokenType::Slash), + + ' ' | '\r' | '\t' => (), + '\n' => self.line += 1, + + _ => error = Err("Unexpected character"), + }; + + error + } + + fn advance(&mut self) -> Option { + self.current += 1; + self.iter.next() + } + + fn add_token(&mut self, t_type: TokenType) { + self.add_token_literal(t_type, None) + } + + fn add_token_literal(&mut self, t_type: TokenType, literal: Option>) { + let text = self.source.substring(self.start, self.current); + self.tokens.push(Token { + t_type, + lexeme: text.to_string(), + literal, + line: self.line, + }); + } + + fn peek(&mut self) -> Option { + self.iter.peek().copied() + } + + fn peek_and_match(&mut self, expected: char) -> bool { + let peek = self.peek(); + if peek.is_some_and(|x| x == expected) { + self.advance(); + return true; + } + + false + } +} + +#[cfg(test)] +mod tests { + use super::*; + use TokenType::*; + + fn do_cols_match(a: &[T], b: &[T]) -> bool { + let matching = a.iter().zip(b.iter()).filter(|&(a, b)| a == b).count(); + matching == a.len() && matching == b.len() + } + + #[test] + fn should_be_equal() { + let value = r#" + // this is a comment + (( )){} // grouping stuff + !*+-/=<> <= == // operators + "#; + + let mut scanner = Scanner::new(value.to_string()); + + let expected_tokens = vec![ + LeftParen, LeftParen, RightParen, RightParen, LeftBrace, RightBrace, Bang, Star, Plus, + Minus, Slash, Equal, Less, Greater, LessEqual, EqualEqual, EOF, + ]; + + let actual_tokens: Vec = scanner + .scan_tokens() + .unwrap() + .iter() + .map(|x| x.t_type) + .collect(); + + println!("actual: {:?}", actual_tokens); + + println!("expected: {:?}", expected_tokens); + + assert!(do_cols_match(&actual_tokens, &expected_tokens)); + } +} diff --git a/src/token.rs b/src/token.rs new file mode 100644 index 0000000..246f7fa --- /dev/null +++ b/src/token.rs @@ -0,0 +1,62 @@ +use std::fmt::Display; + +#[derive(Debug, PartialEq, Clone, Copy)] +pub enum TokenType { + LeftParen, + RightParen, + LeftBrace, + RightBrace, + Comma, + Dot, + Minus, + Plus, + Semicolon, + Slash, + Star, + + Bang, + BangEqual, + Equal, + EqualEqual, + Greater, + GreaterEqual, + Less, + LessEqual, + + Identifier, + String, + Number, + + And, + Class, + Else, + False, + Fun, + For, + If, + Nil, + OR, + Print, + Return, + Super, + This, + True, + Var, + While, + + EOF, +} + +#[derive(Debug)] +pub struct Token { + pub t_type: TokenType, + pub lexeme: String, + pub literal: Option>, + pub line: usize, +} + +impl Display for Token { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + write!(f, "{:?} {} {:?}", self.t_type, self.lexeme, self.literal) + } +} diff --git a/src/utils.rs b/src/utils.rs new file mode 100644 index 0000000..2bda673 --- /dev/null +++ b/src/utils.rs @@ -0,0 +1,36 @@ +pub trait StringUtils { + fn substring(&self, start: usize, end: usize) -> &str; +} + +impl StringUtils for String { + fn substring(&self, start: usize, len: usize) -> &str { + let mut char_pos = 0; + let mut byte_start = 0; + let mut it = self.chars(); + loop { + if char_pos == start { + break; + } + if let Some(c) = it.next() { + char_pos += 1; + byte_start += c.len_utf8(); + } else { + break; + } + } + char_pos = 0; + let mut byte_end = byte_start; + loop { + if char_pos == len { + break; + } + if let Some(c) = it.next() { + char_pos += 1; + byte_end += c.len_utf8(); + } else { + break; + } + } + &self[byte_start..byte_end] + } +}