mirror of
https://github.com/TheM1Stery/izanami.git
synced 2025-04-19 16:31:11 +00:00
feat: add basic lexing(scanning)
just following the Scanning section of the book. Partially implemented
This commit is contained in:
parent
ef7d7bcbf8
commit
5988cd84ef
51
src/lib.rs
Normal file
51
src/lib.rs
Normal file
@ -0,0 +1,51 @@
|
||||
use std::{
|
||||
error::Error,
|
||||
fs,
|
||||
io::{self, Write},
|
||||
};
|
||||
|
||||
use token::Token;
|
||||
|
||||
mod scanner;
|
||||
mod token;
|
||||
mod utils;
|
||||
|
||||
pub fn run_file(path: &str) -> Result<(), Box<dyn Error>> {
|
||||
let file = fs::read_to_string(path)?;
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
pub fn run(src: &str) {
|
||||
let tokens: Vec<Token> = Vec::new();
|
||||
}
|
||||
|
||||
pub fn run_prompt() -> Result<(), Box<dyn Error>> {
|
||||
let stdin = io::stdin();
|
||||
let input = &mut String::new();
|
||||
print!("> ");
|
||||
io::stdout().flush()?;
|
||||
loop {
|
||||
input.clear();
|
||||
let _ = stdin.read_line(input)?;
|
||||
|
||||
print!("> ");
|
||||
io::stdout().flush()?;
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug)]
|
||||
pub struct RloxError {
|
||||
msg: String,
|
||||
line: usize,
|
||||
}
|
||||
|
||||
impl RloxError {
|
||||
pub fn error(line: i32, message: &str) {
|
||||
report(line, "", message);
|
||||
}
|
||||
}
|
||||
|
||||
pub fn report(line: i32, location: &str, message: &str) {
|
||||
eprintln!("[line {line}] Error {location}: {message}");
|
||||
}
|
29
src/main.rs
29
src/main.rs
@ -1 +1,28 @@
|
||||
fn main() {}
|
||||
use std::{env::args_os, ffi::OsString, process::ExitCode};
|
||||
|
||||
use izanami::{run_file, run_prompt};
|
||||
|
||||
fn main() -> ExitCode {
|
||||
let args: Vec<OsString> = args_os().collect();
|
||||
|
||||
if args.len() > 2 {
|
||||
println!("usage: izanami [script]");
|
||||
return ExitCode::from(64);
|
||||
} else if args.len() == 2 {
|
||||
let result = run_file(args[1].to_str().unwrap());
|
||||
|
||||
if let Err(res) = result {
|
||||
println!("Couldn't read the file. Reason: {}", &*res);
|
||||
return ExitCode::from(1);
|
||||
}
|
||||
} else {
|
||||
let result = run_prompt();
|
||||
|
||||
if let Err(res) = result {
|
||||
println!("Error while processing the repl. Reason: {}", &*res);
|
||||
return ExitCode::from(1);
|
||||
}
|
||||
}
|
||||
|
||||
ExitCode::SUCCESS
|
||||
}
|
||||
|
180
src/scanner.rs
Normal file
180
src/scanner.rs
Normal file
@ -0,0 +1,180 @@
|
||||
use std::{iter::Peekable, mem, str::Chars};
|
||||
|
||||
use crate::{
|
||||
token::{Token, TokenType},
|
||||
utils::StringUtils,
|
||||
RloxError,
|
||||
};
|
||||
|
||||
pub struct Scanner {
|
||||
source: String,
|
||||
tokens: Vec<Token>,
|
||||
iter: Peekable<Chars<'static>>,
|
||||
start: usize,
|
||||
current: usize,
|
||||
line: usize,
|
||||
}
|
||||
|
||||
impl Scanner {
|
||||
fn new(source: String) -> Self {
|
||||
// the reason for using unsafe here is to have the ability to use utf-8 symbols
|
||||
// rust doesn't allow having both the iterator and iterable inside one
|
||||
// structure(understandably so bcs of reference invalidation)
|
||||
let chars = unsafe {
|
||||
mem::transmute::<std::str::Chars<'_>, std::str::Chars<'static>>(source.chars())
|
||||
};
|
||||
Self {
|
||||
source,
|
||||
iter: chars.peekable(),
|
||||
tokens: Vec::new(),
|
||||
start: 0,
|
||||
current: 0,
|
||||
line: 1,
|
||||
}
|
||||
}
|
||||
|
||||
// this is so awful for me to write. This function needs to be not mutable in theory and it
|
||||
// could be accomplished. TODO!
|
||||
fn scan_tokens(&mut self) -> Result<&Vec<Token>, Vec<RloxError>> {
|
||||
let mut errors = Vec::new();
|
||||
while let Some(character) = self.advance() {
|
||||
self.start = self.current;
|
||||
let result = self.scan_token(character);
|
||||
if let Err(e) = result {
|
||||
errors.push(RloxError {
|
||||
msg: e.to_string(),
|
||||
line: self.line,
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
self.tokens.push(Token {
|
||||
t_type: TokenType::EOF,
|
||||
lexeme: "".to_string(),
|
||||
literal: None,
|
||||
line: self.line,
|
||||
});
|
||||
|
||||
if !errors.is_empty() {
|
||||
return Err(errors);
|
||||
}
|
||||
|
||||
Ok(&self.tokens)
|
||||
}
|
||||
|
||||
//fn is_at_end(&self) -> bool {
|
||||
// self.current >= self.source.len()
|
||||
//}
|
||||
|
||||
fn scan_token(&mut self, token: char) -> Result<(), &'static str> {
|
||||
let mut error = Ok(());
|
||||
|
||||
match token {
|
||||
'(' => self.add_token(TokenType::LeftParen),
|
||||
')' => self.add_token(TokenType::RightParen),
|
||||
'{' => self.add_token(TokenType::LeftBrace),
|
||||
'}' => self.add_token(TokenType::RightBrace),
|
||||
',' => self.add_token(TokenType::Comma),
|
||||
'.' => self.add_token(TokenType::Dot),
|
||||
'-' => self.add_token(TokenType::Minus),
|
||||
'+' => self.add_token(TokenType::Plus),
|
||||
';' => self.add_token(TokenType::Semicolon),
|
||||
'*' => self.add_token(TokenType::Star),
|
||||
'!' if self.peek_and_match('=') => self.add_token(TokenType::BangEqual),
|
||||
'!' => self.add_token(TokenType::Bang),
|
||||
'=' if self.peek_and_match('=') => self.add_token(TokenType::EqualEqual),
|
||||
'=' => self.add_token(TokenType::Equal),
|
||||
'<' if self.peek_and_match('=') => self.add_token(TokenType::LessEqual),
|
||||
'<' => self.add_token(TokenType::Less),
|
||||
'>' if self.peek_and_match('>') => self.add_token(TokenType::GreaterEqual),
|
||||
'>' => self.add_token(TokenType::Greater),
|
||||
// checking for comments and just advance the iterator
|
||||
'/' if self.peek_and_match('/') => {
|
||||
while self.peek().is_some_and(|x| x != '\n') {
|
||||
self.advance();
|
||||
}
|
||||
}
|
||||
'/' => self.add_token(TokenType::Slash),
|
||||
|
||||
' ' | '\r' | '\t' => (),
|
||||
'\n' => self.line += 1,
|
||||
|
||||
_ => error = Err("Unexpected character"),
|
||||
};
|
||||
|
||||
error
|
||||
}
|
||||
|
||||
fn advance(&mut self) -> Option<char> {
|
||||
self.current += 1;
|
||||
self.iter.next()
|
||||
}
|
||||
|
||||
fn add_token(&mut self, t_type: TokenType) {
|
||||
self.add_token_literal(t_type, None)
|
||||
}
|
||||
|
||||
fn add_token_literal(&mut self, t_type: TokenType, literal: Option<Box<dyn std::any::Any>>) {
|
||||
let text = self.source.substring(self.start, self.current);
|
||||
self.tokens.push(Token {
|
||||
t_type,
|
||||
lexeme: text.to_string(),
|
||||
literal,
|
||||
line: self.line,
|
||||
});
|
||||
}
|
||||
|
||||
fn peek(&mut self) -> Option<char> {
|
||||
self.iter.peek().copied()
|
||||
}
|
||||
|
||||
fn peek_and_match(&mut self, expected: char) -> bool {
|
||||
let peek = self.peek();
|
||||
if peek.is_some_and(|x| x == expected) {
|
||||
self.advance();
|
||||
return true;
|
||||
}
|
||||
|
||||
false
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
use TokenType::*;
|
||||
|
||||
fn do_cols_match<T: PartialEq>(a: &[T], b: &[T]) -> bool {
|
||||
let matching = a.iter().zip(b.iter()).filter(|&(a, b)| a == b).count();
|
||||
matching == a.len() && matching == b.len()
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn should_be_equal() {
|
||||
let value = r#"
|
||||
// this is a comment
|
||||
(( )){} // grouping stuff
|
||||
!*+-/=<> <= == // operators
|
||||
"#;
|
||||
|
||||
let mut scanner = Scanner::new(value.to_string());
|
||||
|
||||
let expected_tokens = vec![
|
||||
LeftParen, LeftParen, RightParen, RightParen, LeftBrace, RightBrace, Bang, Star, Plus,
|
||||
Minus, Slash, Equal, Less, Greater, LessEqual, EqualEqual, EOF,
|
||||
];
|
||||
|
||||
let actual_tokens: Vec<TokenType> = scanner
|
||||
.scan_tokens()
|
||||
.unwrap()
|
||||
.iter()
|
||||
.map(|x| x.t_type)
|
||||
.collect();
|
||||
|
||||
println!("actual: {:?}", actual_tokens);
|
||||
|
||||
println!("expected: {:?}", expected_tokens);
|
||||
|
||||
assert!(do_cols_match(&actual_tokens, &expected_tokens));
|
||||
}
|
||||
}
|
62
src/token.rs
Normal file
62
src/token.rs
Normal file
@ -0,0 +1,62 @@
|
||||
use std::fmt::Display;
|
||||
|
||||
#[derive(Debug, PartialEq, Clone, Copy)]
|
||||
pub enum TokenType {
|
||||
LeftParen,
|
||||
RightParen,
|
||||
LeftBrace,
|
||||
RightBrace,
|
||||
Comma,
|
||||
Dot,
|
||||
Minus,
|
||||
Plus,
|
||||
Semicolon,
|
||||
Slash,
|
||||
Star,
|
||||
|
||||
Bang,
|
||||
BangEqual,
|
||||
Equal,
|
||||
EqualEqual,
|
||||
Greater,
|
||||
GreaterEqual,
|
||||
Less,
|
||||
LessEqual,
|
||||
|
||||
Identifier,
|
||||
String,
|
||||
Number,
|
||||
|
||||
And,
|
||||
Class,
|
||||
Else,
|
||||
False,
|
||||
Fun,
|
||||
For,
|
||||
If,
|
||||
Nil,
|
||||
OR,
|
||||
Print,
|
||||
Return,
|
||||
Super,
|
||||
This,
|
||||
True,
|
||||
Var,
|
||||
While,
|
||||
|
||||
EOF,
|
||||
}
|
||||
|
||||
#[derive(Debug)]
|
||||
pub struct Token {
|
||||
pub t_type: TokenType,
|
||||
pub lexeme: String,
|
||||
pub literal: Option<Box<dyn std::any::Any>>,
|
||||
pub line: usize,
|
||||
}
|
||||
|
||||
impl Display for Token {
|
||||
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
|
||||
write!(f, "{:?} {} {:?}", self.t_type, self.lexeme, self.literal)
|
||||
}
|
||||
}
|
36
src/utils.rs
Normal file
36
src/utils.rs
Normal file
@ -0,0 +1,36 @@
|
||||
pub trait StringUtils {
|
||||
fn substring(&self, start: usize, end: usize) -> &str;
|
||||
}
|
||||
|
||||
impl StringUtils for String {
|
||||
fn substring(&self, start: usize, len: usize) -> &str {
|
||||
let mut char_pos = 0;
|
||||
let mut byte_start = 0;
|
||||
let mut it = self.chars();
|
||||
loop {
|
||||
if char_pos == start {
|
||||
break;
|
||||
}
|
||||
if let Some(c) = it.next() {
|
||||
char_pos += 1;
|
||||
byte_start += c.len_utf8();
|
||||
} else {
|
||||
break;
|
||||
}
|
||||
}
|
||||
char_pos = 0;
|
||||
let mut byte_end = byte_start;
|
||||
loop {
|
||||
if char_pos == len {
|
||||
break;
|
||||
}
|
||||
if let Some(c) = it.next() {
|
||||
char_pos += 1;
|
||||
byte_end += c.len_utf8();
|
||||
} else {
|
||||
break;
|
||||
}
|
||||
}
|
||||
&self[byte_start..byte_end]
|
||||
}
|
||||
}
|
Loading…
Reference in New Issue
Block a user