Skip to content

Commit 503a789

Browse files
committed
feat(lexer): Added custom error type (#103)
There are no tests for the `error` file due to the implementation of the errors being done by `thiserror`.
1 parent d1b0320 commit 503a789

File tree

8 files changed

+90
-24
lines changed

8 files changed

+90
-24
lines changed

Cargo.toml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -41,6 +41,7 @@ publish = false
4141
[workspace.dependencies]
4242
annotate-snippets = "0.10.0"
4343
log = "0.4.20"
44+
thiserror = "1.0.57"
4445

4546
tools = { path = "crates/tools" }
4647

crates/lexer/Cargo.toml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,7 @@ publish.workspace = true
1717
[dependencies]
1818
annotate-snippets.workspace = true
1919
log.workspace = true
20+
thiserror.workspace = true
2021
tools.workspace = true
2122

2223
[lints]

crates/lexer/src/error.rs

Lines changed: 60 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,60 @@
1+
//! Errors that may occur while lexing.
2+
// I Language lexer errors.
3+
// Version: 1.0.0
4+
5+
// Copyright (c) 2023-present I Language Development.
6+
7+
// Permission is hereby granted, free of charge, to any person obtaining a
8+
// copy of this software and associated documentation files (the 'Software'),
9+
// to deal in the Software without restriction, including without limitation
10+
// the rights to use, copy, modify, merge, publish, distribute, sublicense,
11+
// and/or sell copies of the Software, and to permit persons to whom the
12+
// Software is furnished to do so, subject to the following conditions:
13+
14+
// The above copyright notice and this permission notice shall be included in
15+
// all copies or substantial portions of the Software.
16+
17+
// THE SOFTWARE IS PROVIDED 'AS IS', WITHOUT WARRANTY OF ANY KIND, EXPRESS
18+
// OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
19+
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
20+
// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
21+
// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
22+
// FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
23+
// DEALINGS IN THE SOFTWARE.
24+
25+
/////////////
26+
// IMPORTS //
27+
/////////////
28+
29+
use crate::tokens::token::Location;
30+
31+
use thiserror::Error;
32+
33+
34+
////////////
35+
// ERRORS //
36+
////////////
37+
38+
/// The different kinds of errors the lexer can raise.
39+
/// All of these errors implement [`std::error::Error`].
40+
#[allow(clippy::module_name_repetitions)]
41+
#[derive(Clone, Debug, Eq, Error, Hash, Ord, PartialEq, PartialOrd)]
42+
pub enum LexerError {
43+
/// An error which will be returned if a mark was invalid for some reason.
44+
/// This can occur when the starting character of a mark is valid, but the character after it is not.
45+
#[error("invalid mark at {location}")]
46+
InvalidMark { location: Location },
47+
48+
/// An error which will be returned if an unexpected character is encountered.
49+
/// this is most likely to occur when using unicode characters as they are not supported.
50+
#[error("unexpected character `{character}` at {location}")]
51+
UnexpectedCharacter { character: char, location: Location },
52+
53+
/// An error which will be returned if a comment is not terminated by a closing `*/`.
54+
#[error("unterminated comment at {location}")]
55+
UnterminatedComment { location: Location },
56+
57+
/// An error which will be returned if a string is not terminated by a closing quote or the quote is escaped.
58+
#[error("unterminated string at {location}")]
59+
UnterminatedString { location: Location },
60+
}

crates/lexer/src/lex.rs

Lines changed: 13 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -26,6 +26,7 @@
2626
// IMPORTS //
2727
/////////////
2828

29+
use crate::error::LexerError;
2930
use crate::tokens::constant::Type;
3031
use crate::tokens::keyword::Keyword;
3132
use crate::tokens::token::{GetToken, Location, Token, TokenType, TypeDefinition};
@@ -105,9 +106,8 @@ use log::trace;
105106
/// - [`Location`]
106107
#[inline] // Suggesting inlining due to rare calls to the function
107108
#[allow(clippy::too_many_lines)]
108-
// TODO (ElBe, Ranastra): Switch to custom error type
109-
pub fn lex(input: &str, file: &str) -> Result<Vec<Token>, String> {
110-
let mut error: Option<String> = None;
109+
pub fn lex(input: &str, file: &str) -> Result<Vec<Token>, LexerError> {
110+
let mut error: Option<LexerError> = None;
111111
let mut result: Vec<Token> = vec![];
112112

113113
let mut iterator: std::iter::Peekable<std::iter::Enumerate<std::str::Chars>>;
@@ -134,12 +134,10 @@ pub fn lex(input: &str, file: &str) -> Result<Vec<Token>, String> {
134134
};
135135

136136
if character == '"' || character == '\'' {
137-
result.push(TypeDefinition::lex_string(
138-
&mut iterator,
139-
line,
140-
location,
141-
character,
142-
));
137+
match TypeDefinition::lex_string(&mut iterator, line, location, character) {
138+
Ok(value) => result.push(value),
139+
Err(error_value) => error = Some(error_value),
140+
};
143141
} else if matches!(
144142
character,
145143
'+' | '-'
@@ -194,7 +192,7 @@ pub fn lex(input: &str, file: &str) -> Result<Vec<Token>, String> {
194192
let renderer: annotate_snippets::Renderer =
195193
annotate_snippets::Renderer::styled();
196194
eprintln!("{}", renderer.render(snippet));
197-
error = Some(format!("Syntax error: Invalid mark at {location}"));
195+
error = Some(LexerError::InvalidMark { location });
198196
}
199197
} else if character.is_ascii_digit() {
200198
buffer.push(character);
@@ -265,7 +263,10 @@ pub fn lex(input: &str, file: &str) -> Result<Vec<Token>, String> {
265263

266264
let renderer: annotate_snippets::Renderer = annotate_snippets::Renderer::styled();
267265
eprintln!("{}", renderer.render(snippet));
268-
error = Some(format!("Syntax error: Unexpected character at {location}"));
266+
error = Some(LexerError::UnexpectedCharacter {
267+
character,
268+
location,
269+
});
269270
}
270271

271272
trace!(
@@ -276,7 +277,7 @@ pub fn lex(input: &str, file: &str) -> Result<Vec<Token>, String> {
276277
}
277278

278279
match error {
279-
Some(message) => Err(format!("Error during lexing (last): {message}")),
280+
Some(error_value) => Err(error_value),
280281
None => Ok(result),
281282
}
282283
}

crates/lexer/src/lib.rs

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -26,5 +26,6 @@
2626
// EXPORTS //
2727
/////////////
2828

29+
pub mod error;
2930
pub mod lex;
3031
pub mod tokens;

crates/lexer/src/tokens/token.rs

Lines changed: 11 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -29,6 +29,7 @@
2929
use core;
3030
use std;
3131

32+
use crate::error::LexerError;
3233
use crate::tokens::constant::Type;
3334
use crate::tokens::keyword::Keyword;
3435
use crate::tokens::mark::Mark;
@@ -163,7 +164,7 @@ impl TypeDefinition {
163164
///
164165
/// # Returns
165166
///
166-
/// The string as a [`Token`].
167+
/// A result of the string as a [`Token`].
167168
///
168169
/// # Errors
169170
///
@@ -182,11 +183,11 @@ impl TypeDefinition {
182183
/// # line: 1,
183184
/// # column: 1,
184185
/// # };
185-
/// assert_eq!(TypeDefinition::lex_string(&mut iterator, input, location.clone(), '\''), Token {
186+
/// assert_eq!(TypeDefinition::lex_string(&mut iterator, input, location.clone(), '\''), Ok(Token {
186187
/// location,
187188
/// content: "my string".to_owned(),
188189
/// token_type: TokenType::TypeDefinition(TypeDefinition::String)
189-
/// });
190+
/// }));
190191
///
191192
///
192193
/// ```
@@ -197,13 +198,12 @@ impl TypeDefinition {
197198
/// - [`TypeDefinition`]
198199
/// - [`TypeDefinition::String`]
199200
#[inline(always)]
200-
// TODO: Errors
201201
pub fn lex_string(
202202
iterator: &mut std::iter::Peekable<std::iter::Enumerate<std::str::Chars>>,
203203
line: &str,
204204
location: Location,
205205
quote_type: char,
206-
) -> Token {
206+
) -> Result<Token, LexerError> {
207207
let last_character: core::cell::Cell<char> = core::cell::Cell::new('\0');
208208
let second_to_last_character: core::cell::Cell<char> = core::cell::Cell::new('\0');
209209
let buffer: Vec<char> = iterator
@@ -270,17 +270,19 @@ impl TypeDefinition {
270270
if next_character != quote_type {
271271
let renderer: annotate_snippets::Renderer = annotate_snippets::Renderer::styled();
272272
eprintln!("{}", renderer.render(snippet));
273+
return Err(LexerError::UnterminatedString { location });
273274
}
274275
} else {
275276
let renderer: annotate_snippets::Renderer = annotate_snippets::Renderer::styled();
276277
eprintln!("{}", renderer.render(snippet));
278+
return Err(LexerError::UnterminatedString { location });
277279
}
278280

279-
Token {
281+
Ok(Token {
280282
location,
281283
content: buffer.iter().collect::<String>(),
282284
token_type: TokenType::TypeDefinition(TypeDefinition::String),
283-
}
285+
})
284286
}
285287
}
286288

@@ -376,7 +378,7 @@ impl TokenType {
376378
line: &str,
377379
location: Location,
378380
character: char,
379-
) -> Result<Option<Token>, String> {
381+
) -> Result<Option<Token>, LexerError> {
380382
let mut buffer: Vec<char> = vec![character];
381383

382384
if let Some(&(_, next_character)) = iterator.clone().peek() {
@@ -434,7 +436,7 @@ impl TokenType {
434436

435437
let renderer: annotate_snippets::Renderer = annotate_snippets::Renderer::styled();
436438
eprintln!("{}", renderer.render(snippet));
437-
return Err(format!("Syntax error: Unterminated comment at {location}"));
439+
return Err(LexerError::UnterminatedComment { location });
438440
}
439441

440442
iterator.next();

crates/lexer/tests/tokens/token.rs

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -98,11 +98,11 @@ mod tests {
9898

9999
assert_eq!(
100100
TypeDefinition::lex_string(&mut iterator, input, location.clone(), '\''),
101-
Token {
101+
Ok(Token {
102102
location,
103103
content: "my string".to_owned(),
104104
token_type: TokenType::TypeDefinition(TypeDefinition::String)
105-
}
105+
})
106106
);
107107
}
108108

src/main.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -94,7 +94,7 @@ fn main() {
9494
let input: String = std::io::read_to_string(reader).unwrap();
9595

9696
let start: std::time::Instant = std::time::Instant::now();
97-
let output: Result<Vec<lexer::tokens::token::Token>, String> =
97+
let output: Result<Vec<lexer::tokens::token::Token>, lexer::error::LexerError> =
9898
lexer::lex::lex(input.trim(), &file_name);
9999
debug!(
100100
"Lexing `{file_name}` took {}ms.",

0 commit comments

Comments
 (0)