Skip to content

Commit 542d38f

Browse files
committed
feat: initial commit
0 parents  commit 542d38f

File tree

15 files changed

+704
-0
lines changed

15 files changed

+704
-0
lines changed

.gitignore

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,2 @@
1+
/target
2+
/Cargo.lock

.idea/.gitignore

Lines changed: 8 additions & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

.idea/modules.xml

Lines changed: 8 additions & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

.idea/onescript-preprocessor.iml

Lines changed: 12 additions & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

.idea/sonarlint/issuestore/index.pb

Whitespace-only changes.

.idea/vcs.xml

Lines changed: 12 additions & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

Cargo.toml

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,12 @@
1+
[package]
2+
name = "onescript-preprocessor"
3+
version = "0.1.0"
4+
edition = "2021"
5+
authors = ["Viktor Gukov <[email protected]>"]
6+
license = "MIT"
7+
repository = "https://github.com/EightM/onescript-preprocessor"
8+
description = "Preprocessor for OneScript language"
9+
10+
# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
11+
12+
[dependencies]

LICENSE

Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,21 @@
1+
MIT License
2+
3+
Copyright (c) 2023 EightM <[email protected]>
4+
5+
Permission is hereby granted, free of charge, to any person obtaining a copy
6+
of this software and associated documentation files (the "Software"), to deal
7+
in the Software without restriction, including without limitation the rights
8+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9+
copies of the Software, and to permit persons to whom the Software is
10+
furnished to do so, subject to the following conditions:
11+
12+
The above copyright notice and this permission notice shall be included in all
13+
copies or substantial portions of the Software.
14+
15+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21+
SOFTWARE.

README.MD

Lines changed: 31 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,31 @@
1+
# onescript-preprocessor
2+
3+
Препроцессор языка OneScript
4+
5+
## Зачем нужен?
6+
7+
* Удалять директивы препроцессора из исходного текста
8+
* Получать информацию которая хранится в директивах препроцессора
9+
10+
## Что умеет?
11+
12+
* [x] Получать на вход строку исходного текста, удалять из нее директивы препроцессора и возвращать "чистую" строку
13+
* [ ] Возвращать информацию о том, в каких областях (с какими именами) находятся токены исходного текста
14+
* [ ] Возвращать информацию об именах библиотек используемых в директивах `#Использовать`
15+
16+
## Как использовать?
17+
18+
```rust
19+
use onescript_preprocessor::preprocessor::Preprocessor;
20+
21+
fn main() {
22+
let example = "#region Test\nProcedure Test()\nEndProcedure\n#EndRegion";
23+
let preprocessor = Preprocessor::new();
24+
25+
let result = preprocessor.preprocess(example);
26+
}
27+
```
28+
29+
## Что нужно иметь в виду?
30+
31+
Пока не готовы все три галочки, публичное API может изменяться как ему будет удобнее.

src/lexer.rs

Lines changed: 196 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,196 @@
1+
use std::collections::HashMap;
2+
use std::iter::Peekable;
3+
use std::str::Chars;
4+
use crate::token::{KeywordTable, Token, TokenKind};
5+
6+
/// Лексер, разбирающий исходный текст на токены
7+
pub struct Lexer {
8+
current_line: u16,
9+
}
10+
11+
impl Lexer {
12+
/// Создает новый экземпляр лексера. Отсчет номеров строк токенов начинается с 1.
13+
pub fn new() -> Self { Lexer { current_line: 1 } }
14+
15+
/// Осуществляет лексинг переданного исходного текста. Возвращает вектор токенов, включающих в себя
16+
/// как простой текст, так и токены инструкций препроцессора
17+
pub fn lex(&mut self, source: &str) -> Vec<Token> {
18+
let mut chars = source.chars().peekable();
19+
let mut tokens: Vec<Token> = Vec::new();
20+
let keywords_table = KeywordTable::new();
21+
22+
while let Some(char) = chars.peek() {
23+
match char {
24+
'#' => {
25+
chars.next();
26+
27+
if match_char('!', &mut chars) {
28+
let token = Token::new(TokenKind::Shebang, "#!".to_string(), self
29+
.current_line, self.current_line);
30+
tokens.push(token);
31+
Lexer::shebang_text(self, &mut tokens, &mut chars);
32+
} else {
33+
let token = Token::new(TokenKind::Hash, "#".to_string(), self
34+
.current_line, self.current_line);
35+
tokens.push(token);
36+
Lexer::preprocessor_line(self, &mut tokens, &mut chars, &keywords_table);
37+
}
38+
}
39+
_ => {
40+
Lexer::text(self, &mut tokens, &mut chars);
41+
}
42+
}
43+
}
44+
45+
tokens
46+
}
47+
48+
fn shebang_text(&mut self, tokens: &mut Vec<Token>, chars: &mut Peekable<Chars>) {
49+
let mut text_chars: Vec<char> = Vec::new();
50+
let start_line = self.current_line;
51+
let end_line = self.current_line;
52+
53+
while let Some(char) = chars.peek() {
54+
match char {
55+
'\n' => {
56+
break;
57+
}
58+
_ => {
59+
text_chars.push(*char);
60+
chars.next();
61+
}
62+
}
63+
}
64+
65+
if !text_chars.is_empty() {
66+
let token = Token::new(TokenKind::Text, text_chars.iter().collect(), start_line, end_line);
67+
tokens.push(token);
68+
}
69+
}
70+
71+
fn preprocessor_line(&mut self, tokens: &mut Vec<Token>, chars: &mut Peekable<Chars>, keywords: &KeywordTable) {
72+
while let Some(char) = chars.peek() {
73+
match char {
74+
'\n' => {
75+
break;
76+
}
77+
char if char.is_alphabetic() || *char == '_' => {
78+
let token = Lexer::identifier(self, chars, &keywords.table);
79+
tokens.push(token);
80+
}
81+
'"' => {
82+
let mut token = Lexer::string(self, chars);
83+
token.token_kind = TokenKind::Path;
84+
tokens.push(token);
85+
}
86+
_ => {
87+
chars.next();
88+
}
89+
}
90+
}
91+
}
92+
93+
fn identifier(&mut self, chars: &mut Peekable<Chars>, keywords: &HashMap<String, TokenKind>) -> Token {
94+
let mut text_chars: Vec<char> = Vec::new();
95+
96+
while let Some(char) = chars.peek() {
97+
if char.is_alphabetic() || *char == '_' {
98+
text_chars.push(*char);
99+
chars.next();
100+
} else {
101+
break;
102+
}
103+
}
104+
105+
let identifier: String = String::from_iter(text_chars);
106+
if let Some(token_kind) = keywords.get(identifier.to_uppercase().as_str()) {
107+
return Token::new(*token_kind, identifier, self.current_line, self.current_line);
108+
}
109+
110+
return Token::new(TokenKind::Identifier, identifier, self.current_line, self.current_line);
111+
}
112+
113+
fn text(&mut self, tokens: &mut Vec<Token>, chars: &mut Peekable<Chars>) {
114+
let mut text_chars: Vec<char> = Vec::new();
115+
let mut string_or_date = false;
116+
let start_line = self.current_line;
117+
let mut end_line = self.current_line;
118+
119+
while let Some(char) = chars.peek() {
120+
match char {
121+
'#' => {
122+
if !string_or_date {
123+
break;
124+
} else {
125+
text_chars.push(*char);
126+
chars.next();
127+
}
128+
}
129+
'"' | '\'' => {
130+
if string_or_date == false {
131+
string_or_date = true
132+
} else {
133+
string_or_date = false
134+
}
135+
text_chars.push(*char);
136+
chars.next();
137+
}
138+
'\n' => {
139+
self.current_line = self.current_line + 1;
140+
end_line = end_line + 1;
141+
text_chars.push(*char);
142+
chars.next();
143+
}
144+
_ => {
145+
text_chars.push(*char);
146+
chars.next();
147+
}
148+
}
149+
}
150+
151+
let token = Token::new(TokenKind::Text, text_chars.into_iter().collect(),
152+
start_line, end_line);
153+
tokens.push(token);
154+
}
155+
156+
fn string(&mut self, chars: &mut Peekable<Chars>) -> Token {
157+
let mut text_chars: Vec<char> = Vec::new();
158+
// add first quote symbol
159+
text_chars.push(chars.next().unwrap());
160+
let start_line = self.current_line;
161+
let mut end_line = self.current_line;
162+
163+
while let Some(char) = chars.next() {
164+
match char {
165+
char if char == '"' => {
166+
text_chars.push(char);
167+
break;
168+
}
169+
'\n' => {
170+
self.current_line = self.current_line + 1;
171+
end_line = end_line + 1;
172+
text_chars.push(char);
173+
}
174+
_ => { text_chars.push(char) }
175+
}
176+
}
177+
178+
Token::new(TokenKind::Text, text_chars.into_iter().collect(), start_line, end_line)
179+
}
180+
}
181+
182+
fn match_char(expected: char, chars: &mut Peekable<Chars>) -> bool {
183+
let next_char = chars.peek();
184+
185+
if next_char.is_none() {
186+
return false;
187+
}
188+
189+
if next_char.is_some() && *next_char.unwrap() != expected {
190+
return false;
191+
}
192+
193+
// Consume matched character
194+
chars.next();
195+
true
196+
}

src/lib.rs

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,12 @@
1+
#![warn(missing_docs)]
2+
//! Реализация препроцессора для языка программирования OneScript.
3+
//! Позволяет разобрать исходный текст программ на OneScript, обработать и удалить директивы препроцессора
4+
5+
/// Модуль содержащий API для работы с лексером
6+
pub mod lexer;
7+
8+
/// Модуль для имплементации токенов разбора исходного текста
9+
pub mod token;
10+
11+
/// Модуль содержащий API для работы с препроцессором
12+
pub mod preprocessor;

src/preprocessor.rs

Lines changed: 52 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,52 @@
1+
use crate::lexer::Lexer;
2+
use crate::token::TokenKind;
3+
4+
/// Представление препроцессора
5+
pub struct Preprocessor {}
6+
7+
impl Preprocessor {
8+
/// Возвращает новый экземпляр препроцессора
9+
pub fn new() -> Self {
10+
return Preprocessor {};
11+
}
12+
13+
/// Обрабатывает переданный исходный текст, разбирая его на токены. Токены инструкций препроцессора
14+
/// заменяются на пробелы. Остальной текст добавляется без изменений
15+
///
16+
/// # Arguments
17+
///
18+
/// * `source`: исходный текст для разбора
19+
///
20+
/// returns: String исходный текст с удаленными директивами препроцессора
21+
///
22+
/// # Examples
23+
///
24+
/// ```
25+
/// use onescript_preprocessor::preprocessor::Preprocessor;
26+
/// let example = "#region Test\nProcedure Test()\nEndProcedure\n#EndRegion";
27+
/// let expected = " \nProcedure Test()\nEndProcedure\n ";
28+
///
29+
/// let preprocessor = Preprocessor::new();
30+
/// let result = preprocessor.preprocess(example);
31+
/// assert_eq!(expected, result);
32+
/// ```
33+
pub fn preprocess(&self, source: &str) -> String {
34+
let mut lexer = Lexer::new();
35+
let tokens = lexer.lex(source);
36+
let mut result: String = String::new();
37+
38+
for token in &tokens {
39+
match token.token_kind {
40+
TokenKind::Hash => { result.push(' ') }
41+
TokenKind::Shebang => { result.push(' ') }
42+
TokenKind::Text => { result.push_str(&token.lexeme) }
43+
TokenKind::Keyword(_) => {}
44+
TokenKind::Path => {}
45+
TokenKind::ShebangText => {}
46+
TokenKind::Identifier => {}
47+
}
48+
};
49+
50+
result
51+
}
52+
}

0 commit comments

Comments
 (0)