Skip to content

Commit 4f60ff4

Browse files
committed
\u in test now works
1 parent 16de67c commit 4f60ff4

File tree

2 files changed

+64
-46
lines changed

2 files changed

+64
-46
lines changed

src/lexer.rs

Lines changed: 56 additions & 38 deletions
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,6 @@ use crate::{
55
use hexponent::FloatLiteral;
66

77
use unic_emoji_char as emoji;
8-
98
pub struct Lexer {
109
token_list: Vec<Token>,
1110
source: String,
@@ -123,65 +122,72 @@ impl Lexer {
123122
}
124123
// check for escape sequence \` \n \\ \t \r \a \b \f \v \e \Xhh \0ooo \Uhhhhhhhh
125124
if self.peek() == '\\' {
126-
self.source.remove(self.current);
125+
self.remove_text(self.current);
127126
if self.peek() == '`' {
128127
self.advance();
129128
} else if self.peek() == 'n' {
130-
self.source.remove(self.current);
131-
self.source.insert(self.current, '\n');
129+
self.remove_text(self.current);
130+
self.insert_text(self.current, '\n');
132131
self.advance();
133-
self.line += 1;
134132
} else if self.peek() == '\\' {
135-
self.source.remove(self.current);
136-
self.source.insert(self.current, '\\');
133+
self.remove_text(self.current);
134+
self.insert_text(self.current, '\\');
137135
self.advance();
138136
} else if self.peek() == 't' {
139-
self.source.remove(self.current);
140-
self.source.insert(self.current, '\t');
137+
self.remove_text(self.current);
138+
self.insert_text(self.current, '\t');
141139
self.advance();
142140
} else if self.peek() == 'r' {
143-
self.source.remove(self.current);
144-
self.source.insert(self.current, '\r');
141+
self.remove_text(self.current);
142+
self.insert_text(self.current, '\r');
145143
self.advance();
146144
} else if self.peek() == 'a' {
147-
self.source.remove(self.current);
148-
self.source.insert(self.current, '\x07');
145+
self.remove_text(self.current);
146+
self.insert_text(self.current, '\x07');
149147
self.advance();
150148
} else if self.peek() == 'b' {
151-
self.source.remove(self.current);
152-
self.source.insert(self.current, '\x08');
149+
self.remove_text(self.current);
150+
self.insert_text(self.current, '\x08');
153151
self.advance();
154152
} else if self.peek() == 'f' {
155-
self.source.remove(self.current);
156-
self.source.insert(self.current, '\x0C');
153+
self.remove_text(self.current);
154+
self.insert_text(self.current, '\x0C');
157155
self.advance();
158156
} else if self.peek() == 'v' {
159-
self.source.remove(self.current);
160-
self.source.insert(self.current, '\x0b');
157+
self.remove_text(self.current);
158+
self.insert_text(self.current, '\x0b');
161159
self.advance();
162160
} else if self.peek() == 'e' {
163-
self.source.remove(self.current);
164-
self.source.insert(self.current, '\x1b');
161+
self.remove_text(self.current);
162+
self.insert_text(self.current, '\x1b');
165163
self.advance();
166-
} else if self.peek() == 'X' {
167-
self.source.remove(self.current);
168-
match self.source.remove(self.current) {
169-
x if x.is_ascii_hexdigit() => match self.source.remove(self.current) {
170-
y if y.is_ascii_hexdigit() => {self.source.insert(self.current, u8::from_str_radix(format!("{x}{y}").as_str(), 16).unwrap() as char);
164+
} else if self.peek() == 'x' {
165+
self.remove_text(self.current);
166+
match self.remove_text(self.current) {
167+
x if x.is_ascii_hexdigit() => match self.remove_text(self.current) {
168+
y if y.is_ascii_hexdigit() => {self.insert_text(self.current, u8::from_str_radix(format!("{x}{y}").as_str(), 16).unwrap() as char);
171169
self.advance();},
172-
y => {self.source.insert(self.current, u8::from_str_radix(format!("{x}").as_str(), 16).unwrap() as char);
173-
self.source.insert(self.current+1, y);
170+
y => {self.insert_text(self.current, u8::from_str_radix(format!("{x}").as_str(), 16).unwrap() as char);
171+
self.insert_text(self.current+1, y);
174172
self.advance();}
175173
},
176-
x => {self.source.insert(self.current, x);
174+
x => {self.insert_text(self.current, x);
177175
self.advance();
178176
}
179-
};
180-
181-
} else if self.peek() == 'O' {
182-
todo!();
183-
} else if self.peek() == 'U' {
184-
todo!();
177+
}
178+
} else if self.peek() == 'u' {
179+
self.remove_text(self.current);
180+
// can be from 0 to 6 hex digits
181+
// loop until either 6 digits or we find a digit a non-hex digit
182+
// while looping remove the character and append to a string
183+
let mut hex_string = "".to_string();
184+
let mut i = 0;
185+
while i < 6 && self.peek().is_ascii_hexdigit() {
186+
hex_string.push(self.remove_text(self.current));
187+
i += 1;
188+
}
189+
self.insert_text(self.current, char::from_u32(u32::from_str_radix(hex_string.as_str(), 16).unwrap()).unwrap());
190+
self.current += 1;
185191
} else {
186192
error::error(
187193
self.line,
@@ -192,12 +198,10 @@ impl Lexer {
192198
self.advance();
193199
}
194200

195-
// self.advance();
196201
}
197202
if self.is_at_end() {
198203
error::error(self.line, "unterminated string");
199204
}
200-
// println!("{}", self.get_text());
201205
self.advance();
202206
self.start += 1;
203207
self.current -= 1;
@@ -278,7 +282,6 @@ impl Lexer {
278282

279283
fn add_unicode_token(&mut self, token_type: TokenType) {
280284
let text: String = format!("{}", self.source.chars().nth(self.start).expect("Error"));
281-
282285
self.token_list
283286
.push(Token::new(token_type, &text, self.line));
284287
}
@@ -301,4 +304,19 @@ impl Lexer {
301304
});
302305
final_text
303306
}
307+
308+
fn remove_text(&mut self, pos: usize) -> char {
309+
// use chars to be able to use unicode, remove the pos
310+
let mut text: Vec<char> = self.source.chars().collect();
311+
let to_return = text[pos];
312+
text.remove(pos);
313+
self.source = text.iter().collect();
314+
to_return
315+
}
316+
317+
fn insert_text(&mut self, pos: usize, texts: char,) {
318+
let mut text: Vec<char> = self.source.chars().collect();
319+
text.insert(pos, texts);
320+
self.source = text.iter().collect();
321+
}
304322
}

umpl_examples/simple.umpl

Lines changed: 8 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -8,21 +8,21 @@ create str with ((multiply `ff\n` 5.76))>
88
((ge 5 6))>
99
((setwith num (strtonum (input `>> `))))>
1010
((plus num 5))>
11-
((subtractwith num 4))>>
11+
((subtractwith num 4))>
1212
(num)>
13-
(4)>>
13+
(4)>
1414
create str with `df`
1515
(str)>
16-
(`fdff\X4\n`)>
16+
(`fdff\u1f642\`a`)>
1717
((multiplywith str 5))<
18-
(str)>>
18+
(str)>
1919
((addwith str 5))<
20-
(str)>>
20+
(str)>
2121
create trt with ((plus str 5))>
2222
((multiplywith trt 5))<
23-
(trt)>>
23+
(trt)>
2424
((addwith trt 5))<
25-
(str)>>
25+
(str)>
2626
((multiplywith str 5))<
2727
create str with true
2828
if {true} ⧼ ! if true create a code block (⧼) ends with (⧽)
@@ -35,7 +35,7 @@ if {true} ⧼ ! if true create a code block (⧼) ends with (⧽)
3535
(`Hello World`)> ! else print the string Hello World (`Hello World`)>
3636
3737
create file with ((open `test.txt`))>
38-
(`asddff`)>
38+
(`asddff\n`)>
3939
list z with [((read file))> true]
4040
! currently two things cannot have acces to the a variable that holds a file so when we read the file that takes ownership of the file, and can't be accessed anymore,
4141
! so the following line will throw an error that it could not find the variable file

0 commit comments

Comments
 (0)