Skip to content

Commit d70faab

Browse files
committed
(lexer) fix: error displays for trigraphs + ??' ignored (quote is not a symbol)
1 parent f71938f commit d70faab

File tree

4 files changed

+90
-36
lines changed

4 files changed

+90
-36
lines changed

src/lexer/lex_content.rs

Lines changed: 9 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -66,6 +66,11 @@ fn lex_char(
6666

6767
/* Static strings and chars */
6868
// open/close
69+
('\'', Symbols(symbol_state), _) if symbol_state.is_trigraph() => {
70+
if let Some((size, symbol)) = symbol_state.push(ch, lex_data, location) {
71+
lex_data.push_token(Token::from_symbol(symbol, size, location));
72+
}
73+
}
6974
('\'', state @ Char(_), _) => end_current(state, lex_data, location),
7075
('\'', state, _) if !matches!(state, Str(_)) => {
7176
end_current(state, lex_data, location);
@@ -165,12 +170,12 @@ fn lex_line(
165170
) {
166171
lex_data.newline();
167172
let mut escape_state = EscapeState::False;
168-
let trimed = line.trim_end();
169-
if trimed.is_empty() {
173+
let trimmed = line.trim_end();
174+
if trimmed.is_empty() {
170175
return;
171176
}
172-
let last = trimed.len() - 1;
173-
for (idx, ch) in trimed.chars().enumerate() {
177+
let last = trimmed.len() - 1;
178+
for (idx, ch) in trimmed.chars().enumerate() {
174179
lex_char(
175180
ch,
176181
location,

src/lexer/state/end_state.rs

Lines changed: 5 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -51,10 +51,11 @@ pub fn end_symbols(symbols: &mut SymbolState, lex_data: &mut LexingData, locatio
5151
let token = Token::from_symbol(symbol, size, location);
5252
lex_data.push_token(token);
5353
} else {
54-
panic!(
55-
"This can't happen, as lex_data is not empty! LexingData: {:?}",
56-
&lex_data
57-
);
54+
/* This happens when the 3 characters formed a trigraph. If this
55+
* is the case, they were ignored. */
56+
//TODO: the characters are meant to be printed as they are
57+
//TODO: it is only for a case not yet implemented: trigraphs inside
58+
// string literals.
5859
}
5960
}
6061
}

src/lexer/state/symbol.rs

Lines changed: 21 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -24,7 +24,7 @@ impl SymbolState {
2424
}
2525
}
2626

27-
fn handle_digraphs_trigraphs(&mut self) -> Option<(String, bool)> {
27+
fn handle_digraphs_trigraphs(&mut self) -> Option<(String, usize, bool)> {
2828
let symbols = (self.first, self.second, self.third);
2929
let (graph, is_trigraph) = match symbols {
3030
('?', '?', '=') => (Some('#'), true),
@@ -43,20 +43,22 @@ impl SymbolState {
4343
('%', ':', _) => {
4444
return Some((
4545
"Found invalid character '#', found by replacing digraph '%:'.".to_owned(),
46+
2,
4647
true,
4748
));
4849
}
4950
_ => (None, false),
5051
};
5152
if let Some(symbol) = graph {
5253
if is_trigraph {
53-
return Some((
54-
format!(
55-
"Trigraphs are deprecated in C23. Please remove them: Replace \"{}{}{}\" by '{symbol}'.",
56-
self.first, self.second, self.third
57-
),
58-
false,
59-
));
54+
let msg = format!(
55+
"Trigraphs are deprecated in C23. Please remove them: replace '{}{}{}' by '{symbol}'.",
56+
self.first, self.second, self.third
57+
);
58+
self.first = NULL;
59+
self.second = NULL;
60+
self.third = NULL;
61+
return Some((msg, 3, false));
6062
}
6163
self.first = symbol;
6264
self.second = self.third;
@@ -69,6 +71,13 @@ impl SymbolState {
6971
self.first == NULL && self.second == NULL && self.third == NULL
7072
}
7173

74+
pub const fn is_trigraph(&self) -> bool {
75+
matches!(
76+
(self.first, self.second, self.third),
77+
('?', '?', NULL) | (_, '?', '?')
78+
)
79+
}
80+
7281
pub const fn last(&self) -> Option<char> {
7382
if self.third == NULL {
7483
if self.second == NULL {
@@ -123,11 +132,12 @@ impl SymbolState {
123132
lex_data: &mut LexingData,
124133
location: &Location,
125134
) -> Option<(usize, Symbol)> {
126-
if let Some((msg, error)) = self.handle_digraphs_trigraphs() {
135+
if let Some((msg, len, error)) = self.handle_digraphs_trigraphs() {
136+
let new_location = location.to_owned().into_past_with_length(len);
127137
if error {
128-
lex_data.push_err(location.to_error(msg));
138+
lex_data.push_err(new_location.to_error(msg));
129139
} else {
130-
lex_data.push_err(location.to_warning(msg));
140+
lex_data.push_err(new_location.to_warning(msg));
131141
}
132142
}
133143
let result = match (self.first, self.second, self.third) {

tests/strings.rs

Lines changed: 55 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,5 @@
1+
use std::fs;
2+
13
use c_parser::*;
24

35
const SEP: &str = "\n--------------------\n";
@@ -25,6 +27,8 @@ fn test_string_error(content: &str, output: &str) {
2527
} else {
2628
res.get_displayed_errors(files, "lexer")
2729
};
30+
fs::write("displayed.txt", &displayed).unwrap();
31+
fs::write("expected.txt", output).unwrap();
2832
assert!(
2933
output == displayed,
3034
"Mismatch! Expected:\n!{output}!\n!= Computed\n!{displayed}!"
@@ -49,9 +53,8 @@ make_string_tests!(
4953

5054
digraphs:
5155
"
52-
int arr<:3:> = {1, 2, 3}; // Equivalent to int arr[3];
56+
int arr<:3:> = <%1, 2, 3%>; // Equivalent to int arr[3];
5357
arr<:1:> = 42; // Equivalent to arr[1] = 42;
54-
// int map<%2%>; // Equivalent to int map{2}; //TODO
5558
"
5659
=>
5760
"[(((int arr)[3]) = {1, 2, 3}), ((arr[1]) = 42), \u{2205} ..]"
@@ -114,6 +117,10 @@ nested_braces:
114117
=>
115118
"[[\u{2205} , \u{2205} , \u{2205} , \u{2205} , [(a = 1), (b = 2), \u{2205} ], (c = 3), \u{2205} ]..]"
116119

120+
char_array:
121+
"char x[4] = {'b', 12+'5', '3', '\0' };"
122+
=>
123+
"[(((char x)[4]) = {'b', (12 + '5'), '3', '\0'}), \u{2205} ..]"
117124

118125
nested_block_functions:
119126
"f(a+b) { g(!x) { a = 1; b = 2; } c = 3;
@@ -184,29 +191,60 @@ macro_rules! make_string_error_tests {
184191

185192
make_string_error_tests!(
186193

187-
lengths:
188-
"x = \"blob\" bob;"
194+
lengths_literal:
195+
"x = 'c' blob;"
196+
=>
197+
":1:9: parser error: Found 2 consecutive literals: block [(x = 'c')..] followed by blob.
198+
1 | x = 'c' blob;
199+
^~~~
200+
"
201+
202+
lengths_symbols:
203+
"<<="
189204
=>
190-
":1:12: parser error: Found 2 consecutive literals: block [(x = \"blob\")..] followed by bob.
191-
1 | x = \"blob\" bob;
192-
^~~
205+
":1:1: parser error: Tried to call binary operator <<= on without a left argument.
206+
1 | <<=
207+
^~~
193208
"
194209

195210
digraphs:
196211
"%:include <stdio.h>"
197212
=>
198-
":1:3: lexer error: Found invalid character '#', found by replacing digraph '%:'.
213+
":1:1: lexer error: Found invalid character '#', found by replacing digraph '%:'.
199214
1 | %:include <stdio.h>
200-
^
215+
^~
201216
"
202217

203-
// trigraphs:
204-
// "
205-
// int ??= 1;
206-
// int a ??( 10 ??);
207-
// char b = '??/';
208-
// char q ??'!'??';
209-
// int c ??- 5;
210-
// " => ""
218+
trigraphs:
219+
"
220+
char b??(5??) = ??< 'b', 'l', 'o',??/
221+
'b', '\0' ??>;
222+
int x = 1 ??' ??- 2 ??! 3;
223+
" =>
224+
":2:7: lexer warning: Trigraphs are deprecated in C23. Please remove them: replace '??(' by '['.
225+
2 | char b??(5??) = ??< 'b', 'l', 'o',??/
226+
^~~
227+
:2:11: lexer warning: Trigraphs are deprecated in C23. Please remove them: replace '??)' by ']'.
228+
2 | char b??(5??) = ??< 'b', 'l', 'o',??/
229+
^~~
230+
:2:17: lexer warning: Trigraphs are deprecated in C23. Please remove them: replace '??<' by '{'.
231+
2 | char b??(5??) = ??< 'b', 'l', 'o',??/
232+
^~~
233+
:2:35: lexer warning: Trigraphs are deprecated in C23. Please remove them: replace '??/' by '\\'.
234+
2 | char b??(5??) = ??< 'b', 'l', 'o',??/
235+
^~~
236+
:3:30: lexer warning: Trigraphs are deprecated in C23. Please remove them: replace '??>' by '}'.
237+
3 | 'b', '\0' ??>;
238+
^~~
239+
:4:11: lexer warning: Trigraphs are deprecated in C23. Please remove them: replace '??'' by '^'.
240+
4 | int x = 1 ??' ??- 2 ??! 3;
241+
^~~
242+
:4:15: lexer warning: Trigraphs are deprecated in C23. Please remove them: replace '??-' by '~'.
243+
4 | int x = 1 ??' ??- 2 ??! 3;
244+
^~~
245+
:4:21: lexer warning: Trigraphs are deprecated in C23. Please remove them: replace '??!' by '|'.
246+
4 | int x = 1 ??' ??- 2 ??! 3;
247+
^~~
248+
"
211249

212250
);

0 commit comments

Comments
 (0)