Skip to content

Commit 69646f5

Browse files
committed
(lexer) fix: digraphs can produce illegal '#' symbol
1 parent 0d07bc5 commit 69646f5

File tree

4 files changed

+37
-23
lines changed

4 files changed

+37
-23
lines changed

src/lexer/lex_content.rs

Lines changed: 1 addition & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -105,11 +105,7 @@ fn lex_char(
105105
_,
106106
) => {
107107
if let Symbols(symbol_state) = state {
108-
let (value, error) = symbol_state.push(ch);
109-
if let Some(msg) = error {
110-
lex_data.push_err(location.to_warning(msg));
111-
}
112-
if let Some((size, symbol)) = value {
108+
if let Some((size, symbol)) = symbol_state.push(ch, lex_data, location) {
113109
lex_data.push_token(Token::from_symbol(symbol, size, location));
114110
}
115111
} else {

src/lexer/state/end_state.rs

Lines changed: 1 addition & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -47,11 +47,7 @@ pub fn end_symbols(symbols: &mut SymbolState, lex_data: &mut LexingData, locatio
4747
let mut idx: usize = 0;
4848
while !symbols.is_empty() && idx <= 2 {
4949
idx += 1;
50-
let (value, error) = symbols.try_to_operator();
51-
if let Some(msg) = error {
52-
lex_data.push_err(location.to_warning(msg));
53-
}
54-
if let Some((size, symbol)) = value {
50+
if let Some((size, symbol)) = symbols.try_to_operator(lex_data, location) {
5551
let token = Token::from_symbol(symbol, size, location);
5652
lex_data.push_token(token);
5753
} else {

src/lexer/state/lex_state.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -30,7 +30,7 @@ impl LexingState {
3030
}
3131

3232
pub fn new_ident(&mut self, ch: char) {
33-
*self = Self::Identifier(Ident::from(String::from(ch)));
33+
*self = Self::Identifier(Ident::from(ch.to_string()));
3434
}
3535

3636
pub fn new_ident_str(&mut self, str: String) {

src/lexer/state/symbol.rs

Lines changed: 34 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,6 @@
1+
use crate::errors::api::Location;
12
use crate::lexer::api::Symbol;
3+
use crate::lexer::types::api::LexingData;
24

35
const NULL: char = '\0';
46

@@ -22,7 +24,7 @@ impl SymbolState {
2224
}
2325
}
2426

25-
fn handle_digraphs_trigraphs(&mut self) -> Option<String> {
27+
fn handle_digraphs_trigraphs(&mut self) -> Option<(String, bool)> {
2628
let symbols = (self.first, self.second, self.third);
2729
let (graph, is_trigraph) = match symbols {
2830
('?', '?', '=') => (Some('#'), true),
@@ -38,14 +40,19 @@ impl SymbolState {
3840
(':', '>', _) => (Some(']'), false),
3941
('<', '%', _) => (Some('{'), false),
4042
('%', '>', _) => (Some('}'), false),
41-
('%', ':', _) => (Some('#'), false),
43+
('%', ':', _) => {
44+
return Some((
45+
"Found invalid character '#', found by replacing digraph '%:'.".to_owned(),
46+
true,
47+
))
48+
}
4249
_ => (None, false),
4350
};
4451
if let Some(symbol) = graph {
4552
if is_trigraph {
46-
return Some(
47-
format!("Trigraphs are deprecated in C23. Please remove them: Replace \"{}{}{}\" by '{symbol}'.", self.first, self.second, self.third),
48-
);
53+
return Some((
54+
format!("Trigraphs are deprecated in C23. Please remove them: Replace \"{}{}{}\" by '{symbol}'.", self.first, self.second, self.third)
55+
, false));
4956
}
5057
self.first = symbol;
5158
self.second = self.third;
@@ -82,11 +89,16 @@ impl SymbolState {
8289
}
8390
}
8491

85-
pub fn push(&mut self, value: char) -> (Option<(usize, Symbol)>, Option<String>) {
92+
pub fn push(
93+
&mut self,
94+
value: char,
95+
lex_data: &mut LexingData,
96+
location: &Location,
97+
) -> Option<(usize, Symbol)> {
8698
let op = if self.third == NULL {
87-
(None, None)
99+
None
88100
} else {
89-
self.try_to_operator()
101+
self.try_to_operator(lex_data, location)
90102
};
91103
if self.first == NULL {
92104
self.first = value;
@@ -102,8 +114,18 @@ impl SymbolState {
102114
op
103115
}
104116

105-
pub fn try_to_operator(&mut self) -> (Option<(usize, Symbol)>, Option<String>) {
106-
let err = self.handle_digraphs_trigraphs();
117+
pub fn try_to_operator(
118+
&mut self,
119+
lex_data: &mut LexingData,
120+
location: &Location,
121+
) -> Option<(usize, Symbol)> {
122+
if let Some((msg, error)) = self.handle_digraphs_trigraphs() {
123+
if error {
124+
lex_data.push_err(location.to_error(msg));
125+
} else {
126+
lex_data.push_err(location.to_warning(msg));
127+
}
128+
}
107129
let result = match (self.first, self.second, self.third) {
108130
('<', '<', '=') => Some((3, Symbol::ShiftLeftAssign)),
109131
('>', '>', '=') => Some((3, Symbol::ShiftRightAssign)),
@@ -175,10 +197,10 @@ impl SymbolState {
175197
self.third = NULL;
176198
}
177199
_ => panic!(
178-
"his is not meant to happen. nb_consumed is defined only be having values of 0, 1, 2 or 3, not {nb_consumed}"
200+
"This is not meant to happen. `nb_consumed` is defined only be having values of 0, 1, 2 or 3, not {nb_consumed}"
179201
),
180202
};
181203
}
182-
(result, err)
204+
result
183205
}
184206
}

0 commit comments

Comments
 (0)