Skip to content

Commit b9adbfb

Browse files
committed
g
1 parent d7040a4 commit b9adbfb

File tree

16 files changed

+200
-127
lines changed

16 files changed

+200
-127
lines changed

Cargo.toml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,7 @@ edition = "2024"
55
authors = ["Tom Webber"]
66
license-file = "LICENSE"
77
repository = "https://www.github.com/t-webber/c-parser"
8-
description = "A rust library to lex and parse C source files into Abstract Synthax Trees."
8+
description = "A rust library to lex and parse C source files into Abstract Syntax Trees."
99
publish = false
1010
keywords = ["parsing", "lexing", "compiling", "nostd"]
1111
categories = ["compilers", "no-std", "no-std::alloc", "parser-implementations", "parsing"]

LICENSE

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -291,7 +291,7 @@ convey the exclusion of warranty; and each file should have at least
291291
the "copyright" line and a pointer to where the full notice is found.
292292

293293
<one line to give the program's name and a brief idea of what it does.>
294-
Copyright (C) 2024 <name of author>
294+
Copyright (C) 2025 <name of author>
295295

296296
This program is free software; you can redistribute it and/or modify
297297
it under the terms of the GNU General Public License as published by

docs/README.md

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,15 @@
1+
# C parser
2+
3+
This is a rust library that lexes and parses C source files.
4+
5+
## Standard
6+
7+
For the moment, this parser is only meant to support C23 standard C code. No extensions (e.g. GCC extensions) are implemented. The input file is supposed already preprocessed.
8+
9+
## Lexer
10+
11+
The lexer takes as input the preprocessed C source code, and transforms into a valid token: keywords, number constants, identifiers, symbols, strings and chars.
12+
13+
## Parser
14+
15+
The parser takes these tokens and tries to build an Abstract Syntax Tree (AST). The AST is not meant to be valid as it is building AST so it contains empty nodes while building that are meant to disappear before the end of the parsing stage.

src/errors/display.rs

Lines changed: 29 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,10 @@ use super::compile::CompileError;
1212
///
1313
/// See [`Res::get_displayed_errors`](super::result::Res::get_displayed_errors)
1414
/// for extra information and examples.
15+
///
16+
/// # Errors
17+
///
18+
/// Returns an error when the writing on the string buffer fails.
1519
pub(super) fn display_errors(
1620
errors: Vec<CompileError>,
1721
files: &[(String, &str)],
@@ -28,16 +32,34 @@ pub(super) fn display_errors(
2832
let code_lines = files_state
2933
.get(&filename)
3034
.expect("Never happens: File of error doesn't exist");
31-
let code_line = code_lines.get(line_nb - 1).unwrap_or_else(|| {
32-
panic!("Never happens: given line of file that doesn't exist: {filename}:{line_nb}:{column_nb}")
35+
let code_line = code_lines.get(safe_decrement(line_nb)).unwrap_or_else(|| {
36+
panic!("Never happens: given line of file that doesn't exist: {filename}:{line_nb}:{column_nb} (for {err_type})")
3337
});
38+
let mut too_long = false;
39+
let col = safe_decrement(column_nb);
40+
let under_spaces = " ".repeat(8usize.checked_add(col).unwrap_or_else(|| {
41+
too_long = true;
42+
col
43+
}));
44+
let under_tilde = "~".repeat(safe_decrement(length));
3445
writeln!(
3546
res,
36-
"{filename}:{line_nb}:{column_nb}: {err_type} {err_lvl}: {message}\n{line_nb:5} | {code_line}\n{}^{}",
37-
" ".repeat(8 + column_nb - 1),
38-
"~".repeat(length - 1)
39-
)
40-
.map_err(|_| ())?;
47+
"{filename}:{line_nb}:{column_nb}: {err_type} {err_lvl}: {message}\n{line_nb:5} | {code_line}\n{under_spaces}^{under_tilde}"
48+
).map_err(|_| ())?;
49+
if too_long {
50+
writeln!(
51+
res,
52+
"{filename}:{line_nb}:{column_nb}: format warning: This line of code exceeds the maximum size of {}. Consider refactoring your code. {line_nb:5} | {code_line}\n{under_spaces}^{under_tilde}",
53+
usize::MAX
54+
)
55+
.map_err(|_| ())?;
56+
}
4157
}
4258
Ok(res)
4359
}
60+
61+
/// Decrements a value of 1
62+
const fn safe_decrement(val: usize) -> usize {
63+
val.checked_sub(1)
64+
.expect("line, col, len are initialised at 1, then incremented")
65+
}

src/errors/location.rs

Lines changed: 18 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -30,16 +30,30 @@ impl Location {
3030
/// Increments column of location by 1
3131
///
3232
/// This is used by lexer when parsing every character of the C file.
33-
pub(crate) fn incr_col(&mut self) {
34-
self.col += 1;
33+
pub(crate) fn incr_col(&mut self) -> Result<(), CompileError> {
34+
self.col = self.col.checked_add(1).ok_or_else(|| {
35+
self.to_error(format!(
36+
"This line of code exceeds the maximum numbers of columns ({}).
37+
Consider refactoring your code.",
38+
usize::MAX
39+
))
40+
})?;
41+
Ok(())
3542
}
3643

3744
/// Increments line of location by 1
3845
///
3946
/// This is used by lexer when parsing every line of the C file.
40-
pub(crate) fn incr_line(&mut self) {
41-
self.line += 1;
47+
pub(crate) fn incr_line(&mut self) -> Result<(), CompileError> {
48+
self.line = self.line.checked_add(1).ok_or_else(|| {
49+
self.to_error(format!(
50+
"The file exceeds the maximum number lines ({}). Consider refactoring
51+
your code.",
52+
usize::MAX
53+
))
54+
})?;
4255
self.col = 1;
56+
Ok(())
4357
}
4458

4559
/// Creates an error from a location without cloning

src/lexer/lex_content.rs

Lines changed: 7 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -167,7 +167,9 @@ pub fn lex_file(content: &str, location: &mut Location) -> Res<Vec<Token>> {
167167

168168
for line in content.lines() {
169169
lex_line(line, location, &mut lex_data, &mut lex_state);
170-
location.incr_line();
170+
if let Err(err) = location.incr_line() {
171+
lex_data.push_err(err);
172+
}
171173
}
172174

173175
lex_data.into_res()
@@ -184,7 +186,7 @@ fn lex_line(line: &str, location: &mut Location, lex_data: &mut LexingData, lex_
184186
if trimmed.is_empty() {
185187
return;
186188
}
187-
let last = trimmed.len() - 1;
189+
let last = trimmed.len().checked_sub(1).expect("trimmed is not empty");
188190
for (idx, ch) in trimmed.chars().enumerate() {
189191
lex_char(
190192
ch,
@@ -194,7 +196,9 @@ fn lex_line(line: &str, location: &mut Location, lex_data: &mut LexingData, lex_
194196
&mut escape_state,
195197
idx == last,
196198
);
197-
location.incr_col();
199+
if let Err(err) = location.incr_col() {
200+
lex_data.push_err(err);
201+
}
198202
if lex_data.is_end_line() {
199203
break;
200204
}

src/lexer/mod.rs

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,7 @@
1+
//! Module to lex source files into a list of valid
2+
//! [`Token`](types::api::Token): keywords, number constants,
3+
//! identifiers, symbols, strings and chars.
4+
15
pub mod api {
26
//! Api module to choose what functions to export.
37

src/lexer/numbers/base/hexadecimal.rs

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -46,12 +46,13 @@ macro_rules! impl_floating_point {
4646
/// Parses the stringified version of a number into a [`HexFloatData`].
4747
macro_rules! parse_hexadecimal_float {
4848
($overflow:expr, $nb_type:ident, $float_parse:ident, $($t:ident)*) => {{
49+
#[expect(clippy::float_arithmetic, clippy::arithmetic_side_effects)]
50+
#[expect(clippy::as_conversions)]
4951
match $nb_type {
5052
$(NumberType::$t => {
5153
let int_part = $t::from_unsigned(
5254
<concat_idents!($t, IntPart)>::from_str_radix(&$float_parse.int_part, 16).expect("2 <= <= 36"),
5355
$overflow);
54-
#[expect(clippy::as_conversions)]
5556
let exponent = $t::from_unsigned((2 as concat_idents!($t, IntPart)).pow($float_parse.get_exp()), $overflow);
5657
let mut decimal_part: $t = 0.;
5758
for (idx, ch) in $float_parse.decimal_part.chars().enumerate() {
@@ -328,7 +329,6 @@ pub fn to_hex_value(
328329
)
329330
} else {
330331
let mut overflow = false;
331-
#[expect(clippy::float_arithmetic)]
332332
let res =
333333
parse_hexadecimal_float!(&mut overflow, nb_type, float_data, Float Double LongDouble);
334334
if overflow { res.add_overflow() } else { res }

src/lexer/numbers/from_literal.rs

Lines changed: 7 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -29,7 +29,11 @@ fn get_base(
2929
let first = chars.next().expect("len >= 1");
3030
let second = chars.next().expect("len >= 2");
3131

32-
let one_char = literal.len() - nb_type.suffix_size() == 1;
32+
let one_char = literal
33+
.len()
34+
.checked_sub(nb_type.suffix_size())
35+
.expect("literal contains the suffix")
36+
== 1;
3337

3438
match (first, second) {
3539
('0', 'x') if one_char => {
@@ -116,7 +120,7 @@ fn get_number_type(literal: &str, location: &Location) -> Result<NumberType, Com
116120
return Err(location
117121
.to_error("found 3 'l' characters, but max is 2 (`long long`).".to_owned()));
118122
}
119-
'l' | 'L' => l_count += 1,
123+
'l' | 'L' => l_count = l_count.checked_add(1).expect("l_count <= 1"),
120124
'f' | 'F' if is_hex && !double_or_float => break,
121125
'f' | 'F' => float = true,
122126
'i' | 'I' => {
@@ -206,7 +210,7 @@ fn literal_to_number_err(literal: &str, location: Location, signed: bool) -> Par
206210
let mut nb_type = get_number_type(literal, &location)?;
207211
let base = get_base(literal, &nb_type, &location)?;
208212
let value = literal
209-
.get(base.prefix_size()..literal.len() - nb_type.suffix_size())
213+
.get(base.prefix_size()..literal.len().checked_sub(nb_type.suffix_size()).expect("literal contains the suffix"))
210214
.expect("never happens as suffix size + prefix size <= len, as 'x' and 'b' can't be used as suffix");
211215

212216
if value.is_empty() {

src/lexer/numbers/types.rs

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -34,9 +34,13 @@ use arch_types::{Double, Float, Int, Long, LongDouble, LongLong, UInt, ULong, UL
3434
/// Defines the [`Number`] and [`NumberType`] enums
3535
macro_rules! define_nb_types {
3636
($($t:ident)*) => {
37+
/// Token value for a number constant
3738
#[derive(Debug, PartialEq)]
3839
pub enum Number {
39-
$($t($t),)*
40+
$(
41+
/// $t C type
42+
$t($t),
43+
)*
4044
}
4145

4246
pub enum NumberType {

0 commit comments

Comments
 (0)