g

t-webber · t-webber · commit b9adbfbfd2ed · 2025-01-01T20:29:27.000+01:00
diff --git a/Cargo.toml b/Cargo.toml
@@ -5,7 +5,7 @@ edition = "2024"
 authors = ["Tom Webber"]
 license-file = "LICENSE"
 repository = "https://www.github.com/t-webber/c-parser"
-description = "A rust library to lex and parse C source files into Abstract Synthax Trees."
+description = "A rust library to lex and parse C source files into Abstract Syntax Trees."
 publish = false
 keywords = ["parsing", "lexing", "compiling", "nostd"]
 categories = ["compilers", "no-std", "no-std::alloc", "parser-implementations", "parsing"]
diff --git a/LICENSE b/LICENSE
@@ -291,7 +291,7 @@ convey the exclusion of warranty; and each file should have at least
 the "copyright" line and a pointer to where the full notice is found.
 
     <one line to give the program's name and a brief idea of what it does.>
-    Copyright (C) 2024  <name of author>
+    Copyright (C) 2025  <name of author>
 
     This program is free software; you can redistribute it and/or modify
     it under the terms of the GNU General Public License as published by
diff --git a/docs/README.md b/docs/README.md
@@ -0,0 +1,15 @@
+# C parser
+
+This is a rust library that lexes and parses C source files.
+
+## Standard
+
+For the moment, this parser is only meant to  support C23 standard C code. No extensions (e.g. GCC extensions) are implemented. The input file is supposed already preprocessed.
+
+## Lexer
+
+The lexer takes as input the preprocessed C source code, and transforms into a valid token: keywords, number constants, identifiers, symbols, strings and chars.
+
+## Parser
+
+The parser takes these tokens and tries to build an Abstract Syntax Tree (AST). The AST is not meant to be valid as it is building AST so it contains empty nodes while building that are meant to disappear before the end of the parsing stage.
diff --git a/src/errors/display.rs b/src/errors/display.rs
@@ -12,6 +12,10 @@ use super::compile::CompileError;
 ///
 /// See [`Res::get_displayed_errors`](super::result::Res::get_displayed_errors)
 /// for extra information and examples.
+///
+/// # Errors
+///
+/// Returns an error when the writing on the string buffer fails.
 pub(super) fn display_errors(
     errors: Vec<CompileError>,
     files: &[(String, &str)],
@@ -28,16 +32,34 @@ pub(super) fn display_errors(
         let code_lines = files_state
             .get(&filename)
             .expect("Never happens: File of error doesn't exist");
-        let code_line = code_lines.get(line_nb - 1).unwrap_or_else(|| {
-            panic!("Never happens: given line of file that doesn't exist: {filename}:{line_nb}:{column_nb}")
+        let code_line = code_lines.get(safe_decrement(line_nb)).unwrap_or_else(|| {
+            panic!("Never happens: given line of file that doesn't exist: {filename}:{line_nb}:{column_nb} (for {err_type})")
         });
+        let mut too_long = false;
+        let col = safe_decrement(column_nb);
+        let under_spaces = " ".repeat(8usize.checked_add(col).unwrap_or_else(|| {
+            too_long = true;
+            col
+        }));
+        let under_tilde = "~".repeat(safe_decrement(length));
         writeln!(
             res,
-            "{filename}:{line_nb}:{column_nb}: {err_type} {err_lvl}: {message}\n{line_nb:5} | {code_line}\n{}^{}",
-            " ".repeat(8 + column_nb - 1),
-            "~".repeat(length - 1)
-        )
-        .map_err(|_| ())?;
+            "{filename}:{line_nb}:{column_nb}: {err_type} {err_lvl}: {message}\n{line_nb:5} | {code_line}\n{under_spaces}^{under_tilde}"
+        ).map_err(|_| ())?;
+        if too_long {
+            writeln!(
+                res,
+                "{filename}:{line_nb}:{column_nb}: format warning: This line of code exceeds the maximum size of {}. Consider refactoring your code. {line_nb:5} | {code_line}\n{under_spaces}^{under_tilde}",
+                usize::MAX
+            )
+            .map_err(|_| ())?;
+        }
     }
     Ok(res)
 }
+
+/// Decrements a value of 1
+const fn safe_decrement(val: usize) -> usize {
+    val.checked_sub(1)
+        .expect("line, col, len are initialised at 1, then incremented")
+}
diff --git a/src/errors/location.rs b/src/errors/location.rs
@@ -30,16 +30,30 @@ impl Location {
     /// Increments column of location by 1
     ///
     /// This is used by lexer when parsing every character of the C file.
-    pub(crate) fn incr_col(&mut self) {
-        self.col += 1;
+    pub(crate) fn incr_col(&mut self) -> Result<(), CompileError> {
+        self.col = self.col.checked_add(1).ok_or_else(|| {
+            self.to_error(format!(
+                "This line of code exceeds the maximum numbers of columns ({}).
+        Consider refactoring your code.",
+                usize::MAX
+            ))
+        })?;
+        Ok(())
     }
 
     /// Increments line of location by 1
     ///
     /// This is used by lexer when parsing every line of the C file.
-    pub(crate) fn incr_line(&mut self) {
-        self.line += 1;
+    pub(crate) fn incr_line(&mut self) -> Result<(), CompileError> {
+        self.line = self.line.checked_add(1).ok_or_else(|| {
+            self.to_error(format!(
+                "The file exceeds the maximum number lines ({}). Consider refactoring
+        your code.",
+                usize::MAX
+            ))
+        })?;
         self.col = 1;
+        Ok(())
     }
 
     /// Creates an error from a location without cloning
diff --git a/src/lexer/lex_content.rs b/src/lexer/lex_content.rs
@@ -167,7 +167,9 @@ pub fn lex_file(content: &str, location: &mut Location) -> Res<Vec<Token>> {
 
     for line in content.lines() {
         lex_line(line, location, &mut lex_data, &mut lex_state);
-        location.incr_line();
+        if let Err(err) = location.incr_line() {
+            lex_data.push_err(err);
+        }
     }
 
     lex_data.into_res()
@@ -184,7 +186,7 @@ fn lex_line(line: &str, location: &mut Location, lex_data: &mut LexingData, lex_
     if trimmed.is_empty() {
         return;
     }
-    let last = trimmed.len() - 1;
+    let last = trimmed.len().checked_sub(1).expect("trimmed is not empty");
     for (idx, ch) in trimmed.chars().enumerate() {
         lex_char(
             ch,
@@ -194,7 +196,9 @@ fn lex_line(line: &str, location: &mut Location, lex_data: &mut LexingData, lex_
             &mut escape_state,
             idx == last,
         );
-        location.incr_col();
+        if let Err(err) = location.incr_col() {
+            lex_data.push_err(err);
+        }
         if lex_data.is_end_line() {
             break;
         }
diff --git a/src/lexer/mod.rs b/src/lexer/mod.rs
@@ -1,3 +1,7 @@
+//! Module to lex source files into a list of valid
+//! [`Token`](types::api::Token): keywords, number constants,
+//! identifiers, symbols, strings and chars.
+
 pub mod api {
     //! Api module to choose what functions to export.
 
diff --git a/src/lexer/numbers/base/hexadecimal.rs b/src/lexer/numbers/base/hexadecimal.rs
@@ -46,12 +46,13 @@ macro_rules! impl_floating_point {
 /// Parses the stringified version of a number into a [`HexFloatData`].
 macro_rules! parse_hexadecimal_float {
     ($overflow:expr, $nb_type:ident, $float_parse:ident, $($t:ident)*) => {{
+        #[expect(clippy::float_arithmetic, clippy::arithmetic_side_effects)]
+        #[expect(clippy::as_conversions)]
         match $nb_type {
             $(NumberType::$t => {
                 let int_part = $t::from_unsigned(
                     <concat_idents!($t, IntPart)>::from_str_radix(&$float_parse.int_part, 16).expect("2 <= <= 36"),
                     $overflow);
-                #[expect(clippy::as_conversions)]
                 let exponent = $t::from_unsigned((2 as concat_idents!($t, IntPart)).pow($float_parse.get_exp()), $overflow);
                 let mut decimal_part: $t = 0.;
                 for (idx, ch) in $float_parse.decimal_part.chars().enumerate() {
@@ -328,7 +329,6 @@ pub fn to_hex_value(
         )
     } else {
         let mut overflow = false;
-        #[expect(clippy::float_arithmetic)]
         let res =
             parse_hexadecimal_float!(&mut overflow, nb_type, float_data, Float Double LongDouble);
         if overflow { res.add_overflow() } else { res }
diff --git a/src/lexer/numbers/from_literal.rs b/src/lexer/numbers/from_literal.rs
@@ -29,7 +29,11 @@ fn get_base(
     let first = chars.next().expect("len >= 1");
     let second = chars.next().expect("len >= 2");
 
-    let one_char = literal.len() - nb_type.suffix_size() == 1;
+    let one_char = literal
+        .len()
+        .checked_sub(nb_type.suffix_size())
+        .expect("literal contains the suffix")
+        == 1;
 
     match (first, second) {
         ('0', 'x') if one_char => {
@@ -116,7 +120,7 @@ fn get_number_type(literal: &str, location: &Location) -> Result<NumberType, Com
                 return Err(location
                     .to_error("found 3 'l' characters, but max is 2 (`long long`).".to_owned()));
             }
-            'l' | 'L' => l_count += 1,
+            'l' | 'L' => l_count = l_count.checked_add(1).expect("l_count <= 1"),
             'f' | 'F' if is_hex && !double_or_float => break,
             'f' | 'F' => float = true,
             'i' | 'I' => {
@@ -206,7 +210,7 @@ fn literal_to_number_err(literal: &str, location: Location, signed: bool) -> Par
     let mut nb_type = get_number_type(literal, &location)?;
     let base = get_base(literal, &nb_type, &location)?;
     let value = literal
-        .get(base.prefix_size()..literal.len() - nb_type.suffix_size())
+        .get(base.prefix_size()..literal.len().checked_sub(nb_type.suffix_size()).expect("literal contains the suffix"))
         .expect("never happens as suffix size + prefix size <= len, as 'x' and 'b' can't be used as suffix");
 
     if value.is_empty() {
diff --git a/src/lexer/numbers/types.rs b/src/lexer/numbers/types.rs
@@ -34,9 +34,13 @@ use arch_types::{Double, Float, Int, Long, LongDouble, LongLong, UInt, ULong, UL
 /// Defines the [`Number`] and [`NumberType`] enums
 macro_rules! define_nb_types {
     ($($t:ident)*) => {
+        /// Token value for a number constant
         #[derive(Debug, PartialEq)]
         pub enum Number {
-            $($t($t),)*
+            $(
+                /// $t C type
+                $t($t),
+            )*
         }
 
         pub enum NumberType {
diff --git a/src/lexer/state/end_state.rs b/src/lexer/state/end_state.rs
@@ -57,9 +57,10 @@ fn end_ident(literal: &mut Ident, lex_data: &mut LexingData, location: &Location
 
 /// Ends the state for symbols.
 pub fn end_symbols(symbols: &mut SymbolState, lex_data: &mut LexingData, location: &Location) {
-    let mut idx: usize = 0;
-    while !symbols.is_empty() && idx <= 2 {
-        idx += 1;
+    for _ in 0u32..3u32 {
+        if symbols.is_empty() {
+            break;
+        }
         if let Some((size, symbol)) = symbols.try_to_operator(lex_data, location) {
             let token = Token::from_symbol(symbol, size, location);
             lex_data.push_token(token);
diff --git a/src/lib.rs b/src/lib.rs
@@ -1,5 +1,7 @@
+#![doc = include_str!("../docs/README.md")]
+#![cfg_attr(doc, doc = include_str!("../docs/README.md"))]
 // Rustc lint groups
-// #![warn(missing_docs)]
+#![warn(missing_docs)]
 #![warn(warnings)]
 #![warn(deprecated_safe)]
 #![warn(future_incompatible)]
@@ -34,20 +36,17 @@
 #![allow(clippy::mod_module_files)]
 #![allow(clippy::module_name_repetitions)]
 #![allow(clippy::pub_with_shorthand)]
+#![allow(clippy::unseparated_literal_suffix)]
 // Disabled lints
+#![allow(clippy::doc_include_without_cfg, reason = "see issue #13918")]
 #![allow(clippy::exhaustive_enums)]
-// TODO
 #![allow(clippy::allow_attributes_without_reason)]
-#![allow(clippy::arithmetic_side_effects)]
-#![allow(clippy::unseparated_literal_suffix)]
+// Errors to manage
 #![allow(
-    // errors to manage
     clippy::panic,
     clippy::expect_used,
     clippy::unwrap_in_result,
-    clippy::panic_in_result_fn,
-    // doc
-    clippy::missing_docs_in_private_items,
+    clippy::panic_in_result_fn
 )]
 // Features
 #![feature(
@@ -71,4 +70,5 @@ pub use crate::lexer::api::{Number, TokenValue, display_tokens, lex_file};
 #[expect(clippy::useless_attribute, clippy::pub_use)]
 pub use crate::parser::api::parse_tokens;
 
+/// String to represent the empty symbol, displayed for empty nodes.
 const EMPTY: &str = "\u{2205} ";
diff --git a/src/parser/mod.rs b/src/parser/mod.rs
@@ -1,3 +1,8 @@
+//! Module to parse a list of tokens into an Abstract Syntax Tree.
+//!
+//! This module doesn't check that the tree is valid, and only handles trivial
+//! errors detection while building the AST.
+
 pub mod api {
     //! Api module to choose what functions to export.
 
diff --git a/src/parser/modifiers/ast.rs b/src/parser/modifiers/ast.rs
@@ -27,7 +27,7 @@ impl Ast {
             Self::Empty => Err("LHS: Missing argument.".to_owned()),
             Self::Leaf(Literal::Variable(Variable { attrs, .. })) => {
                 let old_attrs = mem::take(attrs);
-                attrs.reserve(previous_attrs.len() + attrs.len());
+                attrs.reserve(previous_attrs.len().checked_add(attrs.len()).ok_or_else(|| "Code overflow occurred. Please reduce the number of attributes applied to this variable.".to_owned())?);
                 attrs.extend(previous_attrs);
                 attrs.extend(old_attrs);
                 Ok(())
diff --git a/src/parser/modifiers/list_initialiser.rs b/src/parser/modifiers/list_initialiser.rs
diff --git a/src/parser/types/mod.rs b/src/parser/types/mod.rs