Skip to content

Commit 3045f49

Browse files
author
Adrian
committed
More regex parser cleanup and improved diagnostics.
1 parent 59c660d commit 3045f49

File tree

5 files changed

+246
-141
lines changed

5 files changed

+246
-141
lines changed

rust/include/log_mechanic.generated.hpp

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -56,10 +56,15 @@ struct CLogFragment {
5656
CLogFragment() = default;
5757

5858
// Generated
59+
/// `0` iff no variable found (static text until end of input).
5960
size_t rule;
61+
/// Start of variable (if found).
6062
const uint8_t *start;
63+
/// End of variable (if found).
6164
const uint8_t *end;
65+
/// Pointer to an array of captures (if variable found).
6266
const Capture *captures;
67+
/// Number of captures.
6368
size_t captures_count;
6469

6570
CLogFragment(size_t const& rule,
@@ -87,7 +92,7 @@ Box<Lexer> clp_log_mechanic_lexer_new(const Schema *schema);
8792
///
8893
/// The returned [`CLogFragment`] includes a hidden exclusive borrow of `lexer`
8994
/// (it contains a pointer into an interal buffer of `lexer`),
90-
/// so it is nolonger valid/you must not use it after a subsequent exclusive borrow of `lexer`
95+
/// so it is nolonger valid (you must not touch it) after any subsequent borrow of `lexer`
9196
/// (i.e. this borrow has ended).
9297
CLogFragment clp_log_mechanic_lexer_next_fragment(Lexer *lexer, CStringView input, size_t *pos);
9398

rust/src/c_interface.rs

Lines changed: 11 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -19,12 +19,19 @@ pub struct CSlice<'lifetime, T> {
1919
pub type CStringView<'lifetime> = CSlice<'lifetime, c_char>;
2020

2121
#[repr(C)]
22-
pub struct CLogFragment<'schema, 'input> {
22+
pub struct CLogFragment<'schema, 'input, 'buffer> {
23+
/// `0` iff no variable found (static text until end of input).
2324
pub rule: usize,
25+
/// Start of variable (if found).
2426
pub start: *const u8,
27+
/// End of variable (if found).
2528
pub end: *const u8,
29+
/// Pointer to an array of captures (if variable found).
2630
pub captures: *const Capture<'schema, 'input>,
31+
/// Number of captures.
2732
pub captures_count: usize,
33+
/// Indicates that `captures` points into (borrows from) some external `'buffer`.
34+
pub _captures_lifetime: PhantomData<&'buffer [Capture<'schema, 'input>]>,
2835
}
2936

3037
#[unsafe(no_mangle)]
@@ -69,21 +76,22 @@ unsafe extern "C" fn clp_log_mechanic_lexer_delete<'a>(lexer: Box<Lexer<'_, 'a>>
6976
///
7077
/// The returned [`CLogFragment`] includes a hidden exclusive borrow of `lexer`
7178
/// (it contains a pointer into an interal buffer of `lexer`),
72-
/// so it is nolonger valid/you must not use it after a subsequent exclusive borrow of `lexer`
79+
/// so it is nolonger valid (you must not touch it) after any subsequent borrow of `lexer`
7380
/// (i.e. this borrow has ended).
7481
#[unsafe(no_mangle)]
7582
unsafe extern "C" fn clp_log_mechanic_lexer_next_fragment<'schema, 'lexer, 'input>(
7683
lexer: &'lexer mut Lexer<'schema, 'input>,
7784
input: CStringView<'input>,
7885
pos: &mut usize,
79-
) -> CLogFragment<'schema, 'input> {
86+
) -> CLogFragment<'schema, 'input, 'lexer> {
8087
let fragment: Fragment<'_, '_, '_> = lexer.next_fragment(input.as_utf8().unwrap(), pos);
8188
CLogFragment {
8289
rule: fragment.rule,
8390
start: fragment.lexeme.as_bytes().as_ptr_range().start,
8491
end: fragment.lexeme.as_bytes().as_ptr_range().end,
8592
captures: fragment.captures.as_ptr(),
8693
captures_count: fragment.captures.len(),
94+
_captures_lifetime: PhantomData,
8795
}
8896
}
8997

rust/src/lexer.rs

Lines changed: 14 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -75,7 +75,6 @@ impl<'schema, 'input> Lexer<'schema, 'input> {
7575
}
7676

7777
fn glob_static_text(&self, input: &'input str, pos: &mut usize) {
78-
// TODO use automata?
7978
for ch in input[*pos..].chars() {
8079
*pos += ch.len_utf8();
8180
if self.is_delimiter(ch) {
@@ -85,7 +84,9 @@ impl<'schema, 'input> Lexer<'schema, 'input> {
8584
}
8685

8786
fn is_delimiter(&self, ch: char) -> bool {
88-
self.schema.delimiters.contains(ch)
87+
// XXX: Needs benchmarking, but given a small set of delimiter characters,
88+
// a linear search may be faster than a theoretically $O(1)$ hash lookup.
89+
self.schema.delimiters().contains(ch)
8990
}
9091
}
9192

@@ -104,11 +105,17 @@ mod test {
104105
let mut lexer: Lexer<'_, '_> = Lexer::new(&schema).unwrap();
105106
let input: &str = "hello world goodbye hello world goodbye ";
106107
let mut pos: usize = 0;
107-
assert_eq!(lexer.next_fragment(input, &mut pos).rule, 1);
108-
assert_eq!(lexer.next_fragment(input, &mut pos).rule, 2);
109-
assert_eq!(lexer.next_fragment(input, &mut pos).rule, 1);
110-
assert_eq!(lexer.next_fragment(input, &mut pos).rule, 2);
111-
assert_eq!(lexer.next_fragment(input, &mut pos).rule, 0);
108+
assert_eq!(lexer.next_fragment(input, &mut pos).projection(), (1, "hello world"));
109+
assert_eq!(lexer.next_fragment(input, &mut pos).projection(), (2, "goodbye"));
110+
assert_eq!(lexer.next_fragment(input, &mut pos).projection(), (1, "hello world"));
111+
assert_eq!(lexer.next_fragment(input, &mut pos).projection(), (2, "goodbye"));
112+
assert_eq!(lexer.next_fragment(input, &mut pos).projection(), (0, ""));
112113
assert_eq!(pos, input.len());
113114
}
115+
116+
impl<'schema, 'input, 'buffer> Fragment<'schema, 'input, 'buffer> {
117+
fn projection(&self) -> (usize, &'input str) {
118+
(self.rule, self.lexeme)
119+
}
120+
}
114121
}

0 commit comments

Comments
 (0)