Skip to content

Commit 160a924

Browse files
committed
feat: using u16 instead of usize offset representation in Atom limiting a pattern to 16::MAX bytes and u16::MAX atoms per branch
1 parent 9b3a750 commit 160a924

File tree

4 files changed

+51
-26
lines changed

4 files changed

+51
-26
lines changed

bmatcher-core/src/atom.rs

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -16,12 +16,12 @@ pub enum ReadWidth {
1616
#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord)]
1717
pub enum Atom {
1818
/// Match a sequence of bytes from the sequence array.
19-
ByteSequence { seq_start: usize, seq_end: usize },
19+
ByteSequence { seq_start: u16, seq_end: u16 },
2020

2121
/// Skip a fixed number of bytes.
22-
WildcardFixed(usize),
22+
WildcardFixed(u16),
2323
/// Skip a variable number of bytes.
24-
WildcardRange { min: usize, max: usize },
24+
WildcardRange { min: u16, max: u16 },
2525

2626
/// Jump to the relative / absolute based on the binary data the current cursor location.
2727
Jump(JumpType),
@@ -34,16 +34,16 @@ pub enum Atom {
3434
/// and then continue where we left of.
3535
Branch {
3636
/// Length of the left subpattern
37-
left_len: usize,
37+
left_len: u16,
3838

3939
/// Length of the right subpattern
40-
right_len: usize,
40+
right_len: u16,
4141
},
4242

4343
/// Push the cursor location to the cursor stack
4444
CursorPush,
4545
/// Pop the cursor location from the cursor stack and advance by X bytes
46-
CursorPop { advance: usize },
46+
CursorPop { advance: u16 },
4747

4848
/// Save the current cursor position to the save stack
4949
SaveCursor,

bmatcher-core/src/compiler/optimizer.rs

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -41,8 +41,8 @@ impl Optimizer {
4141

4242
branches.push(Branch {
4343
atom_index: index,
44-
right_index: index + 1 + *left_len,
45-
end_index: index + 1 + *left_len + *right_len,
44+
right_index: index + 1 + *left_len as usize,
45+
end_index: index + 1 + *left_len as usize + *right_len as usize,
4646
});
4747
}
4848

@@ -83,8 +83,8 @@ impl Optimizer {
8383
fn fixup_branches(&mut self) {
8484
for branch in self.branches.iter() {
8585
self.atoms[branch.atom_index] = Atom::Branch {
86-
left_len: branch.right_index - branch.atom_index - 1,
87-
right_len: branch.end_index - branch.right_index,
86+
left_len: (branch.right_index - branch.atom_index - 1) as u16,
87+
right_len: (branch.end_index - branch.right_index) as u16,
8888
};
8989
}
9090
}

bmatcher-core/src/compiler/parser.rs

Lines changed: 31 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -26,6 +26,8 @@ pub enum ParseError {
2626

2727
RangeBoundInvalid(ParseIntError),
2828
RangeEndMustBeGraterThenStart,
29+
30+
SequenceTooLarge,
2931
}
3032

3133
pub struct PatternParser<'a> {
@@ -120,10 +122,10 @@ impl<'a> PatternParser<'a> {
120122
));
121123
};
122124

125+
let token_range = self.lexer.token_range();
123126
let bytes_start = self.byte_sequence.len();
124127
let mut values = value.char_indices();
125128
while let Some((upper_index, upper)) = values.next() {
126-
let token_range = self.lexer.token_range();
127129
let Some((lower_index, lower)) = values.next() else {
128130
/* byte sequence must always be a multiple of 2 */
129131
return Err(PositionedError::new(
@@ -155,9 +157,16 @@ impl<'a> PatternParser<'a> {
155157
}
156158

157159
let bytes_end = self.byte_sequence.len();
160+
if bytes_start > u16::MAX as usize || bytes_end > u16::MAX as usize {
161+
return Err(PositionedError::new(
162+
token_range,
163+
ParseError::SequenceTooLarge,
164+
));
165+
}
166+
158167
self.atoms.push(Atom::ByteSequence {
159-
seq_start: bytes_start,
160-
seq_end: bytes_end,
168+
seq_start: bytes_start as u16,
169+
seq_end: bytes_end as u16,
161170
});
162171

163172
Ok(())
@@ -270,8 +279,15 @@ impl<'a> PatternParser<'a> {
270279
}
271280

272281
let left_branch_len = self.atoms.len() - branch_atom_index - 1;
282+
if left_branch_len > u16::MAX as usize {
283+
return Err(PositionedError::new(
284+
self.lexer.token_range(),
285+
ParseError::SequenceTooLarge,
286+
));
287+
}
288+
273289
if let Atom::Branch { left_len, .. } = &mut self.atoms[branch_atom_index] {
274-
*left_len = left_branch_len;
290+
*left_len = left_branch_len as u16;
275291
} else {
276292
unreachable!("atom should be a branch");
277293
}
@@ -286,7 +302,15 @@ impl<'a> PatternParser<'a> {
286302
right_len,
287303
} = &mut self.atoms[branch_atom_index]
288304
{
289-
*right_len = atom_count - *left_len - branch_atom_index - 1;
305+
let right_branch_len = atom_count - *left_len as usize - branch_atom_index - 1;
306+
if right_branch_len > u16::MAX as usize {
307+
return Err(PositionedError::new(
308+
self.lexer.token_range(),
309+
ParseError::SequenceTooLarge,
310+
));
311+
}
312+
313+
*right_len = right_branch_len as u16;
290314
} else {
291315
unreachable!("atom should be a branch");
292316
}
@@ -310,7 +334,7 @@ impl<'a> PatternParser<'a> {
310334
));
311335
};
312336

313-
let range_start = range_start.parse::<usize>().map_err(|err| {
337+
let range_start = range_start.parse::<u16>().map_err(|err| {
314338
PositionedError::new(self.lexer.token_range(), ParseError::RangeBoundInvalid(err))
315339
})?;
316340

@@ -327,7 +351,7 @@ impl<'a> PatternParser<'a> {
327351
));
328352
};
329353

330-
let range_end = range_end.parse::<usize>().map_err(|err| {
354+
let range_end = range_end.parse::<u16>().map_err(|err| {
331355
PositionedError::new(
332356
self.lexer.token_range(),
333357
ParseError::RangeBoundInvalid(err),

bmatcher-core/src/matcher.rs

Lines changed: 10 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -44,7 +44,8 @@ impl<'a> BinaryMatcher<'a> {
4444
while atom_cursor < atoms.len() {
4545
match atoms[atom_cursor] {
4646
Atom::ByteSequence { seq_start, seq_end } => {
47-
let expected_bytes = &self.pattern.byte_sequence()[seq_start..seq_end];
47+
let expected_bytes =
48+
&self.pattern.byte_sequence()[seq_start as usize..seq_end as usize];
4849
let actual_bytes = self.target.subrange(data_cursor, expected_bytes.len())?;
4950

5051
if expected_bytes != actual_bytes {
@@ -56,7 +57,7 @@ impl<'a> BinaryMatcher<'a> {
5657
}
5758
Atom::WildcardFixed(length) => {
5859
atom_cursor += 1;
59-
data_cursor += length;
60+
data_cursor += length as usize;
6061
}
6162
Atom::WildcardRange { min, max } => {
6263
let save_stack_size = self.save_stack.len();
@@ -65,8 +66,8 @@ impl<'a> BinaryMatcher<'a> {
6566
for offset in min..=max {
6667
self.save_stack.truncate(save_stack_size);
6768
self.cursor_stack.truncate(cursor_stack_size);
68-
if let Some(data_cursor) =
69-
self.match_atoms(data_cursor + offset, &atoms[atom_cursor + 1..])
69+
if let Some(data_cursor) = self
70+
.match_atoms(data_cursor + offset as usize, &atoms[atom_cursor + 1..])
7071
{
7172
/* match :) */
7273
return Some(data_cursor);
@@ -81,7 +82,7 @@ impl<'a> BinaryMatcher<'a> {
8182
atom_cursor += 1;
8283
}
8384
Atom::CursorPop { advance } => {
84-
data_cursor = self.cursor_stack.pop().unwrap() + advance;
85+
data_cursor = self.cursor_stack.pop().unwrap() + advance as usize;
8586
atom_cursor += 1;
8687
}
8788

@@ -94,7 +95,7 @@ impl<'a> BinaryMatcher<'a> {
9495

9596
data_cursor = if let Some(data_cursor) = self.match_atoms(
9697
data_cursor,
97-
&atoms[atom_cursor + 1..atom_cursor + 1 + left_len],
98+
&atoms[atom_cursor + 1..atom_cursor + 1 + left_len as usize],
9899
) {
99100
/* match for left hand side */
100101
data_cursor
@@ -105,12 +106,12 @@ impl<'a> BinaryMatcher<'a> {
105106

106107
self.match_atoms(
107108
data_cursor,
108-
&atoms[atom_cursor + 1 + left_len
109-
..atom_cursor + 1 + left_len + right_len],
109+
&atoms[atom_cursor + 1 + left_len as usize
110+
..atom_cursor + 1 + left_len as usize + right_len as usize],
110111
)?
111112
};
112113

113-
atom_cursor += 1 + left_len + right_len;
114+
atom_cursor += 1 + left_len as usize + right_len as usize;
114115
}
115116

116117
Atom::Jump(mode) => {

0 commit comments

Comments
 (0)