Skip to content

Commit cac0409

Browse files
committed
feat: adding support for masked byte sequences
1 parent b1afb75 commit cac0409

File tree

5 files changed

+141
-5
lines changed

5 files changed

+141
-5
lines changed

bmatcher-core/src/atom.rs

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,19 @@ pub enum Atom {
1818
/// Match a sequence of bytes from the sequence array.
1919
ByteSequence { seq_start: u16, seq_end: u16 },
2020

21+
/// Match a sequence of bytes from the sequence array
22+
/// using a custom bitmask.
23+
ByteSequenceMasked {
24+
/// Start of the byte sequence in the sequence array.
25+
seq_start: u16,
26+
27+
/// Start of the byte mask in the sequence array.
28+
mask_start: u16,
29+
30+
/// Number of bytes to match
31+
len: u16,
32+
},
33+
2134
/// Skip a fixed number of bytes.
2235
WildcardFixed(u16),
2336
/// Skip a variable number of bytes.

bmatcher-core/src/compiler/lexer.rs

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,9 @@ pub enum Token<'a> {
1111
/// A one byte wildcard token: "?"
1212
Whildcard,
1313

14+
/// A mask token: "&"
15+
Mask,
16+
1417
/// A range open token: "["
1518
RangeOpen,
1619
/// A range token: "-"
@@ -73,6 +76,7 @@ impl<'a> Lexer<'a> {
7376
fn char_to_token(&self, value: char) -> Option<Token<'a>> {
7477
Some(match value {
7578
'?' => Token::Whildcard,
79+
'&' => Token::Mask,
7680

7781
'{' => Token::BlockOpen,
7882
'}' => Token::BlockClose,

bmatcher-core/src/compiler/parser.rs

Lines changed: 75 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,8 @@ pub enum ParseError {
1818
UnexpectedToken,
1919
UnexpectedEnd,
2020

21+
MaskByteLenMismatch,
22+
2123
HexValueInvalid,
2224
HexValueIncomplete,
2325

@@ -114,7 +116,7 @@ impl<'a> PatternParser<'a> {
114116
Ok(false)
115117
}
116118

117-
fn parse_byte_sequence(&mut self) -> Result<(), PositionedError<ParseError>> {
119+
fn parse_bytes(&mut self) -> Result<(u16, u16), PositionedError<ParseError>> {
118120
let Token::Text(value) = self.pop_token()? else {
119121
return Err(PositionedError::new(
120122
self.lexer.token_range(),
@@ -164,10 +166,38 @@ impl<'a> PatternParser<'a> {
164166
));
165167
}
166168

167-
self.atoms.push(Atom::ByteSequence {
168-
seq_start: bytes_start as u16,
169-
seq_end: bytes_end as u16,
170-
});
169+
Ok((bytes_start as u16, bytes_end as u16))
170+
}
171+
172+
fn parse_byte_sequence(&mut self) -> Result<(), PositionedError<ParseError>> {
173+
let (bytes_start, bytes_end) = self.parse_bytes()?;
174+
175+
if let Some(Token::Mask) = self.peek_token() {
176+
/* masked byte sequence */
177+
let _ = self.pop_token()?;
178+
179+
let (mask_start, mask_end) = self.parse_bytes()?;
180+
let mask_length = mask_end - mask_start;
181+
let bytes_length = bytes_end - bytes_start;
182+
if mask_length != bytes_length {
183+
return Err(PositionedError::new(
184+
self.lexer.token_range(),
185+
ParseError::MaskByteLenMismatch,
186+
));
187+
}
188+
189+
self.atoms.push(Atom::ByteSequenceMasked {
190+
seq_start: bytes_start,
191+
mask_start,
192+
len: bytes_length,
193+
});
194+
} else {
195+
/* normal byte sequence */
196+
self.atoms.push(Atom::ByteSequence {
197+
seq_start: bytes_start as u16,
198+
seq_end: bytes_end as u16,
199+
});
200+
}
171201

172202
Ok(())
173203
}
@@ -468,6 +498,46 @@ mod test {
468498
}
469499
}
470500

501+
#[test]
502+
fn test_byte_sequence_mask() {
503+
{
504+
let parser = PatternParser::new("FF & F0");
505+
let result = parser.parse().unwrap();
506+
assert_eq!(
507+
result.atoms(),
508+
&[Atom::ByteSequenceMasked {
509+
seq_start: 0,
510+
mask_start: 1,
511+
len: 1
512+
}]
513+
);
514+
assert_eq!(result.byte_sequence(), &[0xFF, 0xF0]);
515+
}
516+
517+
{
518+
let parser = PatternParser::new("FFEE & F0F0");
519+
let result = parser.parse().unwrap();
520+
assert_eq!(
521+
result.atoms(),
522+
&[Atom::ByteSequenceMasked {
523+
seq_start: 0,
524+
mask_start: 2,
525+
len: 2
526+
}]
527+
);
528+
assert_eq!(result.byte_sequence(), &[0xFF, 0xEE, 0xF0, 0xF0]);
529+
}
530+
531+
{
532+
let parser = PatternParser::new("FFFF & FF");
533+
let result = parser.parse().unwrap_err();
534+
assert_eq!(
535+
&result,
536+
&PositionedError::new(7..9, ParseError::MaskByteLenMismatch)
537+
);
538+
}
539+
}
540+
471541
#[test]
472542
fn test_jump() {
473543
{

bmatcher-core/src/matcher.rs

Lines changed: 37 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -100,6 +100,38 @@ impl<'a, S: Stack<u32>, C: Stack<usize>> BinaryMatcher<'a, S, C> {
100100
atom_cursor += 1;
101101
data_cursor += expected_bytes.len();
102102
}
103+
Atom::ByteSequenceMasked {
104+
seq_start,
105+
mask_start,
106+
len,
107+
} => {
108+
let target_bytes = &self.pattern_byte_sequence
109+
[seq_start as usize..seq_start as usize + len as usize];
110+
111+
let target_mask = &self.pattern_byte_sequence
112+
[mask_start as usize..mask_start as usize + len as usize];
113+
114+
let actual_bytes = self.target.subrange(data_cursor, target_mask.len())?;
115+
116+
let target_bytes = target_bytes
117+
.iter()
118+
.zip(target_mask)
119+
.map(|(value, mask)| *value & *mask);
120+
let actual_bytes = actual_bytes
121+
.iter()
122+
.zip(target_mask)
123+
.map(|(value, mask)| *value & *mask);
124+
125+
if target_bytes
126+
.zip(actual_bytes)
127+
.any(|(expected, data)| expected != data)
128+
{
129+
return None;
130+
}
131+
132+
atom_cursor += 1;
133+
data_cursor += len as usize;
134+
}
103135
Atom::WildcardFixed(length) => {
104136
atom_cursor += 1;
105137
data_cursor += length as usize;
@@ -295,6 +327,11 @@ mod test {
295327
test_single("B7 69 3D", DATA, None);
296328
}
297329

330+
#[test]
331+
fn test_binary_mask() {
332+
test_single("B7682D & FFFEFF", DATA, Some(&[0x41]));
333+
}
334+
298335
#[test]
299336
fn test_range() {
300337
test_single("B7 69 2D [0-3] 85 2C '", DATA, Some(&[0x41, 0x48]));

bmatcher-proc/src/macro_pattern.rs

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -31,6 +31,18 @@ fn emit_atom(output: &mut dyn Write, atom: &Atom) -> io::Result<()> {
3131
)?;
3232
}
3333

34+
Atom::ByteSequenceMasked {
35+
seq_start,
36+
mask_start,
37+
len,
38+
} => {
39+
write!(
40+
output,
41+
"bmatcher::Atom::ByteSequenceMasked{{ seq_start: 0x{:X}, mask_start: 0x{:X}, len: 0x{:X} }}",
42+
seq_start, mask_start, len
43+
)?;
44+
}
45+
3446
Atom::WildcardFixed(value) => {
3547
write!(output, "bmatcher::Atom::WildcardFixed(0x{:X})", value)?;
3648
}

0 commit comments

Comments
 (0)