Skip to content

Commit 7109f03

Browse files
committed
Add lookahead to patterns
1 parent 17cf2fd commit 7109f03

File tree

2 files changed

+146
-20
lines changed

2 files changed

+146
-20
lines changed

include/utility/Pattern.hpp

Lines changed: 14 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,13 @@
66
#include <vector>
77

88
namespace utility {
9+
inline constexpr size_t DEFAULT_GLOB_MAX_GAP = 256;
10+
11+
struct PatternSegment {
12+
std::vector<int16_t> pattern;
13+
size_t max_gap; // max gap BEFORE this segment (0 for first segment)
14+
};
15+
916
class Pattern {
1017
public:
1118
Pattern() = delete;
@@ -19,10 +26,15 @@ namespace utility {
1926
Pattern& operator=(const Pattern& other) = default;
2027
Pattern& operator=(Pattern&& other) = default;
2128

22-
auto pattern_len() const noexcept { return m_pattern.size(); }
29+
// Returns the length of the first segment (backward compat).
30+
auto pattern_len() const noexcept { return m_segments[0].pattern.size(); }
31+
32+
bool is_multi_segment() const noexcept { return m_segments.size() > 1; }
2333

2434
private:
25-
std::vector<int16_t> m_pattern;
35+
std::optional<uintptr_t> find_single(uintptr_t start, size_t length, const std::vector<int16_t>& pat);
36+
37+
std::vector<PatternSegment> m_segments;
2638
};
2739

2840
// Converts a string pattern (eg. "90 90 ? EB ? ? ?" to a vector of int's where

src/Pattern.cpp

Lines changed: 132 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,5 @@
11
#include <algorithm>
2+
#include <charconv>
23

34
#include <Windows.h>
45

@@ -25,20 +26,83 @@ namespace utility {
2526
}
2627

2728
Pattern::Pattern(const string& pattern)
28-
: m_pattern{}
29+
: m_segments{}
2930
{
30-
m_pattern = buildPattern(pattern);
31+
// Split pattern string at '*' tokens to produce segments.
32+
// Syntax: "AA BB * CC DD" or "AA BB *128 CC DD" (max gap in bytes).
33+
// Spaces around '*' are handled by splitting on whitespace tokens.
34+
35+
// Tokenize by spaces first so we can detect '*' and '*N' tokens.
36+
std::vector<std::string> tokens;
37+
{
38+
size_t i = 0;
39+
while (i < pattern.size()) {
40+
while (i < pattern.size() && pattern[i] == ' ') ++i;
41+
if (i >= pattern.size()) break;
42+
size_t start = i;
43+
while (i < pattern.size() && pattern[i] != ' ') ++i;
44+
tokens.emplace_back(pattern.substr(start, i - start));
45+
}
46+
}
47+
48+
// Group tokens into segments separated by '*' / '*N' tokens.
49+
std::vector<std::string> segment_strs;
50+
std::vector<size_t> gap_sizes; // gap_sizes[i] = max gap before segment i
51+
52+
std::string current;
53+
for (auto& tok : tokens) {
54+
if (!tok.empty() && tok[0] == '*') {
55+
// Flush current segment
56+
segment_strs.push_back(std::move(current));
57+
current.clear();
58+
59+
// Parse optional gap size: *[N]
60+
size_t gap = DEFAULT_GLOB_MAX_GAP;
61+
if (tok.size() > 2 && tok[1] == '[') {
62+
auto close = tok.find(']', 2);
63+
if (close != std::string::npos) {
64+
std::from_chars(tok.data() + 2, tok.data() + close, gap);
65+
}
66+
}
67+
gap_sizes.push_back(gap);
68+
} else {
69+
if (!current.empty()) current += ' ';
70+
current += tok;
71+
}
72+
}
73+
// Flush last segment
74+
if (!current.empty()) {
75+
segment_strs.push_back(std::move(current));
76+
}
77+
78+
// Build each segment
79+
for (size_t i = 0; i < segment_strs.size(); ++i) {
80+
PatternSegment seg;
81+
seg.pattern = buildPattern(segment_strs[i]);
82+
seg.max_gap = (i < gap_sizes.size() + 1 && i > 0) ? gap_sizes[i - 1] : 0;
83+
m_segments.push_back(std::move(seg));
84+
}
85+
86+
// Fallback: if pattern was empty or something went wrong, push an empty segment
87+
if (m_segments.empty()) {
88+
m_segments.push_back(PatternSegment{{}, 0});
89+
}
3190
}
3291

33-
optional<uintptr_t> Pattern::find(uintptr_t start, size_t length) {
34-
auto patternLength = m_pattern.size();
92+
optional<uintptr_t> Pattern::find_single(uintptr_t start, size_t length, const vector<int16_t>& pat) {
93+
auto patternLength = pat.size();
94+
95+
if (patternLength == 0 || length < patternLength) {
96+
return start; // Empty pattern matches immediately
97+
}
98+
3599
auto actual_end = start + length;
36100
auto end_scan_from = actual_end - patternLength;
37101

38102
int32_t first_non_wildcard_index{-1};
39103

40-
for (size_t p = 0; p < m_pattern.size(); ++p) {
41-
const auto k = m_pattern[p];
104+
for (size_t p = 0; p < pat.size(); ++p) {
105+
const auto k = pat[p];
42106
if (k != -1) {
43107
first_non_wildcard_index = p;
44108
break;
@@ -52,9 +116,7 @@ namespace utility {
52116
auto it_wildcard = (uint8_t*)start;
53117

54118
do try {
55-
// std::find can throw an exception if the memory is not readable.
56-
// std::find also appears to be highly optimized compared to a manual loop which is why we use it.
57-
it_wildcard = std::find((uint8_t*)it_wildcard, (uint8_t*)actual_end, (uint8_t)m_pattern[first_non_wildcard_index]);
119+
it_wildcard = std::find((uint8_t*)it_wildcard, (uint8_t*)actual_end, (uint8_t)pat[first_non_wildcard_index]);
58120

59121
auto it = it_wildcard - first_non_wildcard_index;
60122

@@ -67,15 +129,7 @@ namespace utility {
67129
auto j = it;
68130
auto failedToMatch = false;
69131

70-
// Make sure the address is readable.
71-
// Actually, don't do this. It's overhead (indirectly calls through a ptr)
72-
// Our exception handler should be fine.
73-
/*if (IsBadReadPtr((const void*)it, patternLength) != FALSE) {
74-
it_wildcard += patternLength - 1;
75-
continue;
76-
}*/
77-
78-
for (auto& k : m_pattern) {
132+
for (auto& k : pat) {
79133
if (k != -1 && k != *(uint8_t*)j) {
80134
failedToMatch = true;
81135
break;
@@ -97,6 +151,66 @@ namespace utility {
97151
return {};
98152
}
99153

154+
optional<uintptr_t> Pattern::find(uintptr_t start, size_t length) {
155+
if (m_segments.empty()) {
156+
return {};
157+
}
158+
159+
const auto actual_end = start + length;
160+
161+
// Fast path: single segment (no glob wildcards).
162+
if (m_segments.size() == 1) {
163+
return find_single(start, length, m_segments[0].pattern);
164+
}
165+
166+
// Multi-segment: find first segment, then each subsequent segment
167+
// within its max_gap window. On failure, retry with the next occurrence
168+
// of the first segment.
169+
auto search_start = start;
170+
171+
while (search_start < actual_end) {
172+
const auto remaining = actual_end - search_start;
173+
auto seg0_result = find_single(search_start, remaining, m_segments[0].pattern);
174+
175+
if (!seg0_result) {
176+
return {};
177+
}
178+
179+
const auto match_start = *seg0_result;
180+
auto cursor = match_start + m_segments[0].pattern.size();
181+
bool all_found = true;
182+
183+
for (size_t i = 1; i < m_segments.size(); ++i) {
184+
const auto& seg = m_segments[i];
185+
const auto seg_len = seg.pattern.size();
186+
const auto window_end = (std::min)(cursor + seg.max_gap + seg_len, actual_end);
187+
188+
if (cursor >= window_end || window_end - cursor < seg_len) {
189+
all_found = false;
190+
break;
191+
}
192+
193+
auto seg_result = find_single(cursor, window_end - cursor, seg.pattern);
194+
195+
if (!seg_result) {
196+
all_found = false;
197+
break;
198+
}
199+
200+
cursor = *seg_result + seg_len;
201+
}
202+
203+
if (all_found) {
204+
return match_start;
205+
}
206+
207+
// Retry from after the failed first-segment match
208+
search_start = match_start + 1;
209+
}
210+
211+
return {};
212+
}
213+
100214
vector<int16_t> buildPattern(string patternStr) {
101215
// Remove spaces from the pattern string.
102216
patternStr.erase(remove_if(begin(patternStr), end(patternStr), isspace), end(patternStr));

0 commit comments

Comments
 (0)