11#include < algorithm>
2+ #include < charconv>
23
34#include < Windows.h>
45
@@ -25,20 +26,83 @@ namespace utility {
2526 }
2627
2728 Pattern::Pattern (const string& pattern)
28- : m_pattern {}
29+ : m_segments {}
2930 {
30- m_pattern = buildPattern (pattern);
31+ // Split pattern string at '*' tokens to produce segments.
32+ // Syntax: "AA BB * CC DD" or "AA BB *128 CC DD" (max gap in bytes).
33+ // Spaces around '*' are handled by splitting on whitespace tokens.
34+
35+ // Tokenize by spaces first so we can detect '*' and '*N' tokens.
36+ std::vector<std::string> tokens;
37+ {
38+ size_t i = 0 ;
39+ while (i < pattern.size ()) {
40+ while (i < pattern.size () && pattern[i] == ' ' ) ++i;
41+ if (i >= pattern.size ()) break ;
42+ size_t start = i;
43+ while (i < pattern.size () && pattern[i] != ' ' ) ++i;
44+ tokens.emplace_back (pattern.substr (start, i - start));
45+ }
46+ }
47+
48+ // Group tokens into segments separated by '*' / '*N' tokens.
49+ std::vector<std::string> segment_strs;
50+ std::vector<size_t > gap_sizes; // gap_sizes[i] = max gap before segment i
51+
52+ std::string current;
53+ for (auto & tok : tokens) {
54+ if (!tok.empty () && tok[0 ] == ' *' ) {
55+ // Flush current segment
56+ segment_strs.push_back (std::move (current));
57+ current.clear ();
58+
59+ // Parse optional gap size: *[N]
60+ size_t gap = DEFAULT_GLOB_MAX_GAP;
61+ if (tok.size () > 2 && tok[1 ] == ' [' ) {
62+ auto close = tok.find (' ]' , 2 );
63+ if (close != std::string::npos) {
64+ std::from_chars (tok.data () + 2 , tok.data () + close, gap);
65+ }
66+ }
67+ gap_sizes.push_back (gap);
68+ } else {
69+ if (!current.empty ()) current += ' ' ;
70+ current += tok;
71+ }
72+ }
73+ // Flush last segment
74+ if (!current.empty ()) {
75+ segment_strs.push_back (std::move (current));
76+ }
77+
78+ // Build each segment
79+ for (size_t i = 0 ; i < segment_strs.size (); ++i) {
80+ PatternSegment seg;
81+ seg.pattern = buildPattern (segment_strs[i]);
82+ seg.max_gap = (i < gap_sizes.size () + 1 && i > 0 ) ? gap_sizes[i - 1 ] : 0 ;
83+ m_segments.push_back (std::move (seg));
84+ }
85+
86+ // Fallback: if pattern was empty or something went wrong, push an empty segment
87+ if (m_segments.empty ()) {
88+ m_segments.push_back (PatternSegment{{}, 0 });
89+ }
3190 }
3291
33- optional<uintptr_t > Pattern::find (uintptr_t start, size_t length) {
34- auto patternLength = m_pattern.size ();
92+ optional<uintptr_t > Pattern::find_single (uintptr_t start, size_t length, const vector<int16_t >& pat) {
93+ auto patternLength = pat.size ();
94+
95+ if (patternLength == 0 || length < patternLength) {
96+ return start; // Empty pattern matches immediately
97+ }
98+
3599 auto actual_end = start + length;
36100 auto end_scan_from = actual_end - patternLength;
37101
38102 int32_t first_non_wildcard_index{-1 };
39103
40- for (size_t p = 0 ; p < m_pattern .size (); ++p) {
41- const auto k = m_pattern [p];
104+ for (size_t p = 0 ; p < pat .size (); ++p) {
105+ const auto k = pat [p];
42106 if (k != -1 ) {
43107 first_non_wildcard_index = p;
44108 break ;
@@ -52,9 +116,7 @@ namespace utility {
52116 auto it_wildcard = (uint8_t *)start;
53117
54118 do try {
55- // std::find can throw an exception if the memory is not readable.
56- // std::find also appears to be highly optimized compared to a manual loop which is why we use it.
57- it_wildcard = std::find ((uint8_t *)it_wildcard, (uint8_t *)actual_end, (uint8_t )m_pattern[first_non_wildcard_index]);
119+ it_wildcard = std::find ((uint8_t *)it_wildcard, (uint8_t *)actual_end, (uint8_t )pat[first_non_wildcard_index]);
58120
59121 auto it = it_wildcard - first_non_wildcard_index;
60122
@@ -67,15 +129,7 @@ namespace utility {
67129 auto j = it;
68130 auto failedToMatch = false ;
69131
70- // Make sure the address is readable.
71- // Actually, don't do this. It's overhead (indirectly calls through a ptr)
72- // Our exception handler should be fine.
73- /* if (IsBadReadPtr((const void*)it, patternLength) != FALSE) {
74- it_wildcard += patternLength - 1;
75- continue;
76- }*/
77-
78- for (auto & k : m_pattern) {
132+ for (auto & k : pat) {
79133 if (k != -1 && k != *(uint8_t *)j) {
80134 failedToMatch = true ;
81135 break ;
@@ -97,6 +151,66 @@ namespace utility {
97151 return {};
98152 }
99153
154+ optional<uintptr_t > Pattern::find (uintptr_t start, size_t length) {
155+ if (m_segments.empty ()) {
156+ return {};
157+ }
158+
159+ const auto actual_end = start + length;
160+
161+ // Fast path: single segment (no glob wildcards).
162+ if (m_segments.size () == 1 ) {
163+ return find_single (start, length, m_segments[0 ].pattern );
164+ }
165+
166+ // Multi-segment: find first segment, then each subsequent segment
167+ // within its max_gap window. On failure, retry with the next occurrence
168+ // of the first segment.
169+ auto search_start = start;
170+
171+ while (search_start < actual_end) {
172+ const auto remaining = actual_end - search_start;
173+ auto seg0_result = find_single (search_start, remaining, m_segments[0 ].pattern );
174+
175+ if (!seg0_result) {
176+ return {};
177+ }
178+
179+ const auto match_start = *seg0_result;
180+ auto cursor = match_start + m_segments[0 ].pattern .size ();
181+ bool all_found = true ;
182+
183+ for (size_t i = 1 ; i < m_segments.size (); ++i) {
184+ const auto & seg = m_segments[i];
185+ const auto seg_len = seg.pattern .size ();
186+ const auto window_end = (std::min)(cursor + seg.max_gap + seg_len, actual_end);
187+
188+ if (cursor >= window_end || window_end - cursor < seg_len) {
189+ all_found = false ;
190+ break ;
191+ }
192+
193+ auto seg_result = find_single (cursor, window_end - cursor, seg.pattern );
194+
195+ if (!seg_result) {
196+ all_found = false ;
197+ break ;
198+ }
199+
200+ cursor = *seg_result + seg_len;
201+ }
202+
203+ if (all_found) {
204+ return match_start;
205+ }
206+
207+ // Retry from after the failed first-segment match
208+ search_start = match_start + 1 ;
209+ }
210+
211+ return {};
212+ }
213+
100214 vector<int16_t > buildPattern (string patternStr) {
101215 // Remove spaces from the pattern string.
102216 patternStr.erase (remove_if (begin (patternStr), end (patternStr), isspace), end (patternStr));
0 commit comments