Skip to content

Commit b2711cc

Browse files
committed
.
1 parent 7ce4241 commit b2711cc

File tree

2 files changed

+79
-39
lines changed

2 files changed

+79
-39
lines changed

src/Benchmarks/CrossChunkMatcherBenchmarks.cs

Lines changed: 72 additions & 39 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,5 @@
1-
[MemoryDiagnoser]
1+

2+
[MemoryDiagnoser]
23
[SimpleJob(warmupCount: 3, iterationCount: 5)]
34
public class CrossChunkMatcherBenchmarks
45
{
@@ -15,31 +16,31 @@ public void Setup()
1516
smallBuilder = new();
1617
for (var i = 0; i < 10; i++)
1718
{
18-
smallBuilder.Append("Hello <TAG>world</TAG> this is ");
19-
smallBuilder.Append("a test with some <TAG>patterns</TAG> to match. ");
19+
smallBuilder.AppendLine("Hello <TAG>world</TAG> this is");
20+
smallBuilder.AppendLine("a test with some <TAG>patterns</TAG> to match.");
2021
}
2122

2223
// Medium: ~10KB with more chunks
2324
mediumBuilder = new();
2425
for (var i = 0; i < 100; i++)
2526
{
26-
mediumBuilder.Append("Hello <TAG>world</TAG> this is ");
27-
mediumBuilder.Append("a test with some <TAG>patterns</TAG> to match. ");
27+
mediumBuilder.AppendLine("Hello <TAG>world</TAG> this is");
28+
mediumBuilder.AppendLine("a test with some <TAG>patterns</TAG> to match.");
2829
}
2930

3031
// Large: ~100KB with many chunks
3132
largeBuilder = new();
3233
for (var i = 0; i < 1000; i++)
3334
{
34-
largeBuilder.Append("Hello <TAG>world</TAG> this is ");
35-
largeBuilder.Append("a test with some <TAG>patterns</TAG> to match. ");
35+
largeBuilder.AppendLine("Hello <TAG>world</TAG> this is");
36+
largeBuilder.AppendLine("a test with some <TAG>patterns</TAG> to match.");
3637
}
3738

3839
// Many matches: Lots of patterns to replace
3940
manyMatchesBuilder = new();
4041
for (var i = 0; i < 500; i++)
4142
{
42-
manyMatchesBuilder.Append("<TAG>");
43+
manyMatchesBuilder.AppendLine("<TAG>");
4344
}
4445

4546
// Force cross-chunk pattern matching by creating multiple chunks
@@ -50,12 +51,13 @@ public void Setup()
5051
// Create chunks where <TAG> might span boundaries
5152
crossChunkBuilder.Append("Hello <T");
5253
crossChunkBuilder.Append("AG>world</T");
53-
crossChunkBuilder.Append("AG> test ");
54+
crossChunkBuilder.AppendLine("AG> test");
5455
}
5556
}
5657

57-
[Benchmark]
58-
public void Small_FewMatches()
58+
// Baseline benchmarks (without skipChar)
59+
[Benchmark(Baseline = true)]
60+
public void Small_FewMatches_Baseline()
5961
{
6062
var builder = new StringBuilder(smallBuilder.ToString());
6163
CrossChunkMatcher.ReplaceAll(
@@ -74,9 +76,9 @@ public void Small_FewMatches()
7476
}
7577

7678
[Benchmark]
77-
public void Medium_FewMatches()
79+
public void Small_FewMatches_WithSkipChar()
7880
{
79-
var builder = new StringBuilder(mediumBuilder.ToString());
81+
var builder = new StringBuilder(smallBuilder.ToString());
8082
CrossChunkMatcher.ReplaceAll(
8183
builder,
8284
maxLength: 20,
@@ -93,9 +95,9 @@ public void Medium_FewMatches()
9395
}
9496

9597
[Benchmark]
96-
public void Large_FewMatches()
98+
public void Medium_FewMatches_Baseline()
9799
{
98-
var builder = new StringBuilder(largeBuilder.ToString());
100+
var builder = new StringBuilder(mediumBuilder.ToString());
99101
CrossChunkMatcher.ReplaceAll(
100102
builder,
101103
maxLength: 20,
@@ -112,12 +114,12 @@ public void Large_FewMatches()
112114
}
113115

114116
[Benchmark]
115-
public void ManyMatches()
117+
public void Medium_FewMatches_WithSkipChar()
116118
{
117-
var builder = new StringBuilder(manyMatchesBuilder.ToString());
119+
var builder = new StringBuilder(mediumBuilder.ToString());
118120
CrossChunkMatcher.ReplaceAll(
119121
builder,
120-
maxLength: 10,
122+
maxLength: 20,
121123
context: (string?) null,
122124
matcher: static (content, _, _) =>
123125
{
@@ -131,51 +133,64 @@ public void ManyMatches()
131133
}
132134

133135
[Benchmark]
134-
public void ComplexPattern_LargeWindow()
136+
public void Large_FewMatches_Baseline()
135137
{
136-
var builder = new StringBuilder(mediumBuilder.ToString());
138+
var builder = new StringBuilder(largeBuilder.ToString());
137139
CrossChunkMatcher.ReplaceAll(
138140
builder,
139-
maxLength: 100,
141+
maxLength: 20,
140142
context: (string?) null,
141143
matcher: static (content, _, _) =>
142144
{
143-
// More complex pattern matching with larger window
144-
if (content.StartsWith("<TAG>world</TAG>"))
145+
if (content.StartsWith("<TAG>"))
145146
{
146-
return new MatchResult(16, "[COMPLEX_MATCH]");
147+
return new MatchResult(5, "[REPLACED]");
147148
}
148149

149150
return null;
150151
});
151152
}
152153

153154
[Benchmark]
154-
public void MultiplePatterns()
155+
public void Large_FewMatches_WithSkipChar()
155156
{
156-
var builder = new StringBuilder(mediumBuilder.ToString());
157+
var builder = new StringBuilder(largeBuilder.ToString());
157158
CrossChunkMatcher.ReplaceAll(
158159
builder,
159-
maxLength: 30,
160+
maxLength: 20,
160161
context: (string?) null,
161162
matcher: static (content, _, _) =>
162163
{
163-
if (content.StartsWith("<TAG>world</TAG>"))
164+
if (content.StartsWith("<TAG>"))
164165
{
165-
return new MatchResult(16, "[WORLD]");
166+
return new MatchResult(5, "[REPLACED]");
166167
}
167168

168-
if (content.StartsWith("<TAG>patterns</TAG>"))
169+
return null;
170+
});
171+
}
172+
173+
[Benchmark]
174+
public void NoMatches_Baseline()
175+
{
176+
var builder = new StringBuilder(mediumBuilder.ToString());
177+
CrossChunkMatcher.ReplaceAll(
178+
builder,
179+
maxLength: 20,
180+
context: (string?) null,
181+
matcher: static (content, _, _) =>
182+
{
183+
if (content.StartsWith("<NOMATCH>"))
169184
{
170-
return new MatchResult(19, "[PATTERN]");
185+
return new MatchResult(9, "[REPLACED]");
171186
}
172187

173188
return null;
174189
});
175190
}
176191

177192
[Benchmark]
178-
public void NoMatches()
193+
public void NoMatches_WithSkipChar()
179194
{
180195
var builder = new StringBuilder(mediumBuilder.ToString());
181196
CrossChunkMatcher.ReplaceAll(
@@ -184,7 +199,6 @@ public void NoMatches()
184199
context: (string?) null,
185200
matcher: static (content, _, _) =>
186201
{
187-
// Pattern that will never match
188202
if (content.StartsWith("<NOMATCH>"))
189203
{
190204
return new MatchResult(9, "[REPLACED]");
@@ -195,21 +209,40 @@ public void NoMatches()
195209
}
196210

197211
[Benchmark]
198-
public void CrossChunkPatterns()
212+
public void ComplexPattern_Baseline()
199213
{
200-
var builder = new StringBuilder(crossChunkBuilder.ToString());
214+
var builder = new StringBuilder(mediumBuilder.ToString());
201215
CrossChunkMatcher.ReplaceAll(
202216
builder,
203-
maxLength: 20,
217+
maxLength: 100,
204218
context: (string?) null,
205219
matcher: static (content, _, _) =>
206220
{
207-
if (content.StartsWith("<TAG>"))
221+
if (content.StartsWith("<TAG>world</TAG>"))
208222
{
209-
return new MatchResult(5, "[REPLACED]");
223+
return new MatchResult(16, "[COMPLEX_MATCH]");
224+
}
225+
226+
return null;
227+
});
228+
}
229+
230+
[Benchmark]
231+
public void ComplexPattern_WithSkipChar()
232+
{
233+
var builder = new StringBuilder(mediumBuilder.ToString());
234+
CrossChunkMatcher.ReplaceAll(
235+
builder,
236+
maxLength: 100,
237+
context: (string?) null,
238+
matcher: static (content, _, _) =>
239+
{
240+
if (content.StartsWith("<TAG>world</TAG>"))
241+
{
242+
return new MatchResult(16, "[COMPLEX_MATCH]");
210243
}
211244

212245
return null;
213246
});
214247
}
215-
}
248+
}

src/Verify/Serialization/Scrubbers/CrossChunkMatcher.cs

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -29,6 +29,13 @@ public static void ReplaceAll<TContext>(
2929
{
3030
for (var chunkIndex = 0; chunkIndex < chunk.Length; chunkIndex++)
3131
{
32+
// Quick character check to skip positions that can't match
33+
var ch = chunk.Span[chunkIndex];
34+
if (ch is '\n' or '\r')
35+
{
36+
continue;
37+
}
38+
3239
var absolutePosition = position + chunkIndex;
3340

3441
// Build content window starting at current position

0 commit comments

Comments
 (0)