Skip to content

Commit cc75e9c

Browse files
committed
.
1 parent 0eddccb commit cc75e9c

10 files changed

+474
-564
lines changed
Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,3 @@
1+
{
2+
MatchCount: 2
3+
}
Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
{
2+
MatchFound: true,
3+
OriginalPatternExists: false,
4+
Note: Pattern spanning 4 chunks should now be detected
5+
}
Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
{
2+
MatchFound: true,
3+
OriginalPatternExists: false,
4+
Note: Pattern spanning 3 chunks should now be detected
5+
}
Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
{
2+
MatchFound: true,
3+
OriginalPatternExists: false,
4+
Note: Pattern spanning 3 chunks should now be detected
5+
}
Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
animal animal animal animal

src/Verify.Tests/Serialization/CrossChunkMatcherTests.cs

Lines changed: 228 additions & 189 deletions
Large diffs are not rendered by default.

src/Verify/Serialization/Scrubbers/CrossChunkMatcher.cs

Lines changed: 112 additions & 67 deletions
Original file line numberDiff line numberDiff line change
@@ -7,101 +7,146 @@ static class CrossChunkMatcher
77
/// Finds all matches in a StringBuilder (handling patterns spanning chunk boundaries) and applies replacements.
88
/// </summary>
99
/// <param name="builder">The StringBuilder to search and modify</param>
10+
/// <param name="maxLength">Maximum pattern length to search for</param>
1011
/// <param name="context">User context passed to callbacks</param>
11-
/// <param name="onCrossChunk">Called for each potential cross-chunk match position</param>
12-
/// <param name="onWithinChunk">Called for each position within a chunk</param>
12+
/// <param name="matcher">Called for each potential match position with accumulated buffer</param>
1313
public static void ReplaceAll<TContext>(
1414
StringBuilder builder,
1515
int maxLength,
1616
TContext context,
17-
CrossChunkHandler<TContext> onCrossChunk,
18-
WithinChunkHandler<TContext> onWithinChunk)
17+
MatchHandler<TContext> matcher)
1918
{
19+
if (maxLength <= 0)
20+
{
21+
throw new ArgumentException("maxLength must be positive", nameof(maxLength));
22+
}
23+
2024
Span<char> buffer = stackalloc char[maxLength];
21-
Span<char> carryoverBuffer = stackalloc char[maxLength - 1];
22-
var carryoverLength = 0;
23-
var previousChunkAbsoluteEnd = 0;
24-
var absolutePosition = 0;
2525
List<Match> matches = [];
26-
var addMatch = matches.Add;
26+
var position = 0;
27+
2728
foreach (var chunk in builder.GetChunks())
2829
{
29-
var chunkSpan = chunk.Span;
30-
31-
// Check for matches spanning from previous chunk to current chunk
32-
if (carryoverLength > 0)
30+
for (var chunkIndex = 0; chunkIndex < chunk.Length; chunkIndex++)
3331
{
34-
for (var carryoverIndex = 0; carryoverIndex < carryoverLength; carryoverIndex++)
32+
var absolutePosition = position + chunkIndex;
33+
34+
// Build content window starting at current position
35+
var bufferLength = FillBuffer(builder, absolutePosition, buffer);
36+
37+
// Check for match at this position
38+
var windowSlice = buffer[..bufferLength];
39+
var result = matcher(windowSlice, absolutePosition, context);
40+
41+
if (result.IsMatch)
3542
{
36-
var remainingInCarryover = carryoverLength - carryoverIndex;
37-
var startPosition = previousChunkAbsoluteEnd - carryoverLength + carryoverIndex;
38-
39-
onCrossChunk(
40-
builder,
41-
carryoverBuffer,
42-
buffer,
43-
carryoverIndex,
44-
remainingInCarryover,
45-
chunkSpan,
46-
startPosition,
47-
context,
48-
addMatch);
43+
matches.Add(new Match(absolutePosition, result.MatchLength, result.Replacement));
44+
45+
// Skip past the match
46+
var skipAmount = result.MatchLength - 1;
47+
if (skipAmount > 0)
48+
{
49+
var remaining = chunk.Length - chunkIndex - 1;
50+
var toSkip = Math.Min(skipAmount, remaining);
51+
chunkIndex += toSkip;
52+
}
4953
}
5054
}
5155

52-
// Process matches entirely within this chunk
53-
var chunkIndex = 0;
54-
while (chunkIndex < chunk.Length)
56+
position += chunk.Length;
57+
}
58+
59+
// Apply matches in descending position order to maintain correct indices
60+
foreach (var match in matches.OrderByDescending(m => m.Index))
61+
{
62+
builder.Overwrite(match.Value, match.Index, match.Length);
63+
}
64+
}
65+
66+
static int FillBuffer(StringBuilder builder, int startPosition, Span<char> buffer)
67+
{
68+
var bufferIndex = 0;
69+
var currentPosition = 0;
70+
71+
foreach (var chunk in builder.GetChunks())
72+
{
73+
var chunkSpan = chunk.Span;
74+
var chunkEnd = currentPosition + chunk.Length;
75+
76+
// Skip chunks before our start position
77+
if (chunkEnd <= startPosition)
5578
{
56-
var absoluteIndex = absolutePosition + chunkIndex;
57-
var skipAhead = onWithinChunk(chunk, chunkSpan, chunkIndex, absoluteIndex, context, addMatch);
58-
chunkIndex += skipAhead > 0 ? skipAhead : 1;
79+
currentPosition = chunkEnd;
80+
continue;
5981
}
6082

61-
// Save last N chars for next iteration
62-
carryoverLength = Math.Min(maxLength - 1, chunk.Length);
63-
chunkSpan.Slice(chunk.Length - carryoverLength, carryoverLength).CopyTo(carryoverBuffer);
83+
// Determine where to start in this chunk
84+
var chunkStartIndex = startPosition > currentPosition ? startPosition - currentPosition : 0;
6485

65-
previousChunkAbsoluteEnd = absolutePosition + chunk.Length;
66-
absolutePosition += chunk.Length;
67-
}
86+
// Copy what we can from this chunk
87+
for (var i = chunkStartIndex; i < chunk.Length && bufferIndex < buffer.Length; i++)
88+
{
89+
buffer[bufferIndex++] = chunkSpan[i];
90+
}
6891

69-
// Apply matches in descending position order
70-
foreach (var match in matches.OrderByDescending(_ => _.Index))
71-
{
72-
builder.Overwrite(match.Value, match.Index, match.Length);
92+
// If buffer is full, we're done
93+
if (bufferIndex >= buffer.Length)
94+
{
95+
break;
96+
}
97+
98+
currentPosition = chunkEnd;
7399
}
100+
101+
return bufferIndex;
74102
}
75103

76104
/// <summary>
77-
/// Callback for processing potential cross-chunk matches.
105+
/// Callback for checking if content matches and should be replaced.
78106
/// </summary>
79-
public delegate void CrossChunkHandler<in TContext>(
80-
StringBuilder builder,
81-
Span<char> carryoverBuffer,
82-
Span<char> buffer,
83-
int carryoverIndex,
84-
int remainingInCarryover,
85-
CharSpan currentChunkSpan,
86-
int absoluteStartPosition,
87-
TContext context,
88-
Action<Match> addMatch);
107+
/// <param name="content">The current window content to check</param>
108+
/// <param name="absolutePosition">Absolute position in the StringBuilder where this content starts</param>
109+
/// <param name="context">User-provided context</param>
110+
/// <returns>Match result indicating if a match was found and replacement details</returns>
111+
public delegate MatchResult MatchHandler<in TContext>(
112+
CharSpan content,
113+
int absolutePosition,
114+
TContext context);
115+
}
116+
117+
/// <summary>
118+
/// Result of a match check operation.
119+
/// </summary>
120+
readonly struct MatchResult
121+
{
122+
public readonly bool IsMatch;
123+
public readonly int MatchLength;
124+
public readonly string Replacement;
125+
126+
private MatchResult(bool isMatch, int matchLength, string replacement)
127+
{
128+
IsMatch = isMatch;
129+
MatchLength = matchLength;
130+
Replacement = replacement;
131+
}
89132

90133
/// <summary>
91-
/// Callback for processing positions within a chunk.
134+
/// Creates a result indicating a match was found.
92135
/// </summary>
93-
/// <returns>
94-
/// Number of positions to skip ahead.
95-
/// Returning 0 or 1 will both advance by 1 position (normal iteration);
96-
/// returning a value greater than 1 will skip past a match.
97-
/// </returns>
98-
public delegate int WithinChunkHandler<in TContext>(
99-
ReadOnlyMemory<char> chunk,
100-
CharSpan chunkSpan,
101-
int chunkIndex,
102-
int absoluteIndex,
103-
TContext context,
104-
Action<Match> addMatch);
136+
public static MatchResult Match(int length, string replacement)
137+
{
138+
if (length <= 0)
139+
{
140+
throw new ArgumentException("Match length must be positive", nameof(length));
141+
}
142+
143+
return new MatchResult(true, length, replacement);
144+
}
145+
146+
/// <summary>
147+
/// Creates a result indicating no match was found.
148+
/// </summary>
149+
public static MatchResult NoMatch() => default;
105150
}
106151

107152
readonly struct Match(int index, int length, string value)

src/Verify/Serialization/Scrubbers/DateScrubber.cs

Lines changed: 21 additions & 69 deletions
Original file line numberDiff line numberDiff line change
@@ -212,75 +212,27 @@ static void ReplaceInner(StringBuilder builder, string format, Counter counter,
212212
builder,
213213
maxLength: max,
214214
context,
215-
OnCrossChunk,
216-
OnWithinChunk);
217-
}
218-
219-
static void OnCrossChunk(
220-
StringBuilder builder,
221-
Span<char> carryoverBuffer,
222-
Span<char> buffer,
223-
int carryoverIndex,
224-
int remainingInCarryover,
225-
CharSpan currentChunkSpan,
226-
int absoluteStartPosition,
227-
MatchContext context,
228-
Action<Match> addMatch)
229-
{
230-
// Try lengths from longest to shortest (greedy matching)
231-
for (var length = context.MaxLength; length >= context.MinLength; length--)
232-
{
233-
var neededFromCurrent = length - remainingInCarryover;
234-
235-
if (neededFromCurrent <= 0 ||
236-
neededFromCurrent > currentChunkSpan.Length)
215+
matcher: static (content, _, context) =>
237216
{
238-
continue;
239-
}
240-
241-
// Combine carryover and current chunk
242-
carryoverBuffer.Slice(carryoverIndex, remainingInCarryover).CopyTo(buffer);
243-
currentChunkSpan[..neededFromCurrent].CopyTo(buffer[remainingInCarryover..]);
244-
245-
var slice = buffer[..length];
246-
247-
if (!context.TryConvert(slice, context.Format, context.Counter, context.Culture, out var convert))
248-
{
249-
continue;
250-
}
251-
252-
addMatch(new(absoluteStartPosition, length, convert));
253-
// Found match at this position
254-
return;
255-
}
256-
}
257-
258-
static int OnWithinChunk(
259-
ReadOnlyMemory<char> chunk,
260-
CharSpan chunkSpan,
261-
int chunkIndex,
262-
int absoluteIndex,
263-
MatchContext context,
264-
Action<Match> addMatch)
265-
{
266-
// Try lengths from longest to shortest (greedy matching)
267-
for (var length = context.MaxLength; length >= context.MinLength; length--)
268-
{
269-
if (chunkIndex + length > chunk.Length)
270-
{
271-
continue;
272-
}
273-
274-
var slice = chunkSpan.Slice(chunkIndex, length);
275-
276-
if (context.TryConvert(slice, context.Format, context.Counter, context.Culture, out var convert))
277-
{
278-
addMatch(new(absoluteIndex, length, convert));
279-
return length; // Skip past match
280-
}
281-
}
282-
283-
return 1;
217+
// Try lengths from longest to shortest (greedy matching)
218+
for (var length = context.MaxLength; length >= context.MinLength; length--)
219+
{
220+
// Not enough content for this length
221+
if (content.Length < length)
222+
{
223+
continue;
224+
}
225+
226+
var slice = content.Slice(0, length);
227+
228+
if (context.TryConvert(slice, context.Format, context.Counter, context.Culture, out var converted))
229+
{
230+
return MatchResult.Match(length, converted);
231+
}
232+
}
233+
234+
return MatchResult.NoMatch();
235+
});
284236
}
285237

286238
sealed class MatchContext(
@@ -298,4 +250,4 @@ sealed class MatchContext(
298250
public int MaxLength { get; } = maxLength;
299251
public int MinLength { get; } = minLength;
300252
}
301-
}
253+
}

0 commit comments

Comments
 (0)