Skip to content

Commit 9c285ea

Browse files
authored
fix(core): Prevent loop detection false positives on lists with long shared prefixes (#18975)
1 parent c0e7da4 commit 9c285ea

File tree

2 files changed

+43
-1
lines changed

2 files changed

+43
-1
lines changed

packages/core/src/services/loopDetectionService.test.ts

Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -210,6 +210,25 @@ describe('LoopDetectionService', () => {
210210
expect(loggers.logLoopDetected).toHaveBeenCalledTimes(1);
211211
});
212212

213+
it('should not detect a loop for a list with a long shared prefix', () => {
214+
service.reset('');
215+
let isLoop = false;
216+
const longPrefix =
217+
'projects/my-google-cloud-project-12345/locations/us-central1/services/';
218+
219+
let listContent = '';
220+
for (let i = 0; i < 15; i++) {
221+
listContent += `- ${longPrefix}${i}\n`;
222+
}
223+
224+
// Simulate receiving the list in a single large chunk or a few chunks
225+
// This is the specific case where the issue occurs, as list boundaries might not reset tracking properly
226+
isLoop = service.addAndCheck(createContentEvent(listContent));
227+
228+
expect(isLoop).toBe(false);
229+
expect(loggers.logLoopDetected).not.toHaveBeenCalled();
230+
});
231+
213232
it('should not detect a loop if repetitions are very far apart', () => {
214233
service.reset('');
215234
const repeatedContent = createRepetitiveContent(1, CONTENT_CHUNK_SIZE);

packages/core/src/services/loopDetectionService.ts

Lines changed: 24 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -379,7 +379,30 @@ export class LoopDetectionService {
379379
const averageDistance = totalDistance / (CONTENT_LOOP_THRESHOLD - 1);
380380
const maxAllowedDistance = CONTENT_CHUNK_SIZE * 5;
381381

382-
return averageDistance <= maxAllowedDistance;
382+
if (averageDistance > maxAllowedDistance) {
383+
return false;
384+
}
385+
386+
// Verify that the sequence is actually repeating, not just sharing a common prefix.
387+
// For a true loop, the text between occurrences of the chunk (the period) should be highly repetitive.
388+
const periods = new Set<string>();
389+
for (let i = 0; i < recentIndices.length - 1; i++) {
390+
periods.add(
391+
this.streamContentHistory.substring(
392+
recentIndices[i],
393+
recentIndices[i + 1],
394+
),
395+
);
396+
}
397+
398+
// If the periods are mostly unique, it's a list of distinct items with a shared prefix.
399+
// A true loop will have a small number of unique periods (usually 1, sometimes 2 or 3).
400+
// We use Math.floor(CONTENT_LOOP_THRESHOLD / 2) as a safe threshold.
401+
if (periods.size > Math.floor(CONTENT_LOOP_THRESHOLD / 2)) {
402+
return false;
403+
}
404+
405+
return true;
383406
}
384407

385408
/**

0 commit comments

Comments
 (0)