Skip to content

Commit 10a235f

Browse files
committed
improve coverage perf
1 parent d914f97 commit 10a235f

10 files changed

Lines changed: 596 additions & 333 deletions

File tree

Lines changed: 46 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,46 @@
1+
using Infidex.Core;
2+
using Infidex.Scoring;
3+
using Infidex.Coverage;
4+
using Infidex.Tokenization;
5+
6+
namespace Infidex.Tests;
7+
8+
[TestClass]
9+
public class BugReproductionTests
10+
{
11+
[TestMethod]
12+
public void PrefixPreference_MatrixRev_PreferRevisitedOverReloaded()
13+
{
14+
// "the matrix rev"
15+
// "The Matrix Reloaded" vs "The Matrix Revisited"
16+
// "Revisited" starts with "Rev". "Reloaded" does not.
17+
18+
string query = "the matrix rev";
19+
string docReloaded = "The Matrix Reloaded";
20+
string docRevisited = "The Matrix Revisited";
21+
22+
var tokenizer = new Tokenizer([3], 2, 0, TextNormalizer.CreateDefault(), TokenizerSetup.CreateDefault());
23+
var setup = CoverageSetup.CreateDefault();
24+
var engine = new CoverageEngine(tokenizer, setup);
25+
26+
// Calculate features for Reloaded
27+
var featsReloaded = engine.CalculateFeatures(query, docReloaded, 0, 1);
28+
29+
// Calculate features for Revisited
30+
var featsRevisited = engine.CalculateFeatures(query, docRevisited, 0, 2);
31+
32+
// Score
33+
var scoreReloaded = FusionScorer.Calculate(query, docReloaded, featsReloaded, 0.5f, 3, new[]{' '});
34+
var scoreRevisited = FusionScorer.Calculate(query, docRevisited, featsRevisited, 0.5f, 3, new[]{' '});
35+
36+
Console.WriteLine($"Reloaded: {scoreReloaded.score}");
37+
Console.WriteLine($"Revisited: {scoreRevisited.score}");
38+
39+
Assert.IsTrue(scoreRevisited.score > scoreReloaded.score,
40+
$"Revisited ({scoreRevisited.score}) should score higher than Reloaded ({scoreReloaded.score})");
41+
42+
// Check why
43+
Assert.IsTrue(featsRevisited.FusionSignals.LexicalPrefixLast, "Revisited should have LexicalPrefixLast");
44+
Assert.IsFalse(featsReloaded.FusionSignals.LexicalPrefixLast, "Reloaded should NOT have LexicalPrefixLast");
45+
}
46+
}

src/Infidex.Tests/WordMatcherTests.cs

Lines changed: 6 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -23,7 +23,8 @@ public void Lookup_ExactMatch_FindsDocument()
2323

2424
var results = matcher.Lookup("world");
2525

26-
Assert.AreEqual(2, results.Count);
26+
Assert.IsNotNull(results);
27+
Assert.AreEqual(2, results.Cardinality);
2728
Assert.IsTrue(results.Contains(0));
2829
Assert.IsTrue(results.Contains(1));
2930
}
@@ -45,7 +46,8 @@ public void Lookup_LD1Support_FindsFuzzyMatches()
4546
// "batmam" is 1 edit away from "batman"
4647
var results = matcher.Lookup("batmam");
4748

48-
Assert.IsTrue(results.Count > 0);
49+
Assert.IsNotNull(results);
50+
Assert.IsTrue(results.Cardinality > 0);
4951
Assert.IsTrue(results.Contains(0));
5052
}
5153

@@ -65,9 +67,8 @@ public void LookupAffix_FindsPrefixMatches()
6567
// "bat" is a prefix of "batman"
6668
var results = matcher.LookupAffix("bat");
6769

68-
Assert.IsTrue(results.Count > 0);
70+
Assert.IsNotNull(results);
71+
Assert.IsTrue(results.Cardinality > 0);
6972
Assert.IsTrue(results.Contains(0));
7073
}
7174
}
72-
73-
Lines changed: 95 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,95 @@
1+
using System.Buffers;
2+
using Infidex.Tokenization;
3+
4+
namespace Infidex.Coverage;
5+
6+
/// <summary>
7+
/// Reusable buffer for coverage calculations to avoid repeated array allocations.
8+
/// Not thread-safe; should be created per query execution.
9+
/// </summary>
10+
internal sealed class CoverageBuffer : IDisposable
11+
{
12+
// Arrays sized for max document tokens
13+
public StringSlice[] DocTokenArray;
14+
public StringSlice[] UniqueDocTokenArray;
15+
public bool[] QActiveArray;
16+
public bool[] DActiveArray;
17+
public float[] TermMatchedCharsArray;
18+
public bool[] TermHasWholeArray;
19+
public bool[] TermHasJoinedArray;
20+
public bool[] TermHasPrefixArray;
21+
public int[] TermFirstPosArray;
22+
public float[] TermIdfArray;
23+
24+
// Fusion arrays
25+
public StringSlice[] FusionQueryTokenArray;
26+
public StringSlice[] FusionDocTokenArray;
27+
28+
private const int DefaultCapacity = 1024; // Reasonable start for doc tokens
29+
private const int DefaultQueryCapacity = 64;
30+
31+
public CoverageBuffer()
32+
{
33+
DocTokenArray = ArrayPool<StringSlice>.Shared.Rent(DefaultCapacity);
34+
UniqueDocTokenArray = ArrayPool<StringSlice>.Shared.Rent(DefaultCapacity);
35+
QActiveArray = ArrayPool<bool>.Shared.Rent(DefaultQueryCapacity);
36+
DActiveArray = ArrayPool<bool>.Shared.Rent(DefaultCapacity);
37+
TermMatchedCharsArray = ArrayPool<float>.Shared.Rent(DefaultQueryCapacity);
38+
TermHasWholeArray = ArrayPool<bool>.Shared.Rent(DefaultQueryCapacity);
39+
TermHasJoinedArray = ArrayPool<bool>.Shared.Rent(DefaultQueryCapacity);
40+
TermHasPrefixArray = ArrayPool<bool>.Shared.Rent(DefaultQueryCapacity);
41+
TermFirstPosArray = ArrayPool<int>.Shared.Rent(DefaultQueryCapacity);
42+
TermIdfArray = ArrayPool<float>.Shared.Rent(DefaultQueryCapacity);
43+
44+
FusionQueryTokenArray = ArrayPool<StringSlice>.Shared.Rent(DefaultQueryCapacity);
45+
FusionDocTokenArray = ArrayPool<StringSlice>.Shared.Rent(DefaultCapacity);
46+
}
47+
48+
public void EnsureDocCapacity(int required)
49+
{
50+
if (DocTokenArray.Length < required)
51+
{
52+
Resize(ref DocTokenArray, required);
53+
Resize(ref UniqueDocTokenArray, required);
54+
Resize(ref DActiveArray, required);
55+
Resize(ref FusionDocTokenArray, required);
56+
}
57+
}
58+
59+
public void EnsureQueryCapacity(int required)
60+
{
61+
if (QActiveArray.Length < required)
62+
{
63+
Resize(ref QActiveArray, required);
64+
Resize(ref TermMatchedCharsArray, required);
65+
Resize(ref TermHasWholeArray, required);
66+
Resize(ref TermHasJoinedArray, required);
67+
Resize(ref TermHasPrefixArray, required);
68+
Resize(ref TermFirstPosArray, required);
69+
Resize(ref TermIdfArray, required);
70+
Resize(ref FusionQueryTokenArray, required);
71+
}
72+
}
73+
74+
private void Resize<T>(ref T[] array, int newSize)
75+
{
76+
ArrayPool<T>.Shared.Return(array);
77+
array = ArrayPool<T>.Shared.Rent(newSize);
78+
}
79+
80+
public void Dispose()
81+
{
82+
if (DocTokenArray != null) ArrayPool<StringSlice>.Shared.Return(DocTokenArray);
83+
if (UniqueDocTokenArray != null) ArrayPool<StringSlice>.Shared.Return(UniqueDocTokenArray);
84+
if (QActiveArray != null) ArrayPool<bool>.Shared.Return(QActiveArray);
85+
if (DActiveArray != null) ArrayPool<bool>.Shared.Return(DActiveArray);
86+
if (TermMatchedCharsArray != null) ArrayPool<float>.Shared.Return(TermMatchedCharsArray);
87+
if (TermHasWholeArray != null) ArrayPool<bool>.Shared.Return(TermHasWholeArray);
88+
if (TermHasJoinedArray != null) ArrayPool<bool>.Shared.Return(TermHasJoinedArray);
89+
if (TermHasPrefixArray != null) ArrayPool<bool>.Shared.Return(TermHasPrefixArray);
90+
if (TermFirstPosArray != null) ArrayPool<int>.Shared.Return(TermFirstPosArray);
91+
if (TermIdfArray != null) ArrayPool<float>.Shared.Return(TermIdfArray);
92+
if (FusionQueryTokenArray != null) ArrayPool<StringSlice>.Shared.Return(FusionQueryTokenArray);
93+
if (FusionDocTokenArray != null) ArrayPool<StringSlice>.Shared.Return(FusionDocTokenArray);
94+
}
95+
}

0 commit comments

Comments
 (0)