Skip to content

Commit 950fb81

Browse files
committed
fix bug in bitshifting
1 parent 10a235f commit 950fb81

2 files changed

Lines changed: 53 additions & 23 deletions

File tree

src/Infidex.Tests/BugReproductionTests.cs

Lines changed: 35 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@
22
using Infidex.Scoring;
33
using Infidex.Coverage;
44
using Infidex.Tokenization;
5+
using System.Reflection;
56

67
namespace Infidex.Tests;
78

@@ -11,22 +12,26 @@ public class BugReproductionTests
1112
[TestMethod]
1213
public void PrefixPreference_MatrixRev_PreferRevisitedOverReloaded()
1314
{
14-
// "the matrix rev"
15-
// "The Matrix Reloaded" vs "The Matrix Revisited"
16-
// "Revisited" starts with "Rev". "Reloaded" does not.
17-
1815
string query = "the matrix rev";
1916
string docReloaded = "The Matrix Reloaded";
2017
string docRevisited = "The Matrix Revisited";
2118

2219
var tokenizer = new Tokenizer([3], 2, 0, TextNormalizer.CreateDefault(), TokenizerSetup.CreateDefault());
2320
var setup = CoverageSetup.CreateDefault();
2421
var engine = new CoverageEngine(tokenizer, setup);
22+
23+
// IDFs observed: "the"~1.57, "matrix"~9.54, "rev"~9.51
24+
var idfCache = new Dictionary<string, float>
25+
{
26+
{ "the", 1.574f },
27+
{ "matrix", 9.544f },
28+
{ "rev", 9.515f }
29+
};
2530

26-
// Calculate features for Reloaded
27-
var featsReloaded = engine.CalculateFeatures(query, docReloaded, 0, 1);
31+
engine.SetWordIdfCache(idfCache);
2832

29-
// Calculate features for Revisited
33+
// Calculate features
34+
var featsReloaded = engine.CalculateFeatures(query, docReloaded, 0, 1);
3035
var featsRevisited = engine.CalculateFeatures(query, docRevisited, 0, 2);
3136

3237
// Score
@@ -36,11 +41,29 @@ public void PrefixPreference_MatrixRev_PreferRevisitedOverReloaded()
3641
Console.WriteLine($"Reloaded: {scoreReloaded.score}");
3742
Console.WriteLine($"Revisited: {scoreRevisited.score}");
3843

44+
// Debug info
45+
Console.WriteLine("Reloaded Features:");
46+
if (featsReloaded.TermIdf != null) Console.WriteLine($" IDFs: {string.Join(", ", featsReloaded.TermIdf)}");
47+
if (featsReloaded.TermCi != null) Console.WriteLine($" Cis: {string.Join(", ", featsReloaded.TermCi)}");
48+
Console.WriteLine($" AvgIDF: {featsReloaded.TotalIdf / featsReloaded.TermsCount}");
49+
50+
Console.WriteLine("Revisited Features:");
51+
if (featsRevisited.TermIdf != null) Console.WriteLine($" IDFs: {string.Join(", ", featsRevisited.TermIdf)}");
52+
if (featsRevisited.TermCi != null) Console.WriteLine($" Cis: {string.Join(", ", featsRevisited.TermCi)}");
53+
Console.WriteLine($" AvgIDF: {featsRevisited.TotalIdf / featsRevisited.TermsCount}");
54+
55+
int scoreReloadedInt = (int)scoreReloaded.score;
56+
bool reloadedHasDominance = (scoreReloadedInt & 64) != 0;
57+
58+
int scoreRevisitedInt = (int)scoreRevisited.score;
59+
bool revisitedHasDominance = (scoreRevisitedInt & 64) != 0;
60+
61+
Console.WriteLine($"Reloaded Dominance: {reloadedHasDominance}");
62+
Console.WriteLine($"Revisited Dominance: {revisitedHasDominance}");
63+
3964
Assert.IsTrue(scoreRevisited.score > scoreReloaded.score,
40-
$"Revisited ({scoreRevisited.score}) should score higher than Reloaded ({scoreReloaded.score})");
41-
42-
// Check why
43-
Assert.IsTrue(featsRevisited.FusionSignals.LexicalPrefixLast, "Revisited should have LexicalPrefixLast");
44-
Assert.IsFalse(featsReloaded.FusionSignals.LexicalPrefixLast, "Reloaded should NOT have LexicalPrefixLast");
65+
$"Revisited ({scoreRevisited.score}) should score higher than Reloaded ({scoreReloaded.score}). " +
66+
$"Currently failing due to Dominance Flip (Reloaded has dominance, Revisited does not).");
4567
}
68+
4669
}

src/Infidex/Scoring/FusionScorer.cs

Lines changed: 18 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -45,11 +45,12 @@ public static (float score, byte tiebreaker) Calculate(
4545
// We use the same bit-logic as before to define tiers, but result is a float
4646
int precedence = 0;
4747

48-
// PRECEDENCE BIT STRUCTURE
49-
// Bits 9-8: COVERAGE TIER (multi-term only)
50-
// Bit 7 (128): EXACT PREFIX or SUBSET MATCH
51-
// Bit 6 (64): HIGH-INFO TERM DOMINANCE
52-
// Bits 5-0: Quality signals
48+
// PRECEDENCE BIT STRUCTURE (Shifted to allow 12 bits for quality signals)
49+
// Bits 17-16: COVERAGE TIER (multi-term only)
50+
// Bit 15: EXACT PREFIX
51+
// Bit 14: SUBSET MATCH
52+
// Bit 13: HIGH-INFO TERM DOMINANCE
53+
// Bits 12-0: Quality signals
5354

5455
int coverageTier = 0;
5556
if (!isSingleTerm && features.TermsCount > 0)
@@ -68,15 +69,20 @@ public static (float score, byte tiebreaker) Calculate(
6869

6970
if (!isSingleTerm && coverageTier > 0)
7071
{
71-
precedence |= (coverageTier & 0b11) << 8;
72+
precedence |= (coverageTier & 0b11) << 16;
7273
}
7374

7475
bool isExactPrefix = !isSingleTerm && isClean && startsAtBeginning && lexicalPrefixLast && isComplete;
7576
bool isSubsetMatch = !isSingleTerm && features.DocTokenCount > 0 && features.WordHits == features.DocTokenCount;
7677

77-
if (isExactPrefix || isSubsetMatch)
78+
if (isExactPrefix)
7879
{
79-
precedence |= 128;
80+
precedence |= (1 << 15);
81+
}
82+
83+
if (isSubsetMatch)
84+
{
85+
precedence |= (1 << 14);
8086
}
8187

8288
// High-info term dominance logic
@@ -129,7 +135,7 @@ public static (float score, byte tiebreaker) Calculate(
129135

130136
if (hasDominantTerm || hasStrongAnchor)
131137
{
132-
precedence |= 64;
138+
precedence |= (1 << 13);
133139
}
134140

135141
int unmatchedTerms = features.TermsCount - features.TermsWithAnyMatch;
@@ -141,8 +147,8 @@ public static (float score, byte tiebreaker) Calculate(
141147

142148
if (isSingleTerm)
143149
{
144-
if (isComplete) precedence |= (1 << 9);
145-
if (isClean && features.TermsCount > 0) precedence |= (1 << 8);
150+
if (isComplete) precedence |= (1 << 17);
151+
if (isClean && features.TermsCount > 0) precedence |= (1 << 16);
146152
precedence |= ComputeSingleTermPrecedence(isExact, isClean, startsAtBeginning, isComplete);
147153
}
148154
else
@@ -250,6 +256,7 @@ private static void LogExplanation(
250256
Console.WriteLine($" query=\"{queryText}\"");
251257
Console.WriteLine($" doc=\"{docPreview}\"");
252258
Console.WriteLine($" precedence={precedence}, semantic={semantic:F4}, finalScore={finalScore:F4}");
259+
Console.WriteLine($" bits: {(precedence & (1 << 17)) != 0} (Tier), {(precedence & (1 << 15)) != 0} (Exact), {(precedence & (1 << 14)) != 0} (Subset), {(precedence & (1 << 13)) != 0} (Dominance), {(precedence & 3)} (Bonus)");
253260
}
254261
}
255262

0 commit comments

Comments
 (0)