Skip to content

Commit 49a9156

Browse files
authored
Merge pull request #680 from moov-io/fix-sourceID-match
search: score 1.0 for sourceID matches
2 parents d13a20a + b63e24f commit 49a9156

File tree

2 files changed

+83
-24
lines changed

2 files changed

+83
-24
lines changed

pkg/search/similarity.go

Lines changed: 67 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -29,28 +29,43 @@ func Similarity[Q any, I any](query Entity[Q], index Entity[I]) float64 {
2929
// The format will evolve over time. No stability guarantee is given over what is written.
3030
func DebugSimilarity[Q any, I any](w io.Writer, query Entity[Q], index Entity[I]) float64 {
3131
details := DetailedSimilarity(w, query, index)
32-
if len(details.Pieces) != 9 {
32+
33+
switch len(details.Pieces) {
34+
case 0:
35+
// No pieces, nothing to debug in terms of pieces
36+
case 1:
37+
// This likely indicates an exact SourceID match short-circuit
38+
// The debug output below will print the single piece at index 0
39+
// and empty pieces for the rest, which is acceptable for now.
40+
case 9:
41+
// Full set of pieces, proceed with detailed debugging
42+
default:
3343
panic(fmt.Sprintf("BUG: got an unexpected amount of %d ScorePieces", len(details.Pieces))) //nolint:forbidigo
3444
}
3545

3646
if w != nil {
37-
// Critical comparisons (exact matches)
38-
debug(w, "Critical pieces\n")
39-
debug(w, " exact identifiers: %#v\n", details.Pieces[0])
40-
debug(w, " crypto addresses: %#v\n", details.Pieces[1])
41-
debug(w, " gov IDs: %#v\n", details.Pieces[2])
42-
debug(w, " contact info: %#v\n", details.Pieces[3])
43-
44-
// Name comparison (second highest weight)
45-
debug(w, "name comparison\n")
46-
debug(w, " name: %#v\n", details.Pieces[4])
47-
debug(w, " titles: %#v\n", details.Pieces[5])
48-
49-
// Supporting information (lower weight)
50-
debug(w, "supporting info\n")
51-
debug(w, " dates: %#v\n", details.Pieces[6])
52-
debug(w, " addresses: %#v\n", details.Pieces[7])
53-
debug(w, " supporting into: %#v\n", details.Pieces[8])
47+
// If there's only one piece, it's likely an exact SourceID match
48+
if len(details.Pieces) == 1 {
49+
debug(w, "one score piece found: %#v\n", details.piece(0))
50+
} else {
51+
// Critical comparisons (exact matches)
52+
debug(w, "Critical pieces\n")
53+
debug(w, " exact identifiers: %#v\n", details.piece(0))
54+
debug(w, " crypto addresses: %#v\n", details.piece(1))
55+
debug(w, " gov IDs: %#v\n", details.piece(2))
56+
debug(w, " contact info: %#v\n", details.piece(3))
57+
58+
// Name comparison (second highest weight)
59+
debug(w, "name comparison\n")
60+
debug(w, " name: %#v\n", details.piece(4))
61+
debug(w, " titles: %#v\n", details.piece(5))
62+
63+
// Supporting information (lower weight)
64+
debug(w, "supporting info\n")
65+
debug(w, " dates: %#v\n", details.piece(6))
66+
debug(w, " addresses: %#v\n", details.piece(7))
67+
debug(w, " supporting into: %#v\n", details.piece(8))
68+
}
5469

5570
// Final Score
5671
debug(w, "finalScore=%.2f", details.FinalScore)
@@ -62,6 +77,18 @@ func DebugSimilarity[Q any, I any](w io.Writer, query Entity[Q], index Entity[I]
6277
var (
6378
emptyPieces = make([]ScorePiece, 9)
6479
emptyEntityType = EntityType("")
80+
81+
exactScore = []ScorePiece{
82+
{
83+
Score: 1.0,
84+
Weight: criticalIdWeight,
85+
Matched: true,
86+
Required: true,
87+
Exact: true,
88+
FieldsCompared: 1,
89+
PieceType: "exact",
90+
},
91+
}
6592
)
6693

6794
// DetailedSimilarity returns the scoring details of each query piece against the index Entity.
@@ -74,6 +101,8 @@ func DetailedSimilarity[Q any, I any](w io.Writer, query Entity[Q], index Entity
74101
Pieces: make([]ScorePiece, 0, 9),
75102
}
76103

104+
var exactOverride bool
105+
77106
// Quick filters
78107
if query.Source != sourceEmpty && query.Source != SourceAPIRequest {
79108
if query.Source != index.Source {
@@ -82,10 +111,13 @@ func DetailedSimilarity[Q any, I any](w io.Writer, query Entity[Q], index Entity
82111
}
83112
}
84113
if query.SourceID != "" {
85-
if query.SourceID != index.SourceID {
114+
if query.SourceID == index.SourceID {
115+
out.FinalScore = 1.0
116+
out.Pieces = exactScore
117+
} else {
86118
out.Pieces = emptyPieces
87-
return out
88119
}
120+
return out
89121
}
90122
if query.Type != emptyEntityType {
91123
if query.Type != index.Type {
@@ -95,7 +127,6 @@ func DetailedSimilarity[Q any, I any](w io.Writer, query Entity[Q], index Entity
95127
}
96128

97129
// Critical identifiers (highest weight)
98-
var exactOverride bool
99130
exactIdentifiers := compareExactIdentifiers(w, query, index, criticalIdWeight)
100131
if exactIdentifiers.Matched && exactIdentifiers.FieldsCompared > 0 {
101132
exactOverride = true
@@ -143,9 +174,7 @@ func DetailedSimilarity[Q any, I any](w io.Writer, query Entity[Q], index Entity
143174
)
144175

145176
out.FinalScore = calculateFinalScore(w, out.Pieces, exactOverride, query, index)
146-
if math.IsNaN(out.FinalScore) {
147-
out.FinalScore = 1.0
148-
}
177+
149178
return out
150179
}
151180

@@ -158,6 +187,15 @@ type SimilarityScore struct {
158187
FinalScore float64 `json:"finalScore,omitzero"`
159188
}
160189

190+
func (ss SimilarityScore) piece(idx int) ScorePiece {
191+
if idx < len(ss.Pieces) {
192+
return ss.Pieces[idx]
193+
}
194+
195+
var empty ScorePiece
196+
return empty
197+
}
198+
161199
// ScorePiece is a partial scoring result from one comparison function
162200
//
163201
// There is no API stability guarantee for ScorePiece.
@@ -241,6 +279,11 @@ func calculateFinalScore[Q any, I any](w io.Writer, pieces []ScorePiece, exactOv
241279
if exactOverride {
242280
return 1.0
243281
}
282+
283+
if math.IsNaN(finalScore) {
284+
return 1.0
285+
}
286+
244287
return finalScore
245288
}
246289

pkg/search/similarity_test.go

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -34,6 +34,22 @@ func TestSimilarityDebug_FromJSON(t *testing.T) {
3434
require.InDelta(t, got, 0.690, 0.001)
3535
}
3636

37+
func TestSimilarity_SourceID(t *testing.T) {
38+
query := search.Entity[search.Value]{
39+
SourceID: "abc",
40+
}
41+
index := readEntity(t, "1-index.json").Normalize()
42+
43+
got := search.Similarity(query, index)
44+
require.InDelta(t, 0.0, got, 0.001)
45+
46+
// make SourceID match
47+
query.SourceID = index.SourceID
48+
49+
got = search.Similarity(query, index)
50+
require.InDelta(t, 1.0, got, 0.001)
51+
}
52+
3753
func readEntity(tb testing.TB, name string) search.Entity[search.Value] {
3854
tb.Helper()
3955

0 commit comments

Comments
 (0)