Skip to content

Commit 4a9ffae

Browse files
authored
feat: Added matched_tag field search api results with fuzzy search capabilities (#5769)
* feat: Added matched_tag field search api results with fuzzy search capabilities Signed-off-by: Aniket Paluskar <[email protected]> * Minor formatting & linting changes Signed-off-by: Aniket Paluskar <[email protected]> * Removed unnecessary debug statements Signed-off-by: Aniket Paluskar <[email protected]> --------- Signed-off-by: Aniket Paluskar <[email protected]>
1 parent aa286ef commit 4a9ffae

File tree

2 files changed

+124
-1
lines changed

2 files changed

+124
-1
lines changed

sdk/python/feast/api/registry/rest/rest_utils.py

Lines changed: 27 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -558,13 +558,39 @@ def filter_search_results_and_match_score(
558558
# Search in tags
559559
tags = result.get("tags", {})
560560
tag_match = False
561+
matched_tag = None
562+
best_fuzzy_score = 0.0
563+
best_fuzzy_tag = None
564+
561565
for key, value in tags.items():
562-
if query_lower in key.lower() or query_lower in str(value).lower():
566+
key_lower = key.lower()
567+
value_str = str(value).lower()
568+
569+
# Exact match in key or value
570+
if query_lower in key_lower or query_lower in value_str:
563571
tag_match = True
572+
# Store the matched tag as a dictionary
573+
matched_tag = {key: value}
564574
break
565575

576+
# Fuzzy match for tags (on combined "key:value" string)
577+
tag_combined = f"{key_lower}={value_str}"
578+
tag_fuzzy_score = fuzzy_match(query_lower, tag_combined)
579+
580+
if tag_fuzzy_score > best_fuzzy_score:
581+
best_fuzzy_score = tag_fuzzy_score
582+
best_fuzzy_tag = {key: value}
583+
566584
if tag_match:
567585
result["match_score"] = MATCH_SCORE_TAGS
586+
result["matched_tag"] = matched_tag
587+
filtered_results.append(result)
588+
continue
589+
590+
# Fuzzy tag match
591+
if best_fuzzy_score >= MATCH_SCORE_DEFAULT_THRESHOLD:
592+
result["match_score"] = best_fuzzy_score * 100
593+
result["matched_tag"] = best_fuzzy_tag
568594
filtered_results.append(result)
569595
continue
570596

sdk/python/tests/unit/api/test_search_api.py

Lines changed: 97 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -734,6 +734,103 @@ def test_search_by_tags(self, shared_search_responses):
734734
f"Expected to find some of {expected_resources} but found none in {found_resources}"
735735
)
736736

737+
def test_search_matched_tag_exact_match(self, search_test_app):
738+
"""Test that matched_tag field is present when a tag matches exactly"""
739+
# Search for "data" which should match tag key "team" with value "data"
740+
response = search_test_app.get("/search?query=data")
741+
assert response.status_code == 200
742+
743+
data = response.json()
744+
results = data["results"]
745+
746+
# Find results that matched via tags (match_score = 60)
747+
tag_matched_results = [
748+
r for r in results if r.get("match_score") == 60 and "matched_tag" in r
749+
]
750+
751+
assert len(tag_matched_results) > 0, (
752+
"Expected to find at least one result with matched_tag from tag matching"
753+
)
754+
755+
# Verify matched_tag is present and has a valid dictionary value
756+
for result in tag_matched_results:
757+
matched_tag = result.get("matched_tag")
758+
assert matched_tag is not None, (
759+
f"matched_tag should not be None for result {result['name']}"
760+
)
761+
assert isinstance(matched_tag, dict), (
762+
f"matched_tag should be a dictionary, got {type(matched_tag)}"
763+
)
764+
# matched_tag should be a dictionary with key:value format
765+
assert len(matched_tag) > 0, "matched_tag should not be empty"
766+
assert len(matched_tag) == 1, (
767+
f"matched_tag should contain exactly one key-value pair, got {len(matched_tag)}"
768+
)
769+
770+
logger.debug(
771+
f"Found {len(tag_matched_results)} results with matched_tag: {[r['name'] + ' -> ' + str(r.get('matched_tag', 'N/A')) for r in tag_matched_results]}"
772+
)
773+
774+
def test_search_matched_tag_fuzzy_match(self, search_test_app):
775+
"""Test that matched_tag field is present when a tag matches via fuzzy matching"""
776+
# Search for "te" which should fuzzy match tag key "team"
777+
# "te" vs "team": overlap={'t','e'}/union={'t','e','a','m'} = 2/4 = 50% (below threshold)
778+
# Try "tea" which should fuzzy match "team" better
779+
# "tea" vs "team": overlap={'t','e','a'}/union={'t','e','a','m'} = 3/4 = 75% (above threshold)
780+
response = search_test_app.get("/search?query=tea")
781+
assert response.status_code == 200
782+
783+
data = response.json()
784+
results = data["results"]
785+
786+
# Find results that matched via fuzzy tag matching (match_score < 60 but >= 40)
787+
fuzzy_tag_matched_results = [
788+
r
789+
for r in results
790+
if r.get("match_score", 0) >= 40
791+
and r.get("match_score", 0) < 60
792+
and "matched_tag" in r
793+
]
794+
795+
# If we don't find fuzzy matches, try a different query that's more likely to match
796+
if len(fuzzy_tag_matched_results) == 0:
797+
# Try "dat" which should fuzzy match tag value "data"
798+
# "dat" vs "data": overlap={'d','a','t'}/union={'d','a','t','a'} = 3/4 = 75% (above threshold)
799+
response = search_test_app.get("/search?query=dat")
800+
assert response.status_code == 200
801+
data = response.json()
802+
results = data["results"]
803+
fuzzy_tag_matched_results = [
804+
r
805+
for r in results
806+
if r.get("match_score", 0) >= 40
807+
and r.get("match_score", 0) < 60
808+
and "matched_tag" in r
809+
]
810+
811+
if len(fuzzy_tag_matched_results) > 0:
812+
# Verify matched_tag is present for fuzzy matches
813+
for result in fuzzy_tag_matched_results:
814+
matched_tag = result.get("matched_tag")
815+
assert matched_tag is not None, (
816+
f"matched_tag should not be None for fuzzy-matched result {result['name']}"
817+
)
818+
assert isinstance(matched_tag, dict), (
819+
f"matched_tag should be a dictionary, got {type(matched_tag)}"
820+
)
821+
assert len(matched_tag) > 0, "matched_tag should not be empty"
822+
assert len(matched_tag) == 1, (
823+
f"matched_tag should contain exactly one key-value pair, got {len(matched_tag)}"
824+
)
825+
# Verify the match_score is in the fuzzy range
826+
assert 40 <= result.get("match_score", 0) < 60, (
827+
f"Fuzzy tag match should have score in [40, 60), got {result.get('match_score')}"
828+
)
829+
830+
logger.debug(
831+
f"Found {len(fuzzy_tag_matched_results)} results with fuzzy matched_tag: {[r['name'] + ' -> ' + str(r.get('matched_tag', 'N/A')) + ' (score: ' + str(r.get('match_score', 'N/A')) + ')' for r in fuzzy_tag_matched_results]}"
832+
)
833+
737834
def test_search_sorting_functionality(self, shared_search_responses):
738835
"""Test search results sorting using pre-computed responses"""
739836
# Test match_score descending sort

0 commit comments

Comments
 (0)