feat: Added matched_tag field search api results with fuzzy search capabilities (#5769)

aniketpalu · web-flow · commit 4a9ffae3d2c7 · 2025-12-11T20:45:43.000+05:30
* feat: Added matched_tag field search api results with fuzzy search capabilities

Signed-off-by: Aniket Paluskar &lt;apaluska@redhat.com&gt;

* Minor formatting &amp; linting changes

Signed-off-by: Aniket Paluskar &lt;apaluska@redhat.com&gt;

* Removed unnecessary debug statements

Signed-off-by: Aniket Paluskar &lt;apaluska@redhat.com&gt;

---------

Signed-off-by: Aniket Paluskar &lt;apaluska@redhat.com&gt;
diff --git a/sdk/python/feast/api/registry/rest/rest_utils.py b/sdk/python/feast/api/registry/rest/rest_utils.py
@@ -558,13 +558,39 @@ def filter_search_results_and_match_score(
         # Search in tags
         tags = result.get("tags", {})
         tag_match = False
+        matched_tag = None
+        best_fuzzy_score = 0.0
+        best_fuzzy_tag = None
+
         for key, value in tags.items():
-            if query_lower in key.lower() or query_lower in str(value).lower():
+            key_lower = key.lower()
+            value_str = str(value).lower()
+
+            # Exact match in key or value
+            if query_lower in key_lower or query_lower in value_str:
                 tag_match = True
+                # Store the matched tag as a dictionary
+                matched_tag = {key: value}
                 break
 
+            # Fuzzy match for tags (on combined "key:value" string)
+            tag_combined = f"{key_lower}={value_str}"
+            tag_fuzzy_score = fuzzy_match(query_lower, tag_combined)
+
+            if tag_fuzzy_score > best_fuzzy_score:
+                best_fuzzy_score = tag_fuzzy_score
+                best_fuzzy_tag = {key: value}
+
         if tag_match:
             result["match_score"] = MATCH_SCORE_TAGS
+            result["matched_tag"] = matched_tag
+            filtered_results.append(result)
+            continue
+
+        # Fuzzy tag match
+        if best_fuzzy_score >= MATCH_SCORE_DEFAULT_THRESHOLD:
+            result["match_score"] = best_fuzzy_score * 100
+            result["matched_tag"] = best_fuzzy_tag
             filtered_results.append(result)
             continue
 
diff --git a/sdk/python/tests/unit/api/test_search_api.py b/sdk/python/tests/unit/api/test_search_api.py
@@ -734,6 +734,103 @@ def test_search_by_tags(self, shared_search_responses):
             f"Expected to find some of {expected_resources} but found none in {found_resources}"
         )
 
+    def test_search_matched_tag_exact_match(self, search_test_app):
+        """Test that matched_tag field is present when a tag matches exactly"""
+        # Search for "data" which should match tag key "team" with value "data"
+        response = search_test_app.get("/search?query=data")
+        assert response.status_code == 200
+
+        data = response.json()
+        results = data["results"]
+
+        # Find results that matched via tags (match_score = 60)
+        tag_matched_results = [
+            r for r in results if r.get("match_score") == 60 and "matched_tag" in r
+        ]
+
+        assert len(tag_matched_results) > 0, (
+            "Expected to find at least one result with matched_tag from tag matching"
+        )
+
+        # Verify matched_tag is present and has a valid dictionary value
+        for result in tag_matched_results:
+            matched_tag = result.get("matched_tag")
+            assert matched_tag is not None, (
+                f"matched_tag should not be None for result {result['name']}"
+            )
+            assert isinstance(matched_tag, dict), (
+                f"matched_tag should be a dictionary, got {type(matched_tag)}"
+            )
+            # matched_tag should be a dictionary with key:value format
+            assert len(matched_tag) > 0, "matched_tag should not be empty"
+            assert len(matched_tag) == 1, (
+                f"matched_tag should contain exactly one key-value pair, got {len(matched_tag)}"
+            )
+
+        logger.debug(
+            f"Found {len(tag_matched_results)} results with matched_tag: {[r['name'] + ' -> ' + str(r.get('matched_tag', 'N/A')) for r in tag_matched_results]}"
+        )
+
+    def test_search_matched_tag_fuzzy_match(self, search_test_app):
+        """Test that matched_tag field is present when a tag matches via fuzzy matching"""
+        # Search for "te" which should fuzzy match tag key "team"
+        # "te" vs "team": overlap={'t','e'}/union={'t','e','a','m'} = 2/4 = 50% (below threshold)
+        # Try "tea" which should fuzzy match "team" better
+        # "tea" vs "team": overlap={'t','e','a'}/union={'t','e','a','m'} = 3/4 = 75% (above threshold)
+        response = search_test_app.get("/search?query=tea")
+        assert response.status_code == 200
+
+        data = response.json()
+        results = data["results"]
+
+        # Find results that matched via fuzzy tag matching (match_score < 60 but >= 40)
+        fuzzy_tag_matched_results = [
+            r
+            for r in results
+            if r.get("match_score", 0) >= 40
+            and r.get("match_score", 0) < 60
+            and "matched_tag" in r
+        ]
+
+        # If we don't find fuzzy matches, try a different query that's more likely to match
+        if len(fuzzy_tag_matched_results) == 0:
+            # Try "dat" which should fuzzy match tag value "data"
+            # "dat" vs "data": overlap={'d','a','t'}/union={'d','a','t','a'} = 3/4 = 75% (above threshold)
+            response = search_test_app.get("/search?query=dat")
+            assert response.status_code == 200
+            data = response.json()
+            results = data["results"]
+            fuzzy_tag_matched_results = [
+                r
+                for r in results
+                if r.get("match_score", 0) >= 40
+                and r.get("match_score", 0) < 60
+                and "matched_tag" in r
+            ]
+
+        if len(fuzzy_tag_matched_results) > 0:
+            # Verify matched_tag is present for fuzzy matches
+            for result in fuzzy_tag_matched_results:
+                matched_tag = result.get("matched_tag")
+                assert matched_tag is not None, (
+                    f"matched_tag should not be None for fuzzy-matched result {result['name']}"
+                )
+                assert isinstance(matched_tag, dict), (
+                    f"matched_tag should be a dictionary, got {type(matched_tag)}"
+                )
+                assert len(matched_tag) > 0, "matched_tag should not be empty"
+                assert len(matched_tag) == 1, (
+                    f"matched_tag should contain exactly one key-value pair, got {len(matched_tag)}"
+                )
+                # Verify the match_score is in the fuzzy range
+                assert 40 <= result.get("match_score", 0) < 60, (
+                    f"Fuzzy tag match should have score in [40, 60), got {result.get('match_score')}"
+                )
+
+            logger.debug(
+                f"Found {len(fuzzy_tag_matched_results)} results with fuzzy matched_tag: {[r['name'] + ' -> ' + str(r.get('matched_tag', 'N/A')) + ' (score: ' + str(r.get('match_score', 'N/A')) + ')' for r in fuzzy_tag_matched_results]}"
+            )
+
     def test_search_sorting_functionality(self, shared_search_responses):
         """Test search results sorting using pre-computed responses"""
         # Test match_score descending sort