Skip to content
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
161 changes: 104 additions & 57 deletions tests/v2_tests/test_score_modifier_search.py
Original file line number Diff line number Diff line change
Expand Up @@ -105,65 +105,112 @@ def test_valid_score_modifiers_format(self):

@mark.fixed
class TestScoreModifierWithRerankCountSearch(MarqoTestCase):
def test_hybrid_search_rrf_score_modifiers_with_rerank_depth(self):
def setUp(self) -> None:
super().setUp()
self.docs_list = [
{"_id": "both1", "text_field_1": "dogs", "int_field_1": -1}, # HIGH tensor, LOW lexical
{"_id": "tensor1", "text_field_1": "puppies", "int_field_1": 2}, # MID tensor
{"_id": "tensor2", "text_field_1": "random words", "int_field_1": 3}, # LOW tensor
]

def test_hybrid_search_structured_rrf_score_modifiers_with_rerank_depth(self):
"""
Test that hybrid search with RRF can use root level score_modifiers and rerank_depth
For structured indexes
"""
test_cases = [
(CloudTestIndex.unstructured_text, self.unstructured_index_name),
(CloudTestIndex.structured_text, self.structured_index_name)
]

docs_list = [
{"_id": "both1", "text_field_1": "dogs", "int_field_1": -1}, # HIGH tensor, LOW lexical
{"_id": "tensor1", "text_field_1": "puppies", "int_field_1": 2}, # MID tensor
{"_id": "tensor2", "text_field_1": "random words", "int_field_1": 3}, # LOW tensor
]
cloud_test_index_to_use = CloudTestIndex.structured_text
open_source_test_index_name = self.structured_index_name

test_index_name = self.get_test_index_name(
cloud_test_index_to_use=cloud_test_index_to_use,
open_source_test_index_name=open_source_test_index_name
)
self.client.index(test_index_name).add_documents(self.docs_list)

# Get unmodified scores
# Unmodified result order should be: both1, tensor1, tensor2
unmodified_results = self.client.index(test_index_name).search(q="dogs", search_method="HYBRID",limit=3)
unmodified_scores = {hit["_id"]: hit["_score"] for hit in unmodified_results["hits"]}
self.assertEqual(["both1", "tensor1", "tensor2"], [hit["_id"] for hit in unmodified_results["hits"]])

# Get modified scores (rank all 3)
# Modified result order should be: tensor2, tensor1, both1
score_modifiers = {
"multiply_score_by": [
{"field_name": "int_field_1", "weight": 1}
],
"add_to_score": [
{"field_name": "int_field_1", "weight": 1}
]
}
modified_results = self.client.index(test_index_name).search(
q="dogs", search_method="HYBRID",
limit=3, rerank_depth=3, score_modifiers=score_modifiers
)
self.assertEqual(["tensor2", "tensor1", "both1"], [hit["_id"] for hit in modified_results["hits"]])
self.assertAlmostEqual(modified_results["hits"][0]["_score"], 3*unmodified_scores["tensor2"] + 3)
self.assertAlmostEqual(modified_results["hits"][1]["_score"], 2*unmodified_scores["tensor1"] + 2)
self.assertAlmostEqual(modified_results["hits"][2]["_score"], -1*unmodified_scores["both1"] - 1)

# Get modified scores (rank only 1). Only both1 should be rescored (goes to the bottom)
# Modified result order should be: tensor1, tensor2, both1
modified_results = self.client.index(test_index_name).search(
q="dogs", search_method="HYBRID",
limit=3, rerank_depth=1, score_modifiers=score_modifiers
)
self.assertEqual(["tensor1", "tensor2", "both1"], [hit["_id"] for hit in modified_results["hits"]])
self.assertAlmostEqual(modified_results["hits"][0]["_score"], unmodified_scores["tensor1"]) # unmodified
self.assertAlmostEqual(modified_results["hits"][1]["_score"], unmodified_scores["tensor2"]) # unmodified
self.assertAlmostEqual(modified_results["hits"][2]["_score"], -1*unmodified_scores["both1"] - 1) # modified

def test_hybrid_search_unstructured_rrf_score_modifiers_with_rerank_depth(self):
"""
Test that hybrid search with RRF can use root level score_modifiers and rerank_depth
For unstructured indexes
"""

cloud_test_index_to_use = CloudTestIndex.unstructured_text
open_source_test_index_name = self.unstructured_index_name

for cloud_test_index_to_use, open_source_test_index_name in test_cases:
with self.subTest(cloud_test_index_to_use=cloud_test_index_to_use,
open_source_test_index_name=open_source_test_index_name):
test_index_name = self.get_test_index_name(
cloud_test_index_to_use=cloud_test_index_to_use,
open_source_test_index_name=open_source_test_index_name
)
self.client.index(test_index_name).add_documents(
docs_list,
tensor_fields=["text_field_1"] if "unstr" in cloud_test_index_to_use or
"unstr" in open_source_test_index_name else None)

# Get unmodified scores
# Unmodified result order should be: both1, tensor1, tensor2
unmodified_results = self.client.index(test_index_name).search(q="dogs", search_method="HYBRID",limit=3)
unmodified_scores = {hit["_id"]: hit["_score"] for hit in unmodified_results["hits"]}
self.assertEqual(["both1", "tensor1", "tensor2"], [hit["_id"] for hit in unmodified_results["hits"]])

# Get modified scores (rank all 3)
# Modified result order should be: tensor2, tensor1, both1
score_modifiers = {
"multiply_score_by": [
{"field_name": "int_field_1", "weight": 1}
],
"add_to_score": [
{"field_name": "int_field_1", "weight": 1}
]
}
modified_results = self.client.index(test_index_name).search(
q="dogs", search_method="HYBRID",
limit=3, rerank_depth=3, score_modifiers=score_modifiers
)
self.assertEqual(["tensor2", "tensor1", "both1"], [hit["_id"] for hit in modified_results["hits"]])
self.assertAlmostEqual(modified_results["hits"][0]["_score"], 3*unmodified_scores["tensor2"] + 3)
self.assertAlmostEqual(modified_results["hits"][1]["_score"], 2*unmodified_scores["tensor1"] + 2)
self.assertAlmostEqual(modified_results["hits"][2]["_score"], -1*unmodified_scores["both1"] - 1)

# Get modified scores (rank only 1). Only both1 should be rescored (goes to the bottom)
# Modified result order should be: tensor1, tensor2, both1
modified_results = self.client.index(test_index_name).search(
q="dogs", search_method="HYBRID",
limit=3, rerank_depth=1, score_modifiers=score_modifiers
)
self.assertEqual(["tensor1", "tensor2", "both1"], [hit["_id"] for hit in modified_results["hits"]])
self.assertAlmostEqual(modified_results["hits"][0]["_score"], unmodified_scores["tensor1"]) # unmodified
self.assertAlmostEqual(modified_results["hits"][1]["_score"], unmodified_scores["tensor2"]) # unmodified
self.assertAlmostEqual(modified_results["hits"][2]["_score"], -1*unmodified_scores["both1"] - 1) # modified
test_index_name = self.get_test_index_name(
cloud_test_index_to_use=cloud_test_index_to_use,
open_source_test_index_name=open_source_test_index_name
)
self.client.index(test_index_name).add_documents(self.docs_list, tensor_fields=["text_field_1"])

# Get unmodified scores
# Unmodified result order should be: both1, tensor1, tensor2
unmodified_results = self.client.index(test_index_name).search(q="dogs", search_method="HYBRID", limit=3)
unmodified_scores = {hit["_id"]: hit["_score"] for hit in unmodified_results["hits"]}
self.assertEqual(["both1", "tensor1", "tensor2"], [hit["_id"] for hit in unmodified_results["hits"]])

# Get modified scores (rank all 3)
# Modified result order should be: tensor2, tensor1, both1
score_modifiers = {
"multiply_score_by": [
{"field_name": "int_field_1", "weight": 1}
],
"add_to_score": [
{"field_name": "int_field_1", "weight": 1}
]
}
modified_results = self.client.index(test_index_name).search(
q="dogs", search_method="HYBRID",
limit=3, rerank_depth=3, score_modifiers=score_modifiers
)
self.assertEqual(["tensor2", "tensor1", "both1"], [hit["_id"] for hit in modified_results["hits"]])
self.assertAlmostEqual(modified_results["hits"][0]["_score"], 3 * unmodified_scores["tensor2"] + 3)
self.assertAlmostEqual(modified_results["hits"][1]["_score"], 2 * unmodified_scores["tensor1"] + 2)
self.assertAlmostEqual(modified_results["hits"][2]["_score"], -1 * unmodified_scores["both1"] - 1)

# Get modified scores (rank only 1). Only both1 should be rescored (goes to the bottom)
# Modified result order should be: tensor1, tensor2, both1
modified_results = self.client.index(test_index_name).search(
q="dogs", search_method="HYBRID",
limit=3, rerank_depth=1, score_modifiers=score_modifiers
)
self.assertEqual(["tensor1", "tensor2", "both1"], [hit["_id"] for hit in modified_results["hits"]])
self.assertAlmostEqual(modified_results["hits"][0]["_score"], unmodified_scores["tensor1"]) # unmodified
self.assertAlmostEqual(modified_results["hits"][1]["_score"], unmodified_scores["tensor2"]) # unmodified
self.assertAlmostEqual(modified_results["hits"][2]["_score"], -1 * unmodified_scores["both1"] - 1) # modified
Loading