diff --git a/.github/actions/run-tests/action.yml b/.github/actions/run-tests/action.yml index 5ca6bf5a09..1f9332fb86 100644 --- a/.github/actions/run-tests/action.yml +++ b/.github/actions/run-tests/action.yml @@ -35,6 +35,10 @@ runs: CLIENT_LIBS_TEST_IMAGE: "redislabs/client-libs-test:${{ inputs.redis-version }}" run: | set -e + + if [ "${{inputs.redis-version}}" == "8.0-M04-pre" ]; then + export REDIS_IMAGE=redis:8.0-M03 + fi echo "::group::Installing dependencies" pip install -U setuptools wheel @@ -56,9 +60,9 @@ runs: # Mapping of redis version to stack version declare -A redis_stack_version_mapping=( - ["7.4.1"]="7.4.0-v1" - ["7.2.6"]="7.2.0-v13" - ["6.2.16"]="6.2.6-v17" + ["7.4.2"]="7.4.0-v2" + ["7.2.7"]="7.2.0-v14" + ["6.2.17"]="6.2.6-v18" ) if [[ -v redis_stack_version_mapping[$REDIS_VERSION] ]]; then diff --git a/.github/workflows/integration.yaml b/.github/workflows/integration.yaml index c32029e6f9..c4548c21ef 100644 --- a/.github/workflows/integration.yaml +++ b/.github/workflows/integration.yaml @@ -29,7 +29,7 @@ env: COVERAGE_CORE: sysmon REDIS_IMAGE: redis:latest REDIS_STACK_IMAGE: redis/redis-stack-server:latest - CURRENT_REDIS_VERSION: '7.4.1' + CURRENT_REDIS_VERSION: '7.4.2' jobs: dependency-audit: @@ -74,7 +74,7 @@ jobs: max-parallel: 15 fail-fast: false matrix: - redis-version: ['8.0-M02', '${{ needs.redis_version.outputs.CURRENT }}', '7.2.6', '6.2.16'] + redis-version: ['8.0-M04-pre', '${{ needs.redis_version.outputs.CURRENT }}', '7.2.7', '6.2.17'] python-version: ['3.8', '3.12'] parser-backend: ['plain'] event-loop: ['asyncio'] diff --git a/redis/commands/search/query.py b/redis/commands/search/query.py index 964ce6cdf4..a8312a2ad2 100644 --- a/redis/commands/search/query.py +++ b/redis/commands/search/query.py @@ -179,6 +179,8 @@ def scorer(self, scorer: str) -> "Query": Use a different scoring function to evaluate document relevance. Default is `TFIDF`. + Since Redis 8.0 default was changed to BM25STD. + :param scorer: The scoring function to use (e.g. `TFIDF.DOCNORM` or `BM25`) """ diff --git a/tests/test_asyncio/test_search.py b/tests/test_asyncio/test_search.py index 4f5a4c2f04..c0efcce882 100644 --- a/tests/test_asyncio/test_search.py +++ b/tests/test_asyncio/test_search.py @@ -341,6 +341,7 @@ async def test_client(decoded_r: redis.Redis): @pytest.mark.redismod @pytest.mark.onlynoncluster +@skip_if_server_version_gte("7.9.0") async def test_scores(decoded_r: redis.Redis): await decoded_r.ft().create_index((TextField("txt"),)) @@ -361,6 +362,29 @@ async def test_scores(decoded_r: redis.Redis): assert "doc1" == res["results"][1]["id"] +@pytest.mark.redismod +@pytest.mark.onlynoncluster +@skip_if_server_version_lt("7.9.0") +async def test_scores_with_new_default_scorer(decoded_r: redis.Redis): + await decoded_r.ft().create_index((TextField("txt"),)) + + await decoded_r.hset("doc1", mapping={"txt": "foo baz"}) + await decoded_r.hset("doc2", mapping={"txt": "foo bar"}) + + q = Query("foo ~bar").with_scores() + res = await decoded_r.ft().search(q) + if is_resp2_connection(decoded_r): + assert 2 == res.total + assert "doc2" == res.docs[0].id + assert 0.87 == pytest.approx(res.docs[0].score, 0.01) + assert "doc1" == res.docs[1].id + else: + assert 2 == res["total_results"] + assert "doc2" == res["results"][0]["id"] + assert 0.87 == pytest.approx(res["results"][0]["score"], 0.01) + assert "doc1" == res["results"][1]["id"] + + @pytest.mark.redismod async def test_stopwords(decoded_r: redis.Redis): stopwords = ["foo", "bar", "baz"] @@ -663,7 +687,7 @@ async def test_summarize(decoded_r: redis.Redis): await createIndex(decoded_r.ft()) await waitForIndex(decoded_r, "idx") - q = Query("king henry").paging(0, 1) + q = Query('"king henry"').paging(0, 1) q.highlight(fields=("play", "txt"), tags=("", "")) q.summarize("txt") @@ -675,7 +699,7 @@ async def test_summarize(decoded_r: redis.Redis): == doc.txt ) - q = Query("king henry").paging(0, 1).summarize().highlight() + q = Query('"king henry"').paging(0, 1).summarize().highlight() doc = sorted((await decoded_r.ft().search(q)).docs)[0] assert "Henry ... " == doc.play @@ -691,7 +715,7 @@ async def test_summarize(decoded_r: redis.Redis): == doc["extra_attributes"]["txt"] ) - q = Query("king henry").paging(0, 1).summarize().highlight() + q = Query('"king henry"').paging(0, 1).summarize().highlight() doc = sorted((await decoded_r.ft().search(q))["results"])[0] assert "Henry ... " == doc["extra_attributes"]["play"] @@ -1029,6 +1053,7 @@ async def test_phonetic_matcher(decoded_r: redis.Redis): @pytest.mark.onlynoncluster # NOTE(imalinovskyi): This test contains hardcoded scores valid only for RediSearch 2.8+ @skip_ifmodversion_lt("2.8.0", "search") +@skip_if_server_version_gte("7.9.0") async def test_scorer(decoded_r: redis.Redis): await decoded_r.ft().create_index((TextField("description"),)) @@ -1087,6 +1112,69 @@ async def test_scorer(decoded_r: redis.Redis): assert 0.0 == res["results"][0]["score"] +@pytest.mark.redismod +@pytest.mark.onlynoncluster +# NOTE(imalinovskyi): This test contains hardcoded scores valid only for RediSearch 2.8+ +@skip_ifmodversion_lt("2.8.0", "search") +@skip_if_server_version_lt("7.9.0") +async def test_scorer_with_new_default_scorer(decoded_r: redis.Redis): + await decoded_r.ft().create_index((TextField("description"),)) + + await decoded_r.hset( + "doc1", mapping={"description": "The quick brown fox jumps over the lazy dog"} + ) + await decoded_r.hset( + "doc2", + mapping={ + "description": "Quick alice was beginning to get very tired of sitting by her quick sister on the bank, and of having nothing to do." # noqa + }, + ) + + if is_resp2_connection(decoded_r): + # default scorer is BM25STD + res = await decoded_r.ft().search(Query("quick").with_scores()) + assert 0.23 == pytest.approx(res.docs[0].score, 0.05) + res = await decoded_r.ft().search(Query("quick").scorer("TFIDF").with_scores()) + assert 1.0 == res.docs[0].score + res = await decoded_r.ft().search( + Query("quick").scorer("TFIDF.DOCNORM").with_scores() + ) + assert 0.14285714285714285 == res.docs[0].score + res = await decoded_r.ft().search(Query("quick").scorer("BM25").with_scores()) + assert 0.22471909420069797 == res.docs[0].score + res = await decoded_r.ft().search(Query("quick").scorer("DISMAX").with_scores()) + assert 2.0 == res.docs[0].score + res = await decoded_r.ft().search( + Query("quick").scorer("DOCSCORE").with_scores() + ) + assert 1.0 == res.docs[0].score + res = await decoded_r.ft().search( + Query("quick").scorer("HAMMING").with_scores() + ) + assert 0.0 == res.docs[0].score + else: + res = await decoded_r.ft().search(Query("quick").with_scores()) + assert 0.23 == pytest.approx(res["results"][0]["score"], 0.05) + res = await decoded_r.ft().search(Query("quick").scorer("TFIDF").with_scores()) + assert 1.0 == res["results"][0]["score"] + res = await decoded_r.ft().search( + Query("quick").scorer("TFIDF.DOCNORM").with_scores() + ) + assert 0.14285714285714285 == res["results"][0]["score"] + res = await decoded_r.ft().search(Query("quick").scorer("BM25").with_scores()) + assert 0.22471909420069797 == res["results"][0]["score"] + res = await decoded_r.ft().search(Query("quick").scorer("DISMAX").with_scores()) + assert 2.0 == res["results"][0]["score"] + res = await decoded_r.ft().search( + Query("quick").scorer("DOCSCORE").with_scores() + ) + assert 1.0 == res["results"][0]["score"] + res = await decoded_r.ft().search( + Query("quick").scorer("HAMMING").with_scores() + ) + assert 0.0 == res["results"][0]["score"] + + @pytest.mark.redismod async def test_get(decoded_r: redis.Redis): await decoded_r.ft().create_index((TextField("f1"), TextField("f2"))) diff --git a/tests/test_commands.py b/tests/test_commands.py index 2681b8eaf0..f83fe76aa9 100644 --- a/tests/test_commands.py +++ b/tests/test_commands.py @@ -4332,7 +4332,6 @@ def test_xgroup_create_mkstream(self, r): assert r.xinfo_groups(stream) == expected @skip_if_server_version_lt("7.0.0") - @skip_if_server_version_gte("7.9.0") def test_xgroup_create_entriesread(self, r: redis.Redis): stream = "stream" group = "group" @@ -4341,28 +4340,6 @@ def test_xgroup_create_entriesread(self, r: redis.Redis): # no group is setup yet, no info to obtain assert r.xinfo_groups(stream) == [] - assert r.xgroup_create(stream, group, 0, entries_read=7) - expected = [ - { - "name": group.encode(), - "consumers": 0, - "pending": 0, - "last-delivered-id": b"0-0", - "entries-read": 7, - "lag": -6, - } - ] - assert r.xinfo_groups(stream) == expected - - @skip_if_server_version_lt("7.9.0") - def test_xgroup_create_entriesread_with_fixed_lag_field(self, r: redis.Redis): - stream = "stream" - group = "group" - r.xadd(stream, {"foo": "bar"}) - - # no group is setup yet, no info to obtain - assert r.xinfo_groups(stream) == [] - assert r.xgroup_create(stream, group, 0, entries_read=7) expected = [ { diff --git a/tests/test_search.py b/tests/test_search.py index ee1ba66434..5b45cfc0a3 100644 --- a/tests/test_search.py +++ b/tests/test_search.py @@ -314,6 +314,7 @@ def test_client(client): @pytest.mark.redismod @pytest.mark.onlynoncluster +@skip_if_server_version_gte("7.9.0") def test_scores(client): client.ft().create_index((TextField("txt"),)) @@ -334,6 +335,29 @@ def test_scores(client): assert "doc1" == res["results"][1]["id"] +@pytest.mark.redismod +@pytest.mark.onlynoncluster +@skip_if_server_version_lt("7.9.0") +def test_scores_with_new_default_scorer(client): + client.ft().create_index((TextField("txt"),)) + + client.hset("doc1", mapping={"txt": "foo baz"}) + client.hset("doc2", mapping={"txt": "foo bar"}) + + q = Query("foo ~bar").with_scores() + res = client.ft().search(q) + if is_resp2_connection(client): + assert 2 == res.total + assert "doc2" == res.docs[0].id + assert 0.87 == pytest.approx(res.docs[0].score, 0.01) + assert "doc1" == res.docs[1].id + else: + assert 2 == res["total_results"] + assert "doc2" == res["results"][0]["id"] + assert 0.87 == pytest.approx(res["results"][0]["score"], 0.01) + assert "doc1" == res["results"][1]["id"] + + @pytest.mark.redismod def test_stopwords(client): client.ft().create_index((TextField("txt"),), stopwords=["foo", "bar", "baz"]) @@ -623,7 +647,7 @@ def test_summarize(client): createIndex(client.ft()) waitForIndex(client, getattr(client.ft(), "index_name", "idx")) - q = Query("king henry").paging(0, 1) + q = Query('"king henry"').paging(0, 1) q.highlight(fields=("play", "txt"), tags=("", "")) q.summarize("txt") @@ -635,7 +659,7 @@ def test_summarize(client): == doc.txt ) - q = Query("king henry").paging(0, 1).summarize().highlight() + q = Query('"king henry"').paging(0, 1).summarize().highlight() doc = sorted(client.ft().search(q).docs)[0] assert "Henry ... " == doc.play @@ -651,7 +675,7 @@ def test_summarize(client): == doc["extra_attributes"]["txt"] ) - q = Query("king henry").paging(0, 1).summarize().highlight() + q = Query('"king henry"').paging(0, 1).summarize().highlight() doc = sorted(client.ft().search(q)["results"])[0] assert "Henry ... " == doc["extra_attributes"]["play"] @@ -936,6 +960,7 @@ def test_phonetic_matcher(client): @pytest.mark.onlynoncluster # NOTE(imalinovskyi): This test contains hardcoded scores valid only for RediSearch 2.8+ @skip_ifmodversion_lt("2.8.0", "search") +@skip_if_server_version_gte("7.9.0") def test_scorer(client): client.ft().create_index((TextField("description"),)) @@ -982,6 +1007,55 @@ def test_scorer(client): assert 0.0 == res["results"][0]["score"] +@pytest.mark.redismod +@pytest.mark.onlynoncluster +@skip_if_server_version_lt("7.9.0") +def test_scorer_with_new_default_scorer(client): + client.ft().create_index((TextField("description"),)) + + client.hset( + "doc1", mapping={"description": "The quick brown fox jumps over the lazy dog"} + ) + client.hset( + "doc2", + mapping={ + "description": "Quick alice was beginning to get very tired of sitting by her quick sister on the bank, and of having nothing to do." # noqa + }, + ) + + # default scorer is BM25STD + if is_resp2_connection(client): + res = client.ft().search(Query("quick").with_scores()) + assert 0.23 == pytest.approx(res.docs[0].score, 0.05) + res = client.ft().search(Query("quick").scorer("TFIDF").with_scores()) + assert 1.0 == res.docs[0].score + res = client.ft().search(Query("quick").scorer("TFIDF.DOCNORM").with_scores()) + assert 0.14285714285714285 == res.docs[0].score + res = client.ft().search(Query("quick").scorer("BM25").with_scores()) + assert 0.22471909420069797 == res.docs[0].score + res = client.ft().search(Query("quick").scorer("DISMAX").with_scores()) + assert 2.0 == res.docs[0].score + res = client.ft().search(Query("quick").scorer("DOCSCORE").with_scores()) + assert 1.0 == res.docs[0].score + res = client.ft().search(Query("quick").scorer("HAMMING").with_scores()) + assert 0.0 == res.docs[0].score + else: + res = client.ft().search(Query("quick").with_scores()) + assert 0.23 == pytest.approx(res["results"][0]["score"], 0.05) + res = client.ft().search(Query("quick").scorer("TFIDF").with_scores()) + assert 1.0 == res["results"][0]["score"] + res = client.ft().search(Query("quick").scorer("TFIDF.DOCNORM").with_scores()) + assert 0.14285714285714285 == res["results"][0]["score"] + res = client.ft().search(Query("quick").scorer("BM25").with_scores()) + assert 0.22471909420069797 == res["results"][0]["score"] + res = client.ft().search(Query("quick").scorer("DISMAX").with_scores()) + assert 2.0 == res["results"][0]["score"] + res = client.ft().search(Query("quick").scorer("DOCSCORE").with_scores()) + assert 1.0 == res["results"][0]["score"] + res = client.ft().search(Query("quick").scorer("HAMMING").with_scores()) + assert 0.0 == res["results"][0]["score"] + + @pytest.mark.redismod def test_get(client): client.ft().create_index((TextField("f1"), TextField("f2"))) @@ -2605,9 +2679,8 @@ def test_search_missing_fields(client): }, ) - with pytest.raises(redis.exceptions.ResponseError) as e: + with pytest.raises(redis.exceptions.ResponseError): client.ft().search(Query("ismissing(@title)").return_field("id").no_content()) - assert "to be defined with 'INDEXMISSING'" in e.value.args[0] res = client.ft().search( Query("ismissing(@features)").return_field("id").no_content() @@ -2813,6 +2886,12 @@ def test_search_query_with_different_dialects(client): assert res["total_results"] == 0 +@pytest.mark.redismod +@skip_if_server_version_lt("7.9.0") +def test_info_exposes_search_info(client): + assert len(client.info("search")) > 0 + + def _assert_search_result(client, result, expected_doc_ids): """ Make sure the result of a geo search is as expected, taking into account the RESP