-
Notifications
You must be signed in to change notification settings - Fork 76
fix: stabilize test_text_query_word_weights with structural assertions #541
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: main
Are you sure you want to change the base?
Changes from 2 commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change | ||||||||||||||
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
|
|
@@ -335,7 +335,6 @@ def test_text_query_with_string_filter(): | |||||||||||||||
| assert "AND" not in query_string_wildcard | ||||||||||||||||
|
|
||||||||||||||||
|
|
||||||||||||||||
| @pytest.mark.skip("Test is flaking") | ||||||||||||||||
| def test_text_query_word_weights(): | ||||||||||||||||
| # verify word weights get added into the raw Redis query syntax | ||||||||||||||||
| query = TextQuery( | ||||||||||||||||
|
|
@@ -344,10 +343,43 @@ def test_text_query_word_weights(): | |||||||||||||||
| text_weights={"alpha": 2, "delta": 0.555, "gamma": 0.95}, | ||||||||||||||||
| ) | ||||||||||||||||
|
|
||||||||||||||||
| assert ( | ||||||||||||||||
| str(query) | ||||||||||||||||
| == "@description:(query | string | alpha=>{$weight:2} | bravo | delta=>{$weight:0.555} | tango | alpha=>{$weight:2}) SCORER BM25STD WITHSCORES DIALECT 2 LIMIT 0 10" | ||||||||||||||||
| ) | ||||||||||||||||
| # Check query components with structural guarantees, | ||||||||||||||||
| # not exact token ordering (which is non-deterministic). | ||||||||||||||||
| query_str = str(query) | ||||||||||||||||
|
|
||||||||||||||||
| # Description clause is properly delimited | ||||||||||||||||
| assert "@description:(" in query_str | ||||||||||||||||
| desc_start = query_str.index("@description:(") | ||||||||||||||||
| desc_close = query_str.index(")", desc_start) | ||||||||||||||||
| desc_clause = query_str[desc_start + len("@description:(") : desc_close] | ||||||||||||||||
|
|
||||||||||||||||
| # Weighted terms appear inside the description clause | ||||||||||||||||
| assert "delta=>{$weight:0.555}" in desc_clause | ||||||||||||||||
|
|
||||||||||||||||
| # alpha appears twice and both occurrences are weighted | ||||||||||||||||
| alpha_weighted = "alpha=>{$weight:2}" | ||||||||||||||||
| assert desc_clause.count(alpha_weighted) == 2 | ||||||||||||||||
| # Ensure no unweighted 'alpha' tokens slipped through | ||||||||||||||||
| idx = 0 | ||||||||||||||||
|
Comment on lines
+359
to
+363
|
||||||||||||||||
| while True: | ||||||||||||||||
| idx = desc_clause.find("alpha", idx) | ||||||||||||||||
| if idx == -1: | ||||||||||||||||
| break | ||||||||||||||||
| assert desc_clause.startswith(alpha_weighted, idx) | ||||||||||||||||
| idx += len("alpha") | ||||||||||||||||
|
|
||||||||||||||||
| # Unweighted terms are present | ||||||||||||||||
| for term in ["query", "string", "bravo", "tango"]: | ||||||||||||||||
| assert term in desc_clause | ||||||||||||||||
|
Comment on lines
+371
to
+373
|
||||||||||||||||
| # Unweighted terms are present | |
| for term in ["query", "string", "bravo", "tango"]: | |
| assert term in desc_clause | |
| # Unweighted terms are present as standalone tokens within the clause | |
| desc_tokens = {token.strip() for token in desc_clause.split("|") if token.strip()} | |
| for term in ["query", "string", "bravo", "tango"]: | |
| assert term in desc_tokens |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
The comment about token ordering being non-deterministic is likely misleading here:
TextQuery._tokenize_and_escape_query()preserves the input token order and only does in-place substitutions for weighted tokens. If the goal is to avoid brittleness, consider rewording this to the actual nondeterminism/flakiness source (or just say the test avoids exact full-string matching).