Skip to content

Commit e047063

Browse files
committed
Merge branch 'boeker/handle-searchable-copies' of github.com:vespa-engine/pyvespa into boeker/handle-searchable-copies
2 parents 3c99fca + 4265c5d commit e047063

File tree

4 files changed

+166
-31
lines changed

4 files changed

+166
-31
lines changed

tests/unit/test_deployment.py

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -172,6 +172,21 @@ def test_check_production_build_status_deploying(self, mock_request):
172172

173173
self.assertEqual(status, {"deployed": False, "status": "deploying"})
174174

175+
@patch("vespa.deployment.VespaCloud._request")
176+
def test_wait_for_prod_deployment_raises_on_failed_job(self, mock_request):
177+
mock_request.return_value = {
178+
"deployed": False,
179+
"status": "deploying",
180+
"jobs": [
181+
{"jobName": "production-us-central-1", "runStatus": "success"},
182+
{"jobName": "production-us-east-3", "runStatus": "deploymentFailed"},
183+
],
184+
}
185+
186+
with self.assertRaises(RuntimeError) as ctx:
187+
self.vespa_cloud.wait_for_prod_deployment(456)
188+
self.assertIn("production-us-east-3: deploymentFailed", str(ctx.exception))
189+
175190
@patch("vespa.deployment.VespaCloud._try_get_access_token")
176191
def test_try_get_access_token(self, mock_get_token):
177192
mock_get_token.return_value = "fake_access_token"

tests/unit/test_evaluator.py

Lines changed: 75 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3345,6 +3345,81 @@ def test_compute_recall(self):
33453345
delta=0.0001,
33463346
)
33473347

3348+
def test_compute_recall_id_field(self):
3349+
response_exact = self.SuccessfullMockVespaResponse(
3350+
[
3351+
{"id": "1", "fields": {"id": "1"}},
3352+
{"id": "2", "fields": {"id": "2"}},
3353+
{"id": "3", "fields": {"id": "3"}},
3354+
{"id": "4", "fields": {"id": "4"}},
3355+
{"id": "5", "fields": {"id": "5"}},
3356+
]
3357+
)
3358+
self.assertAlmostEqual(
3359+
self.recall_evaluator._compute_recall(response_exact, response_exact),
3360+
1.0,
3361+
delta=0.0001,
3362+
)
3363+
3364+
response_approx = self.SuccessfullMockVespaResponse(
3365+
[
3366+
{"id": "1", "fields": {"id": "1"}},
3367+
{"id": "2", "fields": {"id": "2"}},
3368+
{"id": "3", "fields": {"id": "3"}},
3369+
{"id": "4", "fields": {"id": "4"}},
3370+
]
3371+
)
3372+
self.assertAlmostEqual(
3373+
self.recall_evaluator._compute_recall(response_exact, response_approx),
3374+
0.8,
3375+
delta=0.0001,
3376+
)
3377+
3378+
class InternalIDResponse(MockVespaResponse):
3379+
def __init__(
3380+
self,
3381+
hits,
3382+
first_node_id=0,
3383+
_total_count=None,
3384+
_timing=None,
3385+
_status_code=200,
3386+
):
3387+
super().__init__(hits, _total_count, _timing, _status_code)
3388+
self.next_node_num = first_node_id
3389+
3390+
def add_namespace_to_hit_ids(self, hits_list) -> List[Dict[str, Any]]:
3391+
new_hits = []
3392+
for hit_item in hits_list:
3393+
hit_id = hit_item.get("id")
3394+
if isinstance(hit_id, str) and not hit_id.startswith("index:"):
3395+
hit_item["id"] = f"index:cluster/{self.next_node_num}/{hit_id}"
3396+
self.next_node_num += 1
3397+
new_hits.append(hit_item)
3398+
return new_hits
3399+
3400+
def is_successful(self):
3401+
return True
3402+
3403+
def test_compute_recall_internal_ids(self):
3404+
response_exact = self.InternalIDResponse(
3405+
[{"id": "1"}, {"id": "2"}, {"id": "3"}, {"id": "4"}, {"id": "5"}],
3406+
first_node_id=0,
3407+
)
3408+
self.assertAlmostEqual(
3409+
self.recall_evaluator._compute_recall(response_exact, response_exact),
3410+
1.0,
3411+
delta=0.0001,
3412+
)
3413+
3414+
response_approx = self.InternalIDResponse(
3415+
[{"id": "1"}, {"id": "2"}, {"id": "3"}, {"id": "4"}], first_node_id=1
3416+
)
3417+
self.assertAlmostEqual(
3418+
self.recall_evaluator._compute_recall(response_exact, response_approx),
3419+
0.8,
3420+
delta=0.0001,
3421+
)
3422+
33483423
def test_run(self):
33493424
class MockVespaApp:
33503425
def __init__(self, first_mock_responses, second_mock_responses):

vespa/deployment.py

Lines changed: 31 additions & 28 deletions
Original file line numberDiff line numberDiff line change
@@ -925,32 +925,31 @@ def check_production_build_status(self, build_no: Optional[int]) -> dict:
925925
vespa_cloud = VespaCloud(...)
926926
build_no = vespa_cloud.deploy_to_prod()
927927
status = vespa_cloud.check_production_build_status(build_no)
928-
# This can yield one of three responses:
929-
# 1. If the revision (build_no), or higher, has successfully converged everywhere, and nothing older has then been deployed on top of that again. Nothing more will happen in this case.
930-
# {
931-
# "deployed": True,
932-
# "status": "done"
933-
# }
934-
935-
# 2. If the revision (build_no), or newer, has not yet converged, but the system is (most likely) still trying to deploy it. There is a point in polling again later when this is the response.
936-
# {
937-
# "deployed": False,
938-
# "status": "deploying"
939-
# }
940-
# 3. If the revision, or newer, has not yet converged everywhere, and it's never going to, because it was similar to the previous build, or marked obsolete by a user. There is no point in asking again for this revision.
941-
# {
942-
# "deployed": False,
943-
# "status": "done"
944-
# }
928+
# The response contains:
929+
# - "deployed" (bool): True if the build has converged everywhere.
930+
# - "status" (str): "deploying" or "done".
931+
# - "skipReason" (str, optional): Why the build was skipped, e.g. "no-changes" or "cancelled".
932+
# - "jobs" (list): Per-zone deployment details, each with "jobName" and "runStatus".
933+
#
934+
# Example responses:
935+
# 1. Successfully deployed everywhere:
936+
# {"deployed": True, "status": "done", "jobs": [{"jobName": "production-us-east-3", "runStatus": "success"}]}
937+
#
938+
# 2. Still deploying:
939+
# {"deployed": False, "status": "deploying", "jobs": [{"jobName": "production-us-east-3", "runStatus": "running"}]}
940+
#
941+
# 3. Skipped (no changes to deploy):
942+
# {"deployed": False, "status": "done", "skipReason": "no-changes", "jobs": []}
943+
#
944+
# 4. A job failed:
945+
# {"deployed": False, "status": "deploying", "jobs": [{"jobName": "production-us-east-3", "runStatus": "deploymentFailed"}]}
945946
```
946947
947948
Args:
948949
build_no (int): The build number to check.
949950
950951
Returns:
951-
dict: A dictionary with the aggregated status of all deployment jobs for the given build number. The dictionary contains:
952-
- "deployed" (bool): Whether the build has successfully converged.
953-
- "status" (str): The current status of the build ("done", "deploying").
952+
dict: The build status response from the API. See example responses above for the full shape.
954953
955954
Raises:
956955
RuntimeError: If there are issues with retrieving the status of the build.
@@ -993,23 +992,27 @@ def wait_for_prod_deployment(
993992
poll_interval (int, optional): Polling interval in seconds. Default is 5 seconds.
994993
995994
Returns:
996-
bool: True if the deployment is done and converged, False if the deployment has failed.
995+
bool: True if the build was deployed to all production zones, False if it completed
996+
without deploying (e.g. no changes).
997997
998998
Raises:
999+
RuntimeError: If any production job failed (e.g. deploymentFailed, installationFailed).
9991000
TimeoutError: If the deployment did not finish within `max_wait` seconds.
10001001
"""
10011002
start_time = time.time()
10021003
while time.time() - start_time < max_wait:
10031004
status = self.check_production_build_status(build_no)
1005+
failed_jobs = [
1006+
job for job in status.get("jobs", [])
1007+
if job["runStatus"] not in ("success", "running")
1008+
]
1009+
if failed_jobs:
1010+
failures = ", ".join(
1011+
f"{job['jobName']}: {job['runStatus']}" for job in failed_jobs
1012+
)
1013+
raise RuntimeError(f"Deployment failed: {failures}")
10041014
if status["status"] == "done":
10051015
return status["deployed"]
1006-
if "detailed-status" in status and status["detailed-status"] not in [
1007-
"success",
1008-
"running",
1009-
]:
1010-
raise RuntimeError(
1011-
f"The build failed with status code: {status['detailed-status']}"
1012-
)
10131016
time.sleep(poll_interval)
10141017
raise TimeoutError(f"Deployment did not finish within {max_wait} seconds. ")
10151018

vespa/evaluation/_base.py

Lines changed: 45 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1885,6 +1885,7 @@ class VespaNNRecallEvaluator:
18851885
hits (int): Number of hits to use. Should match the parameter targetHits in the used ANN queries.
18861886
app (Vespa): An instance of the Vespa application.
18871887
query_limit (int): Maximum number of queries to determine the recall for. Defaults to 20.
1888+
id_field (str): Name of the field containing a unique id. Defaults to "id".
18881889
**kwargs (dict, optional): Additional HTTP request parameters. See: <https://docs.vespa.ai/en/reference/document-v1-api-reference.html#request-parameters>.
18891890
"""
18901891

@@ -1894,12 +1895,14 @@ def __init__(
18941895
hits: int,
18951896
app: Vespa,
18961897
query_limit: int = 20,
1898+
id_field: str = "id",
18971899
**kwargs,
18981900
):
18991901
self.queries = queries
19001902
self.hits = hits
19011903
self.app = app
19021904
self.query_limit = query_limit
1905+
self.id_field = id_field
19031906
self.parameters = kwargs
19041907

19051908
def _compute_recall(
@@ -1924,8 +1927,39 @@ def _compute_recall(
19241927
except KeyError:
19251928
results_approx = []
19261929

1927-
ids_exact = list(map(lambda x: x["id"], results_exact))
1928-
ids_approx = list(map(lambda x: x["id"], results_approx))
1930+
def extract_id(hit: dict, id_field: str) -> Tuple[str, str]:
1931+
"""Extract document ID from a Vespa hit."""
1932+
1933+
# id as specified by field
1934+
fields = hit.get("fields", {})
1935+
if id_field in fields:
1936+
return fields[id_field], "id_field"
1937+
1938+
# document id
1939+
id = hit.get("id", "")
1940+
if "::" in id:
1941+
return id, "document_id"
1942+
1943+
# internal id
1944+
if id.startswith(
1945+
"index:"
1946+
): # id is an internal id of the form index:[source]/[node-index]/[hex-gid], return hex-gid
1947+
return id.split("/", 2)[2], "internal_id"
1948+
1949+
raise ValueError(f"Could not extract a document id from hit: {hit}")
1950+
1951+
ids_exact = list(map(lambda x: extract_id(x, self.id_field)[0], results_exact))
1952+
ids_approx = list(
1953+
map(lambda x: extract_id(x, self.id_field)[0], results_approx)
1954+
)
1955+
1956+
id_types = set()
1957+
id_types.update(map(lambda x: extract_id(x, self.id_field)[1], results_exact))
1958+
id_types.update(map(lambda x: extract_id(x, self.id_field)[1], results_approx))
1959+
if len(id_types) > 1:
1960+
print(
1961+
f"Warning: Multiple id types obtained for hits: {id_types}. The recall computation will not be reliable. Please specify id_field correctly."
1962+
)
19291963

19301964
if len(ids_exact) != self.hits:
19311965
print(
@@ -2145,6 +2179,7 @@ class VespaNNParameterOptimizer:
21452179
benchmark_time_limit (int): Time in milliseconds to spend per bucket benchmark. Defaults to 5000.
21462180
recall_query_limit(int): Number of queries per bucket to compute the recall for. Defaults to 20.
21472181
max_concurrent(int): Number of queries to execute concurrently during benchmark/recall calculation. Defaults to 10.
2182+
id_field (str): Name of the field containing a unique id for recall computation. Defaults to "id".
21482183
"""
21492184

21502185
def __init__(
@@ -2157,6 +2192,7 @@ def __init__(
21572192
benchmark_time_limit: int = 5000,
21582193
recall_query_limit: int = 20,
21592194
max_concurrent: int = 10,
2195+
id_field: str = "id",
21602196
):
21612197
self.app = app
21622198
self.queries = queries
@@ -2170,6 +2206,7 @@ def __init__(
21702206
self.benchmark_time_limit = benchmark_time_limit
21712207
self.recall_query_limit = recall_query_limit
21722208
self.max_concurrent = max_concurrent
2209+
self.id_field = id_field
21732210

21742211
self.searchable_copies = None
21752212

@@ -2535,7 +2572,12 @@ def compute_average_recalls(self, **kwargs) -> BucketedMetricResults:
25352572
end="",
25362573
)
25372574
recall_evaluator = VespaNNRecallEvaluator(
2538-
bucket, self.hits, self.app, self.recall_query_limit, **kwargs
2575+
bucket,
2576+
self.hits,
2577+
self.app,
2578+
self.recall_query_limit,
2579+
self.id_field,
2580+
**kwargs,
25392581
)
25402582
recall_list = recall_evaluator.run()
25412583
results.append(recall_list)

0 commit comments

Comments
 (0)