Skip to content

Commit ba6bc01

Browse files
committed
fix(SUP-2586): do not return annotations if not asked
1 parent 07bf0cc commit ba6bc01

File tree

7 files changed

+104
-131
lines changed

7 files changed

+104
-131
lines changed

src/kili/adapters/kili_api_gateway/asset/operations.py

Lines changed: 0 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -30,9 +30,3 @@ def get_assets_query(fragment: str) -> str:
3030
external_ids: filterExistingAssets(projectID: $projectID, externalIDs: $externalIDs)
3131
}
3232
"""
33-
34-
GQL_COUNT_ASSET_ANNOTATIONS = """
35-
query countAssetAnnotations($where: AssetWhere!) {
36-
data: countAssetAnnotations(where: $where)
37-
}
38-
"""

src/kili/adapters/kili_api_gateway/asset/operations_mixin.py

Lines changed: 15 additions & 73 deletions
Original file line numberDiff line numberDiff line change
@@ -2,16 +2,11 @@
22

33
from collections.abc import Generator
44

5-
from kili_formats.tool.annotations_to_json_response import (
6-
AnnotationsToJsonResponseConverter,
7-
)
8-
95
from kili.adapters.kili_api_gateway.asset.formatters import (
106
load_asset_json_fields,
117
)
128
from kili.adapters.kili_api_gateway.asset.mappers import asset_where_mapper
139
from kili.adapters.kili_api_gateway.asset.operations import (
14-
GQL_COUNT_ASSET_ANNOTATIONS,
1510
GQL_COUNT_ASSETS,
1611
GQL_CREATE_UPLOAD_BUCKET_SIGNED_URLS,
1712
GQL_FILTER_EXISTING_ASSETS,
@@ -23,18 +18,11 @@
2318
QueryOptions,
2419
fragment_builder,
2520
)
26-
from kili.adapters.kili_api_gateway.label.common import get_annotation_fragment
2721
from kili.adapters.kili_api_gateway.project.common import get_project
2822
from kili.core.graphql.operations.asset.mutations import GQL_SET_ASSET_CONSENSUS
2923
from kili.domain.asset import AssetFilters
3024
from kili.domain.types import ListOrTuple
3125

32-
# Threshold for batching based on number of annotations
33-
# This is used to determine whether to use a single batch or multiple batches
34-
# when fetching assets. If the number of annotations counted exceeds this threshold,
35-
# the asset fetch will be done in multiple smaller batches to avoid performance issues.
36-
THRESHOLD_FOR_BATCHING = 200
37-
3826

3927
class AssetOperationMixin(BaseOperationMixin):
4028
"""Mixin extending Kili API Gateway class with Assets related operations."""
@@ -46,9 +34,6 @@ def list_assets(
4634
options: QueryOptions,
4735
) -> Generator[dict, None, None]:
4836
"""List assets with given options."""
49-
has_labels_url = "labels.jsonResponseUrl" in fields
50-
has_latest_label_url = "latestLabel.jsonResponseUrl" in fields
51-
5237
if "labels.jsonResponse" in fields or "latestLabel.jsonResponse" in fields:
5338
# Check if we can get the jsonResponse of if we need to rebuild it.
5439
project_info = get_project(
@@ -61,10 +46,7 @@ def list_assets(
6146
"LLM_STATIC",
6247
"GEOSPATIAL",
6348
}:
64-
fetch_annotations = not (has_labels_url or has_latest_label_url)
65-
yield from self.list_assets_split(
66-
filters, fields, options, project_info, fetch_annotations
67-
)
49+
yield from self.list_assets_split(filters, fields, options, project_info)
6850
return
6951

7052
fragment = fragment_builder(fields)
@@ -90,35 +72,24 @@ def list_assets_split(
9072
fields: ListOrTuple[str],
9173
options: QueryOptions,
9274
project_info,
93-
fetch_annotations: bool,
9475
) -> Generator[dict, None, None]:
9576
"""List assets with given options."""
96-
nb_annotations = self.count_assets_annotations(filters)
9777
assets_batch_max_amount = 10 if project_info["inputType"] == "VIDEO" else 50
9878
batch_size_to_use = min(options.batch_size, assets_batch_max_amount)
99-
batch_size = (
100-
1 if nb_annotations / batch_size_to_use > THRESHOLD_FOR_BATCHING else batch_size_to_use
101-
)
10279

103-
options = QueryOptions(options.disable_tqdm, options.first, options.skip, batch_size)
104-
105-
static_fragments = {}
106-
if fetch_annotations:
107-
inner_annotation_fragment = get_annotation_fragment()
108-
annotation_fragment = f"""
109-
annotations {{
110-
{inner_annotation_fragment}
111-
}}
112-
"""
113-
static_fragments = {"labels": annotation_fragment, "latestLabel": annotation_fragment}
114-
115-
required_fields = {"content", "jsonContent", "resolution.width", "resolution.height"}
116-
fields = list(fields)
117-
for field in required_fields:
118-
if field not in fields:
119-
fields.append(field)
120-
121-
fragment = fragment_builder(fields, static_fragments if static_fragments else None)
80+
options = QueryOptions(options.disable_tqdm, options.first, options.skip, batch_size_to_use)
81+
82+
required_fields = {"content", "jsonContent", "resolution.width", "resolution.height"}
83+
if "labels.jsonResponse" in fields:
84+
required_fields.add("labels.jsonResponseUrl")
85+
if "latestLabel.jsonResponse" in fields:
86+
required_fields.add("latestLabel.jsonResponseUrl")
87+
fields = list(fields)
88+
for field in required_fields:
89+
if field not in fields:
90+
fields.append(field)
91+
92+
fragment = fragment_builder(fields)
12293
query = get_assets_query(fragment)
12394
where = asset_where_mapper(filters)
12495
assets_gen = PaginatedGraphQLQuery(self.graphql_client).execute_query_from_paginated_call(
@@ -128,28 +99,7 @@ def list_assets_split(
12899
load_asset_json_fields(asset, fields, self.http_client) for asset in assets_gen
129100
)
130101

131-
if fetch_annotations:
132-
converter = AnnotationsToJsonResponseConverter(
133-
json_interface=project_info["jsonInterface"],
134-
project_input_type=project_info["inputType"],
135-
)
136-
is_requesting_annotations = any("annotations." in element for element in fields)
137-
for asset in assets_gen:
138-
if "latestLabel.jsonResponse" in fields and asset.get("latestLabel"):
139-
converter.patch_label_json_response(
140-
asset, asset["latestLabel"], asset["latestLabel"]["annotations"]
141-
)
142-
if not is_requesting_annotations:
143-
asset["latestLabel"].pop("annotations")
144-
145-
if "labels.jsonResponse" in fields:
146-
for label in asset.get("labels", []):
147-
converter.patch_label_json_response(asset, label, label["annotations"])
148-
if not is_requesting_annotations:
149-
label.pop("annotations")
150-
yield asset
151-
else:
152-
yield from assets_gen
102+
yield from assets_gen
153103

154104
def count_assets(self, filters: AssetFilters) -> int:
155105
"""Send a GraphQL request calling countIssues resolver."""
@@ -176,14 +126,6 @@ def filter_existing_assets(self, project_id: str, assets_external_ids: ListOrTup
176126
external_id_response = self.graphql_client.execute(GQL_FILTER_EXISTING_ASSETS, payload)
177127
return external_id_response["external_ids"]
178128

179-
def count_assets_annotations(self, filters: AssetFilters) -> int:
180-
"""Count the number of annotations for assets matching the filters."""
181-
where = asset_where_mapper(filters)
182-
payload = {"where": where}
183-
count_result = self.graphql_client.execute(GQL_COUNT_ASSET_ANNOTATIONS, payload)
184-
count: int = count_result["data"]
185-
return count
186-
187129
def update_asset_consensus(
188130
self,
189131
project_id: str,

src/kili/adapters/kili_api_gateway/label/operations_mixin.py

Lines changed: 7 additions & 47 deletions
Original file line numberDiff line numberDiff line change
@@ -4,8 +4,6 @@
44
from collections.abc import Generator
55
from typing import Optional
66

7-
from kili_formats.tool.annotations_to_json_response import AnnotationsToJsonResponseConverter
8-
97
from kili.adapters.kili_api_gateway.base import BaseOperationMixin
108
from kili.adapters.kili_api_gateway.helpers.queries import (
119
PaginatedGraphQLQuery,
@@ -21,7 +19,6 @@
2119
from kili.domain.types import ListOrTuple
2220
from kili.utils.tqdm import tqdm
2321

24-
from .common import get_annotation_fragment, get_asset
2522
from .formatters import load_label_json_fields
2623
from .mappers import append_label_data_mapper, append_to_labels_data_mapper, label_where_mapper
2724
from .operations import (
@@ -52,11 +49,7 @@ def list_labels(
5249
options: QueryOptions,
5350
) -> Generator[dict, None, None]:
5451
"""List labels."""
55-
has_json_response_url = "jsonResponseUrl" in fields
56-
5752
if "jsonResponse" in fields:
58-
if "labelOf" not in fields:
59-
fields = [*list(fields), "assetId"]
6053
project_info = get_project(
6154
self.graphql_client, filters.project_id, ("inputType", "jsonInterface")
6255
)
@@ -67,10 +60,7 @@ def list_labels(
6760
"LLM_INSTR_FOLLOWING",
6861
"LLM_STATIC",
6962
}:
70-
fetch_annotations = not has_json_response_url
71-
yield from self.list_labels_split(
72-
filters, fields, options, project_info, fetch_annotations
73-
)
63+
yield from self.list_labels_split(filters, fields, options, project_info)
7464
return
7565

7666
fragment = fragment_builder(fields)
@@ -90,57 +80,27 @@ def list_labels_split(
9080
fields: ListOrTuple[str],
9181
options: QueryOptions,
9282
project_info,
93-
fetch_annotations: bool,
9483
) -> Generator[dict, None, None]:
9584
"""List labels."""
9685
if project_info["inputType"] == "VIDEO":
9786
options = QueryOptions(
9887
options.disable_tqdm, options.first, options.skip, min(options.batch_size, 20)
9988
)
10089

101-
fragment = fragment_builder(fields)
90+
fields = list(fields)
91+
if "jsonResponse" in fields and "jsonResponseUrl" not in fields:
92+
fields.append("jsonResponseUrl")
10293

103-
if fetch_annotations:
104-
inner_annotation_fragment = get_annotation_fragment()
105-
full_fragment = f"""
106-
{fragment}
107-
annotations {{
108-
{inner_annotation_fragment}
109-
}}
110-
"""
111-
else:
112-
full_fragment = fragment
113-
114-
query = get_labels_query(full_fragment)
94+
fragment = fragment_builder(fields)
95+
query = get_labels_query(fragment)
11596
where = label_where_mapper(filters)
11697
labels_gen = PaginatedGraphQLQuery(self.graphql_client).execute_query_from_paginated_call(
11798
query, where, options, "Retrieving labels", GQL_COUNT_LABELS
11899
)
119100
labels_gen = (
120101
load_label_json_fields(label, fields, self.http_client) for label in labels_gen
121102
)
122-
123-
if "jsonResponse" in fields and fetch_annotations:
124-
converter = AnnotationsToJsonResponseConverter(
125-
json_interface=project_info["jsonInterface"],
126-
project_input_type=project_info["inputType"],
127-
)
128-
for label in labels_gen:
129-
asset = None
130-
if project_info["inputType"] == "VIDEO":
131-
asset = get_asset(
132-
self.graphql_client,
133-
self.http_client,
134-
label["assetId"],
135-
["content", "jsonContent", "resolution.width", "resolution.height"],
136-
)
137-
converter.patch_label_json_response(asset, label, label["annotations"])
138-
if not any("annotations." in element for element in fields):
139-
label.pop("annotations")
140-
yield label
141-
142-
else:
143-
yield from labels_gen
103+
yield from labels_gen
144104

145105
def delete_labels(
146106
self, ids: ListOrTuple[LabelId], disable_tqdm: Optional[bool]

src/kili/llm/services/export/__init__.py

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -58,7 +58,6 @@
5858
"externalId",
5959
"jsonMetadata",
6060
"labels.jsonResponse",
61-
"labels.jsonResponseUrl",
6261
"labels.author.id",
6362
"labels.author.email",
6463
"labels.author.firstname",

src/kili/services/export/tools.py

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -35,7 +35,6 @@
3535
DEFAULT_FIELDS = [
3636
*COMMON_FIELDS,
3737
"labels.jsonResponse",
38-
"labels.jsonResponseUrl",
3938
"labels.author.id",
4039
"labels.author.email",
4140
"labels.author.firstname",
@@ -49,7 +48,6 @@
4948
LATEST_LABEL_FIELDS = [
5049
*COMMON_FIELDS,
5150
"latestLabel.jsonResponse",
52-
"latestLabel.jsonResponseUrl",
5351
"latestLabel.author.id",
5452
"latestLabel.author.email",
5553
"latestLabel.author.firstname",

tests/integration/adapters/kili_api_gateway/test_asset.py

Lines changed: 82 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -6,9 +6,90 @@
66
from kili.domain.project import ProjectId
77

88

9+
def test_given_video_project_only_latest_label_json_response_requested_annotations_not_fetched_for_labels(
10+
graphql_client, http_client
11+
):
12+
"""For VIDEO projects, when only latestLabel.jsonResponse is requested (not labels.jsonResponse).
13+
14+
The annotations fragment should only be injected into latestLabel in the GraphQL query,
15+
not into labels.
16+
"""
17+
captured_queries = []
18+
19+
def mock_graphql_execute(query, *_args, **_kwargs) -> dict:
20+
captured_queries.append(query)
21+
22+
if "countAssetAnnotations" in query:
23+
return {"data": 1}
24+
25+
if "query countAssets" in query:
26+
return {"data": 1}
27+
28+
if "projects(" in query:
29+
return {
30+
"data": [
31+
{
32+
"id": "project_id",
33+
"inputType": "VIDEO",
34+
"jsonInterface": '{"jobs": {}}',
35+
}
36+
]
37+
}
38+
39+
if "query assets" in query:
40+
return {
41+
"data": [
42+
{
43+
"id": "fake_asset_id",
44+
"labels": [{"id": "label_id"}],
45+
"latestLabel": {
46+
"jsonResponseUrl": "https://storage.example.com/label.json",
47+
},
48+
"content": "",
49+
"jsonContent": "",
50+
"resolution": {"width": 100, "height": 100},
51+
}
52+
]
53+
}
54+
55+
return None
56+
57+
mock_http_response = http_client.get.return_value
58+
mock_http_response.json.return_value = {"jobs": {"OBJECT_DETECTION_JOB": {}}}
59+
60+
graphql_client.execute.side_effect = mock_graphql_execute
61+
62+
asset_operations = AssetOperationMixin()
63+
asset_operations.graphql_client = graphql_client
64+
asset_operations.http_client = http_client
65+
filters = AssetFilters(project_id=ProjectId("project_id"))
66+
fields = ["id", "labels.id", "latestLabel.jsonResponse"]
67+
68+
# when
69+
assets = list(
70+
asset_operations.list_assets(filters, fields, options=QueryOptions(disable_tqdm=None))
71+
)
72+
73+
# then
74+
assert len(assets) == 1
75+
assert assets[0]["id"] == "fake_asset_id"
76+
# jsonResponse was downloaded from jsonResponseUrl and jsonResponseUrl was removed
77+
assert assets[0]["latestLabel"]["jsonResponse"] == {"jobs": {"OBJECT_DETECTION_JOB": {}}}
78+
assert "jsonResponseUrl" not in assets[0]["latestLabel"]
79+
# annotations were never fetched: not in latestLabel, not in labels
80+
assert "annotations" not in assets[0]["latestLabel"]
81+
assert "annotations" not in assets[0]["labels"][0]
82+
83+
# then: the GraphQL query fetches jsonResponseUrl for latestLabel, not annotations
84+
assets_query = next(q for q in captured_queries if "query assets" in q)
85+
assert "jsonResponseUrl" in assets_query # latestLabel has the jsonResponseUrl fragment
86+
assert "annotations" not in assets_query # annotations fragment was not added
87+
assert "labels{ id}" in assets_query # labels block only has id
88+
89+
990
def test_given_a_query_returning_serialized_json_it_parses_json_fields(graphql_client, http_client):
1091
# mocking
11-
def mock_graphql_execute(query, variables, **kwargs):
92+
def mock_graphql_execute(query, variables, **kwargs) -> dict | None:
1293
if "query assets" in query:
1394
label = {"jsonResponse": "{}"}
1495
return {

tests/unit/services/export/test_export.py

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -858,7 +858,6 @@ def test_export_with_asset_filter_kwargs(mocker):
858858
"resolution.height",
859859
"resolution.width",
860860
"latestLabel.jsonResponse",
861-
"latestLabel.jsonResponseUrl",
862861
"latestLabel.author.id",
863862
"latestLabel.author.email",
864863
"latestLabel.author.firstname",

0 commit comments

Comments
 (0)