Skip to content

Commit 707412c

Browse files
github-actions[bot]FannyGaudinBlueGrizzliBear
authored
chore: merge release/2.158.3 into main (#1755)
Co-authored-by: FannyGaudin <[email protected]> Co-authored-by: Clément Bussière <[email protected]>
1 parent 691a625 commit 707412c

File tree

10 files changed

+145
-19
lines changed

10 files changed

+145
-19
lines changed

.github/scripts/upload_test_stats_datadog.py

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,8 @@
1515
from datadog import api, initialize
1616
from tqdm import tqdm
1717

18+
from kili.core.helpers import get_response_json, log_raise_for_status
19+
1820
# https://docs.datadoghq.com/developers/guide/what-best-practices-are-recommended-for-naming-metrics-and-tags/#rules-and-best-practices-for-naming-metrics
1921
# map the test name to the metrics name on datadog
2022
TESTS_TO_PLOT_ON_DATADOG_MAP = {
@@ -77,7 +79,9 @@ def get_workflow_runs_from_github() -> List[Dict]:
7779
while True:
7880
print("Fetching page", page, "...")
7981
response = requests.get(url + f"&page={page}", headers=HEADERS, timeout=30)
80-
response_json = response.json()
82+
log_raise_for_status(response)
83+
84+
response_json = get_response_json(response)
8185
for workflow_run in response_json["workflow_runs"]:
8286
updated_at = datetime.strptime(workflow_run["updated_at"], r"%Y-%m-%dT%H:%M:%SZ")
8387

pyproject.toml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,7 @@ requires = ["setuptools", "wheel"]
33

44
[project]
55
name = "kili"
6-
version = "2.158.2"
6+
version = "2.158.3"
77
description = "Python client for Kili Technology labeling tool"
88
readme = "README.md"
99
authors = [{ name = "Kili Technology", email = "[email protected]" }]

src/kili/__init__.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,3 @@
11
"""Kili Python SDK."""
22

3-
__version__ = "2.158.2"
3+
__version__ = "2.158.3"

src/kili/adapters/kili_api_gateway/asset/formatters.py

Lines changed: 4 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,7 @@
44
from typing import Dict
55

66
from kili.adapters.http_client import HttpClient
7-
from kili.core.helpers import is_url
7+
from kili.core.helpers import get_response_json, is_url, log_raise_for_status
88
from kili.domain.types import ListOrTuple
99

1010

@@ -13,10 +13,9 @@ def load_json_from_link(link: str, http_client: HttpClient) -> Dict:
1313
if link == "" or not is_url(link):
1414
return {}
1515

16-
try:
17-
return http_client.get(link, timeout=30).json()
18-
except json.JSONDecodeError:
19-
return {}
16+
response = http_client.get(link, timeout=30)
17+
log_raise_for_status(response)
18+
return get_response_json(response)
2019

2120

2221
def load_asset_json_fields(asset: Dict, fields: ListOrTuple[str], http_client: HttpClient) -> Dict:

src/kili/core/helpers.py

Lines changed: 29 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@
22

33
import functools
44
import glob
5+
import json
56
import mimetypes
67
import os
78
import re
@@ -10,11 +11,13 @@
1011
from typing import Any, Callable, Dict, List, Optional, Type, TypeVar, Union
1112

1213
import pyparsing as pp
14+
import requests
1315
import tenacity
1416
from typing_extensions import get_args, get_origin
1517

1618
from kili.adapters.http_client import HttpClient
1719
from kili.core.constants import mime_extensions_for_IV2
20+
from kili.log.logging import logger
1821

1922
T = TypeVar("T")
2023

@@ -342,3 +345,29 @@ def is_empty_list_with_warning(method_name: str, argument_name: str, argument_va
342345
)
343346
return True
344347
return False
348+
349+
350+
def log_raise_for_status(response: requests.Response) -> None:
351+
"""Log the error message of a requests.Response if it is not ok.
352+
353+
Args:
354+
response: a requests.Response
355+
"""
356+
try:
357+
response.raise_for_status()
358+
except requests.exceptions.HTTPError as err:
359+
logger.exception("An error occurred while processing the response: %s", err)
360+
raise
361+
362+
363+
def get_response_json(response: requests.Response) -> dict:
364+
"""Get the json from a requests.Response.
365+
366+
Args:
367+
response: a requests.Response
368+
"""
369+
try:
370+
return response.json()
371+
except json.JSONDecodeError:
372+
logger.exception("An error occurred while decoding the json response")
373+
return {}

src/kili/log/logging.py

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,7 @@
1+
import logging
2+
import sys
3+
4+
logger = logging.getLogger("kili")
5+
logger.setLevel(logging.WARNING)
6+
logger.propagate = False
7+
logger.addHandler(logging.StreamHandler(stream=sys.stderr))

src/kili/services/asset_import/video.py

Lines changed: 73 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@
44
from concurrent.futures import ThreadPoolExecutor
55
from enum import Enum
66
from itertools import repeat
7+
from json import JSONDecodeError, loads
78
from typing import List
89

910
from kili.core.helpers import get_mime_type, is_url
@@ -162,20 +163,90 @@ def get_data_type(self, assets):
162163
return VideoDataType.HOSTED_FILE
163164
return VideoDataType.LOCAL_FILE
164165

166+
@staticmethod
167+
def are_native_videos(assets) -> bool:
168+
"""Determine if assets should be imported asynchronously and cut into frames."""
169+
should_use_native_video_array = []
170+
for asset in assets:
171+
# json_metadata stringification is done later on the call
172+
json_metadata_ = asset.get("json_metadata", {})
173+
processing_parameters = json_metadata_.get("processingParameters", {})
174+
should_use_native_video_array.append(
175+
processing_parameters.get("shouldUseNativeVideo", True)
176+
)
177+
if all(should_use_native_video_array):
178+
return True
179+
if all(not b for b in should_use_native_video_array):
180+
return False
181+
raise ImportValidationError(
182+
"""
183+
Cannot upload videos to split into frames
184+
and video to keep as native in the same time.
185+
Please separate the assets into 2 calls
186+
"""
187+
)
188+
189+
@staticmethod
190+
def has_complete_processing_parameters(asset) -> bool:
191+
"""Determine if assets should be imported asynchronously and cut into frames."""
192+
try:
193+
json_metadata = asset.get("jsonMetadata")
194+
if not json_metadata:
195+
return False
196+
197+
processing_parameters = loads(json_metadata).get("processingParameters")
198+
if not processing_parameters:
199+
return False
200+
201+
required_keys = [
202+
"codec",
203+
"delayDueToMinPts",
204+
"framesPlayedPerSecond",
205+
"numberOfFrames",
206+
"startTime",
207+
]
208+
required_types = [str, int, float, int, float]
209+
210+
for key, required_type in zip(required_keys, required_types):
211+
value = processing_parameters.get(key)
212+
if value is None or not isinstance(value, required_type):
213+
return False
214+
215+
return True
216+
except JSONDecodeError:
217+
return False
218+
219+
def videos_have_complete_processing_parameters(self, assets) -> bool:
220+
"""Determine if assets should be imported asynchronously and cut into frames."""
221+
for asset in assets:
222+
if not self.has_complete_processing_parameters(asset):
223+
return False
224+
return True
225+
165226
def import_assets(self, assets: List[AssetLike]):
166227
"""Import video assets into Kili."""
167228
self._check_upload_is_allowed(assets)
168229
data_type = self.get_data_type(assets)
169230
assets = self.filter_duplicate_external_ids(assets)
170231
if data_type == VideoDataType.LOCAL_FILE:
171232
assets = self.filter_local_assets(assets, self.raise_error)
172-
batch_params = BatchParams(is_hosted=False, is_asynchronous=True)
233+
are_native_videos = self.are_native_videos(assets)
234+
videos_have_complete_processing_parameters = (
235+
self.videos_have_complete_processing_parameters(assets)
236+
)
237+
is_synchronous = are_native_videos and videos_have_complete_processing_parameters
238+
batch_params = BatchParams(is_hosted=False, is_asynchronous=not is_synchronous)
173239
batch_importer = VideoContentBatchImporter(
174240
self.kili, self.project_params, batch_params, self.pbar
175241
)
176242
batch_size = IMPORT_BATCH_SIZE
177243
elif data_type == VideoDataType.HOSTED_FILE:
178-
batch_params = BatchParams(is_hosted=True, is_asynchronous=True)
244+
are_native_videos = self.are_native_videos(assets)
245+
videos_have_complete_processing_parameters = (
246+
self.videos_have_complete_processing_parameters(assets)
247+
)
248+
is_synchronous = are_native_videos and videos_have_complete_processing_parameters
249+
batch_params = BatchParams(is_hosted=True, is_asynchronous=not is_synchronous)
179250
batch_importer = VideoContentBatchImporter(
180251
self.kili, self.project_params, batch_params, self.pbar
181252
)

src/kili/services/export/repository.py

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@
44
from typing import Any, Dict, Iterator, List
55

66
from kili.adapters.http_client import HttpClient
7+
from kili.core.helpers import get_response_json, log_raise_for_status
78

89
from .exceptions import DownloadError
910

@@ -48,8 +49,11 @@ def get_frames(self, content_url: str) -> List[str]:
4849
frames: List[str] = []
4950
json_content_resp = self.http_client.get(content_url, timeout=30)
5051

52+
log_raise_for_status(json_content_resp)
53+
json_response = get_response_json(json_content_resp)
54+
5155
if json_content_resp.ok:
52-
frames = list(json_content_resp.json().values())
56+
frames = list(json_response.values())
5357
return frames
5458

5559
def get_content_stream(self, content_url: str, block_size: int) -> Iterator[Any]:

src/kili/services/export/tools.py

Lines changed: 7 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,11 @@
66
from kili.adapters.http_client import HttpClient
77
from kili.adapters.kili_api_gateway.helpers.queries import QueryOptions
88
from kili.core.constants import QUERY_BATCH_SIZE
9-
from kili.core.helpers import validate_category_search_query
9+
from kili.core.helpers import (
10+
get_response_json,
11+
log_raise_for_status,
12+
validate_category_search_query,
13+
)
1014
from kili.core.utils.pagination import batcher
1115
from kili.domain.asset import AssetFilters, AssetId
1216
from kili.domain.project import ProjectId
@@ -187,7 +191,8 @@ def is_geotiff_asset_with_lat_lon_coords(asset: Dict, http_client: HttpClient) -
187191

188192
if isinstance(asset["jsonContent"], str) and asset["jsonContent"].startswith("http"):
189193
response = http_client.get(asset["jsonContent"], timeout=30)
190-
json_content = response.json()
194+
log_raise_for_status(response)
195+
json_content = get_response_json(response)
191196

192197
else:
193198
json_content = asset["jsonContent"]

src/kili/use_cases/asset/media_downloader.py

Lines changed: 13 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,7 @@
1212
from tenacity.wait import wait_random
1313

1414
from kili.adapters.http_client import HttpClient
15+
from kili.core.helpers import get_response_json, log_raise_for_status
1516
from kili.domain.asset import AssetExternalId
1617
from kili.domain.project import ProjectId
1718
from kili.domain.types import ListOrTuple
@@ -125,8 +126,10 @@ def download_assets(self, assets: List[Dict]) -> List[Dict]:
125126
def download_single_asset(self, asset: Dict) -> Dict[str, Any]:
126127
"""Download single asset on disk and modify asset attributes."""
127128
if "ocrMetadata" in asset and str(asset["ocrMetadata"]).startswith("http"):
128-
response = self.http_client.get(asset["ocrMetadata"], timeout=20).json()
129-
asset["ocrMetadata"] = response
129+
response = self.http_client.get(asset["ocrMetadata"], timeout=20)
130+
log_raise_for_status(response)
131+
json_content = get_response_json(response)
132+
asset["ocrMetadata"] = json_content
130133

131134
if "jsonContent" in asset and str(asset["jsonContent"]).startswith("http"):
132135
# richtext
@@ -137,8 +140,10 @@ def download_single_asset(self, asset: Dict) -> Dict[str, Any]:
137140

138141
# video frames
139142
elif self.project_input_type == "VIDEO":
140-
response = self.http_client.get(asset["jsonContent"], timeout=20).json()
141-
urls = tuple(response.values())
143+
response = self.http_client.get(asset["jsonContent"], timeout=20)
144+
log_raise_for_status(response)
145+
json_content = get_response_json(response)
146+
urls = tuple(json_content.values())
142147
nbr_char_zfill = len(str(len(urls)))
143148
img_names = (
144149
f'{asset["externalId"]}_{f"{i+1}".zfill(nbr_char_zfill)}'
@@ -158,8 +163,10 @@ def download_single_asset(self, asset: Dict) -> Dict[str, Any]:
158163
# big images
159164
elif self.project_input_type == "IMAGE":
160165
# the "jsonContent" contains some information but not the image
161-
response = self.http_client.get(asset["jsonContent"], timeout=20).json()
162-
asset["jsonContent"] = response
166+
response = self.http_client.get(asset["jsonContent"], timeout=20)
167+
log_raise_for_status(response)
168+
json_content = get_response_json(response)
169+
asset["jsonContent"] = json_content
163170

164171
else:
165172
raise NotImplementedError(

0 commit comments

Comments
 (0)