Skip to content

Commit a00f3e6

Browse files
feat(LAB-3459): import geospatial image through SDK (#1863)
Co-authored-by: paulruelle <paul.ruelle@kili-technology.com>
1 parent bce7fde commit a00f3e6

File tree

11 files changed

+259
-29
lines changed

11 files changed

+259
-29
lines changed

src/kili/core/constants.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,7 @@
1212
"application/vnd.nitf, image/jp2,image/jpeg,image/png,image/bmp,image/gif,image/webp,"
1313
"image/x-icon,image/tiff,image/vnd.microsoft.icon,image/svg+xml,image/avif,image/apng"
1414
),
15+
"Geospatial": ("application/vnd.nitf, image/jp2, image/tiff"),
1516
"Pdf": "application/pdf",
1617
"Text": "text/plain",
1718
"TimeSeries": "text/csv",
@@ -21,6 +22,7 @@
2122
mime_extensions_for_IV2 = {
2223
"AUDIO": mime_extensions["Audio"],
2324
"IMAGE": mime_extensions["Image"],
25+
"GEOSPATIAL": mime_extensions["Geospatial"],
2426
"NA": "",
2527
"PDF": mime_extensions["Pdf"],
2628
"TEXT": mime_extensions["Text"],

src/kili/domain/project.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,7 @@
1111

1212
ProjectId = NewType("ProjectId", str)
1313
InputType = Literal[
14-
"IMAGE", "PDF", "TEXT", "VIDEO", "LLM_RLHF", "LLM_INSTR_FOLLOWING", "LLM_STATIC"
14+
"IMAGE", "GEOSPATIAL", "PDF", "TEXT", "VIDEO", "LLM_RLHF", "LLM_INSTR_FOLLOWING", "LLM_STATIC"
1515
]
1616

1717

src/kili/services/asset_import/__init__.py

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -52,11 +52,13 @@ def import_assets( # pylint: disable=too-many-arguments
5252

5353
if input_type not in importer_by_type:
5454
raise NotImplementedError(f"There is no importer for the input type: {input_type}")
55-
if input_type != "IMAGE" and any(asset.get("multi_layer_content") for asset in assets):
55+
if input_type not in ["IMAGE", "GEOSPATIAL"] and any(
56+
asset.get("multi_layer_content") for asset in assets
57+
):
5658
raise ImportValidationError(
5759
f"Import of multi-layer assets is not supported for input type: {input_type}"
5860
)
5961
asset_importer = importer_by_type[input_type](*importer_params)
6062
casted_assets = cast(List[AssetLike], assets)
6163
asset_importer.check_asset_contents(casted_assets)
62-
return asset_importer.import_assets(assets=casted_assets)
64+
return asset_importer.import_assets(assets=casted_assets, input_type=input_type)

src/kili/services/asset_import/base.py

Lines changed: 22 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -53,7 +53,6 @@
5353

5454
FILTER_EXISTING_BATCH_SIZE = 1000
5555

56-
5756
if TYPE_CHECKING:
5857
from kili.client import Kili
5958

@@ -63,6 +62,7 @@ class BatchParams(NamedTuple):
6362

6463
is_asynchronous: bool
6564
is_hosted: bool
65+
input_type: Optional[InputType] = None
6666

6767

6868
class ProcessingParams(NamedTuple):
@@ -103,7 +103,9 @@ def __init__(
103103
self.logger = logging.getLogger(__name__)
104104
self.logger.setLevel(logging.INFO)
105105

106-
def import_batch(self, assets: ListOrTuple[AssetLike], verify: bool) -> List[str]:
106+
def import_batch( # pylint: disable=unused-argument
107+
self, assets: ListOrTuple[AssetLike], verify: bool, input_type: Optional[InputType] = None
108+
) -> List[str]:
107109
"""Base actions to import a batch of asset.
108110
109111
Returns:
@@ -236,7 +238,7 @@ def are_native_videos(assets) -> bool:
236238

237239
def _async_import_to_kili(self, assets: List[KiliResolverAsset]):
238240
"""Import assets with asynchronous resolver."""
239-
if self.input_type == "IMAGE":
241+
if self.input_type in ["IMAGE", "GEOSPATIAL"]:
240242
upload_type = "GEO_SATELLITE"
241243
elif self.input_type in ("VIDEO", "VIDEO_LEGACY"):
242244
upload_type = "NATIVE_VIDEO" if self.are_native_videos(assets) else "FRAME_VIDEO"
@@ -297,7 +299,9 @@ def import_to_kili(self, assets: List[KiliResolverAsset]):
297299
class ContentBatchImporter(BaseBatchImporter):
298300
"""Class defining the methods to import a batch of assets with content."""
299301

300-
def import_batch(self, assets: List[AssetLike], verify: bool):
302+
def import_batch(
303+
self, assets: List[AssetLike], verify: bool, input_type: Optional[InputType] = None
304+
):
301305
"""Method to import a batch of asset with content."""
302306
assets = self.add_ids(assets)
303307
if not self.is_hosted:
@@ -312,7 +316,7 @@ def import_batch(self, assets: List[AssetLike], verify: bool):
312316
if not asset.get("content") and not asset.get("multi_layer_content")
313317
]
314318
if len(assets_with_content) > 0:
315-
assets += self.upload_local_content_to_bucket(assets_with_content)
319+
assets += self.upload_local_content_to_bucket(assets_with_content, input_type)
316320
return super().import_batch(assets, verify)
317321

318322
def get_content_type_and_data_from_content(
@@ -332,7 +336,9 @@ def get_type_and_data_from_content_array(
332336
"""Returns the data of the content (path) and its content type for each element in the array."""
333337
return list(map(self.get_content_type_and_data_from_content, content_array))
334338

335-
def upload_local_content_to_bucket(self, assets: List[AssetLike]):
339+
def upload_local_content_to_bucket(
340+
self, assets: List[AssetLike], input_type: Optional[InputType] = None
341+
):
336342
"""Upload local content to a bucket."""
337343
project_bucket_path = self.generate_project_bucket_path()
338344
# tuple containing (bucket_path, file_path, asset_index, content_index)
@@ -343,12 +349,14 @@ def upload_local_content_to_bucket(self, assets: List[AssetLike]):
343349
if multi_layer_content:
344350
for j, item in enumerate(multi_layer_content):
345351
bucket_path = BaseBatchImporter.build_url_from_parts(
346-
project_bucket_path, asset_id, "content", str(j)
352+
project_bucket_path, asset_id, "content", f"{j!s}.tif"
347353
)
348354
to_upload.append((bucket_path, item.get("path"), i, j))
349355
else:
350356
bucket_path = BaseBatchImporter.build_url_from_parts(
351-
project_bucket_path, asset_id, "content"
357+
project_bucket_path,
358+
asset_id,
359+
"content.tif" if input_type == "GEOSPATIAL" else "content",
352360
)
353361
to_upload.append((bucket_path, asset.get("content"), i, None))
354362
signed_urls = bucket.request_signed_urls(
@@ -418,7 +426,9 @@ def upload_json_content_to_bucket(self, assets: List[AssetLike]):
418426
)
419427
return [AssetLike(**{**asset, "json_content": url}) for asset, url in zip(assets, url_gen)] # type: ignore
420428

421-
def import_batch(self, assets: List[AssetLike], verify: bool):
429+
def import_batch(
430+
self, assets: List[AssetLike], verify: bool, input_type: Optional[InputType] = None
431+
):
422432
"""Method to import a batch of asset with json content."""
423433
assets = self.add_ids(assets)
424434
assets = self.loop_on_batch(self.stringify_json_content)(assets)
@@ -443,7 +453,7 @@ def __init__(
443453
self.pbar = tqdm(disable=logger_params.disable_tqdm)
444454

445455
@abc.abstractmethod
446-
def import_assets(self, assets: List[AssetLike]) -> List[str]:
456+
def import_assets(self, assets: List[AssetLike], input_type: InputType) -> List[str]:
447457
"""Import assets into Kili.
448458
449459
Returns:
@@ -611,6 +621,7 @@ def import_assets_by_batch(
611621
assets: List[AssetLike],
612622
batch_importer: BaseBatchImporter,
613623
batch_size=IMPORT_BATCH_SIZE,
624+
input_type: Optional[InputType] = None,
614625
):
615626
"""Split assets by batch and import them with a given batch importer."""
616627
batch_generator = batcher(assets, batch_size)
@@ -622,5 +633,5 @@ def import_assets_by_batch(
622633
for i, batch_assets in enumerate(batch_generator):
623634
# check last batch only
624635
verify = i == (nb_batch - 1) and self.verify
625-
created_asset_ids += batch_importer.import_batch(batch_assets, verify)
636+
created_asset_ids += batch_importer.import_batch(batch_assets, verify, input_type)
626637
return created_asset_ids

src/kili/services/asset_import/image.py

Lines changed: 15 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -5,16 +5,17 @@
55

66
from kili.core.constants import mime_extensions_that_need_post_processing
77
from kili.core.helpers import get_mime_type
8+
from kili.domain.project import InputType
89

910
from .base import BaseAbstractAssetImporter, BatchParams, ContentBatchImporter
1011
from .constants import LARGE_IMAGE_THRESHOLD_SIZE
1112
from .types import AssetLike
1213

1314

1415
class ImageDataImporter(BaseAbstractAssetImporter):
15-
"""Class for importing assets into an IMAGE project."""
16+
"""Class for importing assets into an IMAGE or GEOSPATIAL project."""
1617

17-
def import_assets(self, assets: List[AssetLike]):
18+
def import_assets(self, assets: List[AssetLike], input_type: InputType):
1819
"""Import IMAGE assets into Kili."""
1920
self._check_upload_is_allowed(assets)
2021
is_hosted = self.is_hosted_content(assets)
@@ -24,17 +25,25 @@ def import_assets(self, assets: List[AssetLike]):
2425
sync_assets, async_assets = self.split_asset_by_upload_type(assets, is_hosted)
2526
created_asset_ids: List[str] = []
2627
if len(sync_assets) > 0:
27-
sync_batch_params = BatchParams(is_hosted=is_hosted, is_asynchronous=False)
28+
sync_batch_params = BatchParams(
29+
is_hosted=is_hosted, is_asynchronous=False, input_type=input_type
30+
)
2831
batch_importer = ContentBatchImporter(
2932
self.kili, self.project_params, sync_batch_params, self.pbar
3033
)
31-
created_asset_ids += self.import_assets_by_batch(sync_assets, batch_importer)
34+
created_asset_ids += self.import_assets_by_batch(
35+
sync_assets, batch_importer, input_type=input_type
36+
)
3237
if len(async_assets) > 0:
33-
async_batch_params = BatchParams(is_hosted=is_hosted, is_asynchronous=True)
38+
async_batch_params = BatchParams(
39+
is_hosted=is_hosted, is_asynchronous=True, input_type=input_type
40+
)
3441
batch_importer = ContentBatchImporter(
3542
self.kili, self.project_params, async_batch_params, self.pbar
3643
)
37-
created_asset_ids += self.import_assets_by_batch(async_assets, batch_importer)
44+
created_asset_ids += self.import_assets_by_batch(
45+
async_assets, batch_importer, input_type=input_type
46+
)
3847
return created_asset_ids
3948

4049
@staticmethod

src/kili/services/asset_import/llm.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,7 @@
66
from typing import List, Optional, Tuple
77

88
from kili.core.helpers import is_url
9+
from kili.domain.project import InputType
910

1011
from .base import (
1112
BaseAbstractAssetImporter,
@@ -71,7 +72,7 @@ def transform_asset_content(asset_content, json_metadata):
7172

7273
return transformed_asset_content, changed_json_metadata
7374

74-
def import_assets(self, assets: List[AssetLike]):
75+
def import_assets(self, assets: List[AssetLike], input_type: InputType):
7576
"""Import LLM assets into Kili."""
7677
self._check_upload_is_allowed(assets)
7778
data_type = self.get_data_type(assets)

src/kili/services/asset_import/pdf.py

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2,14 +2,16 @@
22

33
from typing import List
44

5+
from kili.domain.project import InputType
6+
57
from .base import BaseAbstractAssetImporter, BatchParams, ContentBatchImporter
68
from .types import AssetLike
79

810

911
class PdfDataImporter(BaseAbstractAssetImporter):
1012
"""Class for importing data into a PDF project."""
1113

12-
def import_assets(self, assets: List[AssetLike]):
14+
def import_assets(self, assets: List[AssetLike], input_type: InputType):
1315
"""Import PDF assets into Kili."""
1416
self._check_upload_is_allowed(assets)
1517
is_hosted = self.is_hosted_content(assets)

src/kili/services/asset_import/text.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@
55
from typing import List, Optional, Tuple
66

77
from kili.core.helpers import is_url
8+
from kili.domain.project import InputType
89

910
from .base import (
1011
BaseAbstractAssetImporter,
@@ -62,7 +63,7 @@ def get_data_type(assets: List[AssetLike]) -> TextDataType:
6263
return TextDataType.HOSTED_FILE
6364
return TextDataType.RAW_TEXT
6465

65-
def import_assets(self, assets: List[AssetLike]):
66+
def import_assets(self, assets: List[AssetLike], input_type: InputType):
6667
"""Import TEXT assets into Kili."""
6768
self._check_upload_is_allowed(assets)
6869
data_type = self.get_data_type(assets)

src/kili/services/asset_import/video.py

Lines changed: 9 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -5,9 +5,10 @@
55
from enum import Enum
66
from itertools import repeat
77
from json import JSONDecodeError
8-
from typing import List
8+
from typing import List, Optional
99

1010
from kili.core.helpers import get_mime_type, is_url
11+
from kili.domain.project import InputType
1112
from kili.services.asset_import.base import (
1213
BaseAbstractAssetImporter,
1314
BaseBatchImporter,
@@ -74,7 +75,9 @@ def add_video_processing_parameters(self, asset):
7475
json_metadata = {**json_metadata, "processingParameters": processing_parameters}
7576
return AssetLike(**{**asset, "json_metadata": json_metadata}) # type: ignore
7677

77-
def import_batch(self, assets: List[AssetLike], verify: bool):
78+
def import_batch(
79+
self, assets: List[AssetLike], verify: bool, input_type: Optional[InputType] = None
80+
):
7881
"""Import a batch of video assets from content into Kili."""
7982
assets = self.loop_on_batch(self.add_video_processing_parameters)(assets)
8083
return super().import_batch(assets, verify)
@@ -90,7 +93,9 @@ def add_video_processing_parameters(self, asset):
9093
json_metadata = {**json_metadata, "processingParameters": processing_parameters}
9194
return AssetLike(**{**asset, "json_metadata": json_metadata}) # type: ignore
9295

93-
def import_batch(self, assets: List[AssetLike], verify: bool):
96+
def import_batch(
97+
self, assets: List[AssetLike], verify: bool, input_type: Optional[InputType] = None
98+
):
9499
"""Import a batch of video assets from frames."""
95100
assets = self.add_ids(assets)
96101
if not self.is_hosted:
@@ -234,7 +239,7 @@ def videos_have_complete_processing_parameters(self, assets) -> bool:
234239
return False
235240
return True
236241

237-
def import_assets(self, assets: List[AssetLike]):
242+
def import_assets(self, assets: List[AssetLike], input_type: InputType):
238243
"""Import video assets into Kili."""
239244
self._check_upload_is_allowed(assets)
240245
data_type = self.get_data_type(assets)

tests/integration/adapters/kili_api_gateway/test_paginated_graphql_query.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -156,8 +156,8 @@ def test_given_a_query_and_a_number_of_elements_to_query_i_have_a_progress_bar(
156156

157157
# then
158158
captured = capsys.readouterr()
159-
assert "0%| | 0/250" in captured.err
160-
assert "100%|██████████| 250/250" in captured.err
159+
assert "0%" in captured.err and "0/250" in captured.err
160+
assert "100%" in captured.err and "250/250" in captured.err
161161

162162

163163
def test_given_a_query_without_a_count_query_I_do_not_have_a_progress_bar(graphql_client, capsys):

0 commit comments

Comments
 (0)