Skip to content

Commit 08bc415

Browse files
authored
Merge pull request #47 from rediscovery-io/dataset_info
dataset info method
2 parents 99fea31 + 6525d6f commit 08bc415

File tree

1 file changed

+67
-23
lines changed

1 file changed

+67
-23
lines changed

remo/domain/dataset.py

Lines changed: 67 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
import os
2-
from typing import List, TypeVar, Callable
2+
from typing import List, TypeVar
33

44
from .annotation import Annotation
55
from .image import Image
@@ -27,11 +27,31 @@ def __init__(self, id: int = None, name: str = None, quantity: int = 0, **kwargs
2727
self.n_images = quantity
2828

2929
def __str__(self):
30-
return "Dataset {id} - '{name}'".format(id=self.id, name=self.name)
30+
return "Dataset {id:2d} - {name:5s} - {n_images:,} images".format(
31+
id=self.id, name="'{}'".format(self.name), n_images=self.n_images
32+
)
3133

3234
def __repr__(self):
3335
return self.__str__()
3436

37+
def info(self):
38+
"""
39+
Prints basic info about the dataset:
40+
41+
- Dataset name
42+
- Dataset ID
43+
- Number of images contained in the dataset
44+
- Number of annotation sets contained in the dataset
45+
46+
"""
47+
info = """Dataset name: {name}
48+
Dataset ID: {id}
49+
Images: {n_images}
50+
Annotation Sets: {n_annotation_sets}""".format(
51+
id=self.id, name=self.name, n_images=self.n_images, n_annotation_sets=len(self.annotation_sets())
52+
)
53+
print(info)
54+
3555
def add_data(
3656
self,
3757
local_files: List[str] = None,
@@ -41,7 +61,7 @@ def add_data(
4161
folder_id: int = None,
4262
annotation_set_id: int = None,
4363
class_encoding=None,
44-
wait_for_complete=True
64+
wait_for_complete=True,
4565
) -> dict:
4666
"""
4767
Adds images and/or annotations to the dataset.
@@ -115,7 +135,7 @@ def add_data(
115135
folder_id=folder_id,
116136
annotation_set_id=annotation_set_id,
117137
class_encoding=class_encoding,
118-
wait_for_complete=wait_for_complete
138+
wait_for_complete=wait_for_complete,
119139
)
120140

121141
def fetch(self):
@@ -134,7 +154,12 @@ def annotation_sets(self) -> List[AnnotationSet]:
134154
"""
135155
return self.sdk.list_annotation_sets(self.id)
136156

137-
def add_annotations(self, annotations: List[Annotation], annotation_set_id: int = None, create_new_annotation_set: bool = False):
157+
def add_annotations(
158+
self,
159+
annotations: List[Annotation],
160+
annotation_set_id: int = None,
161+
create_new_annotation_set: bool = False,
162+
):
138163
"""
139164
Fast upload of annotations to the Dataset.
140165
@@ -173,29 +198,37 @@ def add_annotations(self, annotations: List[Annotation], annotation_set_id: int
173198
174199
"""
175200
if annotation_set_id and create_new_annotation_set:
176-
raise Exception("You passed an annotation set but also set create_new_annotation_set = True. You can't have both.")
201+
raise Exception(
202+
"You passed an annotation set but also set create_new_annotation_set = True. You can't have both."
203+
)
177204

178205
if annotation_set_id:
179206
annotation_set = self.get_annotation_set(annotation_set_id)
180207
else:
181208
annotation_sets = self.annotation_sets()
182-
if len(annotation_sets)>0:
209+
if len(annotation_sets) > 0:
183210
annotation_set = self.get_annotation_set()
184211
annotation_set_id = annotation_set.id
185212

186213
temp_path, list_of_classes = create_tempfile(annotations)
187214

188-
189215
if create_new_annotation_set or (not annotation_set_id):
190216
n_annotation_sets = len(self.annotation_sets())
191-
self.create_annotation_set(annotation_task=annotations[0].task, name='my_ann_set_{}'.format(n_annotation_sets+1),
192-
classes = list_of_classes, path_to_annotation_file = temp_path)
217+
self.create_annotation_set(
218+
annotation_task=annotations[0].task,
219+
name='my_ann_set_{}'.format(n_annotation_sets + 1),
220+
classes=list_of_classes,
221+
path_to_annotation_file=temp_path,
222+
)
193223

194224
else:
195-
self.add_data(annotation_task = annotation_set.task, annotation_set_id =annotation_set.id,
196-
paths_to_upload = [temp_path])
225+
self.add_data(
226+
annotation_task=annotation_set.task,
227+
annotation_set_id=annotation_set.id,
228+
paths_to_upload=[temp_path],
229+
)
197230

198-
#TODO ALR: removing the temp_path doesn't work on Windows, hence the try except as a temp fix
231+
# TODO ALR: removing the temp_path doesn't work on Windows, hence the try except as a temp fix
199232

200233
try:
201234
os.remove(temp_path)
@@ -304,7 +337,6 @@ def create_annotation_set(
304337

305338
return annotation_set
306339

307-
308340
def get_annotation_set(self, annotation_set_id: int = None) -> AnnotationSet:
309341
"""
310342
Retrieves annotation set with given id.
@@ -323,7 +355,11 @@ def get_annotation_set(self, annotation_set_id: int = None) -> AnnotationSet:
323355
if annotation_set and annotation_set.dataset_id == self.id:
324356
return annotation_set
325357
else:
326-
raise Exception('Annotation set with ID = {} is not part of dataset {}. You can check the list of annotation sets in your dataset using dataset.annotation_sets()'.format(annotation_set_id, self.__str__()))
358+
raise Exception(
359+
'Annotation set with ID = {} is not part of dataset {}. You can check the list of annotation sets in your dataset using dataset.annotation_sets()'.format(
360+
annotation_set_id, self.__str__()
361+
)
362+
)
327363

328364
def default_annotation_set(self) -> AnnotationSet:
329365
"""
@@ -332,11 +368,20 @@ def default_annotation_set(self) -> AnnotationSet:
332368
"""
333369
annotation_sets = self.annotation_sets()
334370

335-
if len(annotation_sets)>1:
336-
raise Exception('Define which annotation set you want to use. ' + self.__str__() + ' has ' + str(len(annotation_sets)) + ' annotation sets. You can see them with `my_dataset.annotation_sets()`')
371+
if len(annotation_sets) > 1:
372+
raise Exception(
373+
'Define which annotation set you want to use. '
374+
+ self.__str__()
375+
+ ' has '
376+
+ str(len(annotation_sets))
377+
+ ' annotation sets. You can see them with `my_dataset.annotation_sets()`'
378+
)
337379

338-
elif len(annotation_sets) ==0:
339-
raise Exception(self.__str__() + " doesn't have any annotations. You can check the list of annotation sets with `my_dataset.annotation_sets()`")
380+
elif len(annotation_sets) == 0:
381+
raise Exception(
382+
self.__str__()
383+
+ " doesn't have any annotations. You can check the list of annotation sets with `my_dataset.annotation_sets()`"
384+
)
340385

341386
return annotation_sets[0]
342387

@@ -415,7 +460,7 @@ def images(self, limit: int = None, offset: int = None) -> List[Image]:
415460
"""
416461
return self.sdk.list_dataset_images(self.id, limit=limit, offset=offset)
417462

418-
def image(self, img_filename = None, img_id = None) -> Image:
463+
def image(self, img_filename=None, img_id=None) -> Image:
419464
"""
420465
Returns the :class:`remo.Image` with matching img_filename or img_id.
421466
Pass either img_filename or img_id.
@@ -427,13 +472,12 @@ def image(self, img_filename = None, img_id = None) -> Image:
427472
Returns:
428473
:class:`remo.Image`
429474
"""
430-
#TODO ALR: do we need to raise an error if no image is found?
431-
#TODO ALR: we have a sdk.get_image by img_id. Should we implement get_image by img_name in the server for faster processing?
475+
# TODO ALR: do we need to raise an error if no image is found?
476+
# TODO ALR: we have a sdk.get_image by img_id. Should we implement get_image by img_name in the server for faster processing?
432477

433478
if (img_filename) and (img_id):
434479
raise Exception("You passed both img_filename and img_id. Pass only one of the two")
435480

436-
437481
if img_filename:
438482
list_of_images = self.images()
439483
for i_image in list_of_images:

0 commit comments

Comments
 (0)