Skip to content

Commit 89fc2f2

Browse files
Merge pull request #2 from Clarifai/DEVX-371-export_update
[DEVX-371]: Export from Clarifai Platform to other formats
2 parents f19fbe8 + 26d50c6 commit 89fc2f2

File tree

9 files changed

+174
-17
lines changed

9 files changed

+174
-17
lines changed

README.md

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -67,9 +67,11 @@ from clarifai_datautils import ImageAnnotations
6767
#import from folder
6868
coco_dataset = ImageAnnotations.import_from(path='folder_path',format= 'coco_detection')
6969

70-
#clarifai dataset loader object
71-
coco_dataset.dataloader
72-
70+
#Using clarifai SDK to upload to Clarifai Platform
71+
#export CLARIFAI_PAT={your personal access token} # set PAT as env variable
72+
from clarifai.client.dataset import Dataset
73+
dataset = Dataset(user_id="user_id", app_id="app_id", dataset_id="dataset_id")
74+
dataset.upload_dataset(dataloader=coco_dataset.dataloader)
7375

7476
#info about loaded dataset
7577
coco_dataset.get_info()

clarifai_datautils/constants/annotations.py

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
IMAGE_ANNOTATION_FORMATS = [
22
'coco_segmentation', 'voc_detection', 'yolo', 'cifar', 'coco_detection', 'cvat', 'imagenet',
3-
'kitti', 'label_me', 'mnist', 'open_images', 'vgg_face2', 'lfw', 'cityscapes', 'ade20k2017'
3+
'kitti', 'label_me', 'mnist', 'open_images', 'vgg_face2', 'lfw', 'cityscapes', 'ade20k2017',
4+
'clarifai'
45
]
56

67
IMAGE_ANNOTATION_TASKS = ['visual_classification', 'visual_detection', 'visual_segmentation']
@@ -11,6 +12,7 @@
1112
'mnist': 'visual_classification',
1213
'vgg_face2': 'visual_classification',
1314
'lfw': 'visual_classification',
15+
'clarifai': 'visual_detection',
1416
'voc_detection': 'visual_detection',
1517
'yolo': 'visual_detection',
1618
'coco_detection': 'visual_detection',

clarifai_datautils/image/annotation_conversion/README.md

Lines changed: 20 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -11,9 +11,6 @@ from clarifai_datautils import ImageAnnotations
1111
#import from folder
1212
coco_dataset = ImageAnnotations.import_from(path='folder_path',format= 'coco_detection')
1313

14-
#clarifai dataset loader object
15-
coco_dataset.dataloader
16-
1714

1815
#info about loaded dataset
1916
coco_dataset.get_info()
@@ -24,7 +21,7 @@ coco_dataset.export_to('voc_detection')
2421
```
2522

2623

27-
### With Clarifai Python SDK
24+
### Upload using Clarifai Python SDK
2825
```python
2926
from clarifai_datautils import ImageAnnotations
3027
coco_dataset = ImageAnnotations.import_from(path='folder_path',format= 'coco_detection')
@@ -38,6 +35,24 @@ dataset.upload_dataset(dataloader=coco_dataset.dataloader)
3835
```
3936

4037

38+
### Export to other formats from Clarifai Platform
39+
```python
40+
41+
#clarifai SDK
42+
#export CLARIFAI_PAT={your personal access token} # set PAT as env variable
43+
from clarifai.client.dataset import Dataset
44+
dataset = Dataset(user_id="user_id", app_id="app_id", dataset_id="dataset_id")
45+
dataset.export(save_path='output.zip',split='train')
46+
47+
#Extract the zip file and pass the folder to ImageAnnotations
48+
from clarifai_datautils import ImageAnnotations
49+
clarifai_dataset = ImageAnnotations.import_from(path='folder_path',format= 'clarifai')
50+
51+
#export to other formats
52+
clarifai_dataset.export_to(path='output_path',format='coco_detection',save_images=True)
53+
54+
```
55+
4156
## Supported Formats
4257

4358
| Annotation format | Format | TASK |
@@ -54,6 +69,7 @@ dataset.upload_dataset(dataloader=coco_dataset.dataloader)
5469
| [Kitti](http://www.cvlibs.net/datasets/kitti/index.php) | kitti | detection |
5570
| [LabelMe](http://labelme.csail.mit.edu/Release3.0) | label_me | detection |
5671
| [Open Images](https://storage.googleapis.com/openimages/web/download.html) | open_images | detection |
72+
| [Clarifai](https://github.com/Clarifai/examples/tree/main/Data_Utils) | clarifai | detection |
5773
| [COCO(segmentation)](http://cocodataset.org/#format-data) | coco_segmentation | segmentation |
5874
| [Cityscapes](https://www.cityscapes-dataset.com/) | cityscapes | segmentation |
5975
| [ADE](https://www.cityscapes-dataset.com/) | ade20k2017 | segmentation |

clarifai_datautils/image/annotation_conversion/annotations.py

Lines changed: 30 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,7 @@
1212
from clarifai_datautils.image.annotation_conversion.loaders import (ClassificationDataLoader,
1313
DetectionDataLoader,
1414
SegmentationDataLoader)
15+
from clarifai_datautils.image.annotation_conversion.utils import Clarifai_to_Datumaro
1516

1617

1718
class ImageAnnotations():
@@ -56,11 +57,15 @@ def import_from(cls, path: str, format: str) -> Dataset:
5657
#task of the dataset
5758
task = IMAGE_ANNOTATION_FORMATS_TO_TASKS[format]
5859

59-
try:
60-
format_name = IMAGE_FORMAT_MAP[format]
61-
dataset = Dataset.import_from(path, format_name)
62-
except (DatasetError, DatasetImportError, DatasetNotFoundError) as ex:
63-
raise AnnotationsDatasetError(ex)
60+
#import dataset
61+
if format == 'clarifai':
62+
dataset = Clarifai_to_Datumaro(path).convert()
63+
else:
64+
try:
65+
format_name = IMAGE_FORMAT_MAP[format]
66+
dataset = Dataset.import_from(path, format_name)
67+
except (DatasetError, DatasetImportError, DatasetNotFoundError) as ex:
68+
raise AnnotationsDatasetError(ex)
6469

6570
return ImageAnnotations(dataset, format, task)
6671

@@ -84,12 +89,13 @@ def get_info(self,) -> Dict[str, Any]:
8489
'categories': list(self._dataset.get_categories_info())
8590
}
8691

87-
def export_to(self, path: str, format: str) -> None:
92+
def export_to(self, path: str, format: str, save_images: bool = False) -> None:
8893
"""Exports a dataset to a given path and format.
8994
9095
Args:
9196
path (str): The path to the dataset.
9297
format (str): The format of the dataset.
98+
save_images (bool): Whether to save the images or not.
9399
94100
Example:
95101
>>> from clarifai_datautils import ImageAnnotations
@@ -99,9 +105,13 @@ def export_to(self, path: str, format: str) -> None:
99105
if format not in IMAGE_ANNOTATION_FORMATS:
100106
raise AnnotationsFormatError('Invalid format')
101107

108+
if format == 'clarifai':
109+
raise AnnotationsFormatError(
110+
'Cannot export to clarifai format. Use clarifai SDK to upload the dataset.')
111+
102112
try:
103113
format_name = IMAGE_FORMAT_MAP[format]
104-
self._dataset.export(path, format_name)
114+
self._dataset.export(path, format_name, save_media=save_images)
105115
except Exception as ex:
106116
raise AnnotationsDatasetError(ex)
107117

@@ -130,6 +140,19 @@ def detect_format(path: str) -> str:
130140
raise AnnotationsFormatError('Given folder does not contain a supported dataset format')
131141
return dataset_format
132142

143+
@staticmethod
144+
def list_formats() -> list:
145+
"""Lists the supported formats.
146+
147+
Returns:
148+
A list of supported formats.
149+
150+
Example:
151+
>>> from clarifai_datautils import ImageAnnotations
152+
>>> ImageAnnotations.list_formats()
153+
"""
154+
return IMAGE_ANNOTATION_FORMATS
155+
133156
@property
134157
def dataloader(self) -> ClarifaiDataLoader:
135158
"""Returns a Clarifai Dataloader Object to pass to SDK Dataset Upload Functionality.
Lines changed: 98 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,98 @@
1+
import json
2+
import os
3+
from typing import Tuple
4+
5+
import PIL
6+
from datumaro.components.annotation import Bbox
7+
from datumaro.components.dataset import Dataset
8+
from datumaro.components.dataset_base import DatasetItem
9+
from datumaro.components.media import Image
10+
11+
from clarifai_datautils.errors import AnnotationsDatasetError, AnnotationsFormatError
12+
13+
14+
class Clarifai_to_Datumaro():
15+
16+
def __init__(
17+
self,
18+
main_path: str,
19+
):
20+
"""Converts a clarifai dataset to a Datumaro dataset.
21+
22+
Args:
23+
path (str): The path to the clarifai dataset.
24+
25+
"""
26+
self.main_path = main_path
27+
self.image_list = os.listdir(os.path.join(self.main_path, 'inputs'))
28+
self.annotations_list = os.listdir(os.path.join(self.main_path, 'annotations'))
29+
self.label_map = {}
30+
31+
def convert(self) -> Dataset:
32+
"""Check folder format and creates a Datumaro Dataset.
33+
34+
Returns:
35+
A Datumaro dataset object.
36+
"""
37+
self.check_folder()
38+
# create a dataset
39+
dataset = Dataset.from_iterable(
40+
iterable=[self.create_item(path) for path in self.image_list],
41+
media_type=Image,
42+
categories=list(self.label_map.keys()))
43+
44+
return dataset
45+
46+
def create_item(self, image_path: str) -> DatasetItem:
47+
"""Creates a Datumaro item from an image path."""
48+
image_full_path = os.path.join(self.main_path, 'inputs', image_path)
49+
image_data = Image.from_file(image_full_path)
50+
width, height = PIL.Image.open(image_full_path).size
51+
try:
52+
with open(
53+
os.path.join(self.main_path, 'annotations', image_path.split('.png')[0] + '.json'),
54+
'r') as file:
55+
item_data = json.load(file)
56+
# create annotations
57+
annotations = []
58+
for annot in item_data:
59+
#check if the annotation has a bounding box
60+
if 'regionInfo' in annot.keys() and 'boundingBox' in annot['regionInfo'].keys():
61+
x, y, w, h = self.clarifai_bbox_to_datumaro_bbox(annot['regionInfo']['boundingBox'],
62+
width, height)
63+
label = annot['data']['concepts'][0]['name']
64+
value = self.label_map.get(label, len(self.label_map))
65+
self.label_map[label] = value
66+
annotations.append(Bbox(x=x, y=y, w=w, h=h, label=value))
67+
68+
except FileNotFoundError:
69+
annotations = []
70+
71+
return DatasetItem(id=image_path.split('.png')[0], media=image_data, annotations=annotations)
72+
73+
def clarifai_bbox_to_datumaro_bbox(self, clarifai_bbox, width, height) -> Tuple[int]:
74+
left_col = clarifai_bbox['leftCol'] * width
75+
top_row = clarifai_bbox['topRow'] * height
76+
right_col = clarifai_bbox['rightCol'] * width
77+
bottom_row = clarifai_bbox['bottomRow'] * height
78+
79+
obj_box = (left_col, top_row, right_col - left_col, bottom_row - top_row)
80+
return obj_box
81+
82+
def check_folder(self):
83+
"""Checks the clarifai folder format."""
84+
if not os.path.exists(self.main_path):
85+
raise AnnotationsDatasetError(f'Folder not found at {self.main_path}')
86+
87+
if not os.path.exists(os.path.join(self.main_path, 'inputs')):
88+
raise AnnotationsFormatError(
89+
f'Folder does not contain an "inputs" folder at {self.main_path}')
90+
if not os.path.exists(os.path.join(self.main_path, 'annotations')):
91+
raise AnnotationsFormatError(
92+
f'Folder does not contain an "annotations" folder at {self.main_path}')
93+
94+
if not all(img.endswith('.png') for img in self.image_list):
95+
raise AnnotationsFormatError(f'Folder should only contain images at {self.main_path}/inputs')
96+
if not all(img.endswith('.json') for img in self.annotations_list):
97+
raise AnnotationsFormatError(
98+
f'Folder should only contain annotations at {self.main_path}/annotations')

tests/annotations/test_clarifai_loader.py

Lines changed: 9 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,7 @@
1414
KITTI_PATH = get_asset_path('kitti_detection')
1515
LABEL_ME_PATH = get_asset_path('labelme_dataset')
1616
OPEN_IMAGES_PATH = get_asset_path('openimages_dataset')
17+
CLARIFAI_PATH = get_asset_path('clarifai_dataset')
1718

1819
COCO_SEGMENTATION_PATH = get_asset_path('coco_segmentation')
1920
CITYSCAPES_PATH = get_asset_path('cityscapes_dataset')
@@ -83,8 +84,6 @@ def test_coco_detection_loader(self,):
8384
dataloader = annotation_object.dataloader
8485
assert dataloader.task == 'visual_detection'
8586
assert len(dataloader) == 2
86-
assert dataloader[0].labels == ['b']
87-
assert dataloader[0].id == 'a'
8887
assert isinstance(dataloader[0].image_bytes, bytes)
8988

9089
def test_cvat_loader(self,):
@@ -121,6 +120,14 @@ def test_open_images_loader(self,):
121120
assert dataloader[1].id == 'aa'
122121
assert isinstance(dataloader[0].image_bytes, bytes)
123122

123+
def test_clarifai_loader(self,):
124+
annotation_object = ImageAnnotations.import_from(path=CLARIFAI_PATH, format='clarifai')
125+
dataloader = annotation_object.dataloader
126+
assert dataloader.task == 'visual_detection'
127+
assert len(dataloader) == 1
128+
assert dataloader[0].id == '000464'
129+
assert isinstance(dataloader[0].image_bytes, bytes)
130+
124131
def test_coco_segmentation_loader(self,):
125132
annotation_object = ImageAnnotations.import_from(
126133
path=COCO_SEGMENTATION_PATH, format='coco_segmentation')

tests/annotations/test_import_formats.py

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,7 @@
1616
KITTI_PATH = get_asset_path('kitti_detection')
1717
LABEL_ME_PATH = get_asset_path('labelme_dataset')
1818
OPEN_IMAGES_PATH = get_asset_path('openimages_dataset')
19+
CLARIFAI_PATH = get_asset_path('clarifai_dataset')
1920

2021
COCO_SEGMENTATION_PATH = get_asset_path('coco_segmentation')
2122
CITYSCAPES_PATH = get_asset_path('cityscapes_dataset')
@@ -118,6 +119,13 @@ def test_open_images_import(self,):
118119
assert annotation_object.task == 'visual_detection'
119120
assert len(annotation_object._dataset._data) == 2 # 2 images
120121

122+
def test_clarifai_import(self,):
123+
annotation_object = ImageAnnotations.import_from(path=CLARIFAI_PATH, format='clarifai')
124+
assert annotation_object.annotation_format == 'clarifai'
125+
assert annotation_object.task == 'visual_detection'
126+
assert len(annotation_object._dataset._data) == 1 # 1 images
127+
assert annotation_object._dataset.get_annotations() == 2 # 2 annotations
128+
121129
def test_coco_segmentation_import(self,):
122130
annotation_object = ImageAnnotations.import_from(
123131
path=COCO_SEGMENTATION_PATH, format='coco_segmentation')
Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
[{"id": "c72ffee676ad6ae88acfc72791bdae14", "regionInfo": {"boundingBox": {"topRow": 0.502, "leftCol": 0.18666667, "bottomRow": 0.626, "rightCol": 0.5733333}}, "data": {"concepts": [{"id": "id-cow", "name": "cow", "value": 1.0, "appId": "demo_train_1402"}]}}, {"id": "b75d9daca8d6b1f07d2e4a39f36bb4c7", "regionInfo": {"boundingBox": {"topRow": 0.402, "leftCol": 0.152, "bottomRow": 0.588, "rightCol": 0.64}}, "data": {"concepts": [{"id": "id-cow", "name": "cow", "value": 1.0, "appId": "demo_train_1402"}]}}]
328 KB
Loading

0 commit comments

Comments
 (0)