Skip to content

Commit 54ed643

Browse files
committed
Added input_id for elements
1 parent 454b76a commit 54ed643

File tree

1 file changed

+11
-3
lines changed

1 file changed

+11
-3
lines changed

clarifai_datautils/multimodal/pipeline/loaders.py

Lines changed: 11 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,5 @@
11
import base64
2+
import uuid
23

34
from clarifai_datautils.constants.base import DATASET_UPLOAD_TASKS
45

@@ -27,7 +28,10 @@ def __getitem__(self, index: int):
2728
meta.pop('coordinates', None)
2829
meta.pop('detection_class_prob', None)
2930
image_data = meta.pop('image_base64', None)
30-
id = meta.get('input_id', None)
31+
try:
32+
id = self.elements[index].element_id[:8]
33+
except (IndexError, AttributeError, TypeError):
34+
id = str(uuid.uuid4())[:8]
3135
if image_data is not None:
3236
# Ensure image_data is already bytes before encoding
3337
image_data = base64.b64decode(image_data)
@@ -39,7 +43,8 @@ def __getitem__(self, index: int):
3943
if self.elements[index].to_dict()['type'] == 'Table':
4044
meta['type'] = 'table'
4145

42-
return MultiModalFeatures(text=text, image_bytes=image_data, metadata=meta, id=id)
46+
return MultiModalFeatures(
47+
text=text, image_bytes=image_data, labels=[self.pipeline_name], metadata=meta, id=id)
4348

4449
def __len__(self):
4550
return len(self.elements)
@@ -64,7 +69,10 @@ def __getitem__(self, index: int):
6469
id = self.elements[index].to_dict().get('element_id', None)
6570
id = id[:48] if id is not None else None
6671
return TextFeatures(
67-
text=self.elements[index].text, metadata=self.elements[index].metadata.to_dict(), id=id)
72+
text=self.elements[index].text,
73+
labels=self.pipeline_name,
74+
metadata=self.elements[index].metadata.to_dict(),
75+
id=id)
6876

6977
def __len__(self):
7078
return len(self.elements)

0 commit comments

Comments
 (0)