11import base64
2+ import uuid
23
34from clarifai_datautils .constants .base import DATASET_UPLOAD_TASKS
45
@@ -27,7 +28,10 @@ def __getitem__(self, index: int):
2728 meta .pop ('coordinates' , None )
2829 meta .pop ('detection_class_prob' , None )
2930 image_data = meta .pop ('image_base64' , None )
30- id = meta .get ('input_id' , None )
31+ try :
32+ id = self .elements [index ].element_id [:8 ]
33+ except (IndexError , AttributeError , TypeError ):
34+ id = str (uuid .uuid4 ())[:8 ]
3135 if image_data is not None :
3236 # Ensure image_data is already bytes before encoding
3337 image_data = base64 .b64decode (image_data )
@@ -39,7 +43,8 @@ def __getitem__(self, index: int):
3943 if self .elements [index ].to_dict ()['type' ] == 'Table' :
4044 meta ['type' ] = 'table'
4145
42- return MultiModalFeatures (text = text , image_bytes = image_data , metadata = meta , id = id )
46+ return MultiModalFeatures (
47+ text = text , image_bytes = image_data , labels = [self .pipeline_name ], metadata = meta , id = id )
4348
4449 def __len__ (self ):
4550 return len (self .elements )
@@ -64,7 +69,10 @@ def __getitem__(self, index: int):
6469 id = self .elements [index ].to_dict ().get ('element_id' , None )
6570 id = id [:48 ] if id is not None else None
6671 return TextFeatures (
67- text = self .elements [index ].text , metadata = self .elements [index ].metadata .to_dict (), id = id )
72+ text = self .elements [index ].text ,
73+ labels = self .pipeline_name ,
74+ metadata = self .elements [index ].metadata .to_dict (),
75+ id = id )
6876
6977 def __len__ (self ):
7078 return len (self .elements )
0 commit comments