Skip to content

Commit 9e024de

Browse files
[DEVX-876]: Fixed Issue with Filename as Invalid Input ID (#501)
* [DEVX-876]: Fixed Issue with Filename as Invalid Input ID * [DEVX-876]: Fixed Issue with Filename as Invalid Input ID * [WFP-902]: Fixed Deployment Tests (#502) * [WFP-902]: Fixed Deployment Tests * [WFP-902]: Fixed Deployment Tests
1 parent 7a69cf1 commit 9e024de

File tree

2 files changed

+14
-3
lines changed

2 files changed

+14
-3
lines changed

clarifai/client/input.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -22,7 +22,7 @@
2222
from clarifai.constants.input import MAX_UPLOAD_BATCH_SIZE
2323
from clarifai.errors import UserError
2424
from clarifai.utils.logging import logger
25-
from clarifai.utils.misc import BackoffIterator, Chunker
25+
from clarifai.utils.misc import BackoffIterator, Chunker, clean_input_id
2626

2727

2828
class Inputs(Lister, BaseClient):
@@ -282,7 +282,7 @@ def get_image_inputs_from_folder(folder_path: str, dataset_id: str = None,
282282
for filename in os.listdir(folder_path):
283283
if filename.split('.')[-1] not in ['jpg', 'jpeg', 'png', 'tiff', 'webp']:
284284
continue
285-
input_id = filename.split('.')[0]
285+
input_id = clean_input_id(filename.split('.')[0])
286286
image_pb = resources_pb2.Image(base64=open(os.path.join(folder_path, filename), 'rb').read())
287287
input_protos.append(
288288
Inputs._get_proto(
@@ -473,7 +473,7 @@ def get_text_inputs_from_folder(folder_path: str, dataset_id: str = None,
473473
for filename in os.listdir(folder_path):
474474
if filename.split('.')[-1] != 'txt':
475475
continue
476-
input_id = filename.split('.')[0]
476+
input_id = clean_input_id(filename.split('.')[0])
477477
text_pb = resources_pb2.Text(raw=open(os.path.join(folder_path, filename), 'rb').read())
478478
input_protos.append(
479479
Inputs._get_proto(

clarifai/utils/misc.py

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,5 @@
11
import os
2+
import re
23
import uuid
34
from typing import Any, Dict, List
45

@@ -75,3 +76,13 @@ def concept_relations_accumulation(relations_dict: Dict[str, Any], subject_conce
7576
def get_uuid(val: int) -> str:
7677
"""Generates a UUID."""
7778
return uuid.uuid4().hex[:val]
79+
80+
81+
def clean_input_id(input_id: str) -> str:
82+
"""Clean input_id string into a valid input ID"""
83+
input_id = re.sub('[., /]+', '_', input_id)
84+
input_id = re.sub('[_]+', '_', input_id)
85+
input_id = re.sub('[-]+', '-', input_id)
86+
input_id = input_id.lower().strip('_-')
87+
input_id = re.sub('[^a-z0-9-_]+', '', input_id)
88+
return input_id

0 commit comments

Comments
 (0)