Skip to content

Commit 66bbe0d

Browse files
authored
Merge pull request #323 from roboflow/import-paligemma-format
Import paligemma format into text-image-pairs project
2 parents f0656cb + 009e322 commit 66bbe0d

19 files changed

+82
-15
lines changed

roboflow/__init__.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,7 @@
1515
from roboflow.models import CLIPModel, GazeModel # noqa: F401
1616
from roboflow.util.general import write_line
1717

18-
__version__ = "1.1.44"
18+
__version__ = "1.1.45"
1919

2020

2121
def check_key(api_key, model, notebook, num_retries=0):

roboflow/roboflowpy.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -47,7 +47,8 @@ def download(args):
4747

4848

4949
def import_dataset(args):
50-
rf = roboflow.Roboflow()
50+
api_key = load_roboflow_api_key(args.workspace)
51+
rf = roboflow.Roboflow(api_key)
5152
workspace = rf.workspace(args.workspace)
5253
workspace.upload_dataset(
5354
dataset_path=args.folder,

roboflow/util/folderparser.py

Lines changed: 32 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,7 @@
88
from .image_utils import load_labelmap
99

1010
IMAGE_EXTENSIONS = {".jpg", ".jpeg", ".png", ".bmp"}
11-
ANNOTATION_EXTENSIONS = {".txt", ".json", ".xml", ".csv"}
11+
ANNOTATION_EXTENSIONS = {".txt", ".json", ".xml", ".csv", ".jsonl"}
1212
LABELMAPS_EXTENSIONS = {".labels", ".yaml", ".yml"}
1313

1414

@@ -107,13 +107,14 @@ def _map_annotations_to_images_1tomany(images, annotationFiles):
107107
dirname = image["dirname"]
108108
annotationsInSameDir = annotationsByDirname.get(dirname, [])
109109
if annotationsInSameDir:
110-
if len(annotationsInSameDir) > 1:
111-
print(f"warning: found multiple annotation files on dir {dirname}")
112-
annotationFile = annotationsInSameDir[0]
113-
format = annotationFile["parsedType"]
114-
image["annotationfile"] = _filterIndividualAnnotations(
115-
image, annotationFile, format, imgRefMap, annotationMap
116-
)
110+
for annotationFile in annotationsInSameDir:
111+
format = annotationFile["parsedType"]
112+
filtered_annotations = _filterIndividualAnnotations(
113+
image, annotationFile, format, imgRefMap, annotationMap
114+
)
115+
if filtered_annotations:
116+
image["annotationfile"] = filtered_annotations
117+
break
117118

118119

119120
def _build_image_and_annotation_maps(annotationFiles):
@@ -182,11 +183,16 @@ def _filterIndividualAnnotations(image, annotation, format, imgRefMap, annotatio
182183
return _annotation
183184
else:
184185
return None
186+
elif format == "jsonl":
187+
jsonlLines = [json.dumps(line) for line in parsed if line["image"] == image["name"]]
188+
if jsonlLines:
189+
_annotation = {"name": "annotation.jsonl", "rawText": "\n".join(jsonlLines)}
190+
return _annotation
185191
return None
186192

187193

188194
def _loadAnnotations(folder, annotations):
189-
valid_extensions = {".json", ".csv"}
195+
valid_extensions = {".json", ".csv", ".jsonl"}
190196
annotations = [a for a in annotations if a["extension"] in valid_extensions]
191197
for ann in annotations:
192198
extension = ann["extension"]
@@ -197,12 +203,29 @@ def _loadAnnotations(folder, annotations):
197203
if parsedType:
198204
ann["parsed"] = parsed
199205
ann["parsedType"] = parsedType
206+
elif extension == ".jsonl":
207+
ann["parsed"] = _read_jsonl(f"{folder}{ann['file']}")
208+
ann["parsedType"] = "jsonl"
200209
elif extension == ".csv":
201210
ann["parsedType"] = "csv"
202211
ann["parsed"] = _parseAnnotationCSV(f"{folder}{ann['file']}")
203212
return annotations
204213

205214

215+
def _read_jsonl(path):
216+
data = []
217+
with open(path) as file:
218+
for linenum, line in enumerate(file, 1):
219+
if not line:
220+
continue
221+
try:
222+
json_object = json.loads(line.strip())
223+
data.append(json_object)
224+
except json.JSONDecodeError:
225+
print(f"Warning: Skipping invalid JSON line in {path}:{linenum}")
226+
return data
227+
228+
206229
def _parseAnnotationCSV(filename):
207230
# TODO: use a proper CSV library?
208231
with open(filename) as f:
Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
# ChartQA > 2024-08-28 7:21pm
2+
https://universe.roboflow.com/roboflow-jvuqo/chartqa-c9zny
3+
4+
Provided by a Roboflow user
5+
License: CC BY 4.0
39.3 KB
Loading
33.8 KB
Loading
34.9 KB
Loading
18.7 KB
Loading
Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,9 @@
1+
{"image":"de960ddd58344041754d5f984f8f82c2_png.rf.011864613b53c6b6a0c0a7086b657a71.jpg","prefix":"What region in Italy had the highest number of mafia crimes in 2018?","suffix":"Calabria"}
2+
{"image":"de960ddd58344041754d5f984f8f82c2_png.rf.011864613b53c6b6a0c0a7086b657a71.jpg","prefix":"How many criminal reports were recorded in the region of Calabria in 2018?","suffix":"896"}
3+
{"image":"de960ddd58344041754d5f984f8f82c2_png.rf.011864613b53c6b6a0c0a7086b657a71.jpg","prefix":"What region in Italy had the highest number of mafia crimes in 2018?","suffix":"Calabria"}
4+
{"image":"de960ddd58344041754d5f984f8f82c2_png.rf.011864613b53c6b6a0c0a7086b657a71.jpg","prefix":"How many criminal reports were recorded in the region of Calabria in 2018?","suffix":"896"}
5+
{"image":"de48275e1ff70fab78bee31e09fc896d_png.rf.01a97b1ad053aa1e6525ac0451cee8b7.jpg","prefix":"Which sector had the highest ROI in 2013?","suffix":"Retail"}
6+
{"image":"de48275e1ff70fab78bee31e09fc896d_png.rf.01a97b1ad053aa1e6525ac0451cee8b7.jpg","prefix":"Which sector had the highest ROI in 2014?","suffix":"Electronics"}
7+
{"image":"e1893eee3f64bda1eac88da795ad3a00_png.rf.01248d761c27015da1fa5f3c4daea759.jpg","prefix":"How much did Hermes' national general cargo revenue add up to in 2009?","suffix":"100"}
8+
{"image":"e1893eee3f64bda1eac88da795ad3a00_png.rf.01248d761c27015da1fa5f3c4daea759.jpg","prefix":"How much did Hermes' national general cargo revenue add up to in 2009?","suffix":"100"}
9+
{"image":"eaab023f1ce380c4c9163415facc3c0d_png.rf.01c5a1f19653c056bbb3b0c8fc2d752d.jpg","prefix":"What's the percentage value of leftmost bar?","suffix":"24"}
Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,4 @@
1+
{"image":"63a6c783083d5c7c7290bc81877a4ee9_png.rf.5c02d037f48bc3df56e6d0e3e6e053e4.jpg","prefix":"How many research and public policy oriented organizations were there among the registered environmental and conservation organizations in the United States in 2005?","suffix":"372"}
2+
{"image":"63a6c783083d5c7c7290bc81877a4ee9_png.rf.5c02d037f48bc3df56e6d0e3e6e053e4.jpg","prefix":"How many research and public policy oriented organizations were there among the registered environmental and conservation organizations in the United States in 2005?","suffix":"372"}
3+
{"image":"5964b4c268577652f171d52dc317d82d_png.rf.5bf49f8aa575f586001710b1d79968fd.jpg","prefix":"What was the crude birth rate in Costa Rica in 2019?","suffix":"13.69"}
4+
{"image":"5964b4c268577652f171d52dc317d82d_png.rf.5bf49f8aa575f586001710b1d79968fd.jpg","prefix":"What was the crude birth rate in Costa Rica in 2019?","suffix":"13.69"}

0 commit comments

Comments
 (0)