Skip to content

Commit ec7e1de

Browse files
authored
Merge pull request #9 from ImageMarkup/support-metadata-types
2 parents 5c91b2b + 373ca41 commit ec7e1de

File tree

4 files changed

+22
-6
lines changed

4 files changed

+22
-6
lines changed

isic_cli/cli/utils.py

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -44,15 +44,17 @@ def _extract_metadata(
4444
metadata_fields = set()
4545

4646
for image in images:
47-
metadata_fields |= set(image.get('metadata', {}).keys())
47+
metadata_fields |= set(image['metadata']['acquisition'].keys())
48+
metadata_fields |= set(image['metadata']['clinical'].keys())
4849
metadata.append(
4950
{
5051
**{
5152
'isic_id': image['isic_id'],
5253
'attribution': image['attribution'],
5354
'copyright_license': image['copyright_license'],
5455
},
55-
**image['metadata'],
56+
**image['metadata']['acquisition'],
57+
**image['metadata']['clinical'],
5658
}
5759
)
5860

isic_cli/io/http.py

Lines changed: 10 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -123,10 +123,18 @@ def get_num_images(
123123
before_sleep=before_sleep_log(logger, logging.DEBUG),
124124
)
125125
def download_image(image: dict, to: Path, progress, task) -> None:
126+
dest_path = to / f'{image["isic_id"]}.JPG'
127+
128+
# Avoid re downloading the image if one of the same name/size exists. This is a decent
129+
# enough proxy for detecting file differences without going throw a hashing mechanism.
130+
if dest_path.exists() and dest_path.stat().st_size == image['files']['full']['size']:
131+
progress.update(task, advance=1)
132+
return
133+
126134
# intentionally don't pass auth headers, since these are s3 signed urls that
127135
# already contain credentials.
128136
with IsicCliSession() as session:
129-
r = session.get(image['urls']['full'], stream=True)
137+
r = session.get(image['files']['full']['url'], stream=True)
130138
r.raise_for_status()
131139

132140
temp_file_name = None
@@ -135,6 +143,6 @@ def download_image(image: dict, to: Path, progress, task) -> None:
135143
for chunk in r.iter_content(1024 * 1024 * 5):
136144
outfile.write(chunk)
137145

138-
shutil.move(temp_file_name, to / f'{image["isic_id"]}.JPG')
146+
shutil.move(temp_file_name, dest_path)
139147

140148
progress.update(task, advance=1)

tests/test_cli_image.py

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -36,7 +36,10 @@ def _download_image_side_effect(*args, **kwargs):
3636
'isic_id': 'ISIC_0000000',
3737
'copyright_license': 'CC-0',
3838
'attribution': 'some-institution',
39-
'metadata': {'sex': 'male', 'diagnosis': 'melanoma'},
39+
'metadata': {
40+
'acquisition': {},
41+
'clinical': {'sex': 'male', 'diagnosis': 'melanoma'},
42+
},
4043
}
4144
]
4245
),

tests/test_cli_metadata.py

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -23,7 +23,10 @@ def test_metadata_download(cli_run, mocker):
2323
'isic_id': 'ISIC_0000000',
2424
'attribution': 'Foo',
2525
'copyright_license': 'CC-0',
26-
'metadata': {'sex': 'male', 'diagnosis': 'melanoma'},
26+
'metadata': {
27+
'acquisition': {},
28+
'clinical': {'sex': 'male', 'diagnosis': 'melanoma'},
29+
},
2730
}
2831
]
2932
),

0 commit comments

Comments
 (0)