Skip to content

Commit 3cccaea

Browse files
committed
Translate invalid content types in HCA files (#7193, #7629)
1 parent 669b7b3 commit 3cccaea

File tree

1 file changed

+13
-0
lines changed

1 file changed

+13
-0
lines changed

src/azul/plugins/metadata/hca/__init__.py

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -545,6 +545,7 @@ def from_metadata(cls,
545545
else:
546546
assert ';' not in content_type, R(
547547
'Unexpected MIME parameter in content type', content_type)
548+
content_type = cls._content_type_corrections.get(content_type, content_type)
548549
return cls(uuid=json_str(descriptor['file_id']),
549550
name=json_str(json_mapping(metadata['file_core'])['file_name']),
550551
version=json_str(descriptor['file_version']),
@@ -566,3 +567,15 @@ def to_manifest_entry(self) -> JSON:
566567
entry['content-type'] = entry.pop('content_type')
567568
entry['indexed'] = False
568569
return entry
570+
571+
_content_type_corrections = {
572+
'.gz': 'application/gzip',
573+
'csv': 'text/csv',
574+
'fastq.gz': 'application/gzip',
575+
'h5': 'application/octet-stream',
576+
'h5ad': 'application/octet-stream',
577+
'txt': 'text/plain',
578+
# Only correct for files created with Excel 2007 or later. Hopefully we
579+
# don't have any files older than that.
580+
'xlsx': 'application/vnd.openxmlformats-officedocument.spreadsheetml.sheet',
581+
}

0 commit comments

Comments
 (0)