@@ -216,6 +216,9 @@ def create_distributions_from_metadata(metadata):
216216 # Expect a SHA-256 hex digest (64 chars). Reject others.
217217 if not isinstance (checksum , str ) or len (checksum ) != 64 :
218218 raise ValueError (f"Invalid checksum for { filename } : expected SHA-256 hex (64 chars), got '{ checksum } '" )
219+ # Known compression extensions
220+ COMPRESSION_EXTS = {"gz" , "bz2" , "xz" , "zip" , "7z" , "tar" , "lz" , "zst" }
221+
219222 parts = filename .split ("." )
220223 if len (parts ) == 1 :
221224 file_format = "none"
@@ -224,8 +227,18 @@ def create_distributions_from_metadata(metadata):
224227 file_format = parts [- 1 ]
225228 compression = "none"
226229 else :
227- file_format = parts [- 2 ]
228- compression = parts [- 1 ]
230+ # Check if last part is a known compression
231+
232+ if parts [- 1 ] in COMPRESSION_EXTS :
233+ compression = parts [- 1 ]
234+ # Handle compound extensions like .tar.gz
235+ if len (parts ) > 2 and parts [- 2 ] in COMPRESSION_EXTS :
236+ file_format = parts [- 3 ] if len (parts ) > 3 else "file"
237+ else :
238+ file_format = parts [- 2 ]
239+ else :
240+ file_format = parts [- 1 ]
241+ compression = "none"
229242
230243 distributions .append (
231244 create_distribution (
@@ -688,7 +701,7 @@ def __download_list__(urls: List[str],
688701def __get_databus_id_parts__ (uri : str ) -> Tuple [Optional [str ], Optional [str ], Optional [str ], Optional [str ], Optional [str ], Optional [str ]]:
689702 uri = uri .removeprefix ("https://" ).removeprefix ("http://" )
690703 parts = uri .strip ("/" ).split ("/" )
691- parts += [None ] * (6 - len (parts )) # pad with None if less than 6 parts
704+ parts += [None ] * (6 - len (parts )) # pad fwith None if less than 6 parts
692705 return tuple (parts [:6 ]) # return only the first 6 parts
693706
694707
0 commit comments