@@ -15,7 +15,7 @@ using SHA: sha256
1515using CEnum: @cenum
1616using UUIDs: UUID
1717using Printf: @printf
18- using CodecBzip2: Bzip2Compressor, Bzip2Decompressor
18+ using CodecBzip2: Bzip2Compressor, Bzip2Decompressor, Bzip2DecompressorStream
1919
2020
2121# # enums
@@ -338,48 +338,16 @@ tag_value_io["DEPF"] = (
338338tag_value_io[" SARC" ] = (
339339 # Source archive; an identifier (null-terminated ASCII) and BZip2-compressed tarball
340340 @NamedTuple {id:: String , archive:: Vector{UInt8} },
341- (io, nb) -> begin
341+ (io, nb) -> begin
342342 id = String (readuntil (io, UInt8 (0 )))
343343 compressed = read (io, nb - sizeof (id) - 1 )
344344
345- # Bzip2Decompressor doesn't support padding (JuliaIO/CodecBzip2.jl#31),
346- # so find the end marker and truncate the archive (in a naive way,
347- # but these source code archives are expected to be small)
348- compressed[1 : 2 ] == [0x42 , 0x5a ] || error (" Not a BZip2 archive" )
349- trailing_bytes = 10 # end marker + checksum
350- end_marker = 0x177245385090
351- i = 1
352- while i < length (compressed) - trailing_bytes
353- # aligned match
354- current_value = UInt64 (compressed[i]) << 40 |
355- UInt64 (compressed[i+ 1 ]) << 32 |
356- UInt64 (compressed[i+ 2 ]) << 24 |
357- UInt64 (compressed[i+ 3 ]) << 16 |
358- UInt64 (compressed[i+ 4 ]) << 8 |
359- UInt64 (compressed[i+ 5 ])
360- if current_value == end_marker
361- break
362- end
363-
364- # misaligned match
365- current_value = UInt64 (current_value) << 8 | compressed[i+ 6 ]
366- if end_marker in ((current_value >> 1 ) & 0xffffffffffff ,
367- (current_value >> 2 ) & 0xffffffffffff ,
368- (current_value >> 3 ) & 0xffffffffffff ,
369- (current_value >> 4 ) & 0xffffffffffff ,
370- (current_value >> 5 ) & 0xffffffffffff ,
371- (current_value >> 6 ) & 0xffffffffffff ,
372- (current_value >> 7 ) & 0xffffffffffff )
373- i += 1
374- break
375- end
376-
377- i += 1
378- end
379- compressed = compressed[1 : i+ trailing_bytes- 1 ]
380-
381345 # decompress the archive
382- archive = transcode (Bzip2Decompressor, compressed)
346+ # archive = transcode(Bzip2Decompressor, compressed)
347+ # special handling is required to set `stop_on_end=true`,
348+ # as these archives are padded to multiples of 16KB.
349+ stream = Bzip2DecompressorStream (IOBuffer (compressed); stop_on_end= true )
350+ archive = read (stream, typemax (Int))
383351
384352 (; id, archive)
385353 end ,
0 commit comments