Skip to content

Commit b949b14

Browse files
authored
Metal library parsing: using CodecBzip2 feature to ignore padding. (#504)
1 parent 1820957 commit b949b14

File tree

2 files changed

+8
-40
lines changed

2 files changed

+8
-40
lines changed

Project.toml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -38,7 +38,7 @@ Adapt = "4"
3838
Artifacts = "1"
3939
BFloat16s = "0.5"
4040
CEnum = "0.4, 0.5"
41-
CodecBzip2 = "0.8"
41+
CodecBzip2 = "0.8.5"
4242
ExprTools = "0.1"
4343
GPUArrays = "11.1"
4444
GPUCompiler = "0.26, 0.27, 1"

src/compiler/library.jl

Lines changed: 7 additions & 39 deletions
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,7 @@ using SHA: sha256
1515
using CEnum: @cenum
1616
using UUIDs: UUID
1717
using Printf: @printf
18-
using CodecBzip2: Bzip2Compressor, Bzip2Decompressor
18+
using CodecBzip2: Bzip2Compressor, Bzip2Decompressor, Bzip2DecompressorStream
1919

2020

2121
## enums
@@ -338,48 +338,16 @@ tag_value_io["DEPF"] = (
338338
tag_value_io["SARC"] = (
339339
# Source archive; an identifier (null-terminated ASCII) and BZip2-compressed tarball
340340
@NamedTuple{id::String, archive::Vector{UInt8}},
341-
(io, nb) -> begin
341+
(io, nb) -> begin
342342
id = String(readuntil(io, UInt8(0)))
343343
compressed = read(io, nb - sizeof(id) - 1)
344344

345-
# Bzip2Decompressor doesn't support padding (JuliaIO/CodecBzip2.jl#31),
346-
# so find the end marker and truncate the archive (in a naive way,
347-
# but these source code archives are expected to be small)
348-
compressed[1:2] == [0x42, 0x5a] || error("Not a BZip2 archive")
349-
trailing_bytes = 10 # end marker + checksum
350-
end_marker = 0x177245385090
351-
i = 1
352-
while i < length(compressed) - trailing_bytes
353-
# aligned match
354-
current_value = UInt64(compressed[i]) << 40 |
355-
UInt64(compressed[i+1]) << 32 |
356-
UInt64(compressed[i+2]) << 24 |
357-
UInt64(compressed[i+3]) << 16 |
358-
UInt64(compressed[i+4]) << 8 |
359-
UInt64(compressed[i+5])
360-
if current_value == end_marker
361-
break
362-
end
363-
364-
# misaligned match
365-
current_value = UInt64(current_value) << 8 | compressed[i+6]
366-
if end_marker in ((current_value >> 1) & 0xffffffffffff,
367-
(current_value >> 2) & 0xffffffffffff,
368-
(current_value >> 3) & 0xffffffffffff,
369-
(current_value >> 4) & 0xffffffffffff,
370-
(current_value >> 5) & 0xffffffffffff,
371-
(current_value >> 6) & 0xffffffffffff,
372-
(current_value >> 7) & 0xffffffffffff)
373-
i += 1
374-
break
375-
end
376-
377-
i += 1
378-
end
379-
compressed = compressed[1:i+trailing_bytes-1]
380-
381345
# decompress the archive
382-
archive = transcode(Bzip2Decompressor, compressed)
346+
#archive = transcode(Bzip2Decompressor, compressed)
347+
# special handling is required to set `stop_on_end=true`,
348+
# as these archives are padded to multiples of 16KB.
349+
stream = Bzip2DecompressorStream(IOBuffer(compressed); stop_on_end=true)
350+
archive = read(stream, typemax(Int))
383351

384352
(; id, archive)
385353
end,

0 commit comments

Comments
 (0)