Skip to content

Commit 970434d

Browse files
committed
Extract files as chunks.
1 parent 28b2db4 commit 970434d

File tree

1 file changed

+111
-87
lines changed

1 file changed

+111
-87
lines changed

util/UNZIP.LUA

Lines changed: 111 additions & 87 deletions
Original file line numberDiff line numberDiff line change
@@ -1,17 +1,30 @@
11
#!/usr/bin/env lua
22

33
-- nbs = new bit stream
4-
local function nbs(data)
4+
local function nbs(reader)
55

6+
-- b = buffer
67
-- p = position
78
-- bb = bit buffer
89
-- bc = bit count
9-
local p, bb, bc = 1, 0, 0
10+
local b, p, bb, bc = "", 1, 0, 0
11+
12+
local function fill()
13+
if p > #b then
14+
b = reader()
15+
if not b or #b == 0 then return false end
16+
p = 1
17+
end
18+
return true
19+
end
1020

1121
-- nb = need bits
1222
local function nb(n)
1323
while bc < n do
14-
bb = bb + ((data:byte(p) or 0) << bc)
24+
if p > #b then
25+
if not fill() then error("Unexpected EOF") end
26+
end
27+
bb = bb + (b:byte(p) << bc)
1528
p = p + 1
1629
bc = bc + 8
1730
end
@@ -28,13 +41,23 @@ local function nbs(data)
2841
-- ab = align byte
2942
local function ab() bb, bc = 0, 0 end
3043

31-
-- s = skip
32-
local function s(n) p = p + n end
33-
34-
-- TODO: inflate() currently expects bs.p() callable, change that
35-
local function gp() return p end
44+
-- raw = read raw bytes
45+
local function raw(n)
46+
ab()
47+
local out, got = {}, 0
48+
while got < n do
49+
if p > #b then
50+
if not fill() then error("Unexpected EOF") end
51+
end
52+
local take = math.min(n - got, #b - p + 1)
53+
out[#out+1] = b:sub(p, p + take - 1)
54+
p = p + take
55+
got = got + take
56+
end
57+
return table.concat(out)
58+
end
3659

37-
return { r = rb, a = ab, p = gp, s = s }
60+
return { r = rb, a = ab, raw = raw }
3861
end
3962

4063
-- rv = revbits
@@ -101,23 +124,40 @@ local function rh(bs, h)
101124
error("invalid Huffman code")
102125
end
103126

104-
local function inflate(data)
127+
local function inflate(reader, writer)
105128

106129
-- bs = bit stream
107-
-- ob = output byte stream
108130
-- op = output position
109131
-- kg = keep going (in a loop)
110-
local bs, ob, op, kg = nbs(data), {}, 0, true
132+
-- win = sliding window (32KB circular buffer)
133+
-- wp = window pointer
134+
-- out = output chunk buffer
135+
local bs, op, kg, win, wp, out = nbs(reader), 0, true, {}, 0, {}
136+
137+
-- flush = write to sink
138+
local function flush()
139+
if #out > 0 then
140+
writer(table.concat(out))
141+
out = {}
142+
end
143+
end
111144

112145
-- ab = append byte
113-
local function ab(byte) op = op + 1 ob[op] = string.char(byte) end
146+
local function ab(byte)
147+
op = op + 1
114148

115-
-- as = append string
116-
local function as(s)
117-
-- push each byte as single-character entry
118-
for i = 1, #s do op = op + 1 ob[op] = s:sub(i,i) end
149+
-- Add to output chunk
150+
out[#out+1] = string.char(byte)
151+
if #out >= 4096 then flush() end
152+
153+
-- Add to history window
154+
win[wp] = byte
155+
wp = (wp + 1) % 32768
119156
end
120157

158+
-- as = append string
159+
local function as(s) for i = 1, #s do ab(s:byte(i)) end end
160+
121161
while kg do
122162

123163
-- f = final
@@ -126,14 +166,10 @@ local function inflate(data)
126166

127167
if t == 0 then
128168
-- uncompressed block: align, then read LEN, NLEN from byte stream
129-
bs.a()
130-
local p = bs.p()
131-
local function le16(s, i) local a,b = s:byte(i,i+1) return a + b*256 end
132-
local len, _ = le16(data, p), le16(data, p + 2)
133-
bs.s(4) -- consume LEN and NLEN
134-
local s = data:sub(bs.p(), bs.p() + len - 1)
169+
local h, len, s = bs.raw(4)
170+
len = h:byte(1) + h:byte(2)*256
171+
s = bs.raw(len)
135172
as(s)
136-
bs.s(len)
137173

138174
elseif t == 1 or t == 2 then
139175
local ll, dl, h, d = {}, {}
@@ -235,9 +271,11 @@ local function inflate(data)
235271
error("Invalid distance " .. tostring(dv) .. " (outpos=" .. tostring(op) .. ")")
236272
end
237273

238-
-- bi = base index
239-
local bi = op - dv
240-
for i = 1, l do ab(ob[bi + i]:byte()) end
274+
-- Read from window history
275+
for _ = 1, l do
276+
local p = (wp - dv) % 32768
277+
ab(win[p] or 0)
278+
end
241279
end
242280
end
243281
else
@@ -247,14 +285,15 @@ local function inflate(data)
247285
if f == 1 then kg = false end
248286
end
249287

250-
return table.concat(ob)
288+
flush()
251289
end
252290

253-
local function crc32(s)
291+
local function crc32(s, c)
254292

255293
-- x = maximum
256294
-- c = crc number
257-
local x, c = 0xFFFFFFFF c = x
295+
local x = 0xFFFFFFFF
296+
c = (c or 0) ~ x
258297

259298
for i = 1, #s do
260299
c = c ~ s:byte(i)
@@ -285,26 +324,6 @@ local function unzip(path)
285324
local function le16() local a, b = R(2):byte(1, 2) return a + b * 256 end
286325
local function le32() local a, b, c, d = R(4):byte(1, 4) return a + b * 256 + c * 65536 + d * 16777216 end
287326

288-
-- RB = read n bytes chunked
289-
local function RB(n)
290-
291-
-- p = parts
292-
-- g = got
293-
-- w = want
294-
-- d = data
295-
local p, g, w, d = {}, 0
296-
297-
while g < n do
298-
w = math.min(C, n - g)
299-
d, E = R(w)
300-
if E then return nil, E end
301-
p[#p + 1] = d
302-
g = g + #d
303-
end
304-
305-
return table.concat(p)
306-
end
307-
308327
while true do
309328

310329
-- s = signature
@@ -316,16 +335,19 @@ local function unzip(path)
316335
-- us = Uncompressed size
317336
-- nl = Length of file name
318337
-- el = Extra length in the file name
319-
-- cd = A buffer of the files data
320338
-- os = The output stream
321-
local s, v, fl, cm, ok, cr, cs, us, nl, el, fn, cd, os = f:read(4)
339+
-- di = directory
340+
-- of = output file
341+
-- re = remaining bytes
342+
-- cr = crc32 checksum
343+
local s, v, fl, cm, cr, cs, us, nl, el, fn, os, di, of, re, cr = f:read(4)
322344
if not s or #s < 4 then break end
323345
if s ~= "PK\3\4" then break end
324346

325-
v, E = le16() if not v then break end -- version
347+
v , E = le16() if not v then break end -- version
326348
fl, E = le16() if not fl then break end -- flags
327349
cm, E = le16() if not cm then break end -- method
328-
S(4) -- modification time and date
350+
S(4) -- skip modification time and date
329351
cr, E = le32() if not cr then break end -- crc32
330352
cs, E = le32() if not cs then break end -- compressed size
331353
us, E = le32() if not us then break end -- uncompressed size
@@ -337,47 +359,49 @@ local function unzip(path)
337359

338360
print("Extracting: " .. fn)
339361

340-
cd, E = RB(cs)
341-
if not cd then
342-
print(e)
343-
break
344-
end
362+
re, cr = cs, 0
345363

346-
if cm == 0 then
347-
-- no compression
348-
os = cd
349-
elseif cm == 8 then
350-
-- deflate (still in-memory per entry)
351-
os = inflate(cd)
352-
else
353-
-- unsupported compression
354-
print("Unsupported compression")
364+
-- reader = read compressed data in chunks
365+
local function reader()
366+
if re <= 0 then return nil end
367+
local chunk = f:read(math.min(C, re))
368+
if chunk then re = re - #chunk end
369+
return chunk
355370
end
356371

357-
if os then
358-
if #os ~= us then print("Size mismatch") end
359-
360-
-- c = actual crc32
361-
-- di = directory
362-
-- of = output file
363-
local c, di, of = crc32(os)
364-
365-
if cr ~= c then print("CRC mismatch") end
372+
-- Setup output file
373+
di = fn:match("(.+)/")
374+
if di and os.execute then
375+
os.execute('mkdir -p "'.. di ..'"')
376+
end
366377

367-
di = fn:match("(.+)/")
368-
if di then
369-
if os.execute then
370-
-- portable make dir (POSIX/Windows-friendly-ish)
371-
os.execute('mkdir -p "'.. di ..'"')
372-
end
378+
of, E = io.open(fn, "wb")
379+
if not of then
380+
print(E)
381+
else
382+
-- writer = write decompressed data and update CRC
383+
local function writer(chunk)
384+
cr = crc32(chunk, cr)
385+
of:write(chunk)
373386
end
374387

375-
of, E = io.open(fn, "wb")
376-
if not of then
377-
print(E)
388+
if cm == 0 then
389+
-- no compression
390+
while re > 0 do
391+
local chunk = reader()
392+
if not chunk then break end
393+
writer(chunk)
394+
end
395+
elseif cm == 8 then
396+
-- deflate
397+
inflate(reader, writer)
378398
else
379-
of:write(os) of:close()
399+
print("Unsupported compression")
380400
end
401+
402+
of:close()
403+
404+
if cr ~= cr then print("CRC mismatch") end
381405
end
382406
end
383407

0 commit comments

Comments
 (0)