Skip to content

Commit 94da877

Browse files
authored
Improve decompression matching performance (#105)
1 parent eeea00d commit 94da877

File tree

6 files changed

+34
-12
lines changed

6 files changed

+34
-12
lines changed

dissect/util/compression/lz4.py

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -80,8 +80,11 @@ def decompress(
8080
if len(dst) + match_len > uncompressed_size > 0:
8181
raise CorruptDataError("Decompressed size exceeds uncompressed_size")
8282

83-
for _ in range(match_len):
84-
dst.append(dst[-offset])
83+
remaining = match_len
84+
while remaining > 0:
85+
match_size = min(remaining, offset)
86+
dst += dst[-offset : (-offset + match_size) or None]
87+
remaining -= match_size
8588

8689
if len(dst) >= uncompressed_size > 0:
8790
break

dissect/util/compression/lzfse.py

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -386,8 +386,12 @@ def _decode_lmd(
386386
raise ValueError("Invalid match distance")
387387

388388
dst += lit.read(L)
389-
for _ in range(M):
390-
dst.append(dst[-D])
389+
390+
remaining = M
391+
while remaining > 0:
392+
match_size = min(remaining, D)
393+
dst += dst[-D : (-D + match_size) or None]
394+
remaining -= match_size
391395

392396
symbols -= 1
393397

dissect/util/compression/lzo.py

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -100,8 +100,11 @@ def decompress(src: bytes | BinaryIO, header: bool = True, buflen: int = -1) ->
100100
length = 0
101101
dist = (src.read(1)[0] << 2) + (val >> 2) + 1
102102

103-
for _ in range(length + 2):
104-
dst.append(dst[-dist])
103+
remaining = length + 2
104+
while remaining > 0:
105+
match_size = min(remaining, dist)
106+
dst += dst[-dist : (-dist + match_size) or None]
107+
remaining -= match_size
105108

106109
# State is often encoded in the last 2 bits of the value, and used in subsequent iterations
107110
state = length = val & 3

dissect/util/compression/lzvn.py

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -225,8 +225,12 @@ def decompress(src: bytes | BinaryIO) -> bytes:
225225
if M is not None:
226226
if len(dst) < D or D == 0:
227227
raise ValueError("Invalid match distance")
228-
for _ in range(M):
229-
dst.append(dst[-D])
228+
229+
remaining = M
230+
while remaining > 0:
231+
match_size = min(remaining, D)
232+
dst += dst[-D : (-D + match_size) or None]
233+
remaining -= match_size
230234

231235
return bytes(dst)
232236

dissect/util/compression/lzxpress.py

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -71,7 +71,10 @@ def decompress(src: bytes | BinaryIO) -> bytes:
7171
match_length += 7
7272
match_length += 3
7373

74-
for _ in range(match_length):
75-
dst.append(dst[-match_offset])
74+
remaining = match_length
75+
while remaining > 0:
76+
match_size = min(remaining, match_offset)
77+
dst += dst[-match_offset : (-match_offset + match_size) or None]
78+
remaining -= match_size
7679

7780
return bytes(dst)

dissect/util/compression/lzxpress_huffman.py

Lines changed: 7 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -172,8 +172,13 @@ def decompress(src: bytes | BinaryIO) -> bytes:
172172
bitstring.skip(symbol)
173173

174174
length += 3
175-
for _ in range(length):
176-
dst.append(dst[-offset])
175+
176+
remaining = length
177+
while remaining > 0:
178+
match_size = min(remaining, offset)
179+
dst += dst[-offset : (-offset + match_size) or None]
180+
remaining -= match_size
181+
177182
chunk_size += length
178183

179184
return bytes(dst)

0 commit comments

Comments
 (0)