Skip to content

Commit 3d0c3e2

Browse files
committed
fix: free memory when block is fully read
1 parent a4fc7d3 commit 3d0c3e2

File tree

4 files changed

+118
-2
lines changed

4 files changed

+118
-2
lines changed

CHANGELOG.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,7 @@ adheres to [Semantic Versioning](https://semver.org/).
1111

1212
### :bug: Fixes
1313

14+
- Free memory after a block is fully read
1415
- Typing: use `BinaryIO` instead of `IO[bytes]`
1516

1617
## [0.3.1] - 2021-12-26

src/xz/block.py

Lines changed: 7 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -148,10 +148,15 @@ def _read(self, size: int) -> bytes:
148148

149149
# read data
150150
try:
151-
return self.operation.decompress(self._pos, size)
151+
data = self.operation.decompress(self._pos, size)
152152
except LZMAError as ex:
153153
raise XZError(f"block: error while decompressing: {ex}") from ex
154154

155+
if self._pos + len(data) == self._length:
156+
self.operation = None # free memory
157+
158+
return data
159+
155160
def writable(self) -> bool:
156161
return isinstance(self.operation, BlockWrite) or not self._length
157162

@@ -174,7 +179,7 @@ def _write_after(self) -> None:
174179
self.unpadded_size, uncompressed_size = self.operation.finish()
175180
if uncompressed_size != self.uncompressed_size:
176181
raise XZError("block: compressor uncompressed size")
177-
self.operation = None
182+
self.operation = None # free memory
178183

179184
def _truncate(self, size: int) -> None:
180185
# thanks to the writable method, we are sure that length is zero
Lines changed: 106 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,106 @@
1+
from io import DEFAULT_BUFFER_SIZE
2+
from lzma import compress
3+
from pathlib import Path
4+
from random import seed
5+
import sys
6+
from typing import BinaryIO, Callable, Iterator, Optional, cast
7+
8+
import pytest
9+
10+
from xz import XZFile
11+
from xz.common import create_xz_index_footer, parse_xz_footer, parse_xz_index
12+
from xz.io import IOCombiner, IOStatic
13+
14+
if sys.version_info >= (3, 9):
15+
from random import randbytes
16+
else:
17+
from random import getrandbits
18+
19+
def randbytes(length: int) -> bytes:
20+
return getrandbits(length * 8).to_bytes(length, "little")
21+
22+
23+
@pytest.fixture
24+
def ram_usage() -> Iterator[Callable[[], int]]:
25+
try:
26+
import tracemalloc # pylint: disable=import-outside-toplevel
27+
except ImportError: # e.g. PyPy
28+
pytest.skip("tracemalloc module not available")
29+
30+
try:
31+
tracemalloc.start()
32+
yield lambda: tracemalloc.get_traced_memory()[1]
33+
finally:
34+
tracemalloc.stop()
35+
36+
37+
BLOCK_SIZE = 1_000_000
38+
39+
40+
@pytest.fixture
41+
def fileobj() -> BinaryIO:
42+
# create xz raw data composed of many identical blocks
43+
nb_blocks = 50
44+
45+
seed(0)
46+
data = compress(randbytes(BLOCK_SIZE))
47+
header = data[:12]
48+
footer = data[-12:]
49+
check, backward_size = parse_xz_footer(footer)
50+
block = data[12 : -12 - backward_size]
51+
records = parse_xz_index(data[-12 - backward_size : -12])
52+
index_footer = create_xz_index_footer(check, records * nb_blocks)
53+
54+
return cast(
55+
BinaryIO,
56+
IOCombiner(
57+
IOStatic(header),
58+
*[IOStatic(block)] * nb_blocks,
59+
IOStatic(index_footer),
60+
),
61+
)
62+
63+
64+
def test_read_linear(
65+
# pylint: disable=redefined-outer-name
66+
fileobj: BinaryIO,
67+
ram_usage: Callable[[], int],
68+
) -> None:
69+
with XZFile(fileobj) as xz_file:
70+
# read almost one block
71+
xz_file.read(BLOCK_SIZE - 1)
72+
one_block_memory = ram_usage()
73+
74+
# read all the file
75+
while xz_file.read(DEFAULT_BUFFER_SIZE):
76+
assert (
77+
# should not use much more memory, take 2 as error margin
78+
ram_usage()
79+
< one_block_memory * 2
80+
), f"Consumes too much RAM (at {100 * xz_file.tell() / len(xz_file):.0f}%)"
81+
82+
83+
def test_write(
84+
tmp_path: Path,
85+
# pylint: disable=redefined-outer-name
86+
ram_usage: Callable[[], int],
87+
) -> None:
88+
nb_blocks = 10
89+
90+
seed(0)
91+
92+
one_block_memory: Optional[int] = None
93+
94+
with XZFile(tmp_path / "archive.xz", "w") as xz_file:
95+
for i in range(nb_blocks):
96+
xz_file.change_block()
97+
xz_file.write(randbytes(BLOCK_SIZE))
98+
99+
if one_block_memory is None:
100+
one_block_memory = ram_usage()
101+
else:
102+
assert (
103+
# should not use much more memory, take 2 as error margin
104+
ram_usage()
105+
< one_block_memory * 2
106+
), f"Consumes too much RAM (at {i / nb_blocks:.0f}%)"

tests/unit/test_block.py

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -80,6 +80,10 @@ def test_read_all(
8080
call.seek(90, SEEK_SET),
8181
call.read(17),
8282
]
83+
fileobj.method_calls.clear()
84+
85+
assert block.read() == b""
86+
assert not fileobj.method_calls
8387

8488

8589
def test_read_seek_forward(

0 commit comments

Comments
 (0)