Skip to content

Commit f1cdb9e

Browse files
authored
Actually use LZ4 (#84)
* actually use lz4 and lz4hc * fix tests * dry
1 parent b5c8f8a commit f1cdb9e

File tree

4 files changed

+77
-23
lines changed

4 files changed

+77
-23
lines changed

.github/workflows/python-module.yml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -145,6 +145,7 @@ jobs:
145145
- build_mac
146146
- build_win
147147
runs-on: ubuntu-latest
148+
if: startsWith(github.event.ref, 'refs/tags')
148149
steps:
149150
- uses: actions/checkout@v3
150151
with:

python/tests/test_wkw.py

Lines changed: 44 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -65,7 +65,31 @@ def test_readwrite():
6565
path.getsize(path.join("tests/tmp", "z0", "y0", "x0.wkw"))
6666
== np.prod(SIZE) * (dataset.header.file_len**3) + header_size
6767
)
68-
assert np.all(dataset.read(POSITION, SIZE) == test_data)
68+
assert np.array_equiv(dataset.read(POSITION, SIZE), test_data)
69+
70+
71+
def test_readwrite_lz4():
72+
with wkw.Dataset.create(
73+
"tests/tmp",
74+
wkw.Header(np.uint8, block_type=wkw.Header.BLOCK_TYPE_LZ4, file_len=4),
75+
) as dataset:
76+
SIZE128 = (128, 128, 128)
77+
test_data = generate_test_data(dataset.header.voxel_type, SIZE128)
78+
79+
dataset.write(POSITION, test_data)
80+
assert np.array_equiv(dataset.read(POSITION, SIZE128), test_data)
81+
82+
83+
def test_readwrite_lz4hc():
84+
with wkw.Dataset.create(
85+
"tests/tmp",
86+
wkw.Header(np.uint8, block_type=wkw.Header.BLOCK_TYPE_LZ4HC, file_len=4),
87+
) as dataset:
88+
SIZE128 = (128, 128, 128)
89+
test_data = generate_test_data(dataset.header.voxel_type, SIZE128)
90+
91+
dataset.write(POSITION, test_data)
92+
assert np.array_equiv(dataset.read(POSITION, SIZE128), test_data)
6993

7094

7195
def test_readwrite_live_compression():
@@ -87,13 +111,13 @@ def test_readwrite_live_compression():
87111
)
88112

89113
with wkw.Dataset.open("tests/tmp") as dataset:
90-
assert np.all(dataset.read(POSITION, SIZE128) == test_data)
114+
assert np.array_equiv(dataset.read(POSITION, SIZE128), test_data)
91115

92116

93117
def test_readwrite_live_compression_should_enforce_full_file_write():
94118
with pytest.raises(Exception):
95119
with wkw.Dataset.create(
96-
"tests/tmp", wkw.Header(np.uint8, block_type=BLOCK_TYPE_LZ4)
120+
"tests/tmp", wkw.Header(np.uint8, block_type=wkw.Header.BLOCK_TYPE_LZ4)
97121
) as dataset:
98122
test_data = generate_test_data(dataset.header.voxel_type)
99123
dataset.write(POSITION, test_data)
@@ -113,7 +137,7 @@ def test_readwrite_live_compression_should_not_allow_inconsistent_writes():
113137
dataset.write(POSITION, test_data)
114138

115139
with wkw.Dataset.open("tests/tmp") as dataset:
116-
assert np.all(dataset.read(POSITION, SIZE129) == empty_data)
140+
assert np.array_equiv(dataset.read(POSITION, SIZE129), empty_data)
117141

118142

119143
def test_readwrite_live_compression_should_truncate():
@@ -138,7 +162,7 @@ def test_readwrite_live_compression_should_truncate():
138162
assert empty_compressed_size < random_compressed_size
139163

140164
with wkw.Dataset.open("tests/tmp") as dataset:
141-
assert np.all(dataset.read(POSITION, SIZE128) == ones_data)
165+
assert np.array_equiv(dataset.read(POSITION, SIZE128), ones_data)
142166

143167

144168
def test_compress():
@@ -157,7 +181,7 @@ def test_compress():
157181
path.getsize(path.join("tests/tmp2", "z0", "y0", "x0.wkw"))
158182
< np.prod(SIZE) * (dataset2.header.file_len**3) + header_size
159183
)
160-
assert np.all(dataset2.read(POSITION, SIZE) == test_data)
184+
assert np.array_equiv(dataset2.read(POSITION, SIZE), test_data)
161185

162186

163187
def test_row_major_order():
@@ -167,15 +191,15 @@ def test_row_major_order():
167191
dataset.write((0, 0, 0), data)
168192
read_data = dataset.read((0, 0, 0), data_shape)
169193

170-
assert np.all(data == read_data)
194+
assert np.array_equiv(data, read_data)
171195

172196
with wkw.Dataset.create("tests/tmp2", wkw.Header(np.uint8)) as dataset:
173197
fortran_data = np.asfortranarray(data)
174198
dataset.write((0, 0, 0), fortran_data)
175199
fortran_read_data = dataset.read((0, 0, 0), data_shape)
176200

177-
assert np.all(fortran_read_data == read_data)
178-
assert np.all(fortran_read_data == fortran_data)
201+
assert np.array_equiv(fortran_read_data, read_data)
202+
assert np.array_equiv(fortran_read_data, fortran_data)
179203

180204

181205
def test_row_major_order_with_offset():
@@ -185,7 +209,7 @@ def test_row_major_order_with_offset():
185209
dataset.write((15, 2, 0), data)
186210
read_data = dataset.read((15, 2, 0), data_shape)
187211

188-
assert np.all(data == read_data)
212+
assert np.array_equiv(data, read_data)
189213

190214

191215
def test_row_major_order_with_different_voxel_size():
@@ -195,7 +219,7 @@ def test_row_major_order_with_different_voxel_size():
195219
dataset.write((3, 1, 0), data)
196220
read_data = dataset.read((3, 1, 0), data_shape)
197221

198-
assert np.all(data == read_data)
222+
assert np.array_equiv(data, read_data)
199223

200224

201225
def test_row_major_order_with_channels():
@@ -207,7 +231,7 @@ def test_row_major_order_with_channels():
207231
dataset.write((3, 1, 0), data)
208232
read_data = dataset.read((3, 1, 0), data_shape[1:])
209233

210-
assert np.all(data == read_data)
234+
assert np.array_equiv(data, read_data)
211235

212236

213237
def test_row_major_order_with_channels_and_different_voxel_size():
@@ -219,7 +243,7 @@ def test_row_major_order_with_channels_and_different_voxel_size():
219243
dataset.write((3, 1, 0), data)
220244
read_data = dataset.read((3, 1, 0), data_shape[1:])
221245

222-
assert np.all(data == read_data)
246+
assert np.array_equiv(data, read_data)
223247

224248

225249
def test_column_major_order_with_channels_and_different_voxel_size():
@@ -231,7 +255,7 @@ def test_column_major_order_with_channels_and_different_voxel_size():
231255
dataset.write((3, 1, 0), data)
232256
read_data = dataset.read((3, 1, 0), data_shape[1:])
233257

234-
assert np.all(data == read_data)
258+
assert np.array_equiv(data, read_data)
235259

236260

237261
def test_view_on_np_array():
@@ -242,7 +266,7 @@ def test_view_on_np_array():
242266
dataset.write((3, 1, 0), data)
243267
read_data = dataset.read((3, 1, 0), data.shape)
244268

245-
assert np.all(data == read_data)
269+
assert np.array_equiv(data, read_data)
246270

247271

248272
def test_not_too_much_data_is_written():
@@ -257,9 +281,9 @@ def write_and_test_in_given_order(wkw_path, order):
257281
before = dataset.read((0, 0, 0), (1, 2, 3))
258282
after = dataset.read((0, 0, 38), (35, 35, 26))
259283

260-
assert np.all(data == read_data)
261-
assert np.all(before == 1)
262-
assert np.all(after == 1)
284+
assert np.array_equiv(data, read_data)
285+
assert np.array_equiv(before, 1)
286+
assert np.array_equiv(after, 1)
263287

264288
write_and_test_in_given_order("tests/tmp", "F")
265289
write_and_test_in_given_order("tests/tmp2", "C")
@@ -281,7 +305,7 @@ def test_multiple_writes_and_reads():
281305
] = data
282306

283307
read_data = dataset.read((0, 0, 0), (200, 200, 200))
284-
assert np.all(mem_buffer == read_data)
308+
assert np.array_equiv(mem_buffer, read_data)
285309

286310

287311
def test_multi_channel_column_major_order():
@@ -295,7 +319,7 @@ def test_multi_channel_column_major_order():
295319
dataset.write(offset, data)
296320

297321
read_data = dataset.read(offset, data_shape[1:])
298-
assert np.all(data == read_data)
322+
assert np.array_equiv(data, read_data)
299323

300324

301325
def test_big_read():

rust/src/file.rs

Lines changed: 13 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -302,7 +302,8 @@ impl File {
302302

303303
let result = match self.header.block_type {
304304
BlockType::Raw => self.write_block_raw(buf),
305-
BlockType::LZ4 | BlockType::LZ4HC => self.write_block_lz4(buf),
305+
BlockType::LZ4 => self.write_block_lz4(buf, BlockType::LZ4),
306+
BlockType::LZ4HC => self.write_block_lz4(buf, BlockType::LZ4HC),
306307
};
307308

308309
// advance
@@ -328,10 +329,19 @@ impl File {
328329
}
329330
}
330331

331-
fn write_block_lz4(&mut self, buf: &[u8]) -> Result<usize> {
332+
fn write_block_lz4(&mut self, buf: &[u8], block_type: BlockType) -> Result<usize> {
332333
// compress data
333334
let mut buf_lz4 = &mut *self.disk_block_buf.as_mut().unwrap();
334-
let len_lz4 = lz4::compress_hc(buf, &mut buf_lz4)?;
335+
let len_lz4 = match block_type {
336+
BlockType::LZ4 => lz4::compress(buf, &mut buf_lz4)?,
337+
BlockType::LZ4HC => lz4::compress_hc(buf, &mut buf_lz4)?,
338+
_ => {
339+
return Err(format!(
340+
"Invalid block_type {:?} for compression.",
341+
block_type
342+
));
343+
}
344+
};
335345

336346
// write data
337347
self.file

rust/src/lz4.rs

Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,25 @@ pub fn compress_bound(input_size: usize) -> usize {
66
unsafe { liblz4::LZ4_compressBound(input_size as i32) as usize }
77
}
88

9+
pub fn compress(src_buf: &[u8], dst_buf: &mut [u8]) -> Result<usize> {
10+
let src_size = src_buf.len() as i32;
11+
let dst_capacity = dst_buf.len() as i32;
12+
13+
let dst_len = unsafe {
14+
liblz4::LZ4_compress_default(
15+
std::mem::transmute::<&[u8], &[i8]>(src_buf).as_ptr(),
16+
std::mem::transmute::<&mut [u8], &mut [i8]>(dst_buf).as_mut_ptr(),
17+
src_size,
18+
dst_capacity,
19+
)
20+
};
21+
22+
match dst_len == 0 {
23+
true => Err(String::from("Error in LZ4_compress_default")),
24+
false => Ok(dst_len as usize),
25+
}
26+
}
27+
928
pub fn compress_hc(src_buf: &[u8], dst_buf: &mut [u8]) -> Result<usize> {
1029
let src_size = src_buf.len() as i32;
1130
let dst_capacity = dst_buf.len() as i32;

0 commit comments

Comments
 (0)