Skip to content

Commit 1feaa7e

Browse files
michielp1807folkertdev
authored andcommitted
ZDICT_analyzePos: make buffer a slice
1 parent 1d395bd commit 1feaa7e

File tree

1 file changed

+33
-36
lines changed

1 file changed

+33
-36
lines changed

lib/dictBuilder/zdict.rs

Lines changed: 33 additions & 36 deletions
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,7 @@ use libc::{free, malloc, memcpy, size_t};
55
use crate::lib::common::bits::{ZSTD_NbCommonBytes, ZSTD_highbit32};
66
use crate::lib::common::error_private::{ERR_getErrorName, ERR_isError, Error};
77
use crate::lib::common::huf::{HUF_CElt, HUF_CTABLE_WORKSPACE_SIZE_U32, HUF_WORKSPACE_SIZE};
8-
use crate::lib::common::mem::{MEM_read16, MEM_read64, MEM_readLE32, MEM_readST, MEM_writeLE32};
8+
use crate::lib::common::mem::{MEM_read64, MEM_readLE32, MEM_readST, MEM_writeLE32};
99
use crate::lib::common::xxhash::ZSTD_XXH64;
1010
use crate::lib::common::zstd_internal::{
1111
repStartValue, LLFSELog, MLFSELog, MaxLL, MaxML, OffFSELog, ZSTD_REP_NUM,
@@ -151,12 +151,11 @@ unsafe fn ZDICT_analyzePos(
151151
doneMarks: &mut [bool],
152152
suffix_slice: &[u32],
153153
mut start: u32,
154-
buffer: *const core::ffi::c_void,
154+
buffer: &[u8],
155155
minRatio: u32,
156156
notificationLevel: u32,
157157
) -> DictItem {
158158
let mut lengthList = [0u32; LLIMIT];
159-
let b = buffer as *const u8;
160159
let mut maxLength = LLIMIT;
161160

162161
// The C implementation maps index `len` and `-1` to the length of the suffix array.
@@ -175,23 +174,18 @@ unsafe fn ZDICT_analyzePos(
175174
doneMarks[pos] = true;
176175

177176
// trivial repetition cases
178-
if MEM_read16(b.add(pos) as *const core::ffi::c_void) as core::ffi::c_int
179-
== MEM_read16(b.add(pos).add(2) as *const core::ffi::c_void) as core::ffi::c_int
180-
|| MEM_read16(b.add(pos).add(1) as *const core::ffi::c_void) as core::ffi::c_int
181-
== MEM_read16(b.add(pos).add(3) as *const core::ffi::c_void) as core::ffi::c_int
182-
|| MEM_read16(b.add(pos).add(2) as *const core::ffi::c_void) as core::ffi::c_int
183-
== MEM_read16(b.add(pos).add(4) as *const core::ffi::c_void) as core::ffi::c_int
177+
if buffer[pos..pos + 2] == buffer[pos + 2..pos + 4]
178+
|| buffer[pos + 1..pos + 3] == buffer[pos + 3..pos + 5]
179+
|| buffer[pos + 2..pos + 4] == buffer[pos + 4..pos + 6]
184180
{
185181
// skip and mark segment
186-
let pattern16 = MEM_read16(b.add(pos).add(4) as *const core::ffi::c_void);
182+
let pattern16 = &buffer[pos + 4..pos + 6];
187183
let mut patternEnd = 6usize;
188-
while MEM_read16(b.add(pos).add(patternEnd) as *const core::ffi::c_void) == pattern16 {
189-
patternEnd = patternEnd.wrapping_add(2);
184+
while buffer[pos + patternEnd..pos + patternEnd + 2] == *pattern16 {
185+
patternEnd += 2;
190186
}
191-
if *b.add(pos.wrapping_add(patternEnd as size_t))
192-
== *b.add(pos.wrapping_add(patternEnd as size_t).wrapping_sub(1))
193-
{
194-
patternEnd = patternEnd.wrapping_add(1);
187+
if buffer[pos + patternEnd] == buffer[pos + patternEnd - 1] {
188+
patternEnd += 1;
195189
}
196190
doneMarks[pos..][1..patternEnd].fill(true);
197191
return solution;
@@ -202,8 +196,8 @@ unsafe fn ZDICT_analyzePos(
202196
loop {
203197
end = end.wrapping_add(1);
204198
length = ZDICT_count(
205-
b.add(pos) as *const core::ffi::c_void,
206-
b.offset(suffix(end as usize) as isize) as *const core::ffi::c_void,
199+
buffer[pos..].as_ptr() as *const core::ffi::c_void,
200+
buffer[suffix(end as usize) as usize..].as_ptr() as *const core::ffi::c_void,
207201
);
208202
if length < MINMATCHLENGTH {
209203
break;
@@ -214,8 +208,9 @@ unsafe fn ZDICT_analyzePos(
214208
let mut length_0: size_t = 0;
215209
loop {
216210
length_0 = ZDICT_count(
217-
b.add(pos) as *const core::ffi::c_void,
218-
b.offset(suffix((start as usize).wrapping_sub(1)) as isize) as *const core::ffi::c_void,
211+
buffer[pos..].as_ptr() as *const core::ffi::c_void,
212+
buffer[suffix((start as usize).wrapping_sub(1)) as usize..].as_ptr()
213+
as *const core::ffi::c_void,
219214
);
220215
if length_0 >= MINMATCHLENGTH {
221216
start = start.wrapping_sub(1);
@@ -258,13 +253,13 @@ unsafe fn ZDICT_analyzePos(
258253
let mut selectedID = currentID;
259254

260255
for id in refinedStart..refinedEnd {
261-
if *b.offset((suffix(id as usize)).wrapping_add(mml) as isize) != currentChar {
256+
if buffer[(suffix(id as usize) + mml) as usize] != currentChar {
262257
if currentCount > selectedCount {
263258
selectedCount = currentCount;
264259
selectedID = currentID;
265260
}
266261
currentID = id;
267-
currentChar = *b.offset((suffix(id as usize)).wrapping_add(mml) as isize);
262+
currentChar = buffer[(suffix(id as usize) + mml) as usize];
268263
currentCount = 0;
269264
}
270265
currentCount = currentCount.wrapping_add(1);
@@ -291,8 +286,8 @@ unsafe fn ZDICT_analyzePos(
291286
loop {
292287
end = end.wrapping_add(1);
293288
let mut length = ZDICT_count(
294-
b.add(pos) as *const core::ffi::c_void,
295-
b.offset(suffix(end as usize) as isize) as *const core::ffi::c_void,
289+
buffer[pos..].as_ptr() as *const core::ffi::c_void,
290+
buffer[suffix(end as usize) as usize..].as_ptr() as *const core::ffi::c_void,
296291
);
297292
if length >= LLIMIT {
298293
length = LLIMIT - 1;
@@ -307,8 +302,9 @@ unsafe fn ZDICT_analyzePos(
307302
let mut length_2 = MINMATCHLENGTH;
308303
while (length_2 >= MINMATCHLENGTH) as core::ffi::c_int & (start > 0) as core::ffi::c_int != 0 {
309304
length_2 = ZDICT_count(
310-
b.add(pos) as *const core::ffi::c_void,
311-
b.offset(suffix(start.wrapping_sub(1) as usize) as isize) as *const core::ffi::c_void,
305+
buffer[pos..].as_ptr() as *const core::ffi::c_void,
306+
buffer[suffix(start.wrapping_sub(1) as usize) as usize..].as_ptr()
307+
as *const core::ffi::c_void,
312308
);
313309
if length_2 >= LLIMIT {
314310
length_2 = LLIMIT - 1;
@@ -340,10 +336,8 @@ unsafe fn ZDICT_analyzePos(
340336

341337
// reduce maxLength in case of final into repetitive data
342338
let mut l = maxLength as u32;
343-
let c = *b.add(pos.wrapping_add(maxLength).wrapping_sub(1));
344-
while *b.add(pos.wrapping_add(l as size_t).wrapping_sub(2)) as core::ffi::c_int
345-
== c as core::ffi::c_int
346-
{
339+
let c = buffer[pos + maxLength - 1];
340+
while buffer[pos + l as usize - 2] == c {
347341
l = l.wrapping_sub(1);
348342
}
349343
maxLength = l as size_t;
@@ -383,8 +377,8 @@ unsafe fn ZDICT_analyzePos(
383377
length_3 = solution.length;
384378
} else {
385379
length_3 = ZDICT_count(
386-
b.add(pos) as *const core::ffi::c_void,
387-
b.offset(testedPos as isize) as *const core::ffi::c_void,
380+
buffer[pos..].as_ptr() as *const core::ffi::c_void,
381+
buffer[testedPos as usize..].as_ptr() as *const core::ffi::c_void,
388382
) as u32;
389383
if length_3 > solution.length {
390384
length_3 = solution.length;
@@ -568,7 +562,7 @@ unsafe fn ZDICT_dictSize(dictList: *const DictItem) -> u32 {
568562
unsafe fn ZDICT_trainBuffer_legacy(
569563
dictList: *mut DictItem,
570564
dictListSize: u32,
571-
buffer: *const core::ffi::c_void,
565+
buffer: &[u8],
572566
mut bufferSize: size_t,
573567
fileSizes: *const size_t,
574568
mut nbFiles: core::ffi::c_uint,
@@ -609,7 +603,7 @@ unsafe fn ZDICT_trainBuffer_legacy(
609603
}
610604
let mut suffix = vec![0u32; bufferSize];
611605
let divSuftSortResult = divsufsort(
612-
core::slice::from_raw_parts(buffer as *const u8, bufferSize),
606+
&buffer[..bufferSize],
613607
std::mem::transmute::<&mut [u32], &mut [i32]>(&mut suffix),
614608
false,
615609
);
@@ -660,7 +654,7 @@ unsafe fn ZDICT_trainBuffer_legacy(
660654
continue;
661655
}
662656

663-
ZDICT_insertDictItem(dictList, dictListSize, solution, buffer);
657+
ZDICT_insertDictItem(dictList, dictListSize, solution, buffer.as_ptr().cast());
664658
cursor += solution.length as usize;
665659

666660
if notificationLevel >= 2 && displayClock.elapsed() > refresh_rate {
@@ -1350,11 +1344,14 @@ unsafe fn ZDICT_trainFromBuffer_unsafe_legacy(
13501344

13511345
dictList.as_mut().unwrap().init();
13521346

1347+
// The samples must be followed by the noise band.
1348+
debug_assert!(samples.len() >= samplesBuffSize + NOISELENGTH);
1349+
13531350
// build dictionary
13541351
ZDICT_trainBuffer_legacy(
13551352
dictList,
13561353
dictListSize,
1357-
samples.as_ptr() as *mut core::ffi::c_void,
1354+
samples,
13581355
samplesBuffSize,
13591356
samplesSizes.as_ptr(),
13601357
nbSamples,

0 commit comments

Comments
 (0)