Skip to content

Commit 1d6b32c

Browse files
michielp1807folkertdev
authored andcommitted
ZDICT_trainBuffer_legacy: make suffix a vec
1 parent 6c63e77 commit 1d6b32c

File tree

1 file changed

+26
-27
lines changed

1 file changed

+26
-27
lines changed

lib/dictBuilder/zdict.rs

Lines changed: 26 additions & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -150,7 +150,7 @@ const LLIMIT: usize = 64;
150150
const MINMATCHLENGTH: usize = 7;
151151
unsafe fn ZDICT_analyzePos(
152152
doneMarks: *mut u8,
153-
suffix: *const core::ffi::c_uint,
153+
suffix_slice: &[u32],
154154
mut start: u32,
155155
buffer: *const core::ffi::c_void,
156156
minRatio: u32,
@@ -161,7 +161,17 @@ unsafe fn ZDICT_analyzePos(
161161
let mut savings = [0u32; LLIMIT];
162162
let b = buffer as *const u8;
163163
let mut maxLength = LLIMIT;
164-
let mut pos = *suffix.offset(start as isize) as size_t;
164+
165+
// The C implementation maps index `len` and `-1` to the length of the suffix array.
166+
let suffix = |index| {
167+
if index == usize::MAX || index == suffix_slice.len() {
168+
suffix_slice.len() as u32
169+
} else {
170+
suffix_slice[index]
171+
}
172+
};
173+
174+
let mut pos = suffix(start as usize) as size_t;
165175
let mut end = start;
166176
let mut solution = DictItem::default();
167177
*doneMarks.add(pos) = 1;
@@ -198,7 +208,7 @@ unsafe fn ZDICT_analyzePos(
198208
end = end.wrapping_add(1);
199209
length = ZDICT_count(
200210
b.add(pos) as *const core::ffi::c_void,
201-
b.offset(*suffix.offset(end as isize) as isize) as *const core::ffi::c_void,
211+
b.offset(suffix(end as usize) as isize) as *const core::ffi::c_void,
202212
);
203213
if length < MINMATCHLENGTH {
204214
break;
@@ -208,7 +218,7 @@ unsafe fn ZDICT_analyzePos(
208218
loop {
209219
length_0 = ZDICT_count(
210220
b.add(pos) as *const core::ffi::c_void,
211-
b.offset(*suffix.offset(start as isize).sub(1) as isize) as *const core::ffi::c_void,
221+
b.offset(suffix((start as usize).wrapping_sub(1)) as isize) as *const core::ffi::c_void,
212222
);
213223
if length_0 >= MINMATCHLENGTH {
214224
start = start.wrapping_sub(1);
@@ -221,7 +231,7 @@ unsafe fn ZDICT_analyzePos(
221231
let mut idx: u32 = 0;
222232
idx = start;
223233
while idx < end {
224-
*doneMarks.offset(*suffix.offset(idx as isize) as isize) = 1;
234+
*doneMarks.offset(suffix(idx as usize) as isize) = 1;
225235
idx = idx.wrapping_add(1);
226236
}
227237
return solution;
@@ -250,16 +260,15 @@ unsafe fn ZDICT_analyzePos(
250260
let mut selectedID = currentID;
251261
id = refinedStart;
252262
while id < refinedEnd {
253-
if *b.offset((*suffix.offset(id as isize)).wrapping_add(mml) as isize)
254-
as core::ffi::c_int
263+
if *b.offset((suffix(id as usize)).wrapping_add(mml) as isize) as core::ffi::c_int
255264
!= currentChar as core::ffi::c_int
256265
{
257266
if currentCount > selectedCount {
258267
selectedCount = currentCount;
259268
selectedID = currentID;
260269
}
261270
currentID = id;
262-
currentChar = *b.offset((*suffix.offset(id as isize)).wrapping_add(mml) as isize);
271+
currentChar = *b.offset((suffix(id as usize)).wrapping_add(mml) as isize);
263272
currentCount = 0;
264273
}
265274
currentCount = currentCount.wrapping_add(1);
@@ -277,7 +286,7 @@ unsafe fn ZDICT_analyzePos(
277286
mml = mml.wrapping_add(1);
278287
}
279288
start = refinedStart;
280-
pos = *suffix.offset(refinedStart as isize) as size_t;
289+
pos = suffix(refinedStart as usize) as size_t;
281290
end = start;
282291
ptr::write_bytes(
283292
lengthList.as_mut_ptr() as *mut u8,
@@ -289,7 +298,7 @@ unsafe fn ZDICT_analyzePos(
289298
end = end.wrapping_add(1);
290299
length_1 = ZDICT_count(
291300
b.add(pos) as *const core::ffi::c_void,
292-
b.offset(*suffix.offset(end as isize) as isize) as *const core::ffi::c_void,
301+
b.offset(suffix(end as usize) as isize) as *const core::ffi::c_void,
293302
);
294303
if length_1 >= LLIMIT {
295304
length_1 = LLIMIT - 1;
@@ -304,8 +313,7 @@ unsafe fn ZDICT_analyzePos(
304313
while (length_2 >= MINMATCHLENGTH) as core::ffi::c_int & (start > 0) as core::ffi::c_int != 0 {
305314
length_2 = ZDICT_count(
306315
b.add(pos) as *const core::ffi::c_void,
307-
b.offset(*suffix.offset(start.wrapping_sub(1) as isize) as isize)
308-
as *const core::ffi::c_void,
316+
b.offset(suffix(start.wrapping_sub(1) as usize) as isize) as *const core::ffi::c_void,
309317
);
310318
if length_2 >= LLIMIT {
311319
length_2 = LLIMIT - 1;
@@ -379,7 +387,7 @@ unsafe fn ZDICT_analyzePos(
379387
let mut p: u32 = 0;
380388
let mut pEnd: u32 = 0;
381389
let mut length_3: u32 = 0;
382-
let testedPos = *suffix.offset(id_0 as isize);
390+
let testedPos = suffix(id_0 as usize);
383391
if testedPos as size_t == pos {
384392
length_3 = solution.length;
385393
} else {
@@ -585,12 +593,7 @@ unsafe fn ZDICT_trainBuffer_legacy(
585593
mut minRatio: core::ffi::c_uint,
586594
notificationLevel: u32,
587595
) -> size_t {
588-
let suffix0 = malloc(
589-
bufferSize
590-
.wrapping_add(2)
591-
.wrapping_mul(::core::mem::size_of::<core::ffi::c_uint>()),
592-
) as *mut core::ffi::c_uint;
593-
let suffix = suffix0.add(1);
596+
let mut suffix = vec![0u32; bufferSize];
594597
let reverseSuffix = malloc(bufferSize.wrapping_mul(::core::mem::size_of::<u32>())) as *mut u32;
595598
let doneMarks = malloc(
596599
bufferSize
@@ -607,7 +610,7 @@ unsafe fn ZDICT_trainBuffer_legacy(
607610
if notificationLevel >= 2 {
608611
eprintln!("\r{:70 }\r", ""); // clean display line
609612
}
610-
if suffix0.is_null() || reverseSuffix.is_null() || doneMarks.is_null() || filePos.is_null() {
613+
if reverseSuffix.is_null() || doneMarks.is_null() || filePos.is_null() {
611614
result = Error::memory_allocation.to_error_code();
612615
} else {
613616
if minRatio < MINRATIO {
@@ -637,20 +640,17 @@ unsafe fn ZDICT_trainBuffer_legacy(
637640
}
638641
let divSuftSortResult = divsufsort(
639642
core::slice::from_raw_parts(buffer as *const u8, bufferSize),
640-
core::slice::from_raw_parts_mut(suffix as *mut i32, bufferSize),
643+
std::mem::transmute::<&mut [u32], &mut [i32]>(&mut suffix[..]),
641644
false,
642645
);
643646
if divSuftSortResult != 0 {
644647
result = Error::GENERIC.to_error_code();
645648
} else {
646-
*suffix.add(bufferSize) = bufferSize as core::ffi::c_uint;
647-
*suffix0 = bufferSize as core::ffi::c_uint;
648-
649649
// build reverse suffix sort
650650
let mut pos: size_t = 0;
651651
pos = 0;
652652
while pos < bufferSize {
653-
*reverseSuffix.offset(*suffix.add(pos) as isize) = pos as u32;
653+
*reverseSuffix.offset(suffix[pos] as isize) = pos as u32;
654654
pos = pos.wrapping_add(1);
655655
}
656656
// Note: filePos tracks borders between samples.
@@ -680,7 +680,7 @@ unsafe fn ZDICT_trainBuffer_legacy(
680680
} else {
681681
solution = ZDICT_analyzePos(
682682
doneMarks,
683-
suffix,
683+
&suffix,
684684
*reverseSuffix.offset(cursor as isize),
685685
buffer,
686686
minRatio,
@@ -707,7 +707,6 @@ unsafe fn ZDICT_trainBuffer_legacy(
707707
}
708708
}
709709
}
710-
free(suffix0 as *mut core::ffi::c_void);
711710
free(reverseSuffix as *mut core::ffi::c_void);
712711
free(doneMarks as *mut core::ffi::c_void);
713712
free(filePos as *mut core::ffi::c_void);

0 commit comments

Comments
 (0)