Skip to content

Commit 6801373

Browse files
committed
Tweak LZ min match for DNA data
1 parent 7dcb54f commit 6801373

File tree

2 files changed

+15
-8
lines changed

2 files changed

+15
-8
lines changed

src/transform/LZCodec.cpp

Lines changed: 14 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -79,6 +79,8 @@ const int LZXCodec<false>::MAX_DISTANCE2 = (1 << 24) - 2;
7979
template<>
8080
const int LZXCodec<false>::MIN_MATCH4 = 4;
8181
template<>
82+
const int LZXCodec<false>::MIN_MATCH6 = 6;
83+
template<>
8284
const int LZXCodec<false>::MIN_MATCH9 = 9;
8385
template<>
8486
const int LZXCodec<false>::MAX_MATCH = 65535 + 254 + 15 + MIN_MATCH4;
@@ -99,6 +101,8 @@ const int LZXCodec<true>::MAX_DISTANCE2 = (1 << 24) - 2;
99101
template<>
100102
const int LZXCodec<true>::MIN_MATCH4 = 4;
101103
template<>
104+
const int LZXCodec<true>::MIN_MATCH6 = 6;
105+
template<>
102106
const int LZXCodec<true>::MIN_MATCH9 = 9;
103107
template<>
104108
const int LZXCodec<true>::MAX_MATCH = 65535 + 254 + 15 + MIN_MATCH4;
@@ -155,8 +159,8 @@ bool LZXCodec<T>::forward(SliceArray<byte>& input, SliceArray<byte>& output, int
155159

156160
if (dt == Global::DNA) {
157161
// Longer min match for DNA input
158-
mm = MIN_MATCH9;
159-
dst[12] |= byte(2);
162+
mm = MIN_MATCH6;
163+
dst[12] |= byte(4);
160164
}
161165
else if (dt == Global::SMALL_ALPHABET) {
162166
return false;
@@ -283,22 +287,22 @@ bool LZXCodec<T>::forward(SliceArray<byte>& input, SliceArray<byte>& output, int
283287
}
284288

285289
_mBuf[mIdx++] = byte(dist);
286-
const int mLen = bestLen - minMatch - 14;
290+
const int mLen = bestLen - minMatch;
287291

288292
// Emit match length
289-
if (mLen >= 0) {
290-
if (mLen == 0) {
293+
if (mLen >= 14) {
294+
if (mLen == 14) {
291295
// Avoid the penalty of one extra byte to encode match length
292296
token = (dist >= dThreshold) ? 0x1D : 0x0D;
293297
bestLen--;
294298
}
295299
else {
296300
token = (dist >= dThreshold) ? 0x1E : 0x0E;
297-
mLenIdx += emitLength(&_mLenBuf[mLenIdx], mLen);
301+
mLenIdx += emitLength(&_mLenBuf[mLenIdx], mLen - 14);
298302
}
299303
}
300304
else {
301-
token = (dist >= dThreshold) ? mLen + 30 : mLen + 14;
305+
token = (dist >= dThreshold) ? mLen + 16 : mLen;
302306
}
303307
}
304308

@@ -426,7 +430,9 @@ bool LZXCodec<T>::inverse(SliceArray<byte>& input, SliceArray<byte>& output, int
426430
const int srcEnd = tkIdx - 13;
427431
const int mFlag = int(src[12]) & 1;
428432
const int maxDist = (mFlag == 0) ? MAX_DISTANCE1 : MAX_DISTANCE2;
429-
const int minMatch = ((int(src[12]) & 2) == 0) ? MIN_MATCH4 : MIN_MATCH9;
433+
const int mmIdx = (int(src[12]) >> 1) & 0x03;
434+
const int MIN_MATCHES[4] = { MIN_MATCH4, MIN_MATCH9, MIN_MATCH6, MIN_MATCH6 };
435+
const int minMatch = MIN_MATCHES[mmIdx];
430436
bool res = true;
431437
int srcIdx = 13;
432438
int dstIdx = 0;

src/transform/LZCodec.hpp

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -101,6 +101,7 @@ namespace kanzi {
101101
static const int MAX_DISTANCE1;
102102
static const int MAX_DISTANCE2;
103103
static const int MIN_MATCH4;
104+
static const int MIN_MATCH6;
104105
static const int MIN_MATCH9;
105106
static const int MAX_MATCH;
106107
static const int MIN_BLOCK_LENGTH;

0 commit comments

Comments
 (0)