Skip to content

Commit 5e404e9

Browse files
Small LZSA2 ratio increase for some files
1 parent 65d6972 commit 5e404e9

File tree

3 files changed

+78
-2
lines changed

3 files changed

+78
-2
lines changed

src/shrink_block_v2.c

Lines changed: 67 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1310,6 +1310,7 @@ int lzsa_optimize_and_write_block_v2(lzsa_compressor *pCompressor, const unsigne
13101310

13111311
int* first_offset_for_byte = pCompressor->first_offset_for_byte;
13121312
int* next_offset_for_pos = pCompressor->next_offset_for_pos;
1313+
int* offset_cache = pCompressor->offset_cache;
13131314
int nPosition;
13141315

13151316
/* Supplement small matches */
@@ -1352,12 +1353,77 @@ int lzsa_optimize_and_write_block_v2(lzsa_compressor *pCompressor, const unsigne
13521353
match[m].offset = nMatchOffset;
13531354
m++;
13541355
nInserted++;
1355-
if (nInserted >= 15)
1356+
if (nInserted >= 12)
13561357
break;
13571358
}
13581359
}
13591360
}
13601361

1362+
/* Supplement matches further */
1363+
1364+
memset(offset_cache, 0xff, sizeof(int) * 2048);
1365+
1366+
for (nPosition = nPreviousBlockSize + 1; nPosition < (nEndOffset - 1); nPosition++) {
1367+
lzsa_match* match = pCompressor->match + ((nPosition - nPreviousBlockSize) << MATCHES_PER_INDEX_SHIFT_V2);
1368+
1369+
if (match[0].length < 5) {
1370+
int m = 0;
1371+
int nMatchPos;
1372+
1373+
while (m < 46 && match[m].length) {
1374+
offset_cache[match[m].offset & 2047] = nPosition;
1375+
m++;
1376+
}
1377+
1378+
for (nMatchPos = next_offset_for_pos[nPosition - nPreviousBlockSize]; m < 46 && nMatchPos >= 0; nMatchPos = next_offset_for_pos[nMatchPos - nPreviousBlockSize]) {
1379+
int nMatchOffset = nPosition - nMatchPos;
1380+
1381+
if (nMatchOffset <= MAX_OFFSET) {
1382+
int nAlreadyExists = 0;
1383+
1384+
if (offset_cache[nMatchOffset & 2047] == nPosition) {
1385+
int nExistingMatchIdx;
1386+
1387+
for (nExistingMatchIdx = 0; nExistingMatchIdx < m; nExistingMatchIdx++) {
1388+
if (match[nExistingMatchIdx].offset == nMatchOffset) {
1389+
nAlreadyExists = 1;
1390+
break;
1391+
}
1392+
}
1393+
}
1394+
1395+
if (!nAlreadyExists) {
1396+
int nForwardPos = nPosition + 2;
1397+
int nGotMatch = 0;
1398+
1399+
while (nForwardPos >= nMatchOffset && (nForwardPos + 2) < nEndOffset && nForwardPos < (nPosition + 2 + 1 + 2)) {
1400+
if (!memcmp(pInWindow + nForwardPos, pInWindow + nForwardPos - nMatchOffset, 2)) {
1401+
nGotMatch = 1;
1402+
break;
1403+
}
1404+
nForwardPos++;
1405+
}
1406+
1407+
if (nGotMatch) {
1408+
int nMatchLen = 2;
1409+
while (nMatchLen < 16 && nPosition < (nEndOffset - nMatchLen) && pInWindow[nMatchPos + nMatchLen] == pInWindow[nPosition + nMatchLen])
1410+
nMatchLen++;
1411+
match[m].length = nMatchLen | 0x8000;
1412+
match[m].offset = nMatchOffset;
1413+
m++;
1414+
1415+
lzsa_insert_forward_match_v2(pCompressor, pInWindow, nPosition, nMatchOffset, nPreviousBlockSize, nEndOffset, 8);
1416+
break;
1417+
}
1418+
}
1419+
}
1420+
else {
1421+
break;
1422+
}
1423+
}
1424+
}
1425+
}
1426+
13611427
/* Compress optimally with the extra matches */
13621428
memset(pCompressor->best_match, 0, BLOCK_SIZE * sizeof(lzsa_match));
13631429
lzsa_optimize_forward_v2(pCompressor, pInWindow, pCompressor->best_match - nPreviousBlockSize, nPreviousBlockSize, nPreviousBlockSize + nInDataSize, 1 /* reduce */, 0 /* use forward reps */, nArrivalsPerPosition);

src/shrink_context.c

Lines changed: 10 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -66,6 +66,7 @@ int lzsa_compressor_init(lzsa_compressor *pCompressor, const int nMaxWindowSize,
6666
pCompressor->rep_len_handled_mask = NULL;
6767
pCompressor->first_offset_for_byte = NULL;
6868
pCompressor->next_offset_for_pos = NULL;
69+
pCompressor->offset_cache = NULL;
6970
pCompressor->min_match_size = nMinMatchSize;
7071
if (pCompressor->min_match_size < nMinMatchSizeForFormat)
7172
pCompressor->min_match_size = nMinMatchSizeForFormat;
@@ -116,7 +117,10 @@ int lzsa_compressor_init(lzsa_compressor *pCompressor, const int nMaxWindowSize,
116117
if (pCompressor->first_offset_for_byte) {
117118
pCompressor->next_offset_for_pos = (int*)malloc(BLOCK_SIZE * sizeof(int));
118119
if (pCompressor->next_offset_for_pos) {
119-
return 0;
120+
pCompressor->offset_cache = (int*)malloc(2048 * sizeof(int));
121+
if (pCompressor->offset_cache) {
122+
return 0;
123+
}
120124
}
121125
}
122126
}
@@ -146,6 +150,11 @@ int lzsa_compressor_init(lzsa_compressor *pCompressor, const int nMaxWindowSize,
146150
void lzsa_compressor_destroy(lzsa_compressor *pCompressor) {
147151
divsufsort_destroy(&pCompressor->divsufsort_context);
148152

153+
if (pCompressor->offset_cache) {
154+
free(pCompressor->offset_cache);
155+
pCompressor->offset_cache = NULL;
156+
}
157+
149158
if (pCompressor->next_offset_for_pos) {
150159
free(pCompressor->next_offset_for_pos);
151160
pCompressor->next_offset_for_pos = NULL;

src/shrink_context.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -128,6 +128,7 @@ typedef struct _lzsa_compressor {
128128
char *rep_len_handled_mask;
129129
int *first_offset_for_byte;
130130
int *next_offset_for_pos;
131+
int *offset_cache;
131132
int min_match_size;
132133
int format_version;
133134
int flags;

0 commit comments

Comments
 (0)