Skip to content

Commit 32a909c

Browse files
committed
slightly optimized StreamHigh, as a result, increased memory usage and slightly increased compression
1 parent 9ed6fb6 commit 32a909c

File tree

3 files changed

+38
-24
lines changed

3 files changed

+38
-24
lines changed

Blazer.Net/Algorithms/StreamEncoder.cs

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,10 @@ public class StreamEncoder : IEncoder
1818
// carefully selected random number
1919
private const uint MUL = 1527631329;
2020

21-
private const int SIZE_SHIFT = 1000000000;
21+
/// <summary>
22+
/// Size to shift big data
23+
/// </summary>
24+
protected const int SIZE_SHIFT = 1000000000;
2225

2326
/// <summary>
2427
/// Hash array to store dictionary between iterations

Blazer.Net/Algorithms/StreamEncoderHigh.cs

Lines changed: 33 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,7 @@ public class StreamEncoderHigh : StreamEncoder
1818
/// <summary>
1919
/// Count of internal hash arrays
2020
/// </summary>
21-
public const int HASHARR_CNT = 16;
21+
public const int HASHARR_CNT = 32;
2222

2323
private const int MIN_SEQ_LEN = 4;
2424

@@ -63,6 +63,19 @@ public override void Init(int maxInBlockSize)
6363
_hashArrPos = new int[HASH_TABLE_LEN + 1];
6464
}
6565

66+
/// <summary>
67+
/// Shifts hashtable data
68+
/// </summary>
69+
/// <remarks>Use this method to periodically shift positions in array. It is required for streams longer than 2Gb</remarks>
70+
protected override void ShiftHashtable()
71+
{
72+
for (var i = 0; i < HASHARR_CNT; i++)
73+
for (var k = 0; k < HASH_TABLE_LEN; k++)
74+
_hashArr2[i][k] = Math.Min(0, _hashArr2[i][k] - SIZE_SHIFT);
75+
76+
for (var k = 0; k < HASH_TABLE_LEN; k++) _hashArrPos[k] = _hashArrPos[k] & 0xffff;
77+
}
78+
6679
/// <summary>
6780
/// Compresses block of data. See <see cref="StreamEncoder.CompressBlockExternal"/> for details
6881
/// </summary>
@@ -81,15 +94,14 @@ private static int FindMaxSequence(byte[] bufferIn, int iterMax, int a, int b, i
8194
{
8295
if (a + minValToCompare >= iterMax) return -1;
8396
if (bufferIn[a + minValToCompare] != bufferIn[b + minValToCompare]) return -1;
84-
var total = 0;
97+
var origA = a;
8598
while (a < iterMax && bufferIn[a] == bufferIn[b])
8699
{
87100
a++;
88101
b++;
89-
total++;
90102
}
91103

92-
return total;
104+
return a - origA;
93105
}
94106

95107
/// <summary>
@@ -124,6 +136,7 @@ public static int CompressBlockHighExternal(byte[] bufferIn, int bufferInOffset,
124136
}
125137

126138
var iterMax = bufferInLength - 1;
139+
var cntToCheck = -1;
127140

128141
while (idxIn < iterMax)
129142
{
@@ -133,20 +146,22 @@ public static int CompressBlockHighExternal(byte[] bufferIn, int bufferInOffset,
133146
var hashKey = (mulEl * MUL) >> (32 - HASH_TABLE_BITS);
134147
int hashVal = 0;
135148

136-
var min = Math.Max(0, hashArrPos[hashKey] - HASHARR_CNT);
149+
var hashArrPo = cntToCheck >= 0 ? cntToCheck : hashArrPos[hashKey];
150+
var min = Math.Max(0, hashArrPo - HASHARR_CNT);
137151
var cnt = 0;
138-
var cntToCmp = 0;
139-
for (var i = hashArrPos[hashKey] - 1; i >= min; i--)
152+
var checkCnt = 3;
153+
for (var i = hashArrPo - 1; i >= min; i--)
140154
{
141155
var hashValLocal = hashArr[i & (HASHARR_CNT - 1)][hashKey] - globalOfs;
142156
int backRefLocal = idxIn - hashValLocal;
143157
if (backRefLocal < MAX_BACK_REF)
144158
{
145-
var cntLocal = FindMaxSequence(bufferIn, iterMax, idxIn - 3, hashValLocal - 3, cntToCmp) + (backRefLocal < 257 ? 1 : 0);
159+
var checkCntLocal = FindMaxSequence(bufferIn, bufferInLength, idxIn - 3, hashValLocal - 3, checkCnt);
160+
var cntLocal = checkCntLocal + (backRefLocal < 257 ? 1 : 0);
146161
if (cntLocal > cnt)
147162
{
148163
cnt = cntLocal;
149-
cntToCmp = cnt - 1;
164+
checkCnt = checkCntLocal;
150165
hashVal = hashValLocal;
151166
}
152167
}
@@ -167,7 +182,8 @@ public static int CompressBlockHighExternal(byte[] bufferIn, int bufferInOffset,
167182
var cntLocal = FindMaxSequence(bufferIn, iterMax, idxIn + 1 - 3, hashValLocal - 3, cnt - 1) + (backRefLocal < 257 ? 1 : 0);
168183
if (cntLocal > cnt)
169184
{
170-
cnt = 0;
185+
checkCnt = 0;
186+
cntToCheck = hashArrPos[hashKeyNext];
171187
break;
172188
}
173189
}
@@ -178,27 +194,22 @@ public static int CompressBlockHighExternal(byte[] bufferIn, int bufferInOffset,
178194
// var hashVal = hashArr[hashArrPos[hashKey] & (HASHARR_CNT - 1)][hashKey] - globalOfs;
179195
hashArr[(hashArrPos[hashKey]++) & (HASHARR_CNT - 1)][hashKey] = idxIn + globalOfs;
180196
// var isBig = backRef < 257 ? 0 : 1;
181-
if (cnt >= 4)
197+
if (checkCnt >= 4)
182198
{
199+
cntToCheck = -1;
183200
var backRef = idxIn - hashVal;
184201
cntLit = idxIn - lastProcessedIdxIn;
185202

186-
hashVal++;
187-
idxIn++;
188-
189-
while (idxIn < bufferInLength)
203+
checkCnt -= 3;
204+
while (checkCnt-- > 0)
190205
{
206+
hashVal++;
207+
idxIn++;
208+
191209
elemP0 = bufferIn[idxIn];
192210
mulEl = (mulEl << 8) | elemP0;
193211
hashKey = (mulEl * MUL) >> (32 - HASH_TABLE_BITS);
194212
hashArr[(hashArrPos[hashKey]++) & (HASHARR_CNT - 1)][hashKey] = idxIn + globalOfs;
195-
196-
if (bufferIn[hashVal] == elemP0)
197-
{
198-
hashVal++;
199-
idxIn++;
200-
}
201-
else break;
202213
}
203214

204215
int seqLen = idxIn - cntLit - lastProcessedIdxIn - MIN_SEQ_LEN + 3/* - isBig*/;

Blazer.Net/Properties/AssemblyInfo.cs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -32,4 +32,4 @@
3232
// by using the '*' as shown below:
3333
// [assembly: AssemblyVersion("1.0.*")]
3434
[assembly: AssemblyVersion("0.9.0.0")]
35-
[assembly: AssemblyFileVersion("0.9.0.8")]
35+
[assembly: AssemblyFileVersion("0.9.1.9")]

0 commit comments

Comments
 (0)