Skip to content

Commit b7dcf60

Browse files
Laurence BankLaurence Bank
authored andcommitted
speed improvements
1 parent f76f8f4 commit b7dcf60

File tree

3 files changed

+38
-13
lines changed

3 files changed

+38
-13
lines changed

src/inffast.c

Lines changed: 34 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,16 @@
1212
#define ALLOWS_UNALIGNED
1313
#endif
1414

15+
#if INTPTR_MAX == INT64_MAX
16+
#define REGISTER_WIDTH 64
17+
typedef uint64_t BIGUINT;
18+
typedef uint32_t SMALLUINT;
19+
#else
20+
#define REGISTER_WIDTH 32
21+
typedef uint32_t BIGUINT;
22+
typedef uint16_t SMALLUINT;
23+
#endif // native register size
24+
1525
#ifdef ASMINF
1626
# pragma message("Assembler code may have bugs -- use at your own risk")
1727
#else
@@ -68,7 +78,8 @@ unsigned start; /* inflate()'s starting value for strm->avail_out */
6878
unsigned whave; /* valid bytes in the window */
6979
unsigned wnext; /* window write index */
7080
unsigned char FAR *window; /* allocated sliding window, if wsize != 0 */
71-
unsigned long hold; /* local strm->hold */
81+
BIGUINT hold, tmpbits; /* local strm->hold */
82+
// unsigned long hold; /* local strm->hold */
7283
unsigned bits; /* local strm->bits */
7384
code const FAR *lcode; /* local strm->lencode */
7485
code const FAR *dcode; /* local strm->distcode */
@@ -105,11 +116,12 @@ unsigned start; /* inflate()'s starting value for strm->avail_out */
105116
/* decode literals and length/distances until end-of-block or not enough
106117
input data or output space */
107118
do {
108-
if (bits < 15) {
119+
if (bits < (REGISTER_WIDTH/2)) { // helps on 32 and 64-bit CPUs
109120
#ifdef ALLOWS_UNALIGNED
110-
hold |= (*(uint16_t *)in << bits);
111-
in += 2;
112-
bits += 16;
121+
tmpbits = *(SMALLUINT *)in;
122+
hold |= (BIGUINT)(tmpbits << bits);
123+
in += sizeof(SMALLUINT);
124+
bits += (REGISTER_WIDTH / 2);
113125
#else
114126
hold += (unsigned long)(*in++) << bits;
115127
bits += 8;
@@ -133,20 +145,29 @@ unsigned start; /* inflate()'s starting value for strm->avail_out */
133145
len = (unsigned)(here.val);
134146
op &= 15; /* number of extra bits */
135147
if (op) {
148+
#if REGISTER_WIDTH == 32
136149
if (bits < op) {
137-
hold += (unsigned long)(*in++) << bits;
150+
hold += (uint32_t)(*in++) << bits;
138151
bits += 8;
139152
}
153+
#endif
140154
len += (unsigned)hold & ((1U << op) - 1);
141155
hold >>= op;
142156
bits -= op;
143157
}
144158
Tracevv((stderr, "inflate: length %u\n", len));
145-
if (bits < 15) {
159+
if (bits < (REGISTER_WIDTH/2)) { // helps on 32 and 64-bit CPUs
160+
#ifdef UNALIGNED_OK
161+
tmpbits = *(SMALLUINT *)in;
162+
hold |= (BIGUINT)(tmpbits << bits);
163+
in += sizeof(SMALLUINT);
164+
bits += (REGISTER_WIDTH / 2);
165+
#else
146166
hold += (unsigned long)(*in++) << bits;
147167
bits += 8;
148168
hold += (unsigned long)(*in++) << bits;
149169
bits += 8;
170+
#endif
150171
}
151172
here = dcode[hold & dmask];
152173
dodist:
@@ -157,6 +178,7 @@ unsigned start; /* inflate()'s starting value for strm->avail_out */
157178
if (op & 16) { /* distance base */
158179
dist = (unsigned)(here.val);
159180
op &= 15; /* number of extra bits */
181+
#if REGISTER_WIDTH == 32
160182
if (bits < op) {
161183
#ifdef ALLOWS_UNALIGNED
162184
hold |= (*(uint16_t *)in << bits);
@@ -171,6 +193,7 @@ unsigned start; /* inflate()'s starting value for strm->avail_out */
171193
}
172194
#endif // ALLOWS_UNALIGNED
173195
}
196+
#endif // 32-bit CPU
174197
dist += (unsigned)hold & ((1U << op) - 1);
175198
#ifdef INFLATE_STRICT
176199
if (dist > dmax) {
@@ -348,10 +371,10 @@ unsigned start; /* inflate()'s starting value for strm->avail_out */
348371
} while (in < last && out < end);
349372

350373
/* return unused bytes (on entry, bits < 8, so in won't go too far back) */
351-
len = bits >> 3;
352-
in -= len;
353-
bits -= len << 3;
354-
hold &= (1U << bits) - 1;
374+
// len = bits >> 3;
375+
// in -= len;
376+
// bits -= len << 3;
377+
// hold &= (1 << bits) - 1;
355378

356379
/* update state and return */
357380
strm->next_in = in;

src/inflate.c

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -266,7 +266,8 @@ int value;
266266
state->bits = 0;
267267
return Z_OK;
268268
}
269-
if (bits > 16 || state->bits + (uInt)bits > 32) return Z_STREAM_ERROR;
269+
if (bits > 16 || state->bits + (uInt)bits > 32)
270+
return Z_STREAM_ERROR;
270271
value &= (1L << bits) - 1;
271272
state->hold += (unsigned)value << state->bits;
272273
state->bits += (uInt)bits;

src/inflate.h

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -98,7 +98,8 @@ struct inflate_state {
9898
unsigned wnext; /* window write index */
9999
unsigned char FAR *window; /* allocated sliding window, if needed */
100100
/* bit accumulator */
101-
unsigned long hold; /* input bit accumulator */
101+
uint64_t hold; /* input bit accumulator */
102+
// unsigned long hold; /* input bit accumulator */
102103
unsigned bits; /* number of bits in "in" */
103104
/* for string and stored block copying */
104105
unsigned length; /* literal or length of data to copy */

0 commit comments

Comments
 (0)