1+ // I hate this, but I need lzokay reading from a stream
2+ // this is 99% code from lzokay library from lib folder, just with std::istream used as input
3+
4+ #include < istream>
5+
6+ #include " lzokay.hpp"
7+ using namespace lzokay ;
8+
9+
10+ #define NEEDS_IN (count ) { \
11+ auto curTell = (std::streamoff)inp.tellg (); \
12+ if (curTell + (count) > inp_end) { \
13+ dst_size = outp - dst; \
14+ return EResult::InputOverrun; \
15+ }}
16+
17+ #define NEEDS_OUT (count ) \
18+ if (outp + (count) > outp_end) { \
19+ dst_size = outp - dst; \
20+ return EResult::OutputOverrun; \
21+ }
22+
23+ #define CONSUME_ZERO_BYTE_LENGTH \
24+ std::size_t offset; \
25+ { \
26+ auto old_inp = inp.tellg (); \
27+ while (inp.peek () == 0 ) inp.get (); \
28+ offset = inp.tellg () - old_inp; \
29+ if (offset > Max255Count) { \
30+ dst_size = outp - dst; \
31+ return EResult::Error; \
32+ } \
33+ }
34+
35+ constexpr uint32_t M1Marker = 0x0 ;
36+ constexpr uint32_t M2Marker = 0x40 ;
37+ constexpr uint32_t M3Marker = 0x20 ;
38+ constexpr uint32_t M4Marker = 0x10 ;
39+
40+ constexpr std::size_t Max255Count = std::size_t (~0 ) / 255 - 2 ;
41+
42+ static uint16_t get_le16 (std::istream& p) {
43+ uint16_t result;
44+ p.read ((char *) &result, 2 );
45+ return result;
46+ }
47+
48+
49+ EResult decompressStream (std::istream& src, std::size_t src_size,
50+ uint8_t * dst, std::size_t init_dst_size,
51+ std::size_t & dst_size) {
52+ dst_size = init_dst_size;
53+
54+ if (src_size < 3 ) {
55+ dst_size = 0 ;
56+ return EResult::InputOverrun;
57+ }
58+
59+ std::istream& inp = src;
60+ size_t inp_end = src_size;
61+ uint8_t * outp = dst;
62+ uint8_t * outp_end = dst + dst_size;
63+ uint8_t * lbcur;
64+ std::size_t lblen;
65+ std::size_t state = 0 ;
66+ std::size_t nstate = 0 ;
67+
68+ /* First byte encoding */
69+ if (inp.peek () >= 22 ) {
70+ /* 22..255 : copy literal string
71+ * length = (byte - 17) = 4..238
72+ * state = 4 [ don't copy extra literals ]
73+ * skip byte
74+ */
75+ std::size_t len = inp.get () - uint8_t (17 );
76+ NEEDS_IN (len)
77+ NEEDS_OUT (len)
78+ for (std::size_t i = 0 ; i < len; ++i)
79+ *outp++ = inp.get ();
80+ state = 4 ;
81+ } else if (inp.peek () >= 18 ) {
82+ /* 18..21 : copy 0..3 literals
83+ * state = (byte - 17) = 0..3 [ copy <state> literals ]
84+ * skip byte
85+ */
86+ nstate = inp.get () - uint8_t (17 );
87+ state = nstate;
88+ NEEDS_IN (nstate)
89+ NEEDS_OUT (nstate)
90+ for (std::size_t i = 0 ; i < nstate; ++i)
91+ *outp++ = inp.get ();
92+ }
93+ /* 0..17 : follow regular instruction encoding, see below. It is worth
94+ * noting that codes 16 and 17 will represent a block copy from
95+ * the dictionary which is empty, and that they will always be
96+ * invalid at this place.
97+ */
98+
99+ while (true ) {
100+ NEEDS_IN (1 )
101+ uint8_t inst = inp.get ();
102+ if (inst & 0xC0 ) {
103+ /* [M2]
104+ * 1 L L D D D S S (128..255)
105+ * Copy 5-8 bytes from block within 2kB distance
106+ * state = S (copy S literals after this block)
107+ * length = 5 + L
108+ * Always followed by exactly one byte : H H H H H H H H
109+ * distance = (H << 3) + D + 1
110+ *
111+ * 0 1 L D D D S S (64..127)
112+ * Copy 3-4 bytes from block within 2kB distance
113+ * state = S (copy S literals after this block)
114+ * length = 3 + L
115+ * Always followed by exactly one byte : H H H H H H H H
116+ * distance = (H << 3) + D + 1
117+ */
118+ NEEDS_IN (1 )
119+ lbcur = outp - ((inp.get () << 3 ) + ((inst >> 2 ) & 0x7 ) + 1 );
120+ lblen = std::size_t (inst >> 5 ) + 1 ;
121+ nstate = inst & uint8_t (0x3 );
122+ } else if (inst & M3Marker) {
123+ /* [M3]
124+ * 0 0 1 L L L L L (32..63)
125+ * Copy of small block within 16kB distance (preferably less than 34B)
126+ * length = 2 + (L ?: 31 + (zero_bytes * 255) + non_zero_byte)
127+ * Always followed by exactly one LE16 : D D D D D D D D : D D D D D D S S
128+ * distance = D + 1
129+ * state = S (copy S literals after this block)
130+ */
131+ lblen = std::size_t (inst & uint8_t (0x1f )) + 2 ;
132+ if (lblen == 2 ) {
133+ CONSUME_ZERO_BYTE_LENGTH
134+ NEEDS_IN (1 )
135+ lblen += offset * 255 + 31 + inp.get ();
136+ }
137+ NEEDS_IN (2 )
138+ nstate = get_le16 (inp);
139+ // inp += 2;
140+ lbcur = outp - ((nstate >> 2 ) + 1 );
141+ nstate &= 0x3 ;
142+ } else if (inst & M4Marker) {
143+ /* [M4]
144+ * 0 0 0 1 H L L L (16..31)
145+ * Copy of a block within 16..48kB distance (preferably less than 10B)
146+ * length = 2 + (L ?: 7 + (zero_bytes * 255) + non_zero_byte)
147+ * Always followed by exactly one LE16 : D D D D D D D D : D D D D D D S S
148+ * distance = 16384 + (H << 14) + D
149+ * state = S (copy S literals after this block)
150+ * End of stream is reached if distance == 16384
151+ */
152+ lblen = std::size_t (inst & uint8_t (0x7 )) + 2 ;
153+ if (lblen == 2 ) {
154+ CONSUME_ZERO_BYTE_LENGTH
155+ NEEDS_IN (1 )
156+ lblen += offset * 255 + 7 + inp.get ();
157+ }
158+ NEEDS_IN (2 )
159+ nstate = get_le16 (inp);
160+ // inp += 2;
161+ lbcur = outp - (((inst & 0x8 ) << 11 ) + (nstate >> 2 ));
162+ nstate &= 0x3 ;
163+ if (lbcur == outp)
164+ break ; /* Stream finished */
165+ lbcur -= 16384 ;
166+ } else {
167+ /* [M1] Depends on the number of literals copied by the last instruction. */
168+ if (state == 0 ) {
169+ /* If last instruction did not copy any literal (state == 0), this
170+ * encoding will be a copy of 4 or more literal, and must be interpreted
171+ * like this :
172+ *
173+ * 0 0 0 0 L L L L (0..15) : copy long literal string
174+ * length = 3 + (L ?: 15 + (zero_bytes * 255) + non_zero_byte)
175+ * state = 4 (no extra literals are copied)
176+ */
177+ std::size_t len = inst + 3 ;
178+ if (len == 3 ) {
179+ CONSUME_ZERO_BYTE_LENGTH
180+ NEEDS_IN (1 )
181+ len += offset * 255 + 15 + inp.get ();
182+ }
183+ /* copy_literal_run */
184+ NEEDS_IN (len)
185+ NEEDS_OUT (len)
186+ for (std::size_t i = 0 ; i < len; ++i)
187+ *outp++ = inp.get ();
188+ state = 4 ;
189+ continue ;
190+ } else if (state != 4 ) {
191+ /* If last instruction used to copy between 1 to 3 literals (encoded in
192+ * the instruction's opcode or distance), the instruction is a copy of a
193+ * 2-byte block from the dictionary within a 1kB distance. It is worth
194+ * noting that this instruction provides little savings since it uses 2
195+ * bytes to encode a copy of 2 other bytes but it encodes the number of
196+ * following literals for free. It must be interpreted like this :
197+ *
198+ * 0 0 0 0 D D S S (0..15) : copy 2 bytes from <= 1kB distance
199+ * length = 2
200+ * state = S (copy S literals after this block)
201+ * Always followed by exactly one byte : H H H H H H H H
202+ * distance = (H << 2) + D + 1
203+ */
204+ NEEDS_IN (1 )
205+ nstate = inst & uint8_t (0x3 );
206+ lbcur = outp - ((inst >> 2 ) + (inp.get () << 2 ) + 1 );
207+ lblen = 2 ;
208+ } else {
209+ /* If last instruction used to copy 4 or more literals (as detected by
210+ * state == 4), the instruction becomes a copy of a 3-byte block from the
211+ * dictionary from a 2..3kB distance, and must be interpreted like this :
212+ *
213+ * 0 0 0 0 D D S S (0..15) : copy 3 bytes from 2..3 kB distance
214+ * length = 3
215+ * state = S (copy S literals after this block)
216+ * Always followed by exactly one byte : H H H H H H H H
217+ * distance = (H << 2) + D + 2049
218+ */
219+ NEEDS_IN (1 )
220+ nstate = inst & uint8_t (0x3 );
221+ lbcur = outp - ((inst >> 2 ) + (inp.get () << 2 ) + 2049 );
222+ lblen = 3 ;
223+ }
224+ }
225+ if (lbcur < dst) {
226+ dst_size = outp - dst;
227+ return EResult::LookbehindOverrun;
228+ }
229+ NEEDS_IN (nstate)
230+ NEEDS_OUT (lblen + nstate)
231+ /* Copy lookbehind */
232+ for (std::size_t i = 0 ; i < lblen; ++i)
233+ *outp++ = *lbcur++;
234+ state = nstate;
235+ /* Copy literal */
236+ for (std::size_t i = 0 ; i < nstate; ++i)
237+ *outp++ = inp.get ();
238+ }
239+
240+ dst_size = outp - dst;
241+ if (lblen != 3 ) /* Ensure terminating M4 was encountered */
242+ return EResult::Error;
243+ if (inp.tellg () == inp_end)
244+ return EResult::Success;
245+ else if (inp.tellg () < inp_end)
246+ return EResult::InputNotConsumed;
247+ else
248+ return EResult::InputOverrun;
249+ }
0 commit comments