Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 2 additions & 1 deletion setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,8 @@ def build_extensions(self):
with open("README.rst", "r", encoding="utf-8") as readme:
long_description = readme.read()

prefer_system_zopfli = bool(os.environ.get('USE_SYSTEM_ZOPFLI'))
prefer_system_zopfli = False
# bool(os.environ.get('USE_SYSTEM_ZOPFLI'))
if prefer_system_zopfli:
zopfli_ext_kwargs = {
'sources': [
Expand Down
236 changes: 231 additions & 5 deletions src/zopfli/zlib.py
Original file line number Diff line number Diff line change
@@ -1,14 +1,240 @@
from __future__ import absolute_import

import zopfli
import zopfli.zopfli
from struct import pack
from zlib import adler32
from zlib import error
# This is mostly for compatibility reasons
from zlib import crc32
from zlib import decompress, decompressobj
ZLIB_RUNTIME_VERSION = '1.2.8' # Mimic old version to guarantee no extra hopes
ZLIB_VERSION = '1.2.8' # Mimic old version to guarantee no extra hopes
try:
from zlib import Z_NO_FLUSH, Z_SYNC_FLUSH, Z_FULL_FLUSH, Z_FINISH
from zlib import Z_NO_COMPRESSION, Z_BEST_SPEED, Z_BEST_COMPRESSION
from zlib import Z_DEFAULT_COMPRESSION
from zlib import DEFLATED, DEF_MEM_LEVEL, MAX_WBITS, DEF_BUF_SIZE
from zlib import Z_DEFAULT_STRATEGY
from zlib import Z_FILTERED, Z_HUFFMAN_ONLY, Z_RLE, Z_FIXED
except ImportError:
# We can't work without original zlib in fact,
# but these constants mentioned there to describe their usage

def compress(data, **kwargs):
# Flush modes
Z_NO_FLUSH = 0
# Z_PARTIAL_FLUSH = 1
Z_SYNC_FLUSH = 2
Z_FULL_FLUSH = 3
Z_FINISH = 4
# Z_BLOCK = 5
# Z_TREES = 6
# Compression levels.
Z_NO_COMPRESSION = 0 # no use for now
Z_BEST_SPEED = 1
Z_BEST_COMPRESSION = 9
Z_DEFAULT_COMPRESSION = -1
# The deflate compression method (the only one supported in this version).
DEFLATED = 8
DEF_MEM_LEVEL = 8
DEF_BUF_SIZE = 16384
MAX_WBITS = 15
# Compression strategy
# Not used...
Z_FILTERED = 1
Z_HUFFMAN_ONLY = 2
Z_RLE = 3
Z_FIXED = 4
Z_DEFAULT_STRATEGY = 0

levit = {-1: 15,
0: 1,
1: 1,
2: 3,
3: 5,
4: 10,
5: 15,
6: 20,
7: 30,
8: 50,
9: 100
}
MASTER_BLOCK_SIZE = 20000000


def int2bitseq(data, length):
res = [] # bytearray()
nowbyte = data
for _ in range(length):
(nowbyte, bit) = divmod(nowbyte, 2)
res.append(bit)
return res


def bitseq2int(data):
res = 0
for bit in reversed(data):
res = bit + res * 2
return res


class compressobj(object):
def __init__(self, level=Z_DEFAULT_COMPRESSION, method=DEFLATED,
wbits=MAX_WBITS, memLevel=DEF_MEM_LEVEL,
strategy=Z_DEFAULT_STRATEGY, zdict=None, **kwargs):
'''simulate zlib deflateInit2
level - compression level
method - compression method, only DEFLATED supported
wbits - should be in the range 8..15, practically ignored
can also be -8..-15 for raw deflate
zlib also have gz with "Add 16 to windowBit"
- not implemented here
memLevel - originally specifies how much memory should be allocated
zopfli - ignored
strategy - originally is used to tune the compression algorithm
zopfli - ignored
zdict - a predefined compression dictionary, could be used to
improve compression. Should be specified during decompression.
'''
if method != DEFLATED:
raise error
if abs(wbits) > MAX_WBITS or abs(wbits) < 5:
raise ValueError
self.crc = None
self.prehist = bytearray()
self.closed = False
self.lastbyte = b''
self.bit = 0
if zdict:
self.buf = bytearray(zdict)
self.opt = {'numiterations': 1}
self.raw = True
self.flush(Z_SYNC_FLUSH) # and omit result
self.zdict = adler32(zdict)
else:
self.zdict = False
self.buf = bytearray()
self.raw = wbits < 0
self.first = True
self.opt = kwargs
if 'numiterations' not in self.opt:
if level in levit:
self.opt['numiterations'] = levit[level]
else:
raise error

def _header(self):
cmf = 120
flevel = 3
fdict = bool(self.zdict)
cmfflg = 256 * cmf + fdict * 32 + flevel * 64
fcheck = 31 - cmfflg % 31
cmfflg += fcheck
return pack('>H', cmfflg) + (pack('>L', self.zdict) if fdict else b'')

def _updatecrc(self):
if self.buf is None or self.raw:
return
if self.crc is None:
self.crc = adler32(bytes(self.buf))
else:
self.crc = adler32(bytes(self.buf), self.crc)

def _compress(self, final=None):
self._updatecrc()
blockfinal = 1 if final else 0
indata = self.prehist
prehist = len(self.prehist)
indata.extend(self.buf)
self.buf = bytearray()
self.prehist = indata[-33000:]
data = zopfli.zopfli.deflate(bytes(indata),
old_tail=bytes(self.lastbyte),
bitpointer=self.bit,
blockfinal=blockfinal,
prehist=prehist, **self.opt)
res = bytearray(data[0])
self.bit = data[1]

if final:
self.lastbyte = b''
return res
else:
self.lastbyte = res[-32:]
return res[:-32]

def compress(self, string):
global MASTER_BLOCK_SIZE
self.buf.extend(bytearray(string))
if len(self.buf) > MASTER_BLOCK_SIZE:
out = bytearray()
if not self.raw and self.first:
out.extend(self._header())
self.first = False
out.extend(self._compress())
return bytes(out)
else:
return b''

def flush(self, mode=Z_FINISH):
def encodedalign(prev):
res = bytearray()
z = bytearray(prev)
# Not final, type 00
z.extend([0, 0, 0])
# if old tail + header cross byte border
tgtlen = 8 if len(z) <= 8 else 16
# Fit to bytes
addlen = tgtlen - len(z)
z.extend((0,) * addlen)
# Add tail and header to result
res.append(bitseq2int(z[:8]))
if tgtlen == 16:
res.append(bitseq2int(z[8:]))
# zero length as we only want to align, no data
res.extend(pack('>H', 0)) # LEN
res.extend(pack('>H', 65535)) # NLEN
return res

if self.closed:
raise error
out = bytearray()
self.closed = mode == Z_FINISH
if not self.raw and self.first:
out.extend(self._header())
self.first = False
if mode == Z_NO_FLUSH:
return bytes(out)
out.extend(self._compress(mode == Z_FINISH))
if mode != Z_FINISH:
self.bit = self.bit % 8
# add void fixed block to align data to bytes
if self.bit:
work = int2bitseq(self.lastbyte.pop(), 8)[:self.bit]
else:
work = []
self.lastbyte.extend(encodedalign(work))
out.extend(self.lastbyte)
self.lastbyte = b''
self.bit = 0
if mode == Z_FULL_FLUSH:
self.prehist = bytearray()

if not self.raw and mode == Z_FINISH:
out.extend(pack('>L', self.crc))
return bytes(out)


def compress(data, level=-1, **kwargs):
"""zlib.compress(data, **kwargs)
""" + zopfli.__COMPRESSOR_DOCSTRING__ + """

""" + zopfli.__COMPRESSOR_DOCSTRING__ + """
Returns:
String containing a zlib container
"""
if 'numiterations' not in kwargs:
if level not in levit:
raise error
kwargs['numiterations'] = levit[level]

kwargs['gzip_mode'] = 0
return zopfli.zopfli.compress(data, **kwargs)
return zopfli.zopfli.compress(bytes(data), **kwargs)
Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Change this for some unittest.
Maybe change to 'y' in C code will be better solution, but this depends on compatibility with 2.7

62 changes: 62 additions & 0 deletions src/zopflimodule.c
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@
#define ZOPFLI_H "../zopfli/src/zopfli/zopfli.h"
#endif
#include ZOPFLI_H
#include "../zopfli/src/zopfli/deflate.h"

#if PY_MAJOR_VERSION >= 3
#define PyInt_Check PyLong_Check
Expand Down Expand Up @@ -63,6 +64,56 @@ zopfli_compress(PyObject *self, PyObject *args, PyObject *keywrds)
return returnValue;
}

static PyObject *
zopfli_deflate(PyObject *self, PyObject *args, PyObject *keywrds)
{
const unsigned char *in, *out;
unsigned char *in2, *out2;
size_t insize=0;
size_t prehist=0;
size_t outsize=0;
ZopfliOptions options;
ZopfliInitOptions(&options);
options.verbose = 0;
options.numiterations = 15;
options.blocksplitting = 1;
options.blocksplittinglast = 0;
options.blocksplittingmax = 15;
int blocktype = 2;
int blockfinal = 1;
unsigned char bitpointer = 0;

static char *kwlist[] = {"data", "verbose", "numiterations", "blocksplitting", "blocksplittinglast", "blocksplittingmax", "blocktype","blockfinal","bitpointer","old_tail","prehist", NULL};

if (!PyArg_ParseTupleAndKeywords(args, keywrds, "y#|iiiiiiiBy#i", kwlist, &in, &insize,
&options.verbose,
&options.numiterations,
&options.blocksplitting,
&options.blocksplittinglast,
&options.blocksplittingmax,
&blocktype,
&blockfinal,
&bitpointer,
&out, &outsize,
&prehist))
return NULL;

Py_BEGIN_ALLOW_THREADS
in2 = malloc(insize);
memcpy(in2, in, insize);
out2 = malloc(outsize);
memcpy(out2, out, outsize);

ZopfliDeflatePart(&options, blocktype, blockfinal, in2, prehist, insize, &bitpointer, &out2, &outsize);

free(in2);
Py_END_ALLOW_THREADS
PyObject *returnValue;
returnValue = Py_BuildValue("y#B", out2, outsize, bitpointer);
free(out2);
return returnValue;
}

PyDoc_STRVAR(compress__doc__,
"zopfli.zopfli.compress applies zopfli zip or gzip compression to an obj."
"" \
Expand All @@ -73,10 +124,21 @@ PyDoc_STRVAR(compress__doc__,
"If gzip_mode is set to a non-zero value, a Gzip compatbile container will "
"be generated, otherwise a zlib compatible container will be generated. ");

PyDoc_STRVAR(deflate__doc__,
"zopfli.zopfli.deflate applies zopfli deflate compression to an obj."
"" \
"zopfli.zopfli.deflate("
" s, **kwargs, verbose=0, numiterations=15, blocksplitting=1, "
" blocksplittinglast=0, blocksplittingmax=15, "
" blocktype=2, blockfinal=1, bitpointer=0, oldtail='', prehist=0)"
""
"Partitial compression also possible");

static PyObject *ZopfliError;

static PyMethodDef ZopfliMethods[] = {
{ "compress", (PyCFunction)zopfli_compress, METH_VARARGS | METH_KEYWORDS, compress__doc__},
{ "deflate", (PyCFunction)zopfli_deflate, METH_VARARGS | METH_KEYWORDS, deflate__doc__},
{ NULL, NULL, 0, NULL}
};

Expand Down
Loading