diff --git a/setup.py b/setup.py index 328f7e1..f7b81a9 100644 --- a/setup.py +++ b/setup.py @@ -20,6 +20,8 @@ Extension('lz4', [ 'src/lz4.c', 'src/lz4hc.c', + 'src/lz4io.c', + 'src/xxhash.c', 'src/python-lz4.c' ], extra_compile_args=[ "-std=c99", diff --git a/src/lz4hc.h b/src/lz4hc.h index 372f7ba..deb2394 100644 --- a/src/lz4hc.h +++ b/src/lz4hc.h @@ -1,60 +1,173 @@ -/* - LZ4 HC - High Compression Mode of LZ4 - Header File - Copyright (C) 2011-2012, Yann Collet. - BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php) - - Redistribution and use in source and binary forms, with or without - modification, are permitted provided that the following conditions are - met: - - * Redistributions of source code must retain the above copyright - notice, this list of conditions and the following disclaimer. - * Redistributions in binary form must reproduce the above - copyright notice, this list of conditions and the following disclaimer - in the documentation and/or other materials provided with the - distribution. - - THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT - OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, - SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT - LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, - DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY - THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE - OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - - You can contact the author at : - - LZ4 homepage : http://fastcompression.blogspot.com/p/lz4.html - - LZ4 source repository : http://code.google.com/p/lz4/ -*/ -#pragma once - - -#if defined (__cplusplus) -extern "C" { -#endif - - -int LZ4_compressHC (const char* source, char* dest, int isize); - -/* -LZ4_compressHC : - return : the number of bytes in compressed buffer dest - note : destination buffer must be already allocated. - To avoid any problem, size it to handle worst cases situations (input data not compressible) - Worst case size evaluation is provided by function LZ4_compressBound() (see "lz4.h") -*/ - - -/* Note : -Decompression functions are provided within regular LZ4 source code (see "lz4.h") (BSD license) -*/ - - -#if defined (__cplusplus) -} -#endif +/* + LZ4 HC - High Compression Mode of LZ4 + Header File + Copyright (C) 2011-2014, Yann Collet. + BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php) + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions are + met: + + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above + copyright notice, this list of conditions and the following disclaimer + in the documentation and/or other materials provided with the + distribution. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + + You can contact the author at : + - LZ4 homepage : http://fastcompression.blogspot.com/p/lz4.html + - LZ4 source repository : http://code.google.com/p/lz4/ +*/ +#pragma once + + +#if defined (__cplusplus) +extern "C" { +#endif + + +int LZ4_compressHC (const char* source, char* dest, int inputSize); +/* +LZ4_compressHC : + return : the number of bytes in compressed buffer dest + or 0 if compression fails. + note : destination buffer must be already allocated. + To avoid any problem, size it to handle worst cases situations (input data not compressible) + Worst case size evaluation is provided by function LZ4_compressBound() (see "lz4.h") +*/ + +int LZ4_compressHC_limitedOutput (const char* source, char* dest, int inputSize, int maxOutputSize); +/* +LZ4_compress_limitedOutput() : + Compress 'inputSize' bytes from 'source' into an output buffer 'dest' of maximum size 'maxOutputSize'. + If it cannot achieve it, compression will stop, and result of the function will be zero. + This function never writes outside of provided output buffer. + + inputSize : Max supported value is 1 GB + maxOutputSize : is maximum allowed size into the destination buffer (which must be already allocated) + return : the number of output bytes written in buffer 'dest' + or 0 if compression fails. +*/ + + +int LZ4_compressHC2 (const char* source, char* dest, int inputSize, int compressionLevel); +int LZ4_compressHC2_limitedOutput (const char* source, char* dest, int inputSize, int maxOutputSize, int compressionLevel); +/* + Same functions as above, but with programmable 'compressionLevel'. + Recommended values are between 4 and 9, although any value between 0 and 16 will work. + 'compressionLevel'==0 means use default 'compressionLevel' value. + Values above 16 behave the same as 16. + Equivalent variants exist for all other compression functions below. +*/ + +/* Note : +Decompression functions are provided within LZ4 source code (see "lz4.h") (BSD license) +*/ + + +/************************************** + Using an external allocation +**************************************/ +int LZ4_sizeofStateHC(void); +int LZ4_compressHC_withStateHC (void* state, const char* source, char* dest, int inputSize); +int LZ4_compressHC_limitedOutput_withStateHC (void* state, const char* source, char* dest, int inputSize, int maxOutputSize); + +int LZ4_compressHC2_withStateHC (void* state, const char* source, char* dest, int inputSize, int compressionLevel); +int LZ4_compressHC2_limitedOutput_withStateHC(void* state, const char* source, char* dest, int inputSize, int maxOutputSize, int compressionLevel); + +/* +These functions are provided should you prefer to allocate memory for compression tables with your own allocation methods. +To know how much memory must be allocated for the compression tables, use : +int LZ4_sizeofStateHC(); + +Note that tables must be aligned for pointer (32 or 64 bits), otherwise compression will fail (return code 0). + +The allocated memory can be provided to the compressions functions using 'void* state' parameter. +LZ4_compress_withStateHC() and LZ4_compress_limitedOutput_withStateHC() are equivalent to previously described functions. +They just use the externally allocated memory area instead of allocating their own (on stack, or on heap). +*/ + + +/************************************** + Streaming Functions +**************************************/ +/* Note : these streaming functions still follows the older model */ +void* LZ4_createHC (const char* inputBuffer); +int LZ4_compressHC_continue (void* LZ4HC_Data, const char* source, char* dest, int inputSize); +int LZ4_compressHC_limitedOutput_continue (void* LZ4HC_Data, const char* source, char* dest, int inputSize, int maxOutputSize); +char* LZ4_slideInputBufferHC (void* LZ4HC_Data); +int LZ4_freeHC (void* LZ4HC_Data); + +int LZ4_compressHC2_continue (void* LZ4HC_Data, const char* source, char* dest, int inputSize, int compressionLevel); +int LZ4_compressHC2_limitedOutput_continue (void* LZ4HC_Data, const char* source, char* dest, int inputSize, int maxOutputSize, int compressionLevel); + +/* +These functions allow the compression of dependent blocks, where each block benefits from prior 64 KB within preceding blocks. +In order to achieve this, it is necessary to start creating the LZ4HC Data Structure, thanks to the function : + +void* LZ4_createHC (const char* inputBuffer); +The result of the function is the (void*) pointer on the LZ4HC Data Structure. +This pointer will be needed in all other functions. +If the pointer returned is NULL, then the allocation has failed, and compression must be aborted. +The only parameter 'const char* inputBuffer' must, obviously, point at the beginning of input buffer. +The input buffer must be already allocated, and size at least 192KB. +'inputBuffer' will also be the 'const char* source' of the first block. + +All blocks are expected to lay next to each other within the input buffer, starting from 'inputBuffer'. +To compress each block, use either LZ4_compressHC_continue() or LZ4_compressHC_limitedOutput_continue(). +Their behavior are identical to LZ4_compressHC() or LZ4_compressHC_limitedOutput(), +but require the LZ4HC Data Structure as their first argument, and check that each block starts right after the previous one. +If next block does not begin immediately after the previous one, the compression will fail (return 0). + +When it's no longer possible to lay the next block after the previous one (not enough space left into input buffer), a call to : +char* LZ4_slideInputBufferHC(void* LZ4HC_Data); +must be performed. It will typically copy the latest 64KB of input at the beginning of input buffer. +Note that, for this function to work properly, minimum size of an input buffer must be 192KB. +==> The memory position where the next input data block must start is provided as the result of the function. + +Compression can then resume, using LZ4_compressHC_continue() or LZ4_compressHC_limitedOutput_continue(), as usual. + +When compression is completed, a call to LZ4_freeHC() will release the memory used by the LZ4HC Data Structure. +*/ + +int LZ4_sizeofStreamStateHC(void); +int LZ4_resetStreamStateHC(void* state, const char* inputBuffer); + +/* +These functions achieve the same result as : +void* LZ4_createHC (const char* inputBuffer); + +They are provided here to allow the user program to allocate memory using its own routines. + +To know how much space must be allocated, use LZ4_sizeofStreamStateHC(); +Note also that space must be aligned for pointers (32 or 64 bits). + +Once space is allocated, you must initialize it using : LZ4_resetStreamStateHC(void* state, const char* inputBuffer); +void* state is a pointer to the space allocated. +It must be aligned for pointers (32 or 64 bits), and be large enough. +The parameter 'const char* inputBuffer' must, obviously, point at the beginning of input buffer. +The input buffer must be already allocated, and size at least 192KB. +'inputBuffer' will also be the 'const char* source' of the first block. + +The same space can be re-used multiple times, just by initializing it each time with LZ4_resetStreamState(). +return value of LZ4_resetStreamStateHC() must be 0 is OK. +Any other value means there was an error (typically, state is not aligned for pointers (32 or 64 bits)). +*/ + + +#if defined (__cplusplus) +} +#endif diff --git a/src/lz4io.c b/src/lz4io.c new file mode 100644 index 0000000..557d3b0 --- /dev/null +++ b/src/lz4io.c @@ -0,0 +1,969 @@ +/* + LZ4io.c - LZ4 File/Stream Interface + Copyright (C) Yann Collet 2011-2014 + GPL v2 License + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License along + with this program; if not, write to the Free Software Foundation, Inc., + 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + + You can contact the author at : + - LZ4 source repository : http://code.google.com/p/lz4/ + - LZ4 public forum : https://groups.google.com/forum/#!forum/lz4c +*/ +/* + Note : this is stand-alone program. + It is not part of LZ4 compression library, it is a user code of the LZ4 library. + - The license of LZ4 library is BSD. + - The license of xxHash library is BSD. + - The license of this source file is GPLv2. +*/ + +//************************************** +// Compiler Options +//************************************** +#ifdef _MSC_VER /* Visual Studio */ +# define FORCE_INLINE static __forceinline +# define _CRT_SECURE_NO_WARNINGS +# define _CRT_SECURE_NO_DEPRECATE // VS2005 +# pragma warning(disable : 4127) // disable: C4127: conditional expression is constant +#else +# ifdef __GNUC__ +# define FORCE_INLINE static inline __attribute__((always_inline)) +# else +# define FORCE_INLINE static inline +# endif +#endif + +#define _FILE_OFFSET_BITS 64 // Large file support on 32-bits unix +#define _POSIX_SOURCE 1 // for fileno() within on unix + + +//**************************** +// Includes +//**************************** +#include // fprintf, fopen, fread, _fileno, stdin, stdout +#include // malloc +#include // strcmp, strlen +#include // clock +#include "lz4io.h" +#include "lz4.h" +#include "lz4hc.h" +#include "xxhash.h" + + +//**************************** +// OS-specific Includes +//**************************** +#if defined(MSDOS) || defined(OS2) || defined(WIN32) || defined(_WIN32) || defined(__CYGWIN__) +# include // _O_BINARY +# include // _setmode, _isatty +# ifdef __MINGW32__ + int _fileno(FILE *stream); // MINGW somehow forgets to include this windows declaration into +# endif +# define SET_BINARY_MODE(file) _setmode(_fileno(file), _O_BINARY) +# define IS_CONSOLE(stdStream) _isatty(_fileno(stdStream)) +#else +# include // isatty +# define SET_BINARY_MODE(file) +# define IS_CONSOLE(stdStream) isatty(fileno(stdStream)) +#endif + + +//************************************** +// Compiler-specific functions +//************************************** +#define GCC_VERSION (__GNUC__ * 100 + __GNUC_MINOR__) + +#if defined(_MSC_VER) // Visual Studio +# define swap32 _byteswap_ulong +#elif GCC_VERSION >= 403 +# define swap32 __builtin_bswap32 +#else + static inline unsigned int swap32(unsigned int x) + { + return ((x << 24) & 0xff000000 ) | + ((x << 8) & 0x00ff0000 ) | + ((x >> 8) & 0x0000ff00 ) | + ((x >> 24) & 0x000000ff ); + } +#endif + + +//**************************** +// Constants +//**************************** +#define KB *(1U<<10) +#define MB *(1U<<20) +#define GB *(1U<<30) + +#define _1BIT 0x01 +#define _2BITS 0x03 +#define _3BITS 0x07 +#define _4BITS 0x0F +#define _8BITS 0xFF + +#define MAGICNUMBER_SIZE 4 +#define LZ4S_MAGICNUMBER 0x184D2204 +#define LZ4S_SKIPPABLE0 0x184D2A50 +#define LZ4S_SKIPPABLEMASK 0xFFFFFFF0 +#define LEGACY_MAGICNUMBER 0x184C2102 + +#define CACHELINE 64 +#define LEGACY_BLOCKSIZE (8 MB) +#define MIN_STREAM_BUFSIZE (192 KB) +#define LZ4S_BLOCKSIZEID_DEFAULT 7 +#define LZ4S_CHECKSUM_SEED 0 +#define LZ4S_EOS 0 +#define LZ4S_MAXHEADERSIZE (MAGICNUMBER_SIZE+2+8+4+1) + + +/* ************************************************** */ +/* Special input/output values */ +/* ************************************************** */ +#define NULL_OUTPUT "null" +static char stdinmark[] = "stdin"; +static char stdoutmark[] = "stdout"; +#ifdef _WIN32 +static char nulmark[] = "nul"; +#else +static char nulmark[] = "/dev/null"; +#endif + + +//************************************** +// Architecture Macros +//************************************** +static const int one = 1; +#define CPU_LITTLE_ENDIAN (*(char*)(&one)) +#define CPU_BIG_ENDIAN (!CPU_LITTLE_ENDIAN) +#define LITTLE_ENDIAN_32(i) (CPU_LITTLE_ENDIAN?(i):swap32(i)) + + +//************************************** +// Macros +//************************************** +#define DISPLAY(...) fprintf(stderr, __VA_ARGS__) +#define DISPLAYLEVEL(l, ...) if (displayLevel>=l) { DISPLAY(__VA_ARGS__); } + + +//************************************** +// Local Parameters +//************************************** +static int displayLevel = 0; // 0 : no display // 1: errors // 2 : + result + interaction + warnings ; // 3 : + progression; // 4 : + information +static int overwrite = 1; +static int blockSizeId = LZ4S_BLOCKSIZEID_DEFAULT; +static int blockChecksum = 0; +static int streamChecksum = 1; +static int blockIndependence = 1; + +static const int minBlockSizeID = 4; +static const int maxBlockSizeID = 7; + +//************************************** +// Exceptions +//************************************** +#define DEBUG 0 +#define DEBUGOUTPUT(...) if (DEBUG) DISPLAY(__VA_ARGS__); +#define EXM_THROW(error, ...) \ +{ \ + DEBUGOUTPUT("Error defined at %s, line %i : \n", __FILE__, __LINE__); \ + DISPLAYLEVEL(1, "Error %i : ", error); \ + DISPLAYLEVEL(1, __VA_ARGS__); \ + DISPLAYLEVEL(1, "\n"); \ + exit(error); \ +} + + +//************************************** +// Version modifiers +//************************************** +#define EXTENDED_ARGUMENTS +#define EXTENDED_HELP +#define EXTENDED_FORMAT +#define DEFAULT_COMPRESSOR compress_file +#define DEFAULT_DECOMPRESSOR decodeLZ4S + + +/* ************************************************** */ +/* ****************** Parameters ******************** */ +/* ************************************************** */ + +/* Default setting : overwrite = 1; return : overwrite mode (0/1) */ +int LZ4IO_setOverwrite(int yes) +{ + overwrite = (yes!=0); + return overwrite; +} + +/* blockSizeID : valid values : 4-5-6-7 */ +int LZ4IO_setBlockSizeID(int bsid) +{ + static const int blockSizeTable[] = { 64 KB, 256 KB, 1 MB, 4 MB }; + if ((bsid < minBlockSizeID) || (bsid > maxBlockSizeID)) return -1; + blockSizeId = bsid; + return blockSizeTable[blockSizeId-minBlockSizeID]; +} + + +int LZ4IO_setBlockMode(blockMode_t blockMode) +{ + blockIndependence = (blockMode == independentBlocks); + return blockIndependence; +} + + +/* Default setting : no checksum */ +int LZ4IO_setBlockChecksumMode(int xxhash) +{ + blockChecksum = (xxhash != 0); + return blockChecksum; +} + + +/* Default setting : checksum enabled */ +int LZ4IO_setStreamChecksumMode(int xxhash) +{ + streamChecksum = (xxhash != 0); + return streamChecksum; +} + + +/* Default setting : 0 (no notification) */ +int LZ4IO_setNotificationLevel(int level) +{ + displayLevel = level; + return displayLevel; +} + + + +/* ************************************************************************ */ +/* ********************** LZ4 File / Stream compression ******************* */ +/* ************************************************************************ */ + +static int LZ4S_GetBlockSize_FromBlockId (int id) { return (1 << (8 + (2 * id))); } +static unsigned int LZ4S_GetCheckBits_FromXXH (unsigned int xxh) { return (xxh >> 8) & _8BITS; } +static int LZ4S_isSkippableMagicNumber(unsigned int magic) { return (magic & LZ4S_SKIPPABLEMASK) == LZ4S_SKIPPABLE0; } + + +static int get_fileHandle(char* input_filename, char* output_filename, FILE** pfinput, FILE** pfoutput) +{ + + if (!strcmp (input_filename, stdinmark)) + { + DISPLAYLEVEL(4,"Using stdin for input\n"); + *pfinput = stdin; + SET_BINARY_MODE(stdin); + } + else + { + *pfinput = fopen(input_filename, "rb"); + } + + if (!strcmp (output_filename, stdoutmark)) + { + DISPLAYLEVEL(4,"Using stdout for output\n"); + *pfoutput = stdout; + SET_BINARY_MODE(stdout); + } + else + { + // Check if destination file already exists + *pfoutput=0; + if (output_filename != nulmark) *pfoutput = fopen( output_filename, "rb" ); + if (*pfoutput!=0) + { + fclose(*pfoutput); + if (!overwrite) + { + char ch; + DISPLAYLEVEL(2, "Warning : %s already exists\n", output_filename); + DISPLAYLEVEL(2, "Overwrite ? (Y/N) : "); + if (displayLevel <= 1) EXM_THROW(11, "Operation aborted : %s already exists", output_filename); // No interaction possible + ch = (char)getchar(); + if ((ch!='Y') && (ch!='y')) EXM_THROW(11, "Operation aborted : %s already exists", output_filename); + } + } + *pfoutput = fopen( output_filename, "wb" ); + } + + if ( *pfinput==0 ) EXM_THROW(12, "Pb opening %s", input_filename); + if ( *pfoutput==0) EXM_THROW(13, "Pb opening %s", output_filename); + + return 0; +} + + +// LZ4IO_compressFilename_Legacy : This function is "hidden" (not published in .h) +// Its purpose is to generate compressed streams using the old 'legacy' format +int LZ4IO_compressFilename_Legacy(char* input_filename, char* output_filename, int compressionlevel) +{ + int (*compressionFunction)(const char*, char*, int); + unsigned long long filesize = 0; + unsigned long long compressedfilesize = MAGICNUMBER_SIZE; + char* in_buff; + char* out_buff; + FILE* finput; + FILE* foutput; + int displayLevel = (compressionlevel>0); + clock_t start, end; + size_t sizeCheck; + + + // Init + if (compressionlevel < 3) compressionFunction = LZ4_compress; else compressionFunction = LZ4_compressHC; + start = clock(); + get_fileHandle(input_filename, output_filename, &finput, &foutput); + if ((displayLevel==2) && (compressionlevel==1)) displayLevel=3; + + // Allocate Memory + in_buff = (char*)malloc(LEGACY_BLOCKSIZE); + out_buff = (char*)malloc(LZ4_compressBound(LEGACY_BLOCKSIZE)); + if (!in_buff || !out_buff) EXM_THROW(21, "Allocation error : not enough memory"); + + // Write Archive Header + *(unsigned int*)out_buff = LITTLE_ENDIAN_32(LEGACY_MAGICNUMBER); + sizeCheck = fwrite(out_buff, 1, MAGICNUMBER_SIZE, foutput); + if (sizeCheck!=MAGICNUMBER_SIZE) EXM_THROW(22, "Write error : cannot write header"); + + // Main Loop + while (1) + { + unsigned int outSize; + // Read Block + int inSize = (int) fread(in_buff, (size_t)1, (size_t)LEGACY_BLOCKSIZE, finput); + if( inSize<=0 ) break; + filesize += inSize; + DISPLAYLEVEL(3, "\rRead : %i MB ", (int)(filesize>>20)); + + // Compress Block + outSize = compressionFunction(in_buff, out_buff+4, inSize); + compressedfilesize += outSize+4; + DISPLAYLEVEL(3, "\rRead : %i MB ==> %.2f%% ", (int)(filesize>>20), (double)compressedfilesize/filesize*100); + + // Write Block + * (unsigned int*) out_buff = LITTLE_ENDIAN_32(outSize); + sizeCheck = fwrite(out_buff, 1, outSize+4, foutput); + if (sizeCheck!=(size_t)(outSize+4)) EXM_THROW(23, "Write error : cannot write compressed block"); + } + + // Status + end = clock(); + DISPLAYLEVEL(2, "\r%79s\r", ""); + DISPLAYLEVEL(2,"Compressed %llu bytes into %llu bytes ==> %.2f%%\n", + (unsigned long long) filesize, (unsigned long long) compressedfilesize, (double)compressedfilesize/filesize*100); + { + double seconds = (double)(end - start)/CLOCKS_PER_SEC; + DISPLAYLEVEL(4,"Done in %.2f s ==> %.2f MB/s\n", seconds, (double)filesize / seconds / 1024 / 1024); + } + + // Close & Free + free(in_buff); + free(out_buff); + fclose(finput); + fclose(foutput); + + return 0; +} + + +static void* LZ4IO_LZ4_createStream (const char* inputBuffer) +{ + (void)inputBuffer; + return LZ4_createStream(); +} + +static int LZ4IO_LZ4_compress_limitedOutput_continue (void* ctx, const char* source, char* dest, int inputSize, int maxOutputSize, int compressionLevel) +{ + (void)compressionLevel; + return LZ4_compress_limitedOutput_continue(ctx, source, dest, inputSize, maxOutputSize); +} + +static int LZ4IO_LZ4_slideInputBufferHC (void* ctx, char* buffer, int size) +{ + (void)size; (void)buffer; + LZ4_slideInputBufferHC (ctx); + return 1; +} + + +static int compress_file_blockDependency(char* input_filename, char* output_filename, int compressionlevel) +{ + void* (*initFunction) (const char*); + int (*compressionFunction)(void*, const char*, char*, int, int, int); + int (*nextBlockFunction) (void*, char*, int); + int (*freeFunction) (void*); + void* ctx; + unsigned long long filesize = 0; + unsigned long long compressedfilesize = 0; + unsigned int checkbits; + char* in_buff, *in_blockStart; + char* out_buff; + FILE* finput; + FILE* foutput; + clock_t start, end; + unsigned int blockSize, inputBufferSize; + size_t sizeCheck, header_size; + void* streamChecksumState=NULL; + + // Init + start = clock(); + if ((displayLevel==2) && (compressionlevel>=3)) displayLevel=3; + + if (compressionlevel<3) + { + initFunction = LZ4IO_LZ4_createStream; + compressionFunction = LZ4IO_LZ4_compress_limitedOutput_continue; + nextBlockFunction = LZ4_saveDict; + freeFunction = LZ4_free; + } + else + { + initFunction = LZ4_createHC; + compressionFunction = LZ4_compressHC2_limitedOutput_continue; + nextBlockFunction = LZ4IO_LZ4_slideInputBufferHC; + freeFunction = LZ4_free; + } + + get_fileHandle(input_filename, output_filename, &finput, &foutput); + blockSize = LZ4S_GetBlockSize_FromBlockId (blockSizeId); + + // Allocate Memory + inputBufferSize = 64 KB + blockSize; + in_buff = (char*)malloc(inputBufferSize); + out_buff = (char*)malloc(blockSize+CACHELINE); + if (!in_buff || !out_buff) EXM_THROW(31, "Allocation error : not enough memory"); + in_blockStart = in_buff + 64 KB; + if (compressionlevel>=3) in_blockStart = in_buff; + if (streamChecksum) streamChecksumState = XXH32_init(LZ4S_CHECKSUM_SEED); + ctx = initFunction(in_buff); + + // Write Archive Header + *(unsigned int*)out_buff = LITTLE_ENDIAN_32(LZ4S_MAGICNUMBER); // Magic Number, in Little Endian convention + *(out_buff+4) = (1 & _2BITS) << 6 ; // Version('01') + *(out_buff+4) |= (blockIndependence & _1BIT) << 5; + *(out_buff+4) |= (blockChecksum & _1BIT) << 4; + *(out_buff+4) |= (streamChecksum & _1BIT) << 2; + *(out_buff+5) = (char)((blockSizeId & _3BITS) << 4); + checkbits = XXH32((out_buff+4), 2, LZ4S_CHECKSUM_SEED); + checkbits = LZ4S_GetCheckBits_FromXXH(checkbits); + *(out_buff+6) = (unsigned char) checkbits; + header_size = 7; + sizeCheck = fwrite(out_buff, 1, header_size, foutput); + if (sizeCheck!=header_size) EXM_THROW(32, "Write error : cannot write header"); + compressedfilesize += header_size; + + // Main Loop + while (1) + { + unsigned int outSize; + unsigned int inSize; + + // Read Block + inSize = (unsigned int) fread(in_blockStart, (size_t)1, (size_t)blockSize, finput); + if( inSize==0 ) break; // No more input : end of compression + filesize += inSize; + DISPLAYLEVEL(3, "\rRead : %i MB ", (int)(filesize>>20)); + if (streamChecksum) XXH32_update(streamChecksumState, in_blockStart, inSize); + + // Compress Block + outSize = compressionFunction(ctx, in_blockStart, out_buff+4, inSize, inSize-1, compressionlevel); + if (outSize > 0) compressedfilesize += outSize+4; else compressedfilesize += inSize+4; + if (blockChecksum) compressedfilesize+=4; + DISPLAYLEVEL(3, "==> %.2f%% ", (double)compressedfilesize/filesize*100); + + // Write Block + if (outSize > 0) + { + int sizeToWrite; + * (unsigned int*) out_buff = LITTLE_ENDIAN_32(outSize); + if (blockChecksum) + { + unsigned int checksum = XXH32(out_buff+4, outSize, LZ4S_CHECKSUM_SEED); + * (unsigned int*) (out_buff+4+outSize) = LITTLE_ENDIAN_32(checksum); + } + sizeToWrite = 4 + outSize + (4*blockChecksum); + sizeCheck = fwrite(out_buff, 1, sizeToWrite, foutput); + if (sizeCheck!=(size_t)(sizeToWrite)) EXM_THROW(33, "Write error : cannot write compressed block"); + } + else // Copy Original + { + * (unsigned int*) out_buff = LITTLE_ENDIAN_32(inSize|0x80000000); // Add Uncompressed flag + sizeCheck = fwrite(out_buff, 1, 4, foutput); + if (sizeCheck!=(size_t)(4)) EXM_THROW(34, "Write error : cannot write block header"); + sizeCheck = fwrite(in_blockStart, 1, inSize, foutput); + if (sizeCheck!=(size_t)(inSize)) EXM_THROW(35, "Write error : cannot write block"); + if (blockChecksum) + { + unsigned int checksum = XXH32(in_blockStart, inSize, LZ4S_CHECKSUM_SEED); + * (unsigned int*) out_buff = LITTLE_ENDIAN_32(checksum); + sizeCheck = fwrite(out_buff, 1, 4, foutput); + if (sizeCheck!=(size_t)(4)) EXM_THROW(36, "Write error : cannot write block checksum"); + } + } + { + size_t sizeToMove = 64 KB; + if (inSize < 64 KB) sizeToMove = inSize; + nextBlockFunction(ctx, in_blockStart - sizeToMove, (int)sizeToMove); + if (compressionlevel>=3) in_blockStart = in_buff + 64 KB; + } + } + + // End of Stream mark + * (unsigned int*) out_buff = LZ4S_EOS; + sizeCheck = fwrite(out_buff, 1, 4, foutput); + if (sizeCheck!=(size_t)(4)) EXM_THROW(37, "Write error : cannot write end of stream"); + compressedfilesize += 4; + if (streamChecksum) + { + unsigned int checksum = XXH32_digest(streamChecksumState); + * (unsigned int*) out_buff = LITTLE_ENDIAN_32(checksum); + sizeCheck = fwrite(out_buff, 1, 4, foutput); + if (sizeCheck!=(size_t)(4)) EXM_THROW(37, "Write error : cannot write stream checksum"); + compressedfilesize += 4; + } + + // Status + end = clock(); + DISPLAYLEVEL(2, "\r%79s\r", ""); + DISPLAYLEVEL(2, "Compressed %llu bytes into %llu bytes ==> %.2f%%\n", + (unsigned long long) filesize, (unsigned long long) compressedfilesize, (double)compressedfilesize/filesize*100); + { + double seconds = (double)(end - start)/CLOCKS_PER_SEC; + DISPLAYLEVEL(4, "Done in %.2f s ==> %.2f MB/s\n", seconds, (double)filesize / seconds / 1024 / 1024); + } + + // Close & Free + freeFunction(ctx); + free(in_buff); + free(out_buff); + fclose(finput); + fclose(foutput); + + return 0; +} + + +FORCE_INLINE int LZ4_compress_limitedOutput_local(const char* src, char* dst, int size, int maxOut, int clevel) +{ (void)clevel; return LZ4_compress_limitedOutput(src, dst, size, maxOut); } + +int LZ4IO_compressFilename(char* input_filename, char* output_filename, int compressionLevel) +{ + int (*compressionFunction)(const char*, char*, int, int, int); + unsigned long long filesize = 0; + unsigned long long compressedfilesize = 0; + unsigned int checkbits; + char* in_buff; + char* out_buff; + char* headerBuffer; + FILE* finput; + FILE* foutput; + clock_t start, end; + int blockSize; + size_t sizeCheck, header_size, readSize; + void* streamChecksumState=NULL; + + // Branch out + if (blockIndependence==0) return compress_file_blockDependency(input_filename, output_filename, compressionLevel); + + // Init + start = clock(); + if ((displayLevel==2) && (compressionLevel>=3)) displayLevel=3; + if (compressionLevel <= 3) compressionFunction = LZ4_compress_limitedOutput_local; + else { compressionFunction = LZ4_compressHC2_limitedOutput; } + get_fileHandle(input_filename, output_filename, &finput, &foutput); + blockSize = LZ4S_GetBlockSize_FromBlockId (blockSizeId); + + // Allocate Memory + in_buff = (char*)malloc(blockSize); + out_buff = (char*)malloc(blockSize+CACHELINE); + headerBuffer = (char*)malloc(LZ4S_MAXHEADERSIZE); + if (!in_buff || !out_buff || !(headerBuffer)) EXM_THROW(31, "Allocation error : not enough memory"); + if (streamChecksum) streamChecksumState = XXH32_init(LZ4S_CHECKSUM_SEED); + + // Write Archive Header + *(unsigned int*)headerBuffer = LITTLE_ENDIAN_32(LZ4S_MAGICNUMBER); // Magic Number, in Little Endian convention + *(headerBuffer+4) = (1 & _2BITS) << 6 ; // Version('01') + *(headerBuffer+4) |= (blockIndependence & _1BIT) << 5; + *(headerBuffer+4) |= (blockChecksum & _1BIT) << 4; + *(headerBuffer+4) |= (streamChecksum & _1BIT) << 2; + *(headerBuffer+5) = (char)((blockSizeId & _3BITS) << 4); + checkbits = XXH32((headerBuffer+4), 2, LZ4S_CHECKSUM_SEED); + checkbits = LZ4S_GetCheckBits_FromXXH(checkbits); + *(headerBuffer+6) = (unsigned char) checkbits; + header_size = 7; + + // Write header + sizeCheck = fwrite(headerBuffer, 1, header_size, foutput); + if (sizeCheck!=header_size) EXM_THROW(32, "Write error : cannot write header"); + compressedfilesize += header_size; + + // read first block + readSize = fread(in_buff, (size_t)1, (size_t)blockSize, finput); + + // Main Loop + while (readSize>0) + { + unsigned int outSize; + + filesize += readSize; + DISPLAYLEVEL(3, "\rRead : %i MB ", (int)(filesize>>20)); + if (streamChecksum) XXH32_update(streamChecksumState, in_buff, (int)readSize); + + // Compress Block + outSize = compressionFunction(in_buff, out_buff+4, (int)readSize, (int)readSize-1, compressionLevel); + if (outSize > 0) compressedfilesize += outSize+4; else compressedfilesize += readSize+4; + if (blockChecksum) compressedfilesize+=4; + DISPLAYLEVEL(3, "==> %.2f%% ", (double)compressedfilesize/filesize*100); + + // Write Block + if (outSize > 0) + { + int sizeToWrite; + * (unsigned int*) out_buff = LITTLE_ENDIAN_32(outSize); + if (blockChecksum) + { + unsigned int checksum = XXH32(out_buff+4, outSize, LZ4S_CHECKSUM_SEED); + * (unsigned int*) (out_buff+4+outSize) = LITTLE_ENDIAN_32(checksum); + } + sizeToWrite = 4 + outSize + (4*blockChecksum); + sizeCheck = fwrite(out_buff, 1, sizeToWrite, foutput); + if (sizeCheck!=(size_t)(sizeToWrite)) EXM_THROW(33, "Write error : cannot write compressed block"); + } + else // Copy Original Uncompressed + { + * (unsigned int*) out_buff = LITTLE_ENDIAN_32(((unsigned long)readSize)|0x80000000); // Add Uncompressed flag + sizeCheck = fwrite(out_buff, 1, 4, foutput); + if (sizeCheck!=(size_t)(4)) EXM_THROW(34, "Write error : cannot write block header"); + sizeCheck = fwrite(in_buff, 1, readSize, foutput); + if (sizeCheck!=readSize) EXM_THROW(35, "Write error : cannot write block"); + if (blockChecksum) + { + unsigned int checksum = XXH32(in_buff, (int)readSize, LZ4S_CHECKSUM_SEED); + * (unsigned int*) out_buff = LITTLE_ENDIAN_32(checksum); + sizeCheck = fwrite(out_buff, 1, 4, foutput); + if (sizeCheck!=(size_t)(4)) EXM_THROW(36, "Write error : cannot write block checksum"); + } + } + + // Read next block + readSize = fread(in_buff, (size_t)1, (size_t)blockSize, finput); + } + + // End of Stream mark + * (unsigned int*) out_buff = LZ4S_EOS; + sizeCheck = fwrite(out_buff, 1, 4, foutput); + if (sizeCheck!=(size_t)(4)) EXM_THROW(37, "Write error : cannot write end of stream"); + compressedfilesize += 4; + if (streamChecksum) + { + unsigned int checksum = XXH32_digest(streamChecksumState); + * (unsigned int*) out_buff = LITTLE_ENDIAN_32(checksum); + sizeCheck = fwrite(out_buff, 1, 4, foutput); + if (sizeCheck!=(size_t)(4)) EXM_THROW(37, "Write error : cannot write stream checksum"); + compressedfilesize += 4; + } + + // Close & Free + free(in_buff); + free(out_buff); + free(headerBuffer); + fclose(finput); + fclose(foutput); + + // Final Status + end = clock(); + DISPLAYLEVEL(2, "\r%79s\r", ""); + DISPLAYLEVEL(2, "Compressed %llu bytes into %llu bytes ==> %.2f%%\n", + (unsigned long long) filesize, (unsigned long long) compressedfilesize, (double)compressedfilesize/filesize*100); + { + double seconds = (double)(end - start)/CLOCKS_PER_SEC; + DISPLAYLEVEL(4, "Done in %.2f s ==> %.2f MB/s\n", seconds, (double)filesize / seconds / 1024 / 1024); + } + + return 0; +} + + +/* ********************************************************************* */ +/* ********************** LZ4 File / Stream decoding ******************* */ +/* ********************************************************************* */ + +static unsigned long long decodeLegacyStream(FILE* finput, FILE* foutput) +{ + unsigned long long filesize = 0; + char* in_buff; + char* out_buff; + unsigned int blockSize; + + + // Allocate Memory + in_buff = (char*)malloc(LZ4_compressBound(LEGACY_BLOCKSIZE)); + out_buff = (char*)malloc(LEGACY_BLOCKSIZE); + if (!in_buff || !out_buff) EXM_THROW(51, "Allocation error : not enough memory"); + + // Main Loop + while (1) + { + int decodeSize; + size_t sizeCheck; + + // Block Size + sizeCheck = fread(&blockSize, 1, 4, finput); + if (sizeCheck==0) break; // Nothing to read : file read is completed + blockSize = LITTLE_ENDIAN_32(blockSize); // Convert to Little Endian + if (blockSize > LZ4_COMPRESSBOUND(LEGACY_BLOCKSIZE)) + { // Cannot read next block : maybe new stream ? + fseek(finput, -4, SEEK_CUR); + break; + } + + // Read Block + sizeCheck = fread(in_buff, 1, blockSize, finput); + + // Decode Block + decodeSize = LZ4_decompress_safe(in_buff, out_buff, blockSize, LEGACY_BLOCKSIZE); + if (decodeSize < 0) EXM_THROW(52, "Decoding Failed ! Corrupted input detected !"); + filesize += decodeSize; + + // Write Block + sizeCheck = fwrite(out_buff, 1, decodeSize, foutput); + if (sizeCheck != (size_t)decodeSize) EXM_THROW(53, "Write error : cannot write decoded block into output\n"); + } + + // Free + free(in_buff); + free(out_buff); + + return filesize; +} + + +static unsigned long long decodeLZ4S(FILE* finput, FILE* foutput) +{ + unsigned long long filesize = 0; + char* in_buff; + char* out_buff, *out_start, *out_end; + unsigned char descriptor[LZ4S_MAXHEADERSIZE]; + size_t nbReadBytes; + int decodedBytes=0; + unsigned int maxBlockSize; + size_t sizeCheck; + int blockChecksumFlag, streamChecksumFlag, blockIndependenceFlag; + void* streamChecksumState=NULL; + int (*decompressionFunction)(void* ctx, const char* src, char* dst, int cSize, int maxOSize) = LZ4_decompress_safe_continue; + LZ4_streamDecode_t ctx; + + // init + memset(&ctx, 0, sizeof(ctx)); + + // Decode stream descriptor + nbReadBytes = fread(descriptor, 1, 3, finput); + if (nbReadBytes != 3) EXM_THROW(61, "Unreadable header"); + { + int version = (descriptor[0] >> 6) & _2BITS; + int streamSize = (descriptor[0] >> 3) & _1BIT; + int reserved1 = (descriptor[0] >> 1) & _1BIT; + int dictionary = (descriptor[0] >> 0) & _1BIT; + + int reserved2 = (descriptor[1] >> 7) & _1BIT; + int blockSizeId = (descriptor[1] >> 4) & _3BITS; + int reserved3 = (descriptor[1] >> 0) & _4BITS; + int checkBits = (descriptor[2] >> 0) & _8BITS; + int checkBits_xxh32; + + blockIndependenceFlag=(descriptor[0] >> 5) & _1BIT; + blockChecksumFlag = (descriptor[0] >> 4) & _1BIT; + streamChecksumFlag= (descriptor[0] >> 2) & _1BIT; + + if (version != 1) EXM_THROW(62, "Wrong version number"); + if (streamSize == 1) EXM_THROW(64, "Does not support stream size"); + if (reserved1 != 0) EXM_THROW(65, "Wrong value for reserved bits"); + if (dictionary == 1) EXM_THROW(66, "Does not support dictionary"); + if (reserved2 != 0) EXM_THROW(67, "Wrong value for reserved bits"); + if (blockSizeId < 4) EXM_THROW(68, "Unsupported block size"); + if (reserved3 != 0) EXM_THROW(67, "Wrong value for reserved bits"); + maxBlockSize = LZ4S_GetBlockSize_FromBlockId(blockSizeId); + // Checkbits verification + descriptor[1] &= 0xF0; + checkBits_xxh32 = XXH32(descriptor, 2, LZ4S_CHECKSUM_SEED); + checkBits_xxh32 = LZ4S_GetCheckBits_FromXXH(checkBits_xxh32); + if (checkBits != checkBits_xxh32) EXM_THROW(69, "Stream descriptor error detected"); + } + + // Allocate Memory + { + size_t outBuffSize = maxBlockSize + 64 KB; + if (outBuffSize < MIN_STREAM_BUFSIZE) outBuffSize = MIN_STREAM_BUFSIZE; + in_buff = (char*)malloc(maxBlockSize); + out_buff = (char*)malloc(outBuffSize); + out_start = out_buff; + out_end = out_start + outBuffSize; + if (!in_buff || !out_buff) EXM_THROW(70, "Allocation error : not enough memory"); + if (streamChecksumFlag) streamChecksumState = XXH32_init(LZ4S_CHECKSUM_SEED); + } + + // Main Loop + while (1) + { + unsigned int blockSize, uncompressedFlag; + + // Block Size + nbReadBytes = fread(&blockSize, 1, 4, finput); + if( nbReadBytes != 4 ) EXM_THROW(71, "Read error : cannot read next block size"); + if (blockSize == LZ4S_EOS) break; // End of Stream Mark : stream is completed + blockSize = LITTLE_ENDIAN_32(blockSize); // Convert to little endian + uncompressedFlag = blockSize >> 31; + blockSize &= 0x7FFFFFFF; + if (blockSize > maxBlockSize) EXM_THROW(72, "Error : invalid block size"); + + // Read Block + nbReadBytes = fread(in_buff, 1, blockSize, finput); + if( nbReadBytes != blockSize ) EXM_THROW(73, "Read error : cannot read data block" ); + + // Check Block + if (blockChecksumFlag) + { + unsigned int checksum = XXH32(in_buff, blockSize, LZ4S_CHECKSUM_SEED); + unsigned int readChecksum; + sizeCheck = fread(&readChecksum, 1, 4, finput); + if( sizeCheck != 4 ) EXM_THROW(74, "Read error : cannot read next block size"); + readChecksum = LITTLE_ENDIAN_32(readChecksum); // Convert to little endian + if (checksum != readChecksum) EXM_THROW(75, "Error : invalid block checksum detected"); + } + + if (uncompressedFlag) + { + // Write uncompressed Block + sizeCheck = fwrite(in_buff, 1, blockSize, foutput); + if (sizeCheck != (size_t)blockSize) EXM_THROW(76, "Write error : cannot write data block"); + filesize += blockSize; + if (streamChecksumFlag) XXH32_update(streamChecksumState, in_buff, blockSize); + if (!blockIndependenceFlag) + { + // handle dictionary for streaming + memcpy(in_buff + blockSize - 64 KB, out_buff, 64 KB); + LZ4_setDictDecode(&ctx, out_buff, 64 KB); + out_start = out_buff + 64 KB; + } + } + else + { + // Decode Block + if (out_start + maxBlockSize > out_end) out_start = out_buff; + decodedBytes = decompressionFunction(&ctx, in_buff, out_start, blockSize, maxBlockSize); + if (decodedBytes < 0) EXM_THROW(77, "Decoding Failed ! Corrupted input detected !"); + filesize += decodedBytes; + if (streamChecksumFlag) XXH32_update(streamChecksumState, out_start, decodedBytes); + + // Write Block + sizeCheck = fwrite(out_start, 1, decodedBytes, foutput); + if (sizeCheck != (size_t)decodedBytes) EXM_THROW(78, "Write error : cannot write decoded block\n"); + out_start += decodedBytes; + } + + } + + // Stream Checksum + if (streamChecksumFlag) + { + unsigned int checksum = XXH32_digest(streamChecksumState); + unsigned int readChecksum; + sizeCheck = fread(&readChecksum, 1, 4, finput); + if (sizeCheck != 4) EXM_THROW(74, "Read error : cannot read stream checksum"); + readChecksum = LITTLE_ENDIAN_32(readChecksum); // Convert to little endian + if (checksum != readChecksum) EXM_THROW(79, "Error : invalid stream checksum detected"); + } + + // Free + free(in_buff); + free(out_buff); + + return filesize; +} + + +static unsigned long long selectDecoder( FILE* finput, FILE* foutput) +{ + unsigned int magicNumber, size; + int errorNb; + size_t nbReadBytes; + + // Check Archive Header + nbReadBytes = fread(&magicNumber, 1, MAGICNUMBER_SIZE, finput); + if (nbReadBytes==0) return 0; // EOF + if (nbReadBytes != MAGICNUMBER_SIZE) EXM_THROW(41, "Unrecognized header : Magic Number unreadable"); + magicNumber = LITTLE_ENDIAN_32(magicNumber); // Convert to Little Endian format + if (LZ4S_isSkippableMagicNumber(magicNumber)) magicNumber = LZ4S_SKIPPABLE0; // fold skippable magic numbers + + switch(magicNumber) + { + case LZ4S_MAGICNUMBER: + return DEFAULT_DECOMPRESSOR(finput, foutput); + case LEGACY_MAGICNUMBER: + DISPLAYLEVEL(4, "Detected : Legacy format \n"); + return decodeLegacyStream(finput, foutput); + case LZ4S_SKIPPABLE0: + DISPLAYLEVEL(4, "Skipping detected skippable area \n"); + nbReadBytes = fread(&size, 1, 4, finput); + if (nbReadBytes != 4) EXM_THROW(42, "Stream error : skippable size unreadable"); + size = LITTLE_ENDIAN_32(size); // Convert to Little Endian format + errorNb = fseek(finput, size, SEEK_CUR); + if (errorNb != 0) EXM_THROW(43, "Stream error : cannot skip skippable area"); + return selectDecoder(finput, foutput); + EXTENDED_FORMAT; + default: + if (ftell(finput) == MAGICNUMBER_SIZE) EXM_THROW(44,"Unrecognized header : file cannot be decoded"); // Wrong magic number at the beginning of 1st stream + DISPLAYLEVEL(2, "Stream followed by unrecognized data\n"); + return 0; + } +} + + +int LZ4IO_decompressFilename(char* input_filename, char* output_filename) +{ + unsigned long long filesize = 0, decodedSize=0; + FILE* finput; + FILE* foutput; + clock_t start, end; + + + // Init + start = clock(); + get_fileHandle(input_filename, output_filename, &finput, &foutput); + + // Loop over multiple streams + do + { + decodedSize = selectDecoder(finput, foutput); + filesize += decodedSize; + } while (decodedSize); + + // Final Status + end = clock(); + DISPLAYLEVEL(2, "\r%79s\r", ""); + DISPLAYLEVEL(2, "Successfully decoded %llu bytes \n", filesize); + { + double seconds = (double)(end - start)/CLOCKS_PER_SEC; + DISPLAYLEVEL(4, "Done in %.2f s ==> %.2f MB/s\n", seconds, (double)filesize / seconds / 1024 / 1024); + } + + // Close + fclose(finput); + fclose(foutput); + + // Error status = OK + return 0; +} + diff --git a/src/lz4io.h b/src/lz4io.h new file mode 100644 index 0000000..5cc532e --- /dev/null +++ b/src/lz4io.h @@ -0,0 +1,64 @@ +/* + LZ4io.h - LZ4 File/Stream Interface + Copyright (C) Yann Collet 2011-2014 + GPL v2 License + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License along + with this program; if not, write to the Free Software Foundation, Inc., + 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + + You can contact the author at : + - LZ4 source repository : http://code.google.com/p/lz4/ + - LZ4 public forum : https://groups.google.com/forum/#!forum/lz4c +*/ +/* + Note : this is stand-alone program. + It is not part of LZ4 compression library, it is a user code of the LZ4 library. + - The license of LZ4 library is BSD. + - The license of xxHash library is BSD. + - The license of this source file is GPLv2. +*/ + + +/* ************************************************** */ +/* ****************** Functions ********************* */ +/* ************************************************** */ + +int LZ4IO_compressFilename (char* input_filename, char* output_filename, int compressionlevel); +int LZ4IO_decompressFilename(char* input_filename, char* output_filename); + + +/* ************************************************** */ +/* ****************** Parameters ******************** */ +/* ************************************************** */ + +/* Default setting : overwrite = 1; + return : overwrite mode (0/1) */ +int LZ4IO_setOverwrite(int yes); + +/* blockSizeID : valid values : 4-5-6-7 + return : -1 if error, blockSize if OK */ +int LZ4IO_setBlockSizeID(int blockSizeID); + +/* Default setting : independent blocks */ +typedef enum { chainedBlocks, independentBlocks } blockMode_t; +int LZ4IO_setBlockMode(blockMode_t blockMode); + +/* Default setting : no checksum */ +int LZ4IO_setBlockChecksumMode(int xxhash); + +/* Default setting : checksum enabled */ +int LZ4IO_setStreamChecksumMode(int xxhash); + +/* Default setting : 0 (no notification) */ +int LZ4IO_setNotificationLevel(int level); diff --git a/src/python-lz4.c b/src/python-lz4.c index 45fe995..b91a31a 100644 --- a/src/python-lz4.c +++ b/src/python-lz4.c @@ -32,12 +32,15 @@ #include #include #include +#include #include #include "lz4.h" #include "lz4hc.h" +#include "lz4io.h" #include "python-lz4.h" #define MAX(a, b) ((a) > (b) ? (a) : (b)) +#define throwWarn(msg) PyErr_WarnEx(PyExc_UserWarning, msg, 1) typedef int (*compressor)(const char *source, char *dest, int isize); @@ -53,6 +56,16 @@ static inline uint32_t load_le32(const char *c) { return d[0] | (d[1] << 8) | (d[2] << 16) | (d[3] << 24); } +static inline char* add_extension(char *input) { + char* output; + + output = (char*)malloc(strlen(input)+4); + strcpy(output, input); + strcat(output, ".lz4"); + + return output; +} + static const int hdr_size = sizeof(uint32_t); static PyObject *compress_with(compressor compress, PyObject *self, PyObject *args) { @@ -62,6 +75,7 @@ static PyObject *compress_with(compressor compress, PyObject *self, PyObject *ar char *dest; int dest_size; + (void)self; if (!PyArg_ParseTuple(args, "s#", &source, &source_size)) return NULL; @@ -99,6 +113,7 @@ static PyObject *py_lz4_uncompress(PyObject *self, PyObject *args) { int source_size; uint32_t dest_size; + (void)self; if (!PyArg_ParseTuple(args, "s#", &source, &source_size)) { return NULL; } @@ -125,6 +140,91 @@ static PyObject *py_lz4_uncompress(PyObject *self, PyObject *args) { return result; } +static PyObject *py_lz4_compressFileDefault(PyObject *self, PyObject *args) { + char* input; + char* output = NULL; + int compLevel = 0; + + (void)self; + if (!PyArg_ParseTuple(args, "s|i", &input, &compLevel)) { + return NULL; + } + + output = add_extension(input); + + LZ4IO_compressFilename(input, output, compLevel); + return Py_None; +} + +static PyObject *py_lz4_compressFileAdv(PyObject *self, PyObject *args, \ + PyObject *keywds) { + char* input; + char* output = NULL; + int compLevel = 0; + int overwrite = 1; + int blockSizeID = 7; + int blockMode = 1; + int blockCheck = 0; + int streamCheck = 1; + int verbosity = 0; + + char* oMsg = "Invalid input for overwrite. Using default value."; + char* bmMsg = "Invalid input for blockMode. Using default value."; + char* bsMsg = "Invalid input for blockSizeID. Using default value."; + char* bcMsg = "Invalid input for blockCheck. Using default value."; + char* scMsg = "Invalid input for streamCheck. Using default value."; + char* vMsg = "Invalid input for verbosity. Using default value."; + + static char *kwlist[] = {"input", "compLevel", "output", "overwrite", + "blockSizeID", "blockMode", "blockCheck", + "streamCheck", "verbosity", NULL}; + + (void)self; + if (!PyArg_ParseTupleAndKeywords(args, keywds, "si|siiiiii", kwlist, + &input, &compLevel, &output, &overwrite, + &blockSizeID, &blockMode, &blockCheck, + &streamCheck, &verbosity)) { + return NULL; + } + + if (!output) { output = add_extension(input); } + (overwrite!=0 && overwrite!=1) ? throwWarn(oMsg) : \ + (void)LZ4IO_setOverwrite(overwrite); + (3 < blockSizeID && blockSizeID < 8) ? (void)LZ4IO_setBlockSizeID(blockSizeID) : \ + throwWarn(bsMsg); + (blockCheck == 0 || blockCheck == 1) ? (void)LZ4IO_setBlockChecksumMode(blockCheck) : \ + throwWarn(bcMsg); + (streamCheck == 0 || streamCheck == 1) ? (void)LZ4IO_setStreamChecksumMode(streamCheck) : \ + throwWarn(scMsg); + (-1 < verbosity && verbosity < 5) ? (void)LZ4IO_setNotificationLevel(verbosity) : \ + throwWarn(vMsg); + (blockMode == 0 || blockMode == 1) ? \ + ((blockMode == 0 ) ? LZ4IO_setBlockMode(chainedBlocks) : \ + (void)LZ4IO_setBlockMode(independentBlocks)) : throwWarn(bmMsg); + + LZ4IO_compressFilename(input, output, compLevel); + return Py_None; +} + +static PyObject *py_lz4_decompressFileDefault(PyObject *self, PyObject *args) { + char* input; + char* output; + int outLen; + + (void)self; + if (!PyArg_ParseTuple(args, "s", &input)) { + return NULL; + } + + outLen=strlen(input) - 4; + output = (char*)calloc(outLen, sizeof(char)); + strncpy(output, input, outLen); + + LZ4IO_decompressFilename(input, output); + return Py_None; +} + + static PyMethodDef Lz4Methods[] = { {"LZ4_compress", py_lz4_compress, METH_VARARGS, COMPRESS_DOCSTRING}, {"LZ4_uncompress", py_lz4_uncompress, METH_VARARGS, UNCOMPRESS_DOCSTRING}, @@ -134,6 +234,9 @@ static PyMethodDef Lz4Methods[] = { {"decompress", py_lz4_uncompress, METH_VARARGS, UNCOMPRESS_DOCSTRING}, {"dumps", py_lz4_compress, METH_VARARGS, COMPRESS_DOCSTRING}, {"loads", py_lz4_uncompress, METH_VARARGS, UNCOMPRESS_DOCSTRING}, + {"compressFileAdv", (PyCFunction)py_lz4_compressFileAdv, METH_VARARGS | METH_KEYWORDS, COMPF_ADV_DOCSTRING}, + {"compressFileDefault", py_lz4_compressFileDefault, METH_VARARGS, COMPF_DEFAULT_DOCSTRING}, + {"decompressFileDefault", py_lz4_decompressFileDefault, METH_VARARGS, DECOMP_FILE_DOCSTRING}, {NULL, NULL, 0, NULL} }; diff --git a/src/python-lz4.h b/src/python-lz4.h index 70e81ac..9896810 100644 --- a/src/python-lz4.h +++ b/src/python-lz4.h @@ -33,12 +33,22 @@ static PyObject *py_lz4_compress(PyObject *self, PyObject *args); static PyObject *py_lz4_uncompress(PyObject *self, PyObject *args); +static PyObject *py_lz4_compressFileAdv(PyObject *self, PyObject *args, PyObject *keywds); +static PyObject *py_lz4_compressFileDefault(PyObject *self, PyObject *args); +static PyObject *py_lz4_decompressFileDefault(PyObject *self, PyObject *args); PyMODINIT_FUNC initlz4(void); #define COMPRESS_DOCSTRING "Compress string, returning the compressed data.\nRaises an exception if any error occurs." #define COMPRESSHC_DOCSTRING COMPRESS_DOCSTRING "\n\nCompared to compress, this gives a better compression ratio, but is much slower." #define UNCOMPRESS_DOCSTRING "Decompress string, returning the uncompressed data.\nRaises an exception if any error occurs." +#define COMPRESS_FILE_DOCSTRING "Compress file, by default adds .lz4 extension to original filename." +#define COMPF_DEFAULT_DOCSTRING COMPRESS_FILE_DOCSTRING "\nAccepts two positional arguments, inputFile and compression level." +#define COMPF_ADV_DOCSTRING COMPRESS_FILE_DOCSTRING "\nRequires the first two keyword arugments and accepts any number of the"\ + "\nfollowing: input, compLevel, output, overwrite, blockSizeID, blockCheck, streamCheck"\ + "\nValid values are as follows(def=default): input='string', compLevel=0(low, def)-9(High), output='string',"\ + "\noverwrite=0/1(def), blockSizeID=4-7(def), blockCheck=0(def)/1, streamCheck=0/1(def), verbosity=0(def)-4" +#define DECOMP_FILE_DOCSTRING "Decompresses file, removes the extension by default, preserves original." #if defined(_WIN32) && defined(_MSC_VER) # define inline __inline diff --git a/src/xxhash.c b/src/xxhash.c new file mode 100644 index 0000000..8304ec2 --- /dev/null +++ b/src/xxhash.c @@ -0,0 +1,475 @@ +/* +xxHash - Fast Hash algorithm +Copyright (C) 2012-2014, Yann Collet. +BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php) + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are +met: + +* Redistributions of source code must retain the above copyright +notice, this list of conditions and the following disclaimer. +* Redistributions in binary form must reproduce the above +copyright notice, this list of conditions and the following disclaimer +in the documentation and/or other materials provided with the +distribution. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +You can contact the author at : +- xxHash source repository : http://code.google.com/p/xxhash/ +*/ + + +//************************************** +// Tuning parameters +//************************************** +// Unaligned memory access is automatically enabled for "common" CPU, such as x86. +// For others CPU, the compiler will be more cautious, and insert extra code to ensure aligned access is respected. +// If you know your target CPU supports unaligned memory access, you want to force this option manually to improve performance. +// You can also enable this parameter if you know your input data will always be aligned (boundaries of 4, for U32). +#if defined(__ARM_FEATURE_UNALIGNED) || defined(__i386) || defined(_M_IX86) || defined(__x86_64__) || defined(_M_X64) +# define XXH_USE_UNALIGNED_ACCESS 1 +#endif + +// XXH_ACCEPT_NULL_INPUT_POINTER : +// If the input pointer is a null pointer, xxHash default behavior is to trigger a memory access error, since it is a bad pointer. +// When this option is enabled, xxHash output for null input pointers will be the same as a null-length input. +// This option has a very small performance cost (only measurable on small inputs). +// By default, this option is disabled. To enable it, uncomment below define : +//#define XXH_ACCEPT_NULL_INPUT_POINTER 1 + +// XXH_FORCE_NATIVE_FORMAT : +// By default, xxHash library provides endian-independant Hash values, based on little-endian convention. +// Results are therefore identical for little-endian and big-endian CPU. +// This comes at a performance cost for big-endian CPU, since some swapping is required to emulate little-endian format. +// Should endian-independance be of no importance for your application, you may set the #define below to 1. +// It will improve speed for Big-endian CPU. +// This option has no impact on Little_Endian CPU. +#define XXH_FORCE_NATIVE_FORMAT 0 + + +//************************************** +// Compiler Specific Options +//************************************** +// Disable some Visual warning messages +#ifdef _MSC_VER // Visual Studio +# pragma warning(disable : 4127) // disable: C4127: conditional expression is constant +#endif + +#ifdef _MSC_VER // Visual Studio +# define FORCE_INLINE static __forceinline +#else +# ifdef __GNUC__ +# define FORCE_INLINE static inline __attribute__((always_inline)) +# else +# define FORCE_INLINE static inline +# endif +#endif + + +//************************************** +// Includes & Memory related functions +//************************************** +#include "xxhash.h" +// Modify the local functions below should you wish to use some other memory related routines +// for malloc(), free() +#include +FORCE_INLINE void* XXH_malloc(size_t s) { return malloc(s); } +FORCE_INLINE void XXH_free (void* p) { free(p); } +// for memcpy() +#include +FORCE_INLINE void* XXH_memcpy(void* dest, const void* src, size_t size) { return memcpy(dest,src,size); } + + +//************************************** +// Basic Types +//************************************** +#if defined (__STDC_VERSION__) && __STDC_VERSION__ >= 199901L // C99 +# include + typedef uint8_t BYTE; + typedef uint16_t U16; + typedef uint32_t U32; + typedef int32_t S32; + typedef uint64_t U64; +#else + typedef unsigned char BYTE; + typedef unsigned short U16; + typedef unsigned int U32; + typedef signed int S32; + typedef unsigned long long U64; +#endif + +#if defined(__GNUC__) && !defined(XXH_USE_UNALIGNED_ACCESS) +# define _PACKED __attribute__ ((packed)) +#else +# define _PACKED +#endif + +#if !defined(XXH_USE_UNALIGNED_ACCESS) && !defined(__GNUC__) +# ifdef __IBMC__ +# pragma pack(1) +# else +# pragma pack(push, 1) +# endif +#endif + +typedef struct _U32_S { U32 v; } _PACKED U32_S; + +#if !defined(XXH_USE_UNALIGNED_ACCESS) && !defined(__GNUC__) +# pragma pack(pop) +#endif + +#define A32(x) (((U32_S *)(x))->v) + + +//*************************************** +// Compiler-specific Functions and Macros +//*************************************** +#define GCC_VERSION (__GNUC__ * 100 + __GNUC_MINOR__) + +// Note : although _rotl exists for minGW (GCC under windows), performance seems poor +#if defined(_MSC_VER) +# define XXH_rotl32(x,r) _rotl(x,r) +#else +# define XXH_rotl32(x,r) ((x << r) | (x >> (32 - r))) +#endif + +#if defined(_MSC_VER) // Visual Studio +# define XXH_swap32 _byteswap_ulong +#elif GCC_VERSION >= 403 +# define XXH_swap32 __builtin_bswap32 +#else +static inline U32 XXH_swap32 (U32 x) { + return ((x << 24) & 0xff000000 ) | + ((x << 8) & 0x00ff0000 ) | + ((x >> 8) & 0x0000ff00 ) | + ((x >> 24) & 0x000000ff );} +#endif + + +//************************************** +// Constants +//************************************** +#define PRIME32_1 2654435761U +#define PRIME32_2 2246822519U +#define PRIME32_3 3266489917U +#define PRIME32_4 668265263U +#define PRIME32_5 374761393U + + +//************************************** +// Architecture Macros +//************************************** +typedef enum { XXH_bigEndian=0, XXH_littleEndian=1 } XXH_endianess; +#ifndef XXH_CPU_LITTLE_ENDIAN // It is possible to define XXH_CPU_LITTLE_ENDIAN externally, for example using a compiler switch + static const int one = 1; +# define XXH_CPU_LITTLE_ENDIAN (*(char*)(&one)) +#endif + + +//************************************** +// Macros +//************************************** +#define XXH_STATIC_ASSERT(c) { enum { XXH_static_assert = 1/(!!(c)) }; } // use only *after* variable declarations + + +//**************************** +// Memory reads +//**************************** +typedef enum { XXH_aligned, XXH_unaligned } XXH_alignment; + +FORCE_INLINE U32 XXH_readLE32_align(const U32* ptr, XXH_endianess endian, XXH_alignment align) +{ + if (align==XXH_unaligned) + return endian==XXH_littleEndian ? A32(ptr) : XXH_swap32(A32(ptr)); + else + return endian==XXH_littleEndian ? *ptr : XXH_swap32(*ptr); +} + +FORCE_INLINE U32 XXH_readLE32(const U32* ptr, XXH_endianess endian) { return XXH_readLE32_align(ptr, endian, XXH_unaligned); } + + +//**************************** +// Simple Hash Functions +//**************************** +FORCE_INLINE U32 XXH32_endian_align(const void* input, int len, U32 seed, XXH_endianess endian, XXH_alignment align) +{ + const BYTE* p = (const BYTE*)input; + const BYTE* const bEnd = p + len; + U32 h32; + +#ifdef XXH_ACCEPT_NULL_INPUT_POINTER + if (p==NULL) { len=0; p=(const BYTE*)(size_t)16; } +#endif + + if (len>=16) + { + const BYTE* const limit = bEnd - 16; + U32 v1 = seed + PRIME32_1 + PRIME32_2; + U32 v2 = seed + PRIME32_2; + U32 v3 = seed + 0; + U32 v4 = seed - PRIME32_1; + + do + { + v1 += XXH_readLE32_align((const U32*)p, endian, align) * PRIME32_2; v1 = XXH_rotl32(v1, 13); v1 *= PRIME32_1; p+=4; + v2 += XXH_readLE32_align((const U32*)p, endian, align) * PRIME32_2; v2 = XXH_rotl32(v2, 13); v2 *= PRIME32_1; p+=4; + v3 += XXH_readLE32_align((const U32*)p, endian, align) * PRIME32_2; v3 = XXH_rotl32(v3, 13); v3 *= PRIME32_1; p+=4; + v4 += XXH_readLE32_align((const U32*)p, endian, align) * PRIME32_2; v4 = XXH_rotl32(v4, 13); v4 *= PRIME32_1; p+=4; + } while (p<=limit); + + h32 = XXH_rotl32(v1, 1) + XXH_rotl32(v2, 7) + XXH_rotl32(v3, 12) + XXH_rotl32(v4, 18); + } + else + { + h32 = seed + PRIME32_5; + } + + h32 += (U32) len; + + while (p<=bEnd-4) + { + h32 += XXH_readLE32_align((const U32*)p, endian, align) * PRIME32_3; + h32 = XXH_rotl32(h32, 17) * PRIME32_4 ; + p+=4; + } + + while (p> 15; + h32 *= PRIME32_2; + h32 ^= h32 >> 13; + h32 *= PRIME32_3; + h32 ^= h32 >> 16; + + return h32; +} + + +U32 XXH32(const void* input, int len, U32 seed) +{ +#if 0 + // Simple version, good for code maintenance, but unfortunately slow for small inputs + void* state = XXH32_init(seed); + XXH32_update(state, input, len); + return XXH32_digest(state); +#else + XXH_endianess endian_detected = (XXH_endianess)XXH_CPU_LITTLE_ENDIAN; + +# if !defined(XXH_USE_UNALIGNED_ACCESS) + if ((((size_t)input) & 3)) // Input is aligned, let's leverage the speed advantage + { + if ((endian_detected==XXH_littleEndian) || XXH_FORCE_NATIVE_FORMAT) + return XXH32_endian_align(input, len, seed, XXH_littleEndian, XXH_aligned); + else + return XXH32_endian_align(input, len, seed, XXH_bigEndian, XXH_aligned); + } +# endif + + if ((endian_detected==XXH_littleEndian) || XXH_FORCE_NATIVE_FORMAT) + return XXH32_endian_align(input, len, seed, XXH_littleEndian, XXH_unaligned); + else + return XXH32_endian_align(input, len, seed, XXH_bigEndian, XXH_unaligned); +#endif +} + + +//**************************** +// Advanced Hash Functions +//**************************** + +struct XXH_state32_t +{ + U64 total_len; + U32 seed; + U32 v1; + U32 v2; + U32 v3; + U32 v4; + int memsize; + char memory[16]; +}; + + +int XXH32_sizeofState() +{ + XXH_STATIC_ASSERT(XXH32_SIZEOFSTATE >= sizeof(struct XXH_state32_t)); // A compilation error here means XXH32_SIZEOFSTATE is not large enough + return sizeof(struct XXH_state32_t); +} + + +XXH_errorcode XXH32_resetState(void* state_in, U32 seed) +{ + struct XXH_state32_t * state = (struct XXH_state32_t *) state_in; + state->seed = seed; + state->v1 = seed + PRIME32_1 + PRIME32_2; + state->v2 = seed + PRIME32_2; + state->v3 = seed + 0; + state->v4 = seed - PRIME32_1; + state->total_len = 0; + state->memsize = 0; + return XXH_OK; +} + + +void* XXH32_init (U32 seed) +{ + void* state = XXH_malloc (sizeof(struct XXH_state32_t)); + XXH32_resetState(state, seed); + return state; +} + + +FORCE_INLINE XXH_errorcode XXH32_update_endian (void* state_in, const void* input, int len, XXH_endianess endian) +{ + struct XXH_state32_t * state = (struct XXH_state32_t *) state_in; + const BYTE* p = (const BYTE*)input; + const BYTE* const bEnd = p + len; + +#ifdef XXH_ACCEPT_NULL_INPUT_POINTER + if (input==NULL) return XXH_ERROR; +#endif + + state->total_len += len; + + if (state->memsize + len < 16) // fill in tmp buffer + { + XXH_memcpy(state->memory + state->memsize, input, len); + state->memsize += len; + return XXH_OK; + } + + if (state->memsize) // some data left from previous update + { + XXH_memcpy(state->memory + state->memsize, input, 16-state->memsize); + { + const U32* p32 = (const U32*)state->memory; + state->v1 += XXH_readLE32(p32, endian) * PRIME32_2; state->v1 = XXH_rotl32(state->v1, 13); state->v1 *= PRIME32_1; p32++; + state->v2 += XXH_readLE32(p32, endian) * PRIME32_2; state->v2 = XXH_rotl32(state->v2, 13); state->v2 *= PRIME32_1; p32++; + state->v3 += XXH_readLE32(p32, endian) * PRIME32_2; state->v3 = XXH_rotl32(state->v3, 13); state->v3 *= PRIME32_1; p32++; + state->v4 += XXH_readLE32(p32, endian) * PRIME32_2; state->v4 = XXH_rotl32(state->v4, 13); state->v4 *= PRIME32_1; p32++; + } + p += 16-state->memsize; + state->memsize = 0; + } + + if (p <= bEnd-16) + { + const BYTE* const limit = bEnd - 16; + U32 v1 = state->v1; + U32 v2 = state->v2; + U32 v3 = state->v3; + U32 v4 = state->v4; + + do + { + v1 += XXH_readLE32((const U32*)p, endian) * PRIME32_2; v1 = XXH_rotl32(v1, 13); v1 *= PRIME32_1; p+=4; + v2 += XXH_readLE32((const U32*)p, endian) * PRIME32_2; v2 = XXH_rotl32(v2, 13); v2 *= PRIME32_1; p+=4; + v3 += XXH_readLE32((const U32*)p, endian) * PRIME32_2; v3 = XXH_rotl32(v3, 13); v3 *= PRIME32_1; p+=4; + v4 += XXH_readLE32((const U32*)p, endian) * PRIME32_2; v4 = XXH_rotl32(v4, 13); v4 *= PRIME32_1; p+=4; + } while (p<=limit); + + state->v1 = v1; + state->v2 = v2; + state->v3 = v3; + state->v4 = v4; + } + + if (p < bEnd) + { + XXH_memcpy(state->memory, p, bEnd-p); + state->memsize = (int)(bEnd-p); + } + + return XXH_OK; +} + +XXH_errorcode XXH32_update (void* state_in, const void* input, int len) +{ + XXH_endianess endian_detected = (XXH_endianess)XXH_CPU_LITTLE_ENDIAN; + + if ((endian_detected==XXH_littleEndian) || XXH_FORCE_NATIVE_FORMAT) + return XXH32_update_endian(state_in, input, len, XXH_littleEndian); + else + return XXH32_update_endian(state_in, input, len, XXH_bigEndian); +} + + + +FORCE_INLINE U32 XXH32_intermediateDigest_endian (void* state_in, XXH_endianess endian) +{ + struct XXH_state32_t * state = (struct XXH_state32_t *) state_in; + const BYTE * p = (const BYTE*)state->memory; + BYTE* bEnd = (BYTE*)state->memory + state->memsize; + U32 h32; + + if (state->total_len >= 16) + { + h32 = XXH_rotl32(state->v1, 1) + XXH_rotl32(state->v2, 7) + XXH_rotl32(state->v3, 12) + XXH_rotl32(state->v4, 18); + } + else + { + h32 = state->seed + PRIME32_5; + } + + h32 += (U32) state->total_len; + + while (p<=bEnd-4) + { + h32 += XXH_readLE32((const U32*)p, endian) * PRIME32_3; + h32 = XXH_rotl32(h32, 17) * PRIME32_4; + p+=4; + } + + while (p> 15; + h32 *= PRIME32_2; + h32 ^= h32 >> 13; + h32 *= PRIME32_3; + h32 ^= h32 >> 16; + + return h32; +} + + +U32 XXH32_intermediateDigest (void* state_in) +{ + XXH_endianess endian_detected = (XXH_endianess)XXH_CPU_LITTLE_ENDIAN; + + if ((endian_detected==XXH_littleEndian) || XXH_FORCE_NATIVE_FORMAT) + return XXH32_intermediateDigest_endian(state_in, XXH_littleEndian); + else + return XXH32_intermediateDigest_endian(state_in, XXH_bigEndian); +} + + +U32 XXH32_digest (void* state_in) +{ + U32 h32 = XXH32_intermediateDigest(state_in); + + XXH_free(state_in); + + return h32; +} diff --git a/src/xxhash.h b/src/xxhash.h new file mode 100644 index 0000000..8491099 --- /dev/null +++ b/src/xxhash.h @@ -0,0 +1,164 @@ +/* + xxHash - Fast Hash algorithm + Header File + Copyright (C) 2012-2014, Yann Collet. + BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php) + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions are + met: + + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above + copyright notice, this list of conditions and the following disclaimer + in the documentation and/or other materials provided with the + distribution. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + + You can contact the author at : + - xxHash source repository : http://code.google.com/p/xxhash/ +*/ + +/* Notice extracted from xxHash homepage : + +xxHash is an extremely fast Hash algorithm, running at RAM speed limits. +It also successfully passes all tests from the SMHasher suite. + +Comparison (single thread, Windows Seven 32 bits, using SMHasher on a Core 2 Duo @3GHz) + +Name Speed Q.Score Author +xxHash 5.4 GB/s 10 +CrapWow 3.2 GB/s 2 Andrew +MumurHash 3a 2.7 GB/s 10 Austin Appleby +SpookyHash 2.0 GB/s 10 Bob Jenkins +SBox 1.4 GB/s 9 Bret Mulvey +Lookup3 1.2 GB/s 9 Bob Jenkins +SuperFastHash 1.2 GB/s 1 Paul Hsieh +CityHash64 1.05 GB/s 10 Pike & Alakuijala +FNV 0.55 GB/s 5 Fowler, Noll, Vo +CRC32 0.43 GB/s 9 +MD5-32 0.33 GB/s 10 Ronald L. Rivest +SHA1-32 0.28 GB/s 10 + +Q.Score is a measure of quality of the hash function. +It depends on successfully passing SMHasher test set. +10 is a perfect score. +*/ + +#pragma once + +#if defined (__cplusplus) +extern "C" { +#endif + + +//**************************** +// Type +//**************************** +typedef enum { XXH_OK=0, XXH_ERROR } XXH_errorcode; + + + +//**************************** +// Simple Hash Functions +//**************************** + +unsigned int XXH32 (const void* input, int len, unsigned int seed); + +/* +XXH32() : + Calculate the 32-bits hash of sequence of length "len" stored at memory address "input". + The memory between input & input+len must be valid (allocated and read-accessible). + "seed" can be used to alter the result predictably. + This function successfully passes all SMHasher tests. + Speed on Core 2 Duo @ 3 GHz (single thread, SMHasher benchmark) : 5.4 GB/s + Note that "len" is type "int", which means it is limited to 2^31-1. + If your data is larger, use the advanced functions below. +*/ + + + +//**************************** +// Advanced Hash Functions +//**************************** + +void* XXH32_init (unsigned int seed); +XXH_errorcode XXH32_update (void* state, const void* input, int len); +unsigned int XXH32_digest (void* state); + +/* +These functions calculate the xxhash of an input provided in several small packets, +as opposed to an input provided as a single block. + +It must be started with : +void* XXH32_init() +The function returns a pointer which holds the state of calculation. + +This pointer must be provided as "void* state" parameter for XXH32_update(). +XXH32_update() can be called as many times as necessary. +The user must provide a valid (allocated) input. +The function returns an error code, with 0 meaning OK, and any other value meaning there is an error. +Note that "len" is type "int", which means it is limited to 2^31-1. +If your data is larger, it is recommended to chunk your data into blocks +of size for example 2^30 (1GB) to avoid any "int" overflow issue. + +Finally, you can end the calculation anytime, by using XXH32_digest(). +This function returns the final 32-bits hash. +You must provide the same "void* state" parameter created by XXH32_init(). +Memory will be freed by XXH32_digest(). +*/ + + +int XXH32_sizeofState(void); +XXH_errorcode XXH32_resetState(void* state, unsigned int seed); + +#define XXH32_SIZEOFSTATE 48 +typedef struct { long long ll[(XXH32_SIZEOFSTATE+(sizeof(long long)-1))/sizeof(long long)]; } XXH32_stateSpace_t; +/* +These functions allow user application to make its own allocation for state. + +XXH32_sizeofState() is used to know how much space must be allocated for the xxHash 32-bits state. +Note that the state must be aligned to access 'long long' fields. Memory must be allocated and referenced by a pointer. +This pointer must then be provided as 'state' into XXH32_resetState(), which initializes the state. + +For static allocation purposes (such as allocation on stack, or freestanding systems without malloc()), +use the structure XXH32_stateSpace_t, which will ensure that memory space is large enough and correctly aligned to access 'long long' fields. +*/ + + +unsigned int XXH32_intermediateDigest (void* state); +/* +This function does the same as XXH32_digest(), generating a 32-bit hash, +but preserve memory context. +This way, it becomes possible to generate intermediate hashes, and then continue feeding data with XXH32_update(). +To free memory context, use XXH32_digest(), or free(). +*/ + + + +//**************************** +// Deprecated function names +//**************************** +// The following translations are provided to ease code transition +// You are encouraged to no longer this function names +#define XXH32_feed XXH32_update +#define XXH32_result XXH32_digest +#define XXH32_getIntermediateResult XXH32_intermediateDigest + + + +#if defined (__cplusplus) +} +#endif diff --git a/tests/test.py b/tests/test.py index 9469a03..59eefdd 100644 --- a/tests/test.py +++ b/tests/test.py @@ -1,16 +1,43 @@ +import hashlib import lz4 +import os +import shutil import sys - - import unittest -import os class TestLZ4(unittest.TestCase): def test_random(self): DATA = os.urandom(128 * 1024) # Read 128kb self.assertEqual(DATA, lz4.loads(lz4.dumps(DATA))) - + + def test_file(self): + fileName = 'src/lz4.c' + os.mkdir('testHold') + testNames = [] + origDigest = hashlib.md5() + + with open('src/lz4.c', 'rb') as lz4Orig: + origDigest.update(lz4Orig.read()) + + for num in range(1, 6): + testNames.append('testHold/test.%d.lz4' % num) + + lz4.compressFileAdv(fileName, 9, output=testNames[0]) + lz4.compressFileAdv(fileName, 9, output=testNames[1], blockSizeID=4) + lz4.compressFileAdv(fileName, 9, output=testNames[2], blockSizeID=7) + lz4.compressFileAdv(fileName, 9, output=testNames[3], blockCheck=1) + lz4.compressFileAdv(fileName, 9, output=testNames[4], streamCheck=0) + + for test in testNames: + lz4.decompressFileDefault(test) + testDigest = hashlib.md5() + with open(test.replace('.lz4', ''), 'rb') as testFile: + testDigest.update(testFile.read()) + self.assertEqual(origDigest.hexdigest(), testDigest.hexdigest()) + del testDigest + shutil.rmtree('testHold') + if __name__ == '__main__': unittest.main()