-
Notifications
You must be signed in to change notification settings - Fork 16
Support for lz4 compression #163 #168
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: main
Are you sure you want to change the base?
Changes from 9 commits
34efbb2
00de271
7ab5809
bcd85b4
0d1a114
64e55de
8e474a8
bf32deb
9376298
d3992d9
0a3bacd
ea4d533
9bf878d
2ee242d
e5194f4
f59fa84
15d59db
2aea1ca
4c18c24
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -5,29 +5,39 @@ build-backend = "setuptools.build_meta" | |
[project] | ||
name = "xopen" | ||
authors = [ | ||
{name = "Marcel Martin", email = "[email protected]"}, | ||
gnzsnz marked this conversation as resolved.
Show resolved
Hide resolved
|
||
{name = "Ruben Vorderman", email = "[email protected]"} | ||
{ name = "Marcel Martin", email = "[email protected]" }, | ||
{ name = "Ruben Vorderman", email = "[email protected]" }, | ||
] | ||
description = "Open compressed files transparently" | ||
readme = "README.rst" | ||
license = {text = "MIT"} | ||
license = { text = "MIT" } | ||
classifiers = [ | ||
"Development Status :: 5 - Production/Stable", | ||
"License :: OSI Approved :: MIT License", | ||
"Programming Language :: Python :: 3" | ||
"Programming Language :: Python :: 3", | ||
] | ||
requires-python = ">=3.8" | ||
dynamic = ["version"] | ||
dependencies = [ | ||
'isal>=1.6.1; platform.machine == "x86_64" or platform.machine == "AMD64" or platform.machine == "aarch64"', | ||
'zlib-ng>=0.4.1; platform.machine == "x86_64" or platform.machine == "AMD64" or platform.machine == "aarch64"' | ||
'zlib-ng>=0.4.1; platform.machine == "x86_64" or platform.machine == "AMD64" or platform.machine == "aarch64"', | ||
'lz4>=4.3.3', | ||
] | ||
|
||
[project.urls] | ||
homepage = "https://github.com/pycompression/xopen/" | ||
|
||
[project.optional-dependencies] | ||
dev = ["pytest"] | ||
dev = [ | ||
"pytest", | ||
"pytest-timeout", | ||
"tox", | ||
"black", | ||
"flake8", | ||
"mypy", | ||
"twine", | ||
"setuptools_scm[toml]", | ||
] | ||
gnzsnz marked this conversation as resolved.
Show resolved
Hide resolved
|
||
zstd = ["zstandard<1"] | ||
|
||
[tool.setuptools_scm] | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -43,6 +43,7 @@ | |
XOPEN_DEFAULT_BZ2_COMPRESSION = 9 | ||
XOPEN_DEFAULT_XZ_COMPRESSION = 6 | ||
XOPEN_DEFAULT_ZST_COMPRESSION = 3 | ||
XOPEN_DEFAULT_LZ4_COMPRESSION = 0 | ||
|
||
igzip: Optional[ModuleType] | ||
isal_zlib: Optional[ModuleType] | ||
|
@@ -70,6 +71,11 @@ | |
except ImportError: | ||
zstandard = None # type: ignore | ||
|
||
try: | ||
import lz4.frame # type: ignore | ||
except ImportError: | ||
lz4 = None | ||
|
||
try: | ||
import fcntl | ||
|
||
|
@@ -120,6 +126,7 @@ class _ProgramSettings: | |
"zstd": _ProgramSettings(("zstd",), tuple(range(1, 20)), "-T"), | ||
"pigz": _ProgramSettings(("pigz", "--no-name"), tuple(range(0, 10)) + (11,), "-p"), | ||
"gzip": _ProgramSettings(("gzip", "--no-name"), tuple(range(1, 10))), | ||
"lz4": _ProgramSettings(("lz4",), tuple(range(0, 17)), "-T"), | ||
gnzsnz marked this conversation as resolved.
Show resolved
Hide resolved
|
||
} | ||
|
||
|
||
|
@@ -551,6 +558,36 @@ def _open_zst( | |
return io.BufferedWriter(f) # mode "ab" and "wb" | ||
|
||
|
||
def _open_lz4( | ||
filename: FileOrPath, | ||
mode: str, | ||
compresslevel: Optional[int], | ||
threads: Optional[int], | ||
): | ||
assert mode in ("rb", "ab", "wb") | ||
if compresslevel is None: | ||
compresslevel = XOPEN_DEFAULT_LZ4_COMPRESSION | ||
|
||
if threads != 0: | ||
try: | ||
return _PipedCompressionProgram( | ||
filename, | ||
mode, | ||
compresslevel, | ||
threads, | ||
program_settings=_PROGRAM_SETTINGS["lz4"], | ||
) | ||
except OSError: | ||
if lz4 is None: | ||
# No fallback available | ||
raise | ||
|
||
if lz4 is None: | ||
raise ImportError("lz4 module not available") | ||
rhpvorderman marked this conversation as resolved.
Show resolved
Hide resolved
|
||
f = lz4.frame.LZ4FrameFile(filename, mode, compression_level=compresslevel) | ||
return f | ||
|
||
|
||
def _open_gz( | ||
filename: FileOrPath, | ||
mode: str, | ||
|
@@ -683,6 +720,10 @@ def _detect_format_from_content(filename: FileOrPath) -> Optional[str]: | |
elif bs[:4] == b"\x28\xb5\x2f\xfd": | ||
# https://datatracker.ietf.org/doc/html/rfc8478#section-3.1.1 | ||
return "zst" | ||
elif bs[:4] == b"\x04\x22\x4d\x18": | ||
# https://en.wikipedia.org/wiki/LZ4_(compression_algorithm) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I'd rather have a link to a specification or other technical documentation here. There is no guarantee that wikipedia will keep listing the magic bytes. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. fixed on 4c18c24 |
||
return "lz4" | ||
|
||
return None | ||
finally: | ||
if closefd: | ||
|
@@ -694,7 +735,7 @@ def _detect_format_from_extension(filename: Union[str, bytes]) -> Optional[str]: | |
Attempt to detect file format from the filename extension. | ||
Return None if no format could be detected. | ||
""" | ||
for ext in ("bz2", "xz", "gz", "zst"): | ||
for ext in ("bz2", "xz", "gz", "zst", "lz4"): | ||
if isinstance(filename, bytes): | ||
if filename.endswith(b"." + ext.encode()): | ||
return ext | ||
|
@@ -717,7 +758,7 @@ def _file_or_path_to_binary_stream( | |
# object is not binary, this will crash at a later point. | ||
return file_or_path, False # type: ignore | ||
raise TypeError( | ||
f"Unsupported type for {file_or_path}, " f"{file_or_path.__class__.__name__}." | ||
f"Unsupported type for {file_or_path}, {file_or_path.__class__.__name__}." | ||
) | ||
|
||
|
||
|
@@ -797,6 +838,7 @@ def xopen( # noqa: C901 | |
- .bz2 uses bzip2 compression | ||
- .xz uses xz/lzma compression | ||
- .zst uses zstandard compression | ||
- .lz4 uses lz4 compression | ||
- otherwise, no compression is used | ||
|
||
When reading, if a file name extension is available, the format is detected | ||
|
@@ -808,7 +850,7 @@ def xopen( # noqa: C901 | |
compresslevel is the compression level for writing to gzip, xz and zst files. | ||
This parameter is ignored for the other compression formats. | ||
If set to None, a default depending on the format is used: | ||
gzip: 6, xz: 6, zstd: 3. | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Note to self: didn't we change the gzip level to 1? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Yes we did. |
||
gzip: 6, xz: 6, zstd: 3, lz4: 1. | ||
gnzsnz marked this conversation as resolved.
Show resolved
Hide resolved
|
||
|
||
When threads is None (the default), compressed file formats are read or written | ||
using a pipe to a subprocess running an external tool such as, | ||
|
@@ -828,7 +870,7 @@ def xopen( # noqa: C901 | |
|
||
format overrides the autodetection of input and output formats. This can be | ||
useful when compressed output needs to be written to a file without an | ||
extension. Possible values are "gz", "xz", "bz2", "zst". | ||
extension. Possible values are "gz", "xz", "bz2", "zst", "lz4". | ||
""" | ||
if mode in ("r", "w", "a"): | ||
mode += "t" # type: ignore | ||
|
@@ -844,10 +886,10 @@ def xopen( # noqa: C901 | |
elif _file_is_a_socket_or_pipe(filename): | ||
filename = open(filename, binary_mode) # type: ignore | ||
|
||
if format not in (None, "gz", "xz", "bz2", "zst"): | ||
if format not in (None, "gz", "xz", "bz2", "zst", "lz4"): | ||
raise ValueError( | ||
f"Format not supported: {format}. " | ||
f"Choose one of: 'gz', 'xz', 'bz2', 'zst'" | ||
f"Choose one of: 'gz', 'xz', 'bz2', 'zst', 'lz4'." | ||
) | ||
detected_format = format or _detect_format_from_extension(filepath) | ||
if detected_format is None and "r" in mode: | ||
|
@@ -861,6 +903,8 @@ def xopen( # noqa: C901 | |
opened_file = _open_bz2(filename, binary_mode, compresslevel, threads) | ||
elif detected_format == "zst": | ||
opened_file = _open_zst(filename, binary_mode, compresslevel, threads) | ||
elif detected_format == "lz4": | ||
opened_file = _open_lz4(filename, binary_mode, compresslevel, threads) | ||
else: | ||
opened_file, _ = _file_or_path_to_binary_stream(filename, binary_mode) | ||
|
||
|
Uh oh!
There was an error while loading. Please reload this page.