Skip to content
Open
Show file tree
Hide file tree
Changes from 15 commits
Commits
Show all changes
21 commits
Select commit Hold shift + click to select a range
1cd0951
Use numcodecs as an optional backend for LZMA and ZSTD
Rachit931 Feb 8, 2026
dff2eae
Use numcodecs as an optional backend for LZMA and ZSTD
Rachit931 Feb 8, 2026
8f1c024
Use numcodecs as an optional backend for LZMA and ZSTD
Rachit931 Feb 8, 2026
f8fb5ee
Use numcodecs as an optional backend for LZMA and ZSTD
Rachit931 Feb 8, 2026
3b604a1
Use numcodecs as an optional backend for LZMA and ZSTD
Rachit931 Feb 8, 2026
c91040d
Use numcodecs as an optional backend for LZMA and ZSTD
Rachit931 Feb 8, 2026
db15239
Use numcodecs as an optional backend for LZMA and ZSTD
Rachit931 Feb 8, 2026
201ebef
Use numcodecs as an optional backend for LZMA and ZSTD
Rachit931 Feb 9, 2026
c351a1c
style: pre-commit fixes
pre-commit-ci[bot] Feb 9, 2026
9d4ee75
Use numcodecs as an optional backend for LZMA and ZSTD
Rachit931 Feb 9, 2026
b047e42
Use numcodecs as an optional backend for LZMA and ZSTD
Rachit931 Feb 9, 2026
6ba8c53
Use numcodecs as an optional backend for LZMA and ZSTD
Rachit931 Feb 9, 2026
cea53de
Use numcodecs as an optional backend for LZMA and ZSTD
Rachit931 Feb 9, 2026
0cdf367
Use numcodecs as an optional backend for LZMA and ZSTD
Rachit931 Feb 9, 2026
ecc9be5
Use numcodecs as an optional backend for LZMA and ZSTD
Rachit931 Feb 9, 2026
82be5d8
Adding numcodecs to test dependencies so CI covers numcodecs backend,…
Rachit931 Feb 10, 2026
958a3de
style: pre-commit fixes
pre-commit-ci[bot] Feb 10, 2026
296671e
Simplify ZSTD handling and remove byte checks
Rachit931 Feb 10, 2026
efbb484
style: pre-commit fixes
pre-commit-ci[bot] Feb 10, 2026
b5ba5f0
Adding numcodecs as a backend to ZLIB as well
Rachit931 Feb 14, 2026
0c89e60
Adding numcodecs as a backend to ZLIB as well
Rachit931 Feb 14, 2026
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
119 changes: 112 additions & 7 deletions src/uproot/compression.py
Original file line number Diff line number Diff line change
Expand Up @@ -189,18 +189,47 @@ class _DecompressLZMA:
_method = b"\x00"

def decompress(self, data: bytes, uncompressed_bytes=None) -> bytes:
# Try numcodecs
try:
numcodecs = uproot.extras.numcodecs()

codec = numcodecs.LZMA()
decoded = codec.decode(data)

decoded = b"".join(decoded) if isinstance(decoded, list) else bytes(decoded)

# numcodecs does not gaurentee outpute size ( must validate )
if uncompressed_bytes is not None and len(decoded) != uncompressed_bytes:
raise ValueError("numcodecs LZMA produced incorrect output size")

return decoded

except ModuleNotFoundError:
# Failure due to numcodecs not being installed = fall back to cramjam/stdlib
pass

except ValueError:
# Failure due to output-size validation failed = fall back to cramjam/stdlib
pass

# Fallback : Try cramjam(preferred) or stdlib
cramjam = uproot.extras.cramjam()
lzma = getattr(cramjam, "xz", None) or getattr(
getattr(cramjam, "experimental", None), "lzma", None
)

# Last fallback : lzma through stdlib
if lzma is None:
import lzma

return lzma.decompress(data)

# Known output size path is required
if uncompressed_bytes is None:
raise ValueError(
"lzma decompression requires the number of uncompressed bytes"
)

return lzma.decompress(data, output_len=uncompressed_bytes)


Expand Down Expand Up @@ -236,13 +265,32 @@ def level(self, value):
self._level = int(value)

def compress(self, data: bytes) -> bytes:
# Try numcodecs
try:
numcodecs = uproot.extras.numcodecs()

codec = numcodecs.LZMA()
out = codec.encode(data)
out = b"".join(out) if isinstance(out, list) else bytes(out)
return out

except ModuleNotFoundError:
# Failure due to numcodecs not installed = fall back to cramjam/stdlib
pass

# Fallbac : Try cramjam
cramjam = uproot.extras.cramjam()
lzma = getattr(cramjam, "xz", None) or getattr(
getattr(cramjam, "experimental", None), "lzma", None
)
if lzma is None:
import lzma
return lzma.compress(data, preset=self._level)
if lzma is not None:
out = lzma.compress(data, preset=self._level)
return bytes(memoryview(out))
# Fallback : stdlib lzma
import lzma as _stdlib_lzma

out = _stdlib_lzma.compress(data, preset=self._level)
return bytes(memoryview(out))


class _DecompressLZ4:
Expand Down Expand Up @@ -301,11 +349,41 @@ class _DecompressZSTD:
_method = b"\x01"

def decompress(self, data: bytes, uncompressed_bytes=None) -> bytes:
zstd = uproot.extras.cramjam().zstd
# ROOT requires exact output size
if uncompressed_bytes is None:
raise ValueError(
"zstd block decompression requires the number of uncompressed bytes"
)

# Try numcodecs
try:
numcodecs = uproot.extras.numcodecs()

codec = numcodecs.Zstd()
decoded = codec.decode(data)

decoded = b"".join(decoded) if isinstance(decoded, list) else bytes(decoded)

# numcodecs does NOT guarantee outpute size (must validate)
if len(decoded) != uncompressed_bytes:
raise ValueError("numcodecs ZSSTD produced incorrect output size")

return decoded

except ModuleNotFoundError:
# Failure due to numcodecs not being installed = fall back
pass
except ValueError:
# Failusre due to size mismatch = fall back to strict backend
pass

# Fallback : cramjam
cramjam = uproot.extras.cramjam()
zstd = getattr(cramjam, "zstd", None)

if zstd is None:
raise RuntimeError("ZSTD decompression requires cramjam or numcodecs")

return zstd.decompress(data, output_len=uncompressed_bytes)


Expand Down Expand Up @@ -342,8 +420,28 @@ def level(self, value):
self._level = int(value)

def compress(self, data: bytes) -> bytes:
zstd = uproot.extras.cramjam().zstd
return zstd.compress(data, level=self._level)
# Try numcodecs :
try:
numcodecs = uproot.extras.numcodecs()

codec = numcodecs.Zstd(level=self._level)
out = codec.encode(data)
out = b"".join(out) if isinstance(out, list) else bytes(out)
return out

except ModuleNotFoundError:
# Failure due to numcodecs not installed = Fall back
pass

# Fallback : cramjam
cramjam = uproot.extras.cramjam()
zstd = getattr(cramjam, "zstd", None)

if zstd is None:
raise RuntimeError("ZSTD compression requires ramjam or numcodecs")

out = zstd.compress(data, level=self._level)
return bytes(memoryview(out))


algorithm_codes = {
Expand Down Expand Up @@ -539,8 +637,14 @@ def compress(data: bytes, compression: Compression) -> bytes:
output would be larger than the input, the input is returned instead, in whatever
format (bytes, memoryview, or NumPy array) it was provided.
"""

def _normalize_bytes(x):
if isinstance(x, list):
return b"".join(x)
return x

if compression is None or compression.level == 0:
return data
return _normalize_bytes(data)

out = []
next = data
Expand Down Expand Up @@ -573,6 +677,7 @@ def compress(data: bytes, compression: Compression) -> bytes:
out.append(compressed)

out = b"".join(out)
data = _normalize_bytes(data)

if len(out) < len(data):
return out
Expand Down
18 changes: 18 additions & 0 deletions src/uproot/extras.py
Original file line number Diff line number Diff line change
Expand Up @@ -189,6 +189,24 @@ def cramjam():
return cramjam


def numcodecs():
"""
Imports and returns ``numcodecs``.
"""
try:
import numcodecs
except ModuleNotFoundError as err:
raise ModuleNotFoundError("""install the `numcodecs` package with:

pip install numcodecs

or

conda install numcodecs""") from err
else:
return numcodecs


def xxhash():
"""
Imports and returns ``xxhash``.
Expand Down
Loading