Skip to content

Commit 78c6458

Browse files
committed
Add xz compression test
1 parent 9888c11 commit 78c6458

File tree

8 files changed

+131
-35
lines changed

8 files changed

+131
-35
lines changed

integration-tests/.pytest.ini

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,7 @@ addopts =
99
env =
1010
D:CLP_BUILD_DIR=../build
1111
D:CLP_CORE_BINS_DIR=../build/core
12+
D:CLP_DEPS_CORE_DIR=../build/deps/core
1213
D:CLP_PACKAGE_DIR=../build/clp-package
1314
log_cli = True
1415
log_cli_date_format = %Y-%m-%d %H:%M:%S,%f

integration-tests/tests/fixtures/integration_test_config.py

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,7 @@
66

77
from tests.utils.config import (
88
CoreConfig,
9+
DepsConfig,
910
IntegrationTestConfig,
1011
PackageConfig,
1112
)
@@ -18,12 +19,16 @@ def integration_test_config() -> IntegrationTestConfig:
1819
core_config = CoreConfig(
1920
clp_core_bins_dir=Path(get_env_var("CLP_CORE_BINS_DIR")).expanduser().resolve()
2021
)
22+
deps_config = DepsConfig(
23+
clp_deps_core_dir=Path(get_env_var("CLP_DEPS_CORE_DIR")).expanduser().resolve()
24+
)
2125
package_config = PackageConfig(
2226
clp_package_dir=Path(get_env_var("CLP_PACKAGE_DIR")).expanduser().resolve()
2327
)
2428
test_root_dir = Path(get_env_var("CLP_BUILD_DIR")).expanduser().resolve() / "integration-tests"
2529
return IntegrationTestConfig(
2630
core_config=core_config,
31+
deps_config=deps_config,
2732
package_config=package_config,
2833
test_root_dir=test_root_dir,
2934
)

integration-tests/tests/fixtures/integration_test_logs.py

Lines changed: 18 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -69,7 +69,7 @@ def _download_and_extract_dataset(
6969
curl_bin,
7070
"--fail",
7171
"--location",
72-
"--output", str(integration_test_logs.tarball_path),
72+
"--output", str(integration_test_logs.tar_gz_path),
7373
"--show-error",
7474
tarball_url,
7575
]
@@ -78,18 +78,33 @@ def _download_and_extract_dataset(
7878

7979
unlink(integration_test_logs.extraction_dir)
8080
shutil.unpack_archive(
81-
integration_test_logs.tarball_path, integration_test_logs.extraction_dir
81+
integration_test_logs.tar_gz_path, integration_test_logs.extraction_dir
8282
)
8383
except Exception as e:
8484
err_msg = f"Failed to download and extract dataset `{name}`."
8585
raise RuntimeError(err_msg) from e
8686

87+
extraction_path = str(integration_test_logs.extraction_dir)
88+
8789
# Allow the extracted content to be deletable or overwritable
8890
chmod_bin = shutil.which("chmod")
8991
if chmod_bin is None:
9092
err_msg = "chmod executable not found"
9193
raise RuntimeError(err_msg)
92-
subprocess.run([chmod_bin, "-R", "gu+w", integration_test_logs.extraction_dir], check=True)
94+
subprocess.run([chmod_bin, "-R", "gu+w", extraction_path], check=True)
95+
96+
# Create tar of the extracted content for different compression formats
97+
tar_bin = shutil.which("tar")
98+
if tar_bin is None:
99+
err_msg = "tar executable not found"
100+
raise RuntimeError(err_msg)
101+
subprocess.run([tar_bin, "--create", f"--file={integration_test_logs.base_tar_path}", f"--directory={integration_test_logs.extraction_dir}", extraction_path], check=True)
102+
103+
# Create LibLZMA xz tar
104+
xz_bin = str(integration_test_config.deps_config.xz_binary_path)
105+
xz_cmds = [xz_bin, "--keep", "--compress", "--stdout", extraction_path]
106+
with open(integration_test_logs.tar_xz_path, "wb") as fout:
107+
subprocess.run(xz_cmds, check=True, stdout=fout, stdin=subprocess.DEVNULL)
93108

94109
logger.info("Downloaded and extracted uncompressed logs for dataset `%s`.", name)
95110
request.config.cache.set(name, True)

integration-tests/tests/test_identity_transformation.py

Lines changed: 41 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,8 @@
33
compression and decompression.
44
"""
55

6+
from pathlib import Path
7+
68
import pytest
79

810
from tests.utils.asserting_utils import run_and_assert
@@ -49,39 +51,63 @@ def test_clp_identity_transform(
4951
:param test_logs_fixture:
5052
"""
5153
integration_test_logs: IntegrationTestLogs = request.getfixturevalue(test_logs_fixture)
54+
logs_source_dir: Path = integration_test_logs.extraction_dir
55+
56+
#test_paths = CompressionTestConfig(
57+
# test_name=f"clp-{integration_test_logs.name}",
58+
# compression_input=integration_test_logs.extraction_dir,
59+
# integration_test_config=integration_test_config,
60+
#)
61+
#_run_clp_identity_transform(logs_source_dir, test_paths, integration_test_config)
62+
63+
#test_paths = CompressionTestConfig(
64+
# test_name=f"clp-{integration_test_logs.name}-tar-gz",
65+
# compression_input=integration_test_logs.tar_gz_path,
66+
# integration_test_config=integration_test_config,
67+
#)
68+
#_run_clp_identity_transform(logs_source_dir, test_paths, integration_test_config)
69+
5270
test_paths = CompressionTestConfig(
53-
test_name=f"clp-{integration_test_logs.name}",
54-
logs_source_dir=integration_test_logs.extraction_dir,
71+
test_name=f"clp-{integration_test_logs.name}-tar-xz",
72+
compression_input=integration_test_logs.tar_xz_path,
5573
integration_test_config=integration_test_config,
5674
)
75+
_run_clp_identity_transform(logs_source_dir, test_paths, integration_test_config)
76+
77+
78+
def _run_clp_identity_transform(
79+
logs_source_dir: Path,
80+
test_paths: CompressionTestConfig,
81+
integration_test_config: IntegrationTestConfig,
82+
) -> None:
5783
test_paths.clear_test_outputs()
5884

5985
bin_path = str(integration_test_config.core_config.clp_binary_path)
60-
src_path = str(test_paths.logs_source_dir)
86+
input_path = str(test_paths.compression_input)
6187
compression_path = str(test_paths.compression_dir)
6288
decompression_path = str(test_paths.decompression_dir)
89+
path_prefix_to_remove = input_path if test_paths.compression_input.is_dir() else str(test_paths.compression_input.parent)
90+
6391
# fmt: off
6492
compression_cmd = [
6593
bin_path,
6694
"c",
6795
"--progress",
68-
"--remove-path-prefix", src_path,
96+
"--remove-path-prefix", path_prefix_to_remove,
6997
compression_path,
70-
src_path,
98+
input_path,
7199
]
72100
# fmt: on
73101
run_and_assert(compression_cmd)
74102

75103
decompression_cmd = [bin_path, "x", compression_path, decompression_path]
76104
run_and_assert(decompression_cmd)
77105

78-
input_path = test_paths.logs_source_dir
79-
output_path = test_paths.decompression_dir
106+
decompressed_logs_path = test_paths.decompression_dir
80107
assert is_dir_tree_content_equal(
81-
input_path,
82-
output_path,
83-
), f"Mismatch between clp input {input_path} and output {output_path}."
84-
108+
logs_source_dir,
109+
decompressed_logs_path,
110+
), f"Mismatch between source {logs_source_dir} and `clp` final output {decompressed_logs_path}."
85111
test_paths.clear_test_outputs()
86112

87113

@@ -105,7 +131,7 @@ def test_clp_s_identity_transform(
105131

106132
test_paths = CompressionTestConfig(
107133
test_name=f"clp-s-{test_logs_name}",
108-
logs_source_dir=integration_test_logs.extraction_dir,
134+
compression_input=integration_test_logs.extraction_dir,
109135
integration_test_config=integration_test_config,
110136
)
111137
_clp_s_compress_and_decompress(integration_test_config, test_paths)
@@ -118,13 +144,13 @@ def test_clp_s_identity_transform(
118144
# See also: https://docs.yscope.com/clp/main/user-guide/core-clp-s.html#current-limitations
119145
consolidated_json_test_paths = CompressionTestConfig(
120146
test_name=f"clp-s-{test_logs_name}-consolidated-json",
121-
logs_source_dir=test_paths.decompression_dir,
147+
compression_input=test_paths.decompression_dir,
122148
integration_test_config=integration_test_config,
123149
)
124150
_clp_s_compress_and_decompress(integration_test_config, consolidated_json_test_paths)
125151

126152
_consolidated_json_file_name = "original"
127-
input_path = consolidated_json_test_paths.logs_source_dir / _consolidated_json_file_name
153+
input_path = consolidated_json_test_paths.compression_input / _consolidated_json_file_name
128154
output_path = consolidated_json_test_paths.decompression_dir / _consolidated_json_file_name
129155
assert is_json_file_structurally_equal(input_path, output_path), (
130156
f"Mismatch between clp-s input {input_path} and output {output_path}."
@@ -139,7 +165,7 @@ def _clp_s_compress_and_decompress(
139165
) -> None:
140166
test_paths.clear_test_outputs()
141167
bin_path = str(integration_test_config.core_config.clp_s_binary_path)
142-
src_path = str(test_paths.logs_source_dir)
168+
src_path = str(test_paths.compression_input)
143169
compression_path = str(test_paths.compression_dir)
144170
decompression_path = str(test_paths.decompression_dir)
145171
run_and_assert([bin_path, "c", compression_path, src_path])

integration-tests/tests/utils/config.py

Lines changed: 60 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,7 @@
77

88
from tests.utils.utils import (
99
unlink,
10-
validate_dir_exists,
10+
validate_dir_exists_and_is_absolute,
1111
)
1212

1313

@@ -24,7 +24,7 @@ def __post_init__(self) -> None:
2424
executables.
2525
"""
2626
clp_core_bins_dir = self.clp_core_bins_dir
27-
validate_dir_exists(clp_core_bins_dir)
27+
validate_dir_exists_and_is_absolute(clp_core_bins_dir)
2828

2929
# Check for required CLP core binaries
3030
required_binaries = ["clg", "clo", "clp", "clp-s", "indexer", "reducer-server"]
@@ -47,6 +47,34 @@ def clp_s_binary_path(self) -> Path:
4747
return self.clp_core_bins_dir / "clp-s"
4848

4949

50+
@dataclass(frozen=True)
51+
class DepsConfig:
52+
"""The configuration for dependencies used by clp."""
53+
54+
#:
55+
clp_deps_core_dir: Path
56+
57+
def __post_init__(self) -> None:
58+
"""Validates that the core dependency directory exists."""
59+
validate_dir_exists_and_is_absolute(self.clp_deps_core_dir)
60+
61+
@property
62+
def lz4_binary_path(self) -> Path:
63+
""":return: The absolute path to the lz4 compression tool."""
64+
return self.clp_deps_core_dir / "lz4-install" / "bin" / "lz4"
65+
66+
@property
67+
def zstd_binary_path(self) -> Path:
68+
""":return: The absolute path to the zstd compression tool."""
69+
return self.clp_deps_core_dir / "zstd-install" / "bin" / "zstd"
70+
71+
@property
72+
def xz_binary_path(self) -> Path:
73+
""":return: The absolute path to the LibLZMA xz compression tool."""
74+
return self.clp_deps_core_dir / "LibLZMA-static-install" / "bin" / "xz"
75+
76+
77+
5078
@dataclass(frozen=True)
5179
class PackageConfig:
5280
"""The configuration for the clp package subject to testing."""
@@ -57,7 +85,7 @@ class PackageConfig:
5785
def __post_init__(self) -> None:
5886
"""Validates that the CLP package directory exists and contains all required directories."""
5987
clp_package_dir = self.clp_package_dir
60-
validate_dir_exists(clp_package_dir)
88+
validate_dir_exists_and_is_absolute(clp_package_dir)
6189

6290
# Check for required package script directories
6391
required_dirs = ["bin", "etc", "lib", "sbin"]
@@ -77,6 +105,8 @@ class IntegrationTestConfig:
77105
#:
78106
core_config: CoreConfig
79107
#:
108+
deps_config: DepsConfig
109+
#:
80110
package_config: PackageConfig
81111
#: Root directory for integration tests output.
82112
test_root_dir: Path
@@ -105,8 +135,6 @@ class IntegrationTestLogs:
105135
tarball_url: str
106136
integration_test_config: InitVar[IntegrationTestConfig]
107137
#:
108-
tarball_path: Path = field(init=False, repr=True)
109-
#:
110138
extraction_dir: Path = field(init=False, repr=True)
111139

112140
def __post_init__(self, integration_test_config: IntegrationTestConfig) -> None:
@@ -116,25 +144,45 @@ def __post_init__(self, integration_test_config: IntegrationTestConfig) -> None:
116144
err_msg = "`name` cannot be empty."
117145
raise ValueError(err_msg)
118146
logs_download_dir = integration_test_config.logs_download_dir
119-
validate_dir_exists(logs_download_dir)
147+
validate_dir_exists_and_is_absolute(logs_download_dir)
120148

121149
object.__setattr__(self, "name", name)
122-
object.__setattr__(self, "tarball_path", logs_download_dir / f"{name}.tar.gz")
123150
object.__setattr__(self, "extraction_dir", logs_download_dir / name)
124151

152+
@property
153+
def base_tar_path(self) -> None:
154+
return self.extraction_dir.with_suffix(".tar")
155+
156+
@property
157+
def tar_gz_path(self) -> None:
158+
return self.extraction_dir.with_suffix(".tar.gz")
159+
160+
@property
161+
def tar_lz4_path(self) -> None:
162+
return self.extraction_dir.with_suffix(".tar.lz4")
163+
164+
@property
165+
def tar_xz_path(self) -> None:
166+
return self.extraction_dir.with_suffix(".tar.xz")
167+
168+
@property
169+
def tar_zstd_path(self) -> None:
170+
return self.extraction_dir.with_suffix(".tar.zstd")
171+
172+
125173

126174
@dataclass(frozen=True)
127175
class CompressionTestConfig:
128176
"""Compression test configuration providing per-test metadata for artifacts and directories."""
129177

130178
#:
131179
test_name: str
132-
#: Directory containing the original (uncompressed) log files used by this test.
133-
logs_source_dir: Path
180+
#: Path to the CLP compressionm input archive or directory.
181+
compression_input: Path
134182
integration_test_config: InitVar[IntegrationTestConfig]
135-
#: Path to store compressed archives generated by the test.
183+
#: Directory to store generated compressed CLP archives.
136184
compression_dir: Path = field(init=False, repr=True)
137-
#: Path to store decompressed logs generated by the test.
185+
#: Directory to store logs decompressed from CLP archives.
138186
decompression_dir: Path = field(init=False, repr=True)
139187

140188
def __post_init__(self, integration_test_config: IntegrationTestConfig) -> None:
@@ -144,7 +192,7 @@ def __post_init__(self, integration_test_config: IntegrationTestConfig) -> None:
144192
err_msg = "`test_name` cannot be empty."
145193
raise ValueError(err_msg)
146194
test_root_dir = integration_test_config.test_root_dir
147-
validate_dir_exists(test_root_dir)
195+
validate_dir_exists_and_is_absolute(test_root_dir)
148196

149197
object.__setattr__(self, "test_name", test_name)
150198
object.__setattr__(self, "compression_dir", test_root_dir / f"{test_name}-archives")

integration-tests/tests/utils/utils.py

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -75,14 +75,17 @@ def unlink(rm_path: Path, force: bool = True) -> None:
7575
raise OSError(err_msg) from e
7676

7777

78-
def validate_dir_exists(dir_path: Path) -> None:
78+
def validate_dir_exists_and_is_absolute(dir_path: Path) -> None:
7979
"""
8080
:param dir_path:
81-
:raise: ValueError if the path does not exist or is not a directory.
81+
:raise: ValueError if the path does not exist, is not a directory, or is not absolute.
8282
"""
8383
if not dir_path.is_dir():
8484
err_msg = f"Path does not exist or is not a directory: {dir_path}"
8585
raise ValueError(err_msg)
86+
if not dir_path.is_absolute():
87+
err_msg = f"Path is not absolute: {dir_path}"
88+
raise ValueError(err_msg)
8689

8790

8891
def _sort_json_keys_and_rows(json_fp: Path) -> IO[str]:

taskfiles/deps/main.yaml

Lines changed: 0 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -265,7 +265,6 @@ tasks:
265265
- "-DXZ_TOOL_LZMAINFO=OFF"
266266
- "-DXZ_TOOL_SCRIPTS=OFF"
267267
- "-DXZ_TOOL_SYMLINKS_LZMA=OFF"
268-
- "-DXZ_TOOL_XZ=OFF"
269268
- "-DXZ_TOOL_XZDEC=OFF"
270269
TARBALL_SHA256: "507825b599356c10dca1cd720c9d0d0c9d5400b9de300af00e4d1ea150795543"
271270
TARBALL_URL: "https://github.com/tukaani-project/xz/releases/download/v5.8.1/xz-5.8.1.tar.gz"
@@ -331,7 +330,6 @@ tasks:
331330
- "-DBUILD_STATIC_LIBS=ON"
332331
- "-DCMAKE_BUILD_TYPE=Release"
333332
- "-DCMAKE_INSTALL_MESSAGE=LAZY"
334-
- "-DLZ4_BUILD_CLI=OFF"
335333
CMAKE_SOURCE_DIR: "build/cmake"
336334
LIB_NAME: "lz4"
337335
TARBALL_SHA256: "537512904744b35e232912055ccf8ec66d768639ff3abe5788d90d792ec5f48b"
@@ -555,7 +553,6 @@ tasks:
555553
- "-DCMAKE_BUILD_TYPE=Release"
556554
- "-DCMAKE_INSTALL_MESSAGE=LAZY"
557555
- "-DZSTD_BUILD_CONTRIB=OFF"
558-
- "-DZSTD_BUILD_PROGRAMS=OFF"
559556
- "-DZSTD_BUILD_SHARED=ON"
560557
- "-DZSTD_BUILD_STATIC=ON"
561558
- "-DZSTD_BUILD_TESTS=OFF"

taskfiles/tests/integration.yaml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -23,5 +23,6 @@ tasks:
2323
env:
2424
CLP_BUILD_DIR: "{{.G_BUILD_DIR}}"
2525
CLP_CORE_BINS_DIR: "{{.G_CORE_COMPONENT_BUILD_DIR}}"
26+
CLP_DEPS_CORE_DIR: "{{.G_DEPS_CORE_DIR}}"
2627
CLP_PACKAGE_DIR: "{{.G_PACKAGE_BUILD_DIR}}"
2728
cmd: "uv run python -m pytest -m core"

0 commit comments

Comments
 (0)