From 78c64587dac3ae788e0d1e9678ad882697a6a107 Mon Sep 17 00:00:00 2001 From: Bingran Hu Date: Wed, 17 Sep 2025 12:45:42 -0400 Subject: [PATCH 1/4] Add xz compression test --- integration-tests/.pytest.ini | 1 + .../tests/fixtures/integration_test_config.py | 5 ++ .../tests/fixtures/integration_test_logs.py | 21 +++++- .../tests/test_identity_transformation.py | 56 +++++++++++---- integration-tests/tests/utils/config.py | 72 +++++++++++++++---- integration-tests/tests/utils/utils.py | 7 +- taskfiles/deps/main.yaml | 3 - taskfiles/tests/integration.yaml | 1 + 8 files changed, 131 insertions(+), 35 deletions(-) diff --git a/integration-tests/.pytest.ini b/integration-tests/.pytest.ini index 6235100f27..eed36c9bf0 100644 --- a/integration-tests/.pytest.ini +++ b/integration-tests/.pytest.ini @@ -9,6 +9,7 @@ addopts = env = D:CLP_BUILD_DIR=../build D:CLP_CORE_BINS_DIR=../build/core + D:CLP_DEPS_CORE_DIR=../build/deps/core D:CLP_PACKAGE_DIR=../build/clp-package log_cli = True log_cli_date_format = %Y-%m-%d %H:%M:%S,%f diff --git a/integration-tests/tests/fixtures/integration_test_config.py b/integration-tests/tests/fixtures/integration_test_config.py index f097021b3d..eedf7e6f75 100644 --- a/integration-tests/tests/fixtures/integration_test_config.py +++ b/integration-tests/tests/fixtures/integration_test_config.py @@ -6,6 +6,7 @@ from tests.utils.config import ( CoreConfig, + DepsConfig, IntegrationTestConfig, PackageConfig, ) @@ -18,12 +19,16 @@ def integration_test_config() -> IntegrationTestConfig: core_config = CoreConfig( clp_core_bins_dir=Path(get_env_var("CLP_CORE_BINS_DIR")).expanduser().resolve() ) + deps_config = DepsConfig( + clp_deps_core_dir=Path(get_env_var("CLP_DEPS_CORE_DIR")).expanduser().resolve() + ) package_config = PackageConfig( clp_package_dir=Path(get_env_var("CLP_PACKAGE_DIR")).expanduser().resolve() ) test_root_dir = Path(get_env_var("CLP_BUILD_DIR")).expanduser().resolve() / "integration-tests" return IntegrationTestConfig( core_config=core_config, + deps_config=deps_config, package_config=package_config, test_root_dir=test_root_dir, ) diff --git a/integration-tests/tests/fixtures/integration_test_logs.py b/integration-tests/tests/fixtures/integration_test_logs.py index 21c288e4c5..6e60fd3fbb 100644 --- a/integration-tests/tests/fixtures/integration_test_logs.py +++ b/integration-tests/tests/fixtures/integration_test_logs.py @@ -69,7 +69,7 @@ def _download_and_extract_dataset( curl_bin, "--fail", "--location", - "--output", str(integration_test_logs.tarball_path), + "--output", str(integration_test_logs.tar_gz_path), "--show-error", tarball_url, ] @@ -78,18 +78,33 @@ def _download_and_extract_dataset( unlink(integration_test_logs.extraction_dir) shutil.unpack_archive( - integration_test_logs.tarball_path, integration_test_logs.extraction_dir + integration_test_logs.tar_gz_path, integration_test_logs.extraction_dir ) except Exception as e: err_msg = f"Failed to download and extract dataset `{name}`." raise RuntimeError(err_msg) from e + extraction_path = str(integration_test_logs.extraction_dir) + # Allow the extracted content to be deletable or overwritable chmod_bin = shutil.which("chmod") if chmod_bin is None: err_msg = "chmod executable not found" raise RuntimeError(err_msg) - subprocess.run([chmod_bin, "-R", "gu+w", integration_test_logs.extraction_dir], check=True) + subprocess.run([chmod_bin, "-R", "gu+w", extraction_path], check=True) + + # Create tar of the extracted content for different compression formats + tar_bin = shutil.which("tar") + if tar_bin is None: + err_msg = "tar executable not found" + raise RuntimeError(err_msg) + subprocess.run([tar_bin, "--create", f"--file={integration_test_logs.base_tar_path}", f"--directory={integration_test_logs.extraction_dir}", extraction_path], check=True) + + # Create LibLZMA xz tar + xz_bin = str(integration_test_config.deps_config.xz_binary_path) + xz_cmds = [xz_bin, "--keep", "--compress", "--stdout", extraction_path] + with open(integration_test_logs.tar_xz_path, "wb") as fout: + subprocess.run(xz_cmds, check=True, stdout=fout, stdin=subprocess.DEVNULL) logger.info("Downloaded and extracted uncompressed logs for dataset `%s`.", name) request.config.cache.set(name, True) diff --git a/integration-tests/tests/test_identity_transformation.py b/integration-tests/tests/test_identity_transformation.py index 953ffd913f..71d1ae14e5 100644 --- a/integration-tests/tests/test_identity_transformation.py +++ b/integration-tests/tests/test_identity_transformation.py @@ -3,6 +3,8 @@ compression and decompression. """ +from pathlib import Path + import pytest from tests.utils.asserting_utils import run_and_assert @@ -49,25 +51,51 @@ def test_clp_identity_transform( :param test_logs_fixture: """ integration_test_logs: IntegrationTestLogs = request.getfixturevalue(test_logs_fixture) + logs_source_dir: Path = integration_test_logs.extraction_dir + + #test_paths = CompressionTestConfig( + # test_name=f"clp-{integration_test_logs.name}", + # compression_input=integration_test_logs.extraction_dir, + # integration_test_config=integration_test_config, + #) + #_run_clp_identity_transform(logs_source_dir, test_paths, integration_test_config) + + #test_paths = CompressionTestConfig( + # test_name=f"clp-{integration_test_logs.name}-tar-gz", + # compression_input=integration_test_logs.tar_gz_path, + # integration_test_config=integration_test_config, + #) + #_run_clp_identity_transform(logs_source_dir, test_paths, integration_test_config) + test_paths = CompressionTestConfig( - test_name=f"clp-{integration_test_logs.name}", - logs_source_dir=integration_test_logs.extraction_dir, + test_name=f"clp-{integration_test_logs.name}-tar-xz", + compression_input=integration_test_logs.tar_xz_path, integration_test_config=integration_test_config, ) + _run_clp_identity_transform(logs_source_dir, test_paths, integration_test_config) + + +def _run_clp_identity_transform( + logs_source_dir: Path, + test_paths: CompressionTestConfig, + integration_test_config: IntegrationTestConfig, +) -> None: test_paths.clear_test_outputs() bin_path = str(integration_test_config.core_config.clp_binary_path) - src_path = str(test_paths.logs_source_dir) + input_path = str(test_paths.compression_input) compression_path = str(test_paths.compression_dir) decompression_path = str(test_paths.decompression_dir) + path_prefix_to_remove = input_path if test_paths.compression_input.is_dir() else str(test_paths.compression_input.parent) + # fmt: off compression_cmd = [ bin_path, "c", "--progress", - "--remove-path-prefix", src_path, + "--remove-path-prefix", path_prefix_to_remove, compression_path, - src_path, + input_path, ] # fmt: on run_and_assert(compression_cmd) @@ -75,13 +103,11 @@ def test_clp_identity_transform( decompression_cmd = [bin_path, "x", compression_path, decompression_path] run_and_assert(decompression_cmd) - input_path = test_paths.logs_source_dir - output_path = test_paths.decompression_dir + decompressed_logs_path = test_paths.decompression_dir assert is_dir_tree_content_equal( - input_path, - output_path, - ), f"Mismatch between clp input {input_path} and output {output_path}." - + logs_source_dir, + decompressed_logs_path, + ), f"Mismatch between source {logs_source_dir} and `clp` final output {decompressed_logs_path}." test_paths.clear_test_outputs() @@ -105,7 +131,7 @@ def test_clp_s_identity_transform( test_paths = CompressionTestConfig( test_name=f"clp-s-{test_logs_name}", - logs_source_dir=integration_test_logs.extraction_dir, + compression_input=integration_test_logs.extraction_dir, integration_test_config=integration_test_config, ) _clp_s_compress_and_decompress(integration_test_config, test_paths) @@ -118,13 +144,13 @@ def test_clp_s_identity_transform( # See also: https://docs.yscope.com/clp/main/user-guide/core-clp-s.html#current-limitations consolidated_json_test_paths = CompressionTestConfig( test_name=f"clp-s-{test_logs_name}-consolidated-json", - logs_source_dir=test_paths.decompression_dir, + compression_input=test_paths.decompression_dir, integration_test_config=integration_test_config, ) _clp_s_compress_and_decompress(integration_test_config, consolidated_json_test_paths) _consolidated_json_file_name = "original" - input_path = consolidated_json_test_paths.logs_source_dir / _consolidated_json_file_name + input_path = consolidated_json_test_paths.compression_input / _consolidated_json_file_name output_path = consolidated_json_test_paths.decompression_dir / _consolidated_json_file_name assert is_json_file_structurally_equal(input_path, output_path), ( f"Mismatch between clp-s input {input_path} and output {output_path}." @@ -139,7 +165,7 @@ def _clp_s_compress_and_decompress( ) -> None: test_paths.clear_test_outputs() bin_path = str(integration_test_config.core_config.clp_s_binary_path) - src_path = str(test_paths.logs_source_dir) + src_path = str(test_paths.compression_input) compression_path = str(test_paths.compression_dir) decompression_path = str(test_paths.decompression_dir) run_and_assert([bin_path, "c", compression_path, src_path]) diff --git a/integration-tests/tests/utils/config.py b/integration-tests/tests/utils/config.py index b087a106f2..77a1acc066 100644 --- a/integration-tests/tests/utils/config.py +++ b/integration-tests/tests/utils/config.py @@ -7,7 +7,7 @@ from tests.utils.utils import ( unlink, - validate_dir_exists, + validate_dir_exists_and_is_absolute, ) @@ -24,7 +24,7 @@ def __post_init__(self) -> None: executables. """ clp_core_bins_dir = self.clp_core_bins_dir - validate_dir_exists(clp_core_bins_dir) + validate_dir_exists_and_is_absolute(clp_core_bins_dir) # Check for required CLP core binaries required_binaries = ["clg", "clo", "clp", "clp-s", "indexer", "reducer-server"] @@ -47,6 +47,34 @@ def clp_s_binary_path(self) -> Path: return self.clp_core_bins_dir / "clp-s" +@dataclass(frozen=True) +class DepsConfig: + """The configuration for dependencies used by clp.""" + + #: + clp_deps_core_dir: Path + + def __post_init__(self) -> None: + """Validates that the core dependency directory exists.""" + validate_dir_exists_and_is_absolute(self.clp_deps_core_dir) + + @property + def lz4_binary_path(self) -> Path: + """:return: The absolute path to the lz4 compression tool.""" + return self.clp_deps_core_dir / "lz4-install" / "bin" / "lz4" + + @property + def zstd_binary_path(self) -> Path: + """:return: The absolute path to the zstd compression tool.""" + return self.clp_deps_core_dir / "zstd-install" / "bin" / "zstd" + + @property + def xz_binary_path(self) -> Path: + """:return: The absolute path to the LibLZMA xz compression tool.""" + return self.clp_deps_core_dir / "LibLZMA-static-install" / "bin" / "xz" + + + @dataclass(frozen=True) class PackageConfig: """The configuration for the clp package subject to testing.""" @@ -57,7 +85,7 @@ class PackageConfig: def __post_init__(self) -> None: """Validates that the CLP package directory exists and contains all required directories.""" clp_package_dir = self.clp_package_dir - validate_dir_exists(clp_package_dir) + validate_dir_exists_and_is_absolute(clp_package_dir) # Check for required package script directories required_dirs = ["bin", "etc", "lib", "sbin"] @@ -77,6 +105,8 @@ class IntegrationTestConfig: #: core_config: CoreConfig #: + deps_config: DepsConfig + #: package_config: PackageConfig #: Root directory for integration tests output. test_root_dir: Path @@ -105,8 +135,6 @@ class IntegrationTestLogs: tarball_url: str integration_test_config: InitVar[IntegrationTestConfig] #: - tarball_path: Path = field(init=False, repr=True) - #: extraction_dir: Path = field(init=False, repr=True) def __post_init__(self, integration_test_config: IntegrationTestConfig) -> None: @@ -116,12 +144,32 @@ def __post_init__(self, integration_test_config: IntegrationTestConfig) -> None: err_msg = "`name` cannot be empty." raise ValueError(err_msg) logs_download_dir = integration_test_config.logs_download_dir - validate_dir_exists(logs_download_dir) + validate_dir_exists_and_is_absolute(logs_download_dir) object.__setattr__(self, "name", name) - object.__setattr__(self, "tarball_path", logs_download_dir / f"{name}.tar.gz") object.__setattr__(self, "extraction_dir", logs_download_dir / name) + @property + def base_tar_path(self) -> None: + return self.extraction_dir.with_suffix(".tar") + + @property + def tar_gz_path(self) -> None: + return self.extraction_dir.with_suffix(".tar.gz") + + @property + def tar_lz4_path(self) -> None: + return self.extraction_dir.with_suffix(".tar.lz4") + + @property + def tar_xz_path(self) -> None: + return self.extraction_dir.with_suffix(".tar.xz") + + @property + def tar_zstd_path(self) -> None: + return self.extraction_dir.with_suffix(".tar.zstd") + + @dataclass(frozen=True) class CompressionTestConfig: @@ -129,12 +177,12 @@ class CompressionTestConfig: #: test_name: str - #: Directory containing the original (uncompressed) log files used by this test. - logs_source_dir: Path + #: Path to the CLP compressionm input archive or directory. + compression_input: Path integration_test_config: InitVar[IntegrationTestConfig] - #: Path to store compressed archives generated by the test. + #: Directory to store generated compressed CLP archives. compression_dir: Path = field(init=False, repr=True) - #: Path to store decompressed logs generated by the test. + #: Directory to store logs decompressed from CLP archives. decompression_dir: Path = field(init=False, repr=True) def __post_init__(self, integration_test_config: IntegrationTestConfig) -> None: @@ -144,7 +192,7 @@ def __post_init__(self, integration_test_config: IntegrationTestConfig) -> None: err_msg = "`test_name` cannot be empty." raise ValueError(err_msg) test_root_dir = integration_test_config.test_root_dir - validate_dir_exists(test_root_dir) + validate_dir_exists_and_is_absolute(test_root_dir) object.__setattr__(self, "test_name", test_name) object.__setattr__(self, "compression_dir", test_root_dir / f"{test_name}-archives") diff --git a/integration-tests/tests/utils/utils.py b/integration-tests/tests/utils/utils.py index 1dca8ba162..da016d8c5a 100644 --- a/integration-tests/tests/utils/utils.py +++ b/integration-tests/tests/utils/utils.py @@ -75,14 +75,17 @@ def unlink(rm_path: Path, force: bool = True) -> None: raise OSError(err_msg) from e -def validate_dir_exists(dir_path: Path) -> None: +def validate_dir_exists_and_is_absolute(dir_path: Path) -> None: """ :param dir_path: - :raise: ValueError if the path does not exist or is not a directory. + :raise: ValueError if the path does not exist, is not a directory, or is not absolute. """ if not dir_path.is_dir(): err_msg = f"Path does not exist or is not a directory: {dir_path}" raise ValueError(err_msg) + if not dir_path.is_absolute(): + err_msg = f"Path is not absolute: {dir_path}" + raise ValueError(err_msg) def _sort_json_keys_and_rows(json_fp: Path) -> IO[str]: diff --git a/taskfiles/deps/main.yaml b/taskfiles/deps/main.yaml index 308e3c119b..5c713f06e9 100644 --- a/taskfiles/deps/main.yaml +++ b/taskfiles/deps/main.yaml @@ -265,7 +265,6 @@ tasks: - "-DXZ_TOOL_LZMAINFO=OFF" - "-DXZ_TOOL_SCRIPTS=OFF" - "-DXZ_TOOL_SYMLINKS_LZMA=OFF" - - "-DXZ_TOOL_XZ=OFF" - "-DXZ_TOOL_XZDEC=OFF" TARBALL_SHA256: "507825b599356c10dca1cd720c9d0d0c9d5400b9de300af00e4d1ea150795543" TARBALL_URL: "https://github.com/tukaani-project/xz/releases/download/v5.8.1/xz-5.8.1.tar.gz" @@ -331,7 +330,6 @@ tasks: - "-DBUILD_STATIC_LIBS=ON" - "-DCMAKE_BUILD_TYPE=Release" - "-DCMAKE_INSTALL_MESSAGE=LAZY" - - "-DLZ4_BUILD_CLI=OFF" CMAKE_SOURCE_DIR: "build/cmake" LIB_NAME: "lz4" TARBALL_SHA256: "537512904744b35e232912055ccf8ec66d768639ff3abe5788d90d792ec5f48b" @@ -555,7 +553,6 @@ tasks: - "-DCMAKE_BUILD_TYPE=Release" - "-DCMAKE_INSTALL_MESSAGE=LAZY" - "-DZSTD_BUILD_CONTRIB=OFF" - - "-DZSTD_BUILD_PROGRAMS=OFF" - "-DZSTD_BUILD_SHARED=ON" - "-DZSTD_BUILD_STATIC=ON" - "-DZSTD_BUILD_TESTS=OFF" diff --git a/taskfiles/tests/integration.yaml b/taskfiles/tests/integration.yaml index 823c6853b0..9a789c90d8 100644 --- a/taskfiles/tests/integration.yaml +++ b/taskfiles/tests/integration.yaml @@ -23,5 +23,6 @@ tasks: env: CLP_BUILD_DIR: "{{.G_BUILD_DIR}}" CLP_CORE_BINS_DIR: "{{.G_CORE_COMPONENT_BUILD_DIR}}" + CLP_DEPS_CORE_DIR: "{{.G_DEPS_CORE_DIR}}" CLP_PACKAGE_DIR: "{{.G_PACKAGE_BUILD_DIR}}" cmd: "uv run python -m pytest -m core" From 4741ef9519b0a626ee439063e92d6f6fcf7b3b02 Mon Sep 17 00:00:00 2001 From: Bingran Hu Date: Wed, 17 Sep 2025 15:19:16 -0400 Subject: [PATCH 2/4] complete reset of the compression formats --- .../tests/fixtures/integration_test_logs.py | 37 +++++++++++----- .../tests/test_identity_transformation.py | 44 +++++++++---------- integration-tests/tests/utils/config.py | 18 +++++--- 3 files changed, 58 insertions(+), 41 deletions(-) diff --git a/integration-tests/tests/fixtures/integration_test_logs.py b/integration-tests/tests/fixtures/integration_test_logs.py index 6e60fd3fbb..a5d2b84d85 100644 --- a/integration-tests/tests/fixtures/integration_test_logs.py +++ b/integration-tests/tests/fixtures/integration_test_logs.py @@ -84,28 +84,43 @@ def _download_and_extract_dataset( err_msg = f"Failed to download and extract dataset `{name}`." raise RuntimeError(err_msg) from e - extraction_path = str(integration_test_logs.extraction_dir) - # Allow the extracted content to be deletable or overwritable chmod_bin = shutil.which("chmod") if chmod_bin is None: err_msg = "chmod executable not found" raise RuntimeError(err_msg) - subprocess.run([chmod_bin, "-R", "gu+w", extraction_path], check=True) + subprocess.run([chmod_bin, "-R", "gu+w", str(integration_test_logs.extraction_dir)], check=True) # Create tar of the extracted content for different compression formats - tar_bin = shutil.which("tar") - if tar_bin is None: - err_msg = "tar executable not found" + gzip_bin = shutil.which("gzip") + if gzip_bin is None: + err_msg = "gzip executable not found" raise RuntimeError(err_msg) - subprocess.run([tar_bin, "--create", f"--file={integration_test_logs.base_tar_path}", f"--directory={integration_test_logs.extraction_dir}", extraction_path], check=True) - - # Create LibLZMA xz tar + gzip_cmds = [gzip_bin, "--decompress", "--stdout", str(integration_test_logs.tar_gz_path)] + with integration_test_logs.base_tar_path.open(mode="wb") as fout: + subprocess.run(gzip_cmds, check=True, stdout=fout, stdin=subprocess.DEVNULL) + + # Create lz4 tar + lz4_bin = str(integration_test_config.deps_config.lz4_binary_path) + lz4_cmds = [ + lz4_bin, + str(integration_test_logs.base_tar_path), + str(integration_test_logs.tar_lz4_path), + ] + subprocess.run(lz4_cmds, check=True) + + # Create xz tar xz_bin = str(integration_test_config.deps_config.xz_binary_path) - xz_cmds = [xz_bin, "--keep", "--compress", "--stdout", extraction_path] - with open(integration_test_logs.tar_xz_path, "wb") as fout: + xz_cmds = [xz_bin, "--compress", "--stdout", str(integration_test_logs.base_tar_path)] + with integration_test_logs.tar_xz_path.open(mode="wb") as fout: subprocess.run(xz_cmds, check=True, stdout=fout, stdin=subprocess.DEVNULL) + # Create zstd tar + zstd_bin = str(integration_test_config.deps_config.zstd_binary_path) + zstd_cmds = [zstd_bin, "--stdout", str(integration_test_logs.base_tar_path)] + with integration_test_logs.tar_zstd_path.open(mode="wb") as fout: + subprocess.run(zstd_cmds, check=True, stdout=fout, stdin=subprocess.DEVNULL) + logger.info("Downloaded and extracted uncompressed logs for dataset `%s`.", name) request.config.cache.set(name, True) return integration_test_logs diff --git a/integration-tests/tests/test_identity_transformation.py b/integration-tests/tests/test_identity_transformation.py index 71d1ae14e5..1183a3b92c 100644 --- a/integration-tests/tests/test_identity_transformation.py +++ b/integration-tests/tests/test_identity_transformation.py @@ -53,40 +53,38 @@ def test_clp_identity_transform( integration_test_logs: IntegrationTestLogs = request.getfixturevalue(test_logs_fixture) logs_source_dir: Path = integration_test_logs.extraction_dir - #test_paths = CompressionTestConfig( - # test_name=f"clp-{integration_test_logs.name}", - # compression_input=integration_test_logs.extraction_dir, - # integration_test_config=integration_test_config, - #) - #_run_clp_identity_transform(logs_source_dir, test_paths, integration_test_config) - - #test_paths = CompressionTestConfig( - # test_name=f"clp-{integration_test_logs.name}-tar-gz", - # compression_input=integration_test_logs.tar_gz_path, - # integration_test_config=integration_test_config, - #) - #_run_clp_identity_transform(logs_source_dir, test_paths, integration_test_config) - - test_paths = CompressionTestConfig( - test_name=f"clp-{integration_test_logs.name}-tar-xz", - compression_input=integration_test_logs.tar_xz_path, - integration_test_config=integration_test_config, - ) - _run_clp_identity_transform(logs_source_dir, test_paths, integration_test_config) + archives_to_test = [ + integration_test_logs.extraction_dir, + integration_test_logs.tar_gz_path, + integration_test_logs.tar_lz4_path, + integration_test_logs.tar_xz_path, + integration_test_logs.tar_zstd_path, + ] + for archive_path in archives_to_test: + _run_clp_identity_transform(archive_path, integration_test_config, logs_source_dir) def _run_clp_identity_transform( - logs_source_dir: Path, - test_paths: CompressionTestConfig, + compression_input: Path, integration_test_config: IntegrationTestConfig, + logs_source_dir: Path, ) -> None: + test_paths = CompressionTestConfig( + test_name=f"clp-{compression_input.name}", + compression_input=compression_input, + integration_test_config=integration_test_config, + ) test_paths.clear_test_outputs() bin_path = str(integration_test_config.core_config.clp_binary_path) input_path = str(test_paths.compression_input) compression_path = str(test_paths.compression_dir) decompression_path = str(test_paths.decompression_dir) - path_prefix_to_remove = input_path if test_paths.compression_input.is_dir() else str(test_paths.compression_input.parent) + path_prefix_to_remove = ( + input_path + if test_paths.compression_input.is_dir() + else str(test_paths.compression_input.parent) + ) # fmt: off compression_cmd = [ diff --git a/integration-tests/tests/utils/config.py b/integration-tests/tests/utils/config.py index 77a1acc066..0ac61aa092 100644 --- a/integration-tests/tests/utils/config.py +++ b/integration-tests/tests/utils/config.py @@ -74,7 +74,6 @@ def xz_binary_path(self) -> Path: return self.clp_deps_core_dir / "LibLZMA-static-install" / "bin" / "xz" - @dataclass(frozen=True) class PackageConfig: """The configuration for the clp package subject to testing.""" @@ -143,6 +142,7 @@ def __post_init__(self, integration_test_config: IntegrationTestConfig) -> None: if 0 == len(name): err_msg = "`name` cannot be empty." raise ValueError(err_msg) + logs_download_dir = integration_test_config.logs_download_dir validate_dir_exists_and_is_absolute(logs_download_dir) @@ -150,27 +150,31 @@ def __post_init__(self, integration_test_config: IntegrationTestConfig) -> None: object.__setattr__(self, "extraction_dir", logs_download_dir / name) @property - def base_tar_path(self) -> None: + def base_tar_path(self) -> Path: + """:return: The absolute path to the tar archive.""" return self.extraction_dir.with_suffix(".tar") @property - def tar_gz_path(self) -> None: + def tar_gz_path(self) -> Path: + """:return: The absolute path to the tar gzip archive.""" return self.extraction_dir.with_suffix(".tar.gz") @property - def tar_lz4_path(self) -> None: + def tar_lz4_path(self) -> Path: + """:return: The absolute path to the tar lz4 archive.""" return self.extraction_dir.with_suffix(".tar.lz4") @property - def tar_xz_path(self) -> None: + def tar_xz_path(self) -> Path: + """:return: The absolute path to the tar xz archive.""" return self.extraction_dir.with_suffix(".tar.xz") @property - def tar_zstd_path(self) -> None: + def tar_zstd_path(self) -> Path: + """:return: The absolute path to the tar zstd archive.""" return self.extraction_dir.with_suffix(".tar.zstd") - @dataclass(frozen=True) class CompressionTestConfig: """Compression test configuration providing per-test metadata for artifacts and directories.""" From 70a7529dedb34f6eae42c13e173cf655402ddbce Mon Sep 17 00:00:00 2001 From: Bingran Hu Date: Wed, 17 Sep 2025 16:35:34 -0400 Subject: [PATCH 3/4] Pass in various compression tool path prefixes via taskflow variables --- integration-tests/.pytest.ini | 3 ++ .../tests/fixtures/integration_test_config.py | 5 +++- integration-tests/tests/utils/config.py | 29 ++++++++++++------- taskfiles/deps/main.yaml | 17 +++++++++-- taskfiles/tests/integration.yaml | 3 ++ 5 files changed, 43 insertions(+), 14 deletions(-) diff --git a/integration-tests/.pytest.ini b/integration-tests/.pytest.ini index eed36c9bf0..10b120bf87 100644 --- a/integration-tests/.pytest.ini +++ b/integration-tests/.pytest.ini @@ -10,7 +10,10 @@ env = D:CLP_BUILD_DIR=../build D:CLP_CORE_BINS_DIR=../build/core D:CLP_DEPS_CORE_DIR=../build/deps/core + D:CLP_LIBLZMA_ROOT=../build/deps/core/LibLZMA-install + D:CLP_LZ4_ROOT=../build/deps/core/lz4-install D:CLP_PACKAGE_DIR=../build/clp-package + D:CLP_ZSTD_ROOT=../build/deps/core/zstd-install log_cli = True log_cli_date_format = %Y-%m-%d %H:%M:%S,%f log_cli_format = %(name)s %(asctime)s [%(levelname)s] %(message)s diff --git a/integration-tests/tests/fixtures/integration_test_config.py b/integration-tests/tests/fixtures/integration_test_config.py index eedf7e6f75..412a101444 100644 --- a/integration-tests/tests/fixtures/integration_test_config.py +++ b/integration-tests/tests/fixtures/integration_test_config.py @@ -20,7 +20,10 @@ def integration_test_config() -> IntegrationTestConfig: clp_core_bins_dir=Path(get_env_var("CLP_CORE_BINS_DIR")).expanduser().resolve() ) deps_config = DepsConfig( - clp_deps_core_dir=Path(get_env_var("CLP_DEPS_CORE_DIR")).expanduser().resolve() + clp_deps_core_dir=Path(get_env_var("CLP_DEPS_CORE_DIR")).expanduser().resolve(), + clp_liblzma_root=Path(get_env_var("CLP_LIBLZMA_ROOT")).expanduser().resolve(), + clp_lz4_root=Path(get_env_var("CLP_LZ4_ROOT")).expanduser().resolve(), + clp_zstd_root=Path(get_env_var("CLP_ZSTD_ROOT")).expanduser().resolve(), ) package_config = PackageConfig( clp_package_dir=Path(get_env_var("CLP_PACKAGE_DIR")).expanduser().resolve() diff --git a/integration-tests/tests/utils/config.py b/integration-tests/tests/utils/config.py index 0ac61aa092..7f1d107506 100644 --- a/integration-tests/tests/utils/config.py +++ b/integration-tests/tests/utils/config.py @@ -49,29 +49,38 @@ def clp_s_binary_path(self) -> Path: @dataclass(frozen=True) class DepsConfig: - """The configuration for dependencies used by clp.""" + """The configuration for dependencies used by CLP package and binaries.""" - #: + #: Install directory for all core CLP dependencies. clp_deps_core_dir: Path + #: Install prefix of LibLZMA used by CLP. + clp_liblzma_root: Path + #: Install prefix of lz4 used by CLP. + clp_lz4_root: Path + #: Install prefix of zstd used by CLP. + clp_zstd_root: Path def __post_init__(self) -> None: - """Validates that the core dependency directory exists.""" + """Validates that dependency directories exist.""" validate_dir_exists_and_is_absolute(self.clp_deps_core_dir) + validate_dir_exists_and_is_absolute(self.clp_liblzma_root) + validate_dir_exists_and_is_absolute(self.clp_lz4_root) + validate_dir_exists_and_is_absolute(self.clp_zstd_root) @property def lz4_binary_path(self) -> Path: """:return: The absolute path to the lz4 compression tool.""" - return self.clp_deps_core_dir / "lz4-install" / "bin" / "lz4" - - @property - def zstd_binary_path(self) -> Path: - """:return: The absolute path to the zstd compression tool.""" - return self.clp_deps_core_dir / "zstd-install" / "bin" / "zstd" + return self.clp_lz4_root / "bin" / "lz4" @property def xz_binary_path(self) -> Path: """:return: The absolute path to the LibLZMA xz compression tool.""" - return self.clp_deps_core_dir / "LibLZMA-static-install" / "bin" / "xz" + return self.clp_liblzma_root / "bin" / "xz" + + @property + def zstd_binary_path(self) -> Path: + """:return: The absolute path to the zstd compression tool.""" + return self.clp_zstd_root / "bin" / "zstd" @dataclass(frozen=True) diff --git a/taskfiles/deps/main.yaml b/taskfiles/deps/main.yaml index 5c713f06e9..780ecaff50 100644 --- a/taskfiles/deps/main.yaml +++ b/taskfiles/deps/main.yaml @@ -21,6 +21,14 @@ vars: G_BOOST_LIB_NAME: "Boost" G_FMT_LIB_NAME: "fmt" G_GSL_LIB_NAME: "Microsoft.GSL" + G_LIBLZMA_LIB_NAME: "LibLZMA" + G_LZ4_LIB_NAME: "lz4" + G_ZSTD_LIB_NAME: "zstd" + + # Library install prefixes + G_LZ4_INSTALL_PREFIX: "{{.G_DEPS_CORE_DIR}}/{{.G_LZ4_LIB_NAME}}-install" + G_ZSTD_INSTALL_PREFIX: "{{.G_DEPS_CORE_DIR}}/{{.G_ZSTD_LIB_NAME}}-install" + G_LIBLZMA_STATIC_INSTALL_PREFIX: "{{.G_DEPS_CORE_DIR}}/{{.G_LIBLZMA_LIB_NAME}}-static-install" # Antlr G_ANTLR_VERSION: "4.13.2" @@ -296,7 +304,8 @@ tasks: - "-DBUILD_SHARED_LIBS={{ if .BUILD_SHARED_LIBS }}ON{{ else }}OFF{{ end }}" - >- {{ join " " .COMMON_CMAKE_GEN_ARGS }} - LIB_NAME: "LibLZMA-{{ if .BUILD_SHARED_LIBS }}shared{{ else }}static{{ end }}" + LIB_NAME: >- + {{.G_LIBLZMA_LIB_NAME}}-{{ if .BUILD_SHARED_LIBS }}shared{{ else }}static{{ end }} TARBALL_SHA256: "{{.TARBALL_SHA256}}" TARBALL_URL: "{{.TARBALL_URL}}" @@ -331,7 +340,8 @@ tasks: - "-DCMAKE_BUILD_TYPE=Release" - "-DCMAKE_INSTALL_MESSAGE=LAZY" CMAKE_SOURCE_DIR: "build/cmake" - LIB_NAME: "lz4" + INSTALL_PREFIX: "{{.G_LZ4_INSTALL_PREFIX}}" + LIB_NAME: "{{.G_LZ4_LIB_NAME}}" TARBALL_SHA256: "537512904744b35e232912055ccf8ec66d768639ff3abe5788d90d792ec5f48b" TARBALL_URL: "https://github.com/lz4/lz4/releases/download/v1.10.0/lz4-1.10.0.tar.gz" @@ -557,6 +567,7 @@ tasks: - "-DZSTD_BUILD_STATIC=ON" - "-DZSTD_BUILD_TESTS=OFF" CMAKE_SOURCE_DIR: "build/cmake" - LIB_NAME: "zstd" + INSTALL_PREFIX: "{{.G_ZSTD_INSTALL_PREFIX}}" + LIB_NAME: "{{.G_ZSTD_LIB_NAME}}" TARBALL_SHA256: "eb33e51f49a15e023950cd7825ca74a4a2b43db8354825ac24fc1b7ee09e6fa3" TARBALL_URL: "https://github.com/facebook/zstd/releases/download/v1.5.7/zstd-1.5.7.tar.gz" diff --git a/taskfiles/tests/integration.yaml b/taskfiles/tests/integration.yaml index 9a789c90d8..a178ef57ee 100644 --- a/taskfiles/tests/integration.yaml +++ b/taskfiles/tests/integration.yaml @@ -24,5 +24,8 @@ tasks: CLP_BUILD_DIR: "{{.G_BUILD_DIR}}" CLP_CORE_BINS_DIR: "{{.G_CORE_COMPONENT_BUILD_DIR}}" CLP_DEPS_CORE_DIR: "{{.G_DEPS_CORE_DIR}}" + CLP_LIBLZMA_ROOT: "{{.G_LIBLZMA_STATIC_INSTALL_PREFIX}}" + CLP_LZ4_ROOT: "{{.G_LZ4_INSTALL_PREFIX}}" CLP_PACKAGE_DIR: "{{.G_PACKAGE_BUILD_DIR}}" + CLP_ZSTD_ROOT: "{{.G_ZSTD_INSTALL_PREFIX}}" cmd: "uv run python -m pytest -m core" From 45be4a92525de332147ea27c7711411219c74196 Mon Sep 17 00:00:00 2001 From: Bingran Hu Date: Thu, 18 Sep 2025 10:36:04 +0000 Subject: [PATCH 4/4] Improve python comments and work tasks --- .../tests/fixtures/integration_test_logs.py | 2 +- .../tests/test_identity_transformation.py | 16 +++++++++++++--- taskfiles/deps/main.yaml | 10 ++++++++-- 3 files changed, 22 insertions(+), 6 deletions(-) diff --git a/integration-tests/tests/fixtures/integration_test_logs.py b/integration-tests/tests/fixtures/integration_test_logs.py index a5d2b84d85..5b2496a269 100644 --- a/integration-tests/tests/fixtures/integration_test_logs.py +++ b/integration-tests/tests/fixtures/integration_test_logs.py @@ -91,7 +91,7 @@ def _download_and_extract_dataset( raise RuntimeError(err_msg) subprocess.run([chmod_bin, "-R", "gu+w", str(integration_test_logs.extraction_dir)], check=True) - # Create tar of the extracted content for different compression formats + # Create base tar stream object to be compressed into different formats gzip_bin = shutil.which("gzip") if gzip_bin is None: err_msg = "gzip executable not found" diff --git a/integration-tests/tests/test_identity_transformation.py b/integration-tests/tests/test_identity_transformation.py index 1183a3b92c..18bd66e010 100644 --- a/integration-tests/tests/test_identity_transformation.py +++ b/integration-tests/tests/test_identity_transformation.py @@ -44,7 +44,7 @@ def test_clp_identity_transform( ) -> None: """ Validate that compression and decompression by the core binary `clp` run successfully and are - lossless. + lossless across various input archive formats. :param request: :param integration_test_config: @@ -61,14 +61,24 @@ def test_clp_identity_transform( integration_test_logs.tar_zstd_path, ] for archive_path in archives_to_test: - _run_clp_identity_transform(archive_path, integration_test_config, logs_source_dir) + _test_clp_identity_transform_single_archive( + archive_path, integration_test_config, logs_source_dir + ) -def _run_clp_identity_transform( +def _test_clp_identity_transform_single_archive( compression_input: Path, integration_test_config: IntegrationTestConfig, logs_source_dir: Path, ) -> None: + """ + Validate that compression and decompression by the core binary `clp` run successfully and are + lossless for a single archive input format. + + :param compression_input: Path to the archive for compression. + :param integration_test_config: General config for the integration tests. + :param logs_source_dir: Path to the uncompressed logs for comparison. + """ test_paths = CompressionTestConfig( test_name=f"clp-{compression_input.name}", compression_input=compression_input, diff --git a/taskfiles/deps/main.yaml b/taskfiles/deps/main.yaml index 780ecaff50..a7b582800f 100644 --- a/taskfiles/deps/main.yaml +++ b/taskfiles/deps/main.yaml @@ -26,9 +26,10 @@ vars: G_ZSTD_LIB_NAME: "zstd" # Library install prefixes + G_LIBLZMA_SHARED_INSTALL_PREFIX: "{{.G_DEPS_CORE_DIR}}/{{.G_LIBLZMA_LIB_NAME}}-shared-install" + G_LIBLZMA_STATIC_INSTALL_PREFIX: "{{.G_DEPS_CORE_DIR}}/{{.G_LIBLZMA_LIB_NAME}}-static-install" G_LZ4_INSTALL_PREFIX: "{{.G_DEPS_CORE_DIR}}/{{.G_LZ4_LIB_NAME}}-install" G_ZSTD_INSTALL_PREFIX: "{{.G_DEPS_CORE_DIR}}/{{.G_ZSTD_LIB_NAME}}-install" - G_LIBLZMA_STATIC_INSTALL_PREFIX: "{{.G_DEPS_CORE_DIR}}/{{.G_LIBLZMA_LIB_NAME}}-static-install" # Antlr G_ANTLR_VERSION: "4.13.2" @@ -262,7 +263,6 @@ tasks: TARBALL_URL: "https://github.com/fmtlib/fmt/archive/refs/tags/10.2.1.tar.gz" liblzma: - internal: true vars: COMMON_CMAKE_GEN_ARGS: - "-DBUILD_TESTING=OFF" @@ -304,6 +304,12 @@ tasks: - "-DBUILD_SHARED_LIBS={{ if .BUILD_SHARED_LIBS }}ON{{ else }}OFF{{ end }}" - >- {{ join " " .COMMON_CMAKE_GEN_ARGS }} + INSTALL_PREFIX: >- + {{- if .BUILD_SHARED_LIBS -}} + {{ .G_LIBLZMA_SHARED_INSTALL_PREFIX }} + {{- else -}} + {{ .G_LIBLZMA_STATIC_INSTALL_PREFIX }} + {{- end -}} LIB_NAME: >- {{.G_LIBLZMA_LIB_NAME}}-{{ if .BUILD_SHARED_LIBS }}shared{{ else }}static{{ end }} TARBALL_SHA256: "{{.TARBALL_SHA256}}"