diff --git a/integration-tests/.pytest.ini b/integration-tests/.pytest.ini index 6235100f27..10b120bf87 100644 --- a/integration-tests/.pytest.ini +++ b/integration-tests/.pytest.ini @@ -9,7 +9,11 @@ addopts = env = D:CLP_BUILD_DIR=../build D:CLP_CORE_BINS_DIR=../build/core + D:CLP_DEPS_CORE_DIR=../build/deps/core + D:CLP_LIBLZMA_ROOT=../build/deps/core/LibLZMA-install + D:CLP_LZ4_ROOT=../build/deps/core/lz4-install D:CLP_PACKAGE_DIR=../build/clp-package + D:CLP_ZSTD_ROOT=../build/deps/core/zstd-install log_cli = True log_cli_date_format = %Y-%m-%d %H:%M:%S,%f log_cli_format = %(name)s %(asctime)s [%(levelname)s] %(message)s diff --git a/integration-tests/tests/fixtures/integration_test_config.py b/integration-tests/tests/fixtures/integration_test_config.py index f097021b3d..412a101444 100644 --- a/integration-tests/tests/fixtures/integration_test_config.py +++ b/integration-tests/tests/fixtures/integration_test_config.py @@ -6,6 +6,7 @@ from tests.utils.config import ( CoreConfig, + DepsConfig, IntegrationTestConfig, PackageConfig, ) @@ -18,12 +19,19 @@ def integration_test_config() -> IntegrationTestConfig: core_config = CoreConfig( clp_core_bins_dir=Path(get_env_var("CLP_CORE_BINS_DIR")).expanduser().resolve() ) + deps_config = DepsConfig( + clp_deps_core_dir=Path(get_env_var("CLP_DEPS_CORE_DIR")).expanduser().resolve(), + clp_liblzma_root=Path(get_env_var("CLP_LIBLZMA_ROOT")).expanduser().resolve(), + clp_lz4_root=Path(get_env_var("CLP_LZ4_ROOT")).expanduser().resolve(), + clp_zstd_root=Path(get_env_var("CLP_ZSTD_ROOT")).expanduser().resolve(), + ) package_config = PackageConfig( clp_package_dir=Path(get_env_var("CLP_PACKAGE_DIR")).expanduser().resolve() ) test_root_dir = Path(get_env_var("CLP_BUILD_DIR")).expanduser().resolve() / "integration-tests" return IntegrationTestConfig( core_config=core_config, + deps_config=deps_config, package_config=package_config, test_root_dir=test_root_dir, ) diff --git a/integration-tests/tests/fixtures/integration_test_logs.py b/integration-tests/tests/fixtures/integration_test_logs.py index 21c288e4c5..5b2496a269 100644 --- a/integration-tests/tests/fixtures/integration_test_logs.py +++ b/integration-tests/tests/fixtures/integration_test_logs.py @@ -69,7 +69,7 @@ def _download_and_extract_dataset( curl_bin, "--fail", "--location", - "--output", str(integration_test_logs.tarball_path), + "--output", str(integration_test_logs.tar_gz_path), "--show-error", tarball_url, ] @@ -78,7 +78,7 @@ def _download_and_extract_dataset( unlink(integration_test_logs.extraction_dir) shutil.unpack_archive( - integration_test_logs.tarball_path, integration_test_logs.extraction_dir + integration_test_logs.tar_gz_path, integration_test_logs.extraction_dir ) except Exception as e: err_msg = f"Failed to download and extract dataset `{name}`." @@ -89,7 +89,37 @@ def _download_and_extract_dataset( if chmod_bin is None: err_msg = "chmod executable not found" raise RuntimeError(err_msg) - subprocess.run([chmod_bin, "-R", "gu+w", integration_test_logs.extraction_dir], check=True) + subprocess.run([chmod_bin, "-R", "gu+w", str(integration_test_logs.extraction_dir)], check=True) + + # Create base tar stream object to be compressed into different formats + gzip_bin = shutil.which("gzip") + if gzip_bin is None: + err_msg = "gzip executable not found" + raise RuntimeError(err_msg) + gzip_cmds = [gzip_bin, "--decompress", "--stdout", str(integration_test_logs.tar_gz_path)] + with integration_test_logs.base_tar_path.open(mode="wb") as fout: + subprocess.run(gzip_cmds, check=True, stdout=fout, stdin=subprocess.DEVNULL) + + # Create lz4 tar + lz4_bin = str(integration_test_config.deps_config.lz4_binary_path) + lz4_cmds = [ + lz4_bin, + str(integration_test_logs.base_tar_path), + str(integration_test_logs.tar_lz4_path), + ] + subprocess.run(lz4_cmds, check=True) + + # Create xz tar + xz_bin = str(integration_test_config.deps_config.xz_binary_path) + xz_cmds = [xz_bin, "--compress", "--stdout", str(integration_test_logs.base_tar_path)] + with integration_test_logs.tar_xz_path.open(mode="wb") as fout: + subprocess.run(xz_cmds, check=True, stdout=fout, stdin=subprocess.DEVNULL) + + # Create zstd tar + zstd_bin = str(integration_test_config.deps_config.zstd_binary_path) + zstd_cmds = [zstd_bin, "--stdout", str(integration_test_logs.base_tar_path)] + with integration_test_logs.tar_zstd_path.open(mode="wb") as fout: + subprocess.run(zstd_cmds, check=True, stdout=fout, stdin=subprocess.DEVNULL) logger.info("Downloaded and extracted uncompressed logs for dataset `%s`.", name) request.config.cache.set(name, True) diff --git a/integration-tests/tests/test_identity_transformation.py b/integration-tests/tests/test_identity_transformation.py index 953ffd913f..18bd66e010 100644 --- a/integration-tests/tests/test_identity_transformation.py +++ b/integration-tests/tests/test_identity_transformation.py @@ -3,6 +3,8 @@ compression and decompression. """ +from pathlib import Path + import pytest from tests.utils.asserting_utils import run_and_assert @@ -42,32 +44,66 @@ def test_clp_identity_transform( ) -> None: """ Validate that compression and decompression by the core binary `clp` run successfully and are - lossless. + lossless across various input archive formats. :param request: :param integration_test_config: :param test_logs_fixture: """ integration_test_logs: IntegrationTestLogs = request.getfixturevalue(test_logs_fixture) + logs_source_dir: Path = integration_test_logs.extraction_dir + + archives_to_test = [ + integration_test_logs.extraction_dir, + integration_test_logs.tar_gz_path, + integration_test_logs.tar_lz4_path, + integration_test_logs.tar_xz_path, + integration_test_logs.tar_zstd_path, + ] + for archive_path in archives_to_test: + _test_clp_identity_transform_single_archive( + archive_path, integration_test_config, logs_source_dir + ) + + +def _test_clp_identity_transform_single_archive( + compression_input: Path, + integration_test_config: IntegrationTestConfig, + logs_source_dir: Path, +) -> None: + """ + Validate that compression and decompression by the core binary `clp` run successfully and are + lossless for a single archive input format. + + :param compression_input: Path to the archive for compression. + :param integration_test_config: General config for the integration tests. + :param logs_source_dir: Path to the uncompressed logs for comparison. + """ test_paths = CompressionTestConfig( - test_name=f"clp-{integration_test_logs.name}", - logs_source_dir=integration_test_logs.extraction_dir, + test_name=f"clp-{compression_input.name}", + compression_input=compression_input, integration_test_config=integration_test_config, ) test_paths.clear_test_outputs() bin_path = str(integration_test_config.core_config.clp_binary_path) - src_path = str(test_paths.logs_source_dir) + input_path = str(test_paths.compression_input) compression_path = str(test_paths.compression_dir) decompression_path = str(test_paths.decompression_dir) + path_prefix_to_remove = ( + input_path + if test_paths.compression_input.is_dir() + else str(test_paths.compression_input.parent) + ) + # fmt: off compression_cmd = [ bin_path, "c", "--progress", - "--remove-path-prefix", src_path, + "--remove-path-prefix", path_prefix_to_remove, compression_path, - src_path, + input_path, ] # fmt: on run_and_assert(compression_cmd) @@ -75,13 +111,11 @@ def test_clp_identity_transform( decompression_cmd = [bin_path, "x", compression_path, decompression_path] run_and_assert(decompression_cmd) - input_path = test_paths.logs_source_dir - output_path = test_paths.decompression_dir + decompressed_logs_path = test_paths.decompression_dir assert is_dir_tree_content_equal( - input_path, - output_path, - ), f"Mismatch between clp input {input_path} and output {output_path}." - + logs_source_dir, + decompressed_logs_path, + ), f"Mismatch between source {logs_source_dir} and `clp` final output {decompressed_logs_path}." test_paths.clear_test_outputs() @@ -105,7 +139,7 @@ def test_clp_s_identity_transform( test_paths = CompressionTestConfig( test_name=f"clp-s-{test_logs_name}", - logs_source_dir=integration_test_logs.extraction_dir, + compression_input=integration_test_logs.extraction_dir, integration_test_config=integration_test_config, ) _clp_s_compress_and_decompress(integration_test_config, test_paths) @@ -118,13 +152,13 @@ def test_clp_s_identity_transform( # See also: https://docs.yscope.com/clp/main/user-guide/core-clp-s.html#current-limitations consolidated_json_test_paths = CompressionTestConfig( test_name=f"clp-s-{test_logs_name}-consolidated-json", - logs_source_dir=test_paths.decompression_dir, + compression_input=test_paths.decompression_dir, integration_test_config=integration_test_config, ) _clp_s_compress_and_decompress(integration_test_config, consolidated_json_test_paths) _consolidated_json_file_name = "original" - input_path = consolidated_json_test_paths.logs_source_dir / _consolidated_json_file_name + input_path = consolidated_json_test_paths.compression_input / _consolidated_json_file_name output_path = consolidated_json_test_paths.decompression_dir / _consolidated_json_file_name assert is_json_file_structurally_equal(input_path, output_path), ( f"Mismatch between clp-s input {input_path} and output {output_path}." @@ -139,7 +173,7 @@ def _clp_s_compress_and_decompress( ) -> None: test_paths.clear_test_outputs() bin_path = str(integration_test_config.core_config.clp_s_binary_path) - src_path = str(test_paths.logs_source_dir) + src_path = str(test_paths.compression_input) compression_path = str(test_paths.compression_dir) decompression_path = str(test_paths.decompression_dir) run_and_assert([bin_path, "c", compression_path, src_path]) diff --git a/integration-tests/tests/utils/config.py b/integration-tests/tests/utils/config.py index b087a106f2..7f1d107506 100644 --- a/integration-tests/tests/utils/config.py +++ b/integration-tests/tests/utils/config.py @@ -7,7 +7,7 @@ from tests.utils.utils import ( unlink, - validate_dir_exists, + validate_dir_exists_and_is_absolute, ) @@ -24,7 +24,7 @@ def __post_init__(self) -> None: executables. """ clp_core_bins_dir = self.clp_core_bins_dir - validate_dir_exists(clp_core_bins_dir) + validate_dir_exists_and_is_absolute(clp_core_bins_dir) # Check for required CLP core binaries required_binaries = ["clg", "clo", "clp", "clp-s", "indexer", "reducer-server"] @@ -47,6 +47,42 @@ def clp_s_binary_path(self) -> Path: return self.clp_core_bins_dir / "clp-s" +@dataclass(frozen=True) +class DepsConfig: + """The configuration for dependencies used by CLP package and binaries.""" + + #: Install directory for all core CLP dependencies. + clp_deps_core_dir: Path + #: Install prefix of LibLZMA used by CLP. + clp_liblzma_root: Path + #: Install prefix of lz4 used by CLP. + clp_lz4_root: Path + #: Install prefix of zstd used by CLP. + clp_zstd_root: Path + + def __post_init__(self) -> None: + """Validates that dependency directories exist.""" + validate_dir_exists_and_is_absolute(self.clp_deps_core_dir) + validate_dir_exists_and_is_absolute(self.clp_liblzma_root) + validate_dir_exists_and_is_absolute(self.clp_lz4_root) + validate_dir_exists_and_is_absolute(self.clp_zstd_root) + + @property + def lz4_binary_path(self) -> Path: + """:return: The absolute path to the lz4 compression tool.""" + return self.clp_lz4_root / "bin" / "lz4" + + @property + def xz_binary_path(self) -> Path: + """:return: The absolute path to the LibLZMA xz compression tool.""" + return self.clp_liblzma_root / "bin" / "xz" + + @property + def zstd_binary_path(self) -> Path: + """:return: The absolute path to the zstd compression tool.""" + return self.clp_zstd_root / "bin" / "zstd" + + @dataclass(frozen=True) class PackageConfig: """The configuration for the clp package subject to testing.""" @@ -57,7 +93,7 @@ class PackageConfig: def __post_init__(self) -> None: """Validates that the CLP package directory exists and contains all required directories.""" clp_package_dir = self.clp_package_dir - validate_dir_exists(clp_package_dir) + validate_dir_exists_and_is_absolute(clp_package_dir) # Check for required package script directories required_dirs = ["bin", "etc", "lib", "sbin"] @@ -77,6 +113,8 @@ class IntegrationTestConfig: #: core_config: CoreConfig #: + deps_config: DepsConfig + #: package_config: PackageConfig #: Root directory for integration tests output. test_root_dir: Path @@ -105,8 +143,6 @@ class IntegrationTestLogs: tarball_url: str integration_test_config: InitVar[IntegrationTestConfig] #: - tarball_path: Path = field(init=False, repr=True) - #: extraction_dir: Path = field(init=False, repr=True) def __post_init__(self, integration_test_config: IntegrationTestConfig) -> None: @@ -115,13 +151,38 @@ def __post_init__(self, integration_test_config: IntegrationTestConfig) -> None: if 0 == len(name): err_msg = "`name` cannot be empty." raise ValueError(err_msg) + logs_download_dir = integration_test_config.logs_download_dir - validate_dir_exists(logs_download_dir) + validate_dir_exists_and_is_absolute(logs_download_dir) object.__setattr__(self, "name", name) - object.__setattr__(self, "tarball_path", logs_download_dir / f"{name}.tar.gz") object.__setattr__(self, "extraction_dir", logs_download_dir / name) + @property + def base_tar_path(self) -> Path: + """:return: The absolute path to the tar archive.""" + return self.extraction_dir.with_suffix(".tar") + + @property + def tar_gz_path(self) -> Path: + """:return: The absolute path to the tar gzip archive.""" + return self.extraction_dir.with_suffix(".tar.gz") + + @property + def tar_lz4_path(self) -> Path: + """:return: The absolute path to the tar lz4 archive.""" + return self.extraction_dir.with_suffix(".tar.lz4") + + @property + def tar_xz_path(self) -> Path: + """:return: The absolute path to the tar xz archive.""" + return self.extraction_dir.with_suffix(".tar.xz") + + @property + def tar_zstd_path(self) -> Path: + """:return: The absolute path to the tar zstd archive.""" + return self.extraction_dir.with_suffix(".tar.zstd") + @dataclass(frozen=True) class CompressionTestConfig: @@ -129,12 +190,12 @@ class CompressionTestConfig: #: test_name: str - #: Directory containing the original (uncompressed) log files used by this test. - logs_source_dir: Path + #: Path to the CLP compressionm input archive or directory. + compression_input: Path integration_test_config: InitVar[IntegrationTestConfig] - #: Path to store compressed archives generated by the test. + #: Directory to store generated compressed CLP archives. compression_dir: Path = field(init=False, repr=True) - #: Path to store decompressed logs generated by the test. + #: Directory to store logs decompressed from CLP archives. decompression_dir: Path = field(init=False, repr=True) def __post_init__(self, integration_test_config: IntegrationTestConfig) -> None: @@ -144,7 +205,7 @@ def __post_init__(self, integration_test_config: IntegrationTestConfig) -> None: err_msg = "`test_name` cannot be empty." raise ValueError(err_msg) test_root_dir = integration_test_config.test_root_dir - validate_dir_exists(test_root_dir) + validate_dir_exists_and_is_absolute(test_root_dir) object.__setattr__(self, "test_name", test_name) object.__setattr__(self, "compression_dir", test_root_dir / f"{test_name}-archives") diff --git a/integration-tests/tests/utils/utils.py b/integration-tests/tests/utils/utils.py index 1dca8ba162..da016d8c5a 100644 --- a/integration-tests/tests/utils/utils.py +++ b/integration-tests/tests/utils/utils.py @@ -75,14 +75,17 @@ def unlink(rm_path: Path, force: bool = True) -> None: raise OSError(err_msg) from e -def validate_dir_exists(dir_path: Path) -> None: +def validate_dir_exists_and_is_absolute(dir_path: Path) -> None: """ :param dir_path: - :raise: ValueError if the path does not exist or is not a directory. + :raise: ValueError if the path does not exist, is not a directory, or is not absolute. """ if not dir_path.is_dir(): err_msg = f"Path does not exist or is not a directory: {dir_path}" raise ValueError(err_msg) + if not dir_path.is_absolute(): + err_msg = f"Path is not absolute: {dir_path}" + raise ValueError(err_msg) def _sort_json_keys_and_rows(json_fp: Path) -> IO[str]: diff --git a/taskfiles/deps/main.yaml b/taskfiles/deps/main.yaml index 308e3c119b..a7b582800f 100644 --- a/taskfiles/deps/main.yaml +++ b/taskfiles/deps/main.yaml @@ -21,6 +21,15 @@ vars: G_BOOST_LIB_NAME: "Boost" G_FMT_LIB_NAME: "fmt" G_GSL_LIB_NAME: "Microsoft.GSL" + G_LIBLZMA_LIB_NAME: "LibLZMA" + G_LZ4_LIB_NAME: "lz4" + G_ZSTD_LIB_NAME: "zstd" + + # Library install prefixes + G_LIBLZMA_SHARED_INSTALL_PREFIX: "{{.G_DEPS_CORE_DIR}}/{{.G_LIBLZMA_LIB_NAME}}-shared-install" + G_LIBLZMA_STATIC_INSTALL_PREFIX: "{{.G_DEPS_CORE_DIR}}/{{.G_LIBLZMA_LIB_NAME}}-static-install" + G_LZ4_INSTALL_PREFIX: "{{.G_DEPS_CORE_DIR}}/{{.G_LZ4_LIB_NAME}}-install" + G_ZSTD_INSTALL_PREFIX: "{{.G_DEPS_CORE_DIR}}/{{.G_ZSTD_LIB_NAME}}-install" # Antlr G_ANTLR_VERSION: "4.13.2" @@ -254,7 +263,6 @@ tasks: TARBALL_URL: "https://github.com/fmtlib/fmt/archive/refs/tags/10.2.1.tar.gz" liblzma: - internal: true vars: COMMON_CMAKE_GEN_ARGS: - "-DBUILD_TESTING=OFF" @@ -265,7 +273,6 @@ tasks: - "-DXZ_TOOL_LZMAINFO=OFF" - "-DXZ_TOOL_SCRIPTS=OFF" - "-DXZ_TOOL_SYMLINKS_LZMA=OFF" - - "-DXZ_TOOL_XZ=OFF" - "-DXZ_TOOL_XZDEC=OFF" TARBALL_SHA256: "507825b599356c10dca1cd720c9d0d0c9d5400b9de300af00e4d1ea150795543" TARBALL_URL: "https://github.com/tukaani-project/xz/releases/download/v5.8.1/xz-5.8.1.tar.gz" @@ -297,7 +304,14 @@ tasks: - "-DBUILD_SHARED_LIBS={{ if .BUILD_SHARED_LIBS }}ON{{ else }}OFF{{ end }}" - >- {{ join " " .COMMON_CMAKE_GEN_ARGS }} - LIB_NAME: "LibLZMA-{{ if .BUILD_SHARED_LIBS }}shared{{ else }}static{{ end }}" + INSTALL_PREFIX: >- + {{- if .BUILD_SHARED_LIBS -}} + {{ .G_LIBLZMA_SHARED_INSTALL_PREFIX }} + {{- else -}} + {{ .G_LIBLZMA_STATIC_INSTALL_PREFIX }} + {{- end -}} + LIB_NAME: >- + {{.G_LIBLZMA_LIB_NAME}}-{{ if .BUILD_SHARED_LIBS }}shared{{ else }}static{{ end }} TARBALL_SHA256: "{{.TARBALL_SHA256}}" TARBALL_URL: "{{.TARBALL_URL}}" @@ -331,9 +345,9 @@ tasks: - "-DBUILD_STATIC_LIBS=ON" - "-DCMAKE_BUILD_TYPE=Release" - "-DCMAKE_INSTALL_MESSAGE=LAZY" - - "-DLZ4_BUILD_CLI=OFF" CMAKE_SOURCE_DIR: "build/cmake" - LIB_NAME: "lz4" + INSTALL_PREFIX: "{{.G_LZ4_INSTALL_PREFIX}}" + LIB_NAME: "{{.G_LZ4_LIB_NAME}}" TARBALL_SHA256: "537512904744b35e232912055ccf8ec66d768639ff3abe5788d90d792ec5f48b" TARBALL_URL: "https://github.com/lz4/lz4/releases/download/v1.10.0/lz4-1.10.0.tar.gz" @@ -555,11 +569,11 @@ tasks: - "-DCMAKE_BUILD_TYPE=Release" - "-DCMAKE_INSTALL_MESSAGE=LAZY" - "-DZSTD_BUILD_CONTRIB=OFF" - - "-DZSTD_BUILD_PROGRAMS=OFF" - "-DZSTD_BUILD_SHARED=ON" - "-DZSTD_BUILD_STATIC=ON" - "-DZSTD_BUILD_TESTS=OFF" CMAKE_SOURCE_DIR: "build/cmake" - LIB_NAME: "zstd" + INSTALL_PREFIX: "{{.G_ZSTD_INSTALL_PREFIX}}" + LIB_NAME: "{{.G_ZSTD_LIB_NAME}}" TARBALL_SHA256: "eb33e51f49a15e023950cd7825ca74a4a2b43db8354825ac24fc1b7ee09e6fa3" TARBALL_URL: "https://github.com/facebook/zstd/releases/download/v1.5.7/zstd-1.5.7.tar.gz" diff --git a/taskfiles/tests/integration.yaml b/taskfiles/tests/integration.yaml index 823c6853b0..a178ef57ee 100644 --- a/taskfiles/tests/integration.yaml +++ b/taskfiles/tests/integration.yaml @@ -23,5 +23,9 @@ tasks: env: CLP_BUILD_DIR: "{{.G_BUILD_DIR}}" CLP_CORE_BINS_DIR: "{{.G_CORE_COMPONENT_BUILD_DIR}}" + CLP_DEPS_CORE_DIR: "{{.G_DEPS_CORE_DIR}}" + CLP_LIBLZMA_ROOT: "{{.G_LIBLZMA_STATIC_INSTALL_PREFIX}}" + CLP_LZ4_ROOT: "{{.G_LZ4_INSTALL_PREFIX}}" CLP_PACKAGE_DIR: "{{.G_PACKAGE_BUILD_DIR}}" + CLP_ZSTD_ROOT: "{{.G_ZSTD_INSTALL_PREFIX}}" cmd: "uv run python -m pytest -m core"