Skip to content

Commit 65b5291

Browse files
author
Andrei Neagu
committed
replace unarchiver
1 parent 53e0728 commit 65b5291

File tree

9 files changed

+265
-27
lines changed

9 files changed

+265
-27
lines changed

.github/workflows/ci-testing-deploy.yml

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1304,6 +1304,8 @@ jobs:
13041304
cache-dependency-glob: "**/dynamic-sidecar/requirements/ci.txt"
13051305
- name: show system version
13061306
run: ./ci/helpers/show_system_versions.bash
1307+
- name: install 7zip
1308+
run: ./ci/github/helpers/install_7zip.bash
13071309
- name: install
13081310
run: ./ci/github/unit-testing/dynamic-sidecar.bash install
13091311
- name: typecheck
Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,12 @@
1+
#!/bin/bash
2+
#
3+
# Installs the latest version of 7zip plugin
4+
#
5+
6+
# http://redsymbol.net/articles/unofficial-bash-strict-mode/
7+
set -o errexit # abort on nonzero exitstatus
8+
set -o nounset # abort on unbound variable
9+
set -o pipefail # don't hide errors within pipes
10+
IFS=$'\n\t'
11+
12+
exec "$( dirname -- "$0"; )"/../../../scripts/install_7zip.bash

scripts/install_7zip.bash

Lines changed: 28 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,28 @@
1+
#!/bin/bash
2+
#
3+
# Installs 7zip
4+
#
5+
6+
# http://redsymbol.net/articles/unofficial-bash-strict-mode/
7+
set -o errexit # abort on nonzero exitstatus
8+
set -o nounset # abort on unbound variable
9+
set -o pipefail # don't hide errors within pipes
10+
IFS=$'\n\t'
11+
12+
13+
SEVEN_ZIP_VERSION="2409"
14+
## 7z compression
15+
echo "create install dir"
16+
rm -rf /tmp/7zip
17+
mkdir -p /tmp/7zip
18+
cd /tmp/7zip
19+
20+
curl -LO https://www.7-zip.org/a/7z${SEVEN_ZIP_VERSION}-linux-x64.tar.xz
21+
tar -xvf 7z${SEVEN_ZIP_VERSION}-linux-x64.tar.xz
22+
cp 7zz /usr/bin/7z
23+
24+
echo "remove install dir"
25+
rm -rf /tmp/7zip
26+
27+
echo "test installation"
28+
7z --help

services/dynamic-sidecar/Dockerfile

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -30,6 +30,7 @@ RUN \
3030
apt-get update && \
3131
apt-get install -y --no-install-recommends\
3232
curl \
33+
xz-utils \
3334
gnupg \
3435
lsb-release \
3536
&& mkdir -p /etc/apt/keyrings \
@@ -56,6 +57,10 @@ RUN \
5657
RUN \
5758
--mount=type=bind,source=scripts/install_rclone.bash,target=install_rclone.bash \
5859
./install_rclone.bash
60+
# install 7zip
61+
RUN \
62+
--mount=type=bind,source=scripts/install_7zip.bash,target=install_7zip.bash \
63+
./install_7zip.bash
5964

6065
RUN AWS_CLI_VERSION="2.11.11" \
6166
&& curl "https://awscli.amazonaws.com/awscli-exe-linux-x86_64-${AWS_CLI_VERSION}.zip" -o "awscliv2.zip" \

services/dynamic-sidecar/src/simcore_service_dynamic_sidecar/core/errors.py

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -29,3 +29,7 @@ class ContainerExecCommandFailedError(BaseDynamicSidecarError):
2929
"Command '{command}' exited with code '{exit_code}'"
3030
"and output: '{command_result}'"
3131
)
32+
33+
34+
class SevenZipError(BaseDynamicSidecarError):
35+
msg_template = "Could not finish command: '{command}'\nReason: {command_result}"

services/dynamic-sidecar/src/simcore_service_dynamic_sidecar/core/utils.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -49,7 +49,7 @@ def _close_transport(proc: Process):
4949

5050
async def async_command(
5151
command: str,
52-
timeout: float | None = None,
52+
timeout: float | None = None, # noqa: ASYNC109
5353
pipe_as_input: str | None = None,
5454
env_vars: dict[str, str] | None = None,
5555
) -> CommandResult:

services/dynamic-sidecar/src/simcore_service_dynamic_sidecar/modules/nodeports.py

Lines changed: 9 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -20,12 +20,7 @@
2020
from models_library.projects_nodes_io import NodeIDStr
2121
from models_library.services_types import ServicePortKey
2222
from pydantic import ByteSize
23-
from servicelib.archiving_utils import (
24-
PrunableFolder,
25-
UnsupportedArchiveFormatError,
26-
archive_dir,
27-
unarchive_dir,
28-
)
23+
from servicelib.archiving_utils import PrunableFolder, archive_dir
2924
from servicelib.async_utils import run_sequentially_in_context
3025
from servicelib.file_utils import remove_directory
3126
from servicelib.logging_utils import log_context
@@ -41,6 +36,7 @@
4136

4237
from ..core.settings import ApplicationSettings, get_settings
4338
from ..modules.notifications import PortNotifier
39+
from .seven_zip_wrapper import unarchive_zip_to
4440

4541

4642
class PortTypeName(str, Enum):
@@ -298,28 +294,15 @@ async def _get_data_from_port(
298294
dest_folder = PrunableFolder(final_path)
299295

300296
if _is_zip_file(downloaded_file):
301-
# unzip updated data to dest_path
302-
_logger.debug("unzipping %s", downloaded_file)
303-
try:
304-
unarchived: set[Path] = await unarchive_dir(
305-
archive_to_extract=downloaded_file,
306-
destination_folder=final_path,
307-
progress_bar=sub_progress,
297+
with log_context(
298+
_logger,
299+
logging.DEBUG,
300+
f"unzipping '{downloaded_file}' to {final_path}",
301+
):
302+
unarchived: set[Path] = await unarchive_zip_to(
303+
downloaded_file, final_path, sub_progress
308304
)
309305
dest_folder.prune(exclude=unarchived)
310-
311-
_logger.debug("all unzipped in %s", final_path)
312-
except UnsupportedArchiveFormatError:
313-
_logger.warning(
314-
"Could not extract archive '%s' to '%s' moving it to: '%s'",
315-
downloaded_file,
316-
final_path,
317-
final_path / downloaded_file.name,
318-
)
319-
await _move_file_to_input_port(
320-
final_path, downloaded_file, dest_folder
321-
)
322-
323306
else:
324307
await _move_file_to_input_port(final_path, downloaded_file, dest_folder)
325308

Lines changed: 59 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,59 @@
1+
import asyncio
2+
import logging
3+
import re
4+
from pathlib import Path
5+
6+
from models_library.basic_types import IDStr
7+
from servicelib.progress_bar import ProgressBarData
8+
9+
from ..core.errors import SevenZipError
10+
from ..core.utils import async_command
11+
12+
_logger = logging.getLogger(__name__)
13+
14+
15+
async def _get_file_count(zip_path: Path) -> int:
16+
result = await async_command(f"7z l {zip_path}")
17+
if not result.success:
18+
raise SevenZipError(command=result.command, command_result=result.message)
19+
20+
match = re.search(r"\s*(\d+)\s*files", result.message)
21+
return int(match.group().replace("files", "").strip())
22+
23+
24+
async def unarchive_zip_to(
25+
zip_path: Path,
26+
output_dir: Path,
27+
progress_bar: ProgressBarData | None = None,
28+
) -> set[Path]:
29+
if not progress_bar:
30+
progress_bar = ProgressBarData(
31+
num_steps=1, description=IDStr(f"extracting {zip_path.name}")
32+
)
33+
34+
file_count = await _get_file_count(zip_path)
35+
36+
command = f"7z x {zip_path} -o{output_dir} -bb1"
37+
process = await asyncio.create_subprocess_shell(
38+
command, stdout=asyncio.subprocess.PIPE, stderr=asyncio.subprocess.PIPE
39+
)
40+
41+
async with progress_bar.sub_progress(
42+
steps=file_count, description=IDStr("...")
43+
) as sub_prog:
44+
45+
while True:
46+
line = await process.stdout.readline()
47+
if not line:
48+
break
49+
50+
line_decoded = line.decode().strip()
51+
if line_decoded.startswith("- "): # check file entry
52+
await sub_prog.update(1)
53+
54+
await process.wait()
55+
if process.returncode != 0:
56+
stderr = await process.stderr.read()
57+
raise SevenZipError(command=command, command_result=stderr.decode().strip())
58+
59+
return {x for x in output_dir.rglob("*") if x.is_file()}
Lines changed: 145 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,145 @@
1+
# pylint: disable=redefined-outer-name
2+
# pylint: disable=unused-argument
3+
4+
import subprocess
5+
from pathlib import Path
6+
7+
import pytest
8+
from _pytest._py.path import LocalPath
9+
from faker import Faker
10+
from models_library.basic_types import IDStr
11+
from models_library.progress_bar import ProgressReport
12+
from servicelib.archiving_utils import archive_dir, unarchive_dir
13+
from servicelib.progress_bar import ProgressBarData
14+
from simcore_service_dynamic_sidecar.modules.seven_zip_wrapper import (
15+
SevenZipError,
16+
unarchive_zip_to,
17+
)
18+
19+
20+
def _ensure_path(dir_path: Path) -> Path:
21+
dir_path.mkdir(parents=True, exist_ok=True)
22+
return dir_path
23+
24+
25+
def _assert_same_directory_content(path1: Path, path2: Path) -> None:
26+
assert path1.is_dir()
27+
assert path2.is_dir()
28+
29+
contents1 = {p.relative_to(path1) for p in path1.rglob("*")}
30+
contents2 = {p.relative_to(path2) for p in path2.rglob("*")}
31+
32+
assert contents1 == contents2
33+
34+
35+
@pytest.fixture
36+
def to_archive_dir(tmpdir: LocalPath) -> Path:
37+
return _ensure_path(Path(tmpdir) / "to_archive")
38+
39+
40+
@pytest.fixture
41+
def internal_tools_unarchived_tools(tmpdir: LocalPath) -> Path:
42+
return _ensure_path(Path(tmpdir) / "internal_unarchived")
43+
44+
45+
@pytest.fixture
46+
def external_unarchived_tools(tmpdir: LocalPath) -> Path:
47+
return _ensure_path(Path(tmpdir) / "external_unarchived")
48+
49+
50+
@pytest.fixture
51+
def archive_path(tmpdir: LocalPath) -> Path:
52+
return Path(tmpdir) / "archive.zip"
53+
54+
55+
@pytest.fixture
56+
def generate_content(
57+
to_archive_dir: Path, sub_dirs: int, files_in_subdirs: int
58+
) -> None:
59+
for i in range(sub_dirs):
60+
(to_archive_dir / f"s{i}").mkdir(parents=True, exist_ok=True)
61+
for k in range(files_in_subdirs):
62+
(to_archive_dir / f"s{i}" / f"{k}.txt").write_text("a" * k)
63+
64+
65+
@pytest.fixture
66+
def skip_if_seven_zip_is_missing() -> None:
67+
try:
68+
subprocess.check_output(["7z", "--help"]) # noqa: S607
69+
except Exception: # pylint: disable=broad-except
70+
pytest.skip("7z is not installed")
71+
72+
73+
async def test_missing_path_raises_error(
74+
skip_if_seven_zip_is_missing: None,
75+
faker: Faker,
76+
external_unarchived_tools: Path,
77+
):
78+
missing_path = Path("/tmp") / f"this_path_is_missing_{faker.uuid4()}" # noqa: S108
79+
with pytest.raises(SevenZipError):
80+
await unarchive_zip_to(missing_path, external_unarchived_tools)
81+
82+
83+
def _print_sorted(unarchived_dir: set[Path]) -> None:
84+
print(f"List '{unarchived_dir}'")
85+
for entry in sorted(unarchived_dir):
86+
print(f"{entry}")
87+
88+
89+
def _strip_folder_from_path(paths: set[Path], *, to_strip: Path) -> set[Path]:
90+
return {x.relative_to(to_strip) for x in paths}
91+
92+
93+
@pytest.mark.parametrize(
94+
"sub_dirs, files_in_subdirs",
95+
[
96+
pytest.param(50, 40, id="few_items"),
97+
],
98+
)
99+
async def test_ensure_same_interface_as_unarchive_dir(
100+
skip_if_seven_zip_is_missing: None,
101+
generate_content: Path,
102+
archive_path: Path,
103+
to_archive_dir: Path,
104+
internal_tools_unarchived_tools: Path,
105+
external_unarchived_tools: Path,
106+
sub_dirs: int,
107+
files_in_subdirs: int,
108+
):
109+
110+
await archive_dir(
111+
to_archive_dir, archive_path, compress=False, store_relative_path=True
112+
)
113+
114+
intenal_response = await unarchive_dir(
115+
archive_path, internal_tools_unarchived_tools
116+
)
117+
118+
last_actual_progress_value = 0
119+
120+
async def _report_progress(progress_report: ProgressReport) -> None:
121+
nonlocal last_actual_progress_value
122+
last_actual_progress_value = progress_report.actual_value
123+
124+
progress_bar = ProgressBarData(
125+
num_steps=1,
126+
description=IDStr("test progress bar"),
127+
progress_report_cb=_report_progress,
128+
)
129+
async with progress_bar:
130+
external_response = await unarchive_zip_to(
131+
archive_path, external_unarchived_tools, progress_bar
132+
)
133+
assert last_actual_progress_value == 1 # ensure progress was reported
134+
assert len(external_response) == sub_dirs * files_in_subdirs
135+
136+
_assert_same_directory_content(
137+
internal_tools_unarchived_tools, external_unarchived_tools
138+
)
139+
140+
_print_sorted(intenal_response)
141+
_print_sorted(external_response)
142+
143+
assert _strip_folder_from_path(
144+
intenal_response, to_strip=internal_tools_unarchived_tools
145+
) == _strip_folder_from_path(external_response, to_strip=external_unarchived_tools)

0 commit comments

Comments
 (0)