Skip to content

Commit 135c6bb

Browse files
authored
fix(package): Update native/decompress.py to use CLI args and env vars for DB config (fixes #1199); Prevent IR extraction with clp-s storage engine and only use dataset argument for JSON extraction (fixes #1200). (#1202)
1 parent 4288532 commit 135c6bb

File tree

2 files changed

+26
-22
lines changed

2 files changed

+26
-22
lines changed

components/clp-package-utils/clp_package_utils/scripts/decompress.py

Lines changed: 12 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -181,23 +181,13 @@ def handle_extract_stream_cmd(
181181
return -1
182182

183183
job_command = parsed_args.command
184+
if EXTRACT_IR_CMD == job_command and StorageEngine.CLP != storage_engine:
185+
logger.error(f"IR extraction is not supported for storage engine `{storage_engine}`.")
186+
return -1
184187
if EXTRACT_JSON_CMD == job_command and StorageEngine.CLP_S != storage_engine:
185188
logger.error(f"JSON extraction is not supported for storage engine `{storage_engine}`.")
186189
return -1
187190

188-
dataset = parsed_args.dataset
189-
if StorageEngine.CLP_S == storage_engine:
190-
dataset = CLP_DEFAULT_DATASET_NAME if dataset is None else dataset
191-
try:
192-
clp_db_connection_params = clp_config.database.get_clp_connection_params_and_type(True)
193-
validate_dataset_name(clp_db_connection_params["table_prefix"], dataset)
194-
except Exception as e:
195-
logger.error(e)
196-
return -1
197-
elif dataset is not None:
198-
logger.error(f"Dataset selection is not supported for storage engine: {storage_engine}.")
199-
return -1
200-
201191
container_name = generate_container_name(str(JobType.IR_EXTRACTION))
202192
container_clp_config, mounts = generate_container_config(clp_config, clp_home)
203193
generated_config_path_on_container, generated_config_path_on_host = dump_container_config(
@@ -229,6 +219,15 @@ def handle_extract_stream_cmd(
229219
extract_cmd.append("--target-uncompressed-size")
230220
extract_cmd.append(str(parsed_args.target_uncompressed_size))
231221
elif EXTRACT_JSON_CMD == job_command:
222+
dataset = parsed_args.dataset
223+
dataset = CLP_DEFAULT_DATASET_NAME if dataset is None else dataset
224+
try:
225+
clp_db_connection_params = clp_config.database.get_clp_connection_params_and_type(True)
226+
validate_dataset_name(clp_db_connection_params["table_prefix"], dataset)
227+
except Exception as e:
228+
logger.error(e)
229+
return -1
230+
232231
extract_cmd.append(str(parsed_args.archive_id))
233232
if dataset is not None:
234233
extract_cmd.append("--dataset")

components/clp-package-utils/clp_package_utils/scripts/native/decompress.py

Lines changed: 14 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -1,14 +1,14 @@
11
import argparse
22
import asyncio
33
import logging
4+
import os
45
import pathlib
56
import subprocess
67
import sys
78
import uuid
89
from contextlib import closing
910
from typing import Optional
1011

11-
import yaml
1212
from clp_py_utils.clp_config import (
1313
CLPConfig,
1414
Database,
@@ -229,18 +229,24 @@ def handle_extract_file_cmd(
229229
logs_dir = clp_config.logs_directory
230230
archives_dir = clp_config.archive_output.get_directory()
231231

232-
# Generate database config file for clp
233-
db_config_file_path = logs_dir / f".decompress-db-config-{uuid.uuid4()}.yml"
234-
with open(db_config_file_path, "w") as f:
235-
yaml.safe_dump(clp_config.database.get_clp_connection_params_and_type(True), f)
236-
232+
# Configure CLP metadata DB connection params.
233+
clp_db_connection_params = clp_config.database.get_clp_connection_params_and_type(True)
237234
# fmt: off
238235
extract_cmd = [
239236
str(clp_home / "bin" / "clp"),
240237
"x", str(archives_dir), str(extraction_dir),
241-
"--db-config-file", str(db_config_file_path),
238+
"--db-type", clp_db_connection_params["type"],
239+
"--db-host", clp_db_connection_params["host"],
240+
"--db-port", str(clp_db_connection_params["port"]),
241+
"--db-name", clp_db_connection_params["name"],
242+
"--db-table-prefix", clp_db_connection_params["table_prefix"],
242243
]
243244
# fmt: on
245+
extract_env = {
246+
**os.environ,
247+
"CLP_DB_USER": clp_db_connection_params["username"],
248+
"CLP_DB_PASS": clp_db_connection_params["password"],
249+
}
244250

245251
files_to_extract_list_path = None
246252
if list_path is not None:
@@ -256,7 +262,7 @@ def handle_extract_file_cmd(
256262
extract_cmd.append("-f")
257263
extract_cmd.append(str(files_to_extract_list_path))
258264

259-
proc = subprocess.Popen(extract_cmd)
265+
proc = subprocess.Popen(extract_cmd, env=extract_env)
260266
return_code = proc.wait()
261267
if 0 != return_code:
262268
logger.error(f"File extraction failed, return_code={return_code}")
@@ -265,7 +271,6 @@ def handle_extract_file_cmd(
265271
# Remove generated files
266272
if files_to_extract_list_path is not None:
267273
files_to_extract_list_path.unlink()
268-
db_config_file_path.unlink()
269274

270275
return 0
271276

0 commit comments

Comments
 (0)