Skip to content
Merged
Show file tree
Hide file tree
Changes from 14 commits
Commits
Show all changes
51 commits
Select commit Hold shift + click to select a range
af8ce88
Rename generate-user-env-vars-file since it will also need to generat…
kirkrodrigues Aug 12, 2025
e0a1820
Move CLP config load out of _add_clp_env_vars.
kirkrodrigues Aug 13, 2025
4deb2ba
Add S3 support
anlowee Aug 18, 2025
728efe2
Fix
anlowee Aug 18, 2025
2ba53ca
Fix
anlowee Aug 18, 2025
aa3e18d
Add logs
anlowee Aug 18, 2025
67e16ce
Revert "Add logs"
anlowee Aug 18, 2025
ba16edf
Merge branch 'main' into xwei/s3-support-config
anlowee Aug 18, 2025
78139be
Merge branch 'main' into xwei/s3-support-config
anlowee Aug 18, 2025
8d6744d
Merge branch 'main' into xwei/s3-support-config
anlowee Aug 19, 2025
2afac89
Fix lint
anlowee Aug 19, 2025
d4bf46a
Merge branch 'xwei/s3-support-config' of github.com:anlowee/clp into …
anlowee Aug 19, 2025
5e13836
Merge branch 'main' into xwei/s3-support-config
anlowee Aug 19, 2025
d7478f8
Address coderabbitai comments
anlowee Aug 20, 2025
14b3a80
Address coderabbitai comments
anlowee Aug 20, 2025
c09641b
Merge branch 'main' into xwei/s3-support-config
anlowee Aug 20, 2025
d59af37
Update docs to remove the limitation that only local file system file…
anlowee Aug 20, 2025
80959fc
Merge branch 'main' into xwei/s3-support-config
anlowee Aug 20, 2025
ada03bb
Generate clp.properties by pythong script
anlowee Aug 25, 2025
9d92fc9
Merge branch 'xwei/s3-support-config' of github.com:anlowee/clp into …
anlowee Aug 25, 2025
968c29c
Lint fix
anlowee Aug 25, 2025
cdaf3f5
Update the docs and config
anlowee Aug 25, 2025
40c352c
Merge branch 'main' into xwei/s3-support-config
anlowee Aug 25, 2025
e5e1ed6
Refactor _generate_worker_clp_properties.
kirkrodrigues Sep 2, 2025
b9a898f
Remove worker's clp.properties since it'll be generated.
kirkrodrigues Sep 2, 2025
e012fb0
Undo unnecessary changes in generate-configs.sh.
kirkrodrigues Sep 2, 2025
354a1aa
Refactor s3 config reading.
kirkrodrigues Sep 2, 2025
d1aa25b
Refactor path resolution. Use correct key for staging_directory.
kirkrodrigues Sep 2, 2025
29c4232
Extract CLP S3 env var extraction.
kirkrodrigues Sep 2, 2025
c4bcb8b
Remove obsolete method.
kirkrodrigues Sep 2, 2025
5cb13b9
Apply linter.
kirkrodrigues Sep 2, 2025
570c18a
Edit set-up-config.sh.
kirkrodrigues Sep 2, 2025
9842ef6
Note how to configure S3 config and known issue in docs.
kirkrodrigues Sep 2, 2025
f769282
Remove blank line.
kirkrodrigues Sep 2, 2025
a388247
Fix: Require secret_access_key.
kirkrodrigues Sep 2, 2025
72ae05b
Use correct type annotations.
kirkrodrigues Sep 2, 2025
ccaf9a3
Address coderabbitai comments
anlowee Sep 2, 2025
34617b9
Merge remote-tracking branch 'origin/main' into xwei/s3-support-config
anlowee Sep 2, 2025
b3694b7
Fix a bug
anlowee Sep 2, 2025
6bcacfb
Update issue
anlowee Sep 2, 2025
0a670b0
Merge branch 'main' into xwei/s3-support-config
anlowee Sep 3, 2025
33592c9
docs: Change supported release to clp v0.5.0; Remove SELECT * warning…
kirkrodrigues Sep 3, 2025
bc86d60
Set PRESTO_WORKER_CLPPROPERTIES_STORAGE_TYPE in both fs and s3 cases …
kirkrodrigues Sep 3, 2025
3402f9c
Fix type annotation for _get_config_value.
kirkrodrigues Sep 3, 2025
2ac911a
Use _get_required_config_value for database credentials; Add config f…
kirkrodrigues Sep 3, 2025
06b5771
Minor touch-up.
kirkrodrigues Sep 3, 2025
bc90d7f
Update link to split config file syntax.
kirkrodrigues Sep 3, 2025
2b3c624
Merge branch 'main' into xwei/s3-support-config
anlowee Sep 3, 2025
8ac0b03
Address comments
anlowee Sep 3, 2025
2306ba1
Merge branch 'xwei/s3-support-config' of github.com:anlowee/clp into …
anlowee Sep 3, 2025
f4d08b4
Merge branch 'main' into xwei/s3-support-config
anlowee Sep 3, 2025
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
import logging
import sys
from pathlib import Path
from typing import Dict, Optional
from typing import Any, Dict, Optional

import yaml
from dotenv import dotenv_values
Expand All @@ -28,7 +28,9 @@ def main(argv=None) -> int:
argv = sys.argv

args_parser = argparse.ArgumentParser(
description="Generates an environment variables file for any user-configured properties."
description=(
"Generates any necessary config files corresponding to user-configured properties."
)
)
args_parser.add_argument(
"--clp-package-dir", help="CLP package directory.", required=True, type=Path
Expand All @@ -41,8 +43,20 @@ def main(argv=None) -> int:
clp_package_dir: Path = parsed_args.clp_package_dir.resolve()
output_file: Path = parsed_args.output_file

clp_config_file_path = clp_package_dir / "etc" / "clp-config.yml"
if not clp_config_file_path.exists():
logger.error(
"'%s' doesn't exist. Is '%s' the location of the CLP package?",
clp_config_file_path,
clp_package_dir.resolve(),
)
return 1

with open(clp_config_file_path, "r") as clp_config_file:
clp_config = yaml.safe_load(clp_config_file)

env_vars: Dict[str, str] = {}
if not _add_clp_env_vars(clp_package_dir, env_vars):
if not _add_clp_env_vars(clp_config, clp_package_dir, env_vars):
return 1

script_dir = Path(__file__).parent.resolve()
Expand All @@ -56,28 +70,19 @@ def main(argv=None) -> int:
return 0


def _add_clp_env_vars(clp_package_dir: Path, env_vars: Dict[str, str]) -> bool:
def _add_clp_env_vars(
clp_config: Dict[str, Any], clp_package_dir: Path, env_vars: Dict[str, str]
) -> bool:
"""
Adds environment variables for CLP config values to `env_vars`.

:param clp_config:
:param clp_package_dir:
:param env_vars:
:return: Whether the environment variables were successfully added.
"""
env_vars["PRESTO_COORDINATOR_CLPPROPERTIES_METADATA_TABLE_PREFIX"] = "clp_"

clp_config_file_path = clp_package_dir / "etc" / "clp-config.yml"
if not clp_config_file_path.exists():
logger.error(
"'%s' doesn't exist. Is '%s' the location of the CLP package?",
clp_config_file_path,
clp_package_dir.resolve(),
)
return False

with open(clp_config_file_path, "r") as clp_config_file:
clp_config = yaml.safe_load(clp_config_file)

database_type = _get_config_value(clp_config, "database.type", "mariadb")
if "mariadb" != database_type and "mysql" != database_type:
logger.error(
Expand All @@ -98,24 +103,43 @@ def _add_clp_env_vars(clp_package_dir: Path, env_vars: Dict[str, str]) -> bool:
clp_archive_output_storage_type = _get_config_value(
clp_config, "archive_output.storage.type", "fs"
)
if "fs" != clp_archive_output_storage_type:
env_vars["CLP_ARCHIVES_DIR"] = _resolve_archives_dir(
clp_package_dir,
_get_config_value(clp_config, "archive_output.storage.directory"),
clp_package_dir
/ "var"
/ "data"
/ ("archives" if clp_archive_output_storage_type == "fs" else "staged-archives"),
)
if "s3" == clp_archive_output_storage_type:
s3_config_key_prefix = f"archive_output.storage.s3_config"
s3_credentials_key_prefix = f"{s3_config_key_prefix}.aws_authentication.credentials"

s3_access_key_id = _get_config_value(
clp_config, f"{s3_credentials_key_prefix}.access_key_id"
)

s3_bucket = _get_config_value(clp_config, f"{s3_config_key_prefix}.bucket")
s3_region_code = _get_config_value(clp_config, f"{s3_config_key_prefix}.region_code")
s3_end_point = f"https://{s3_bucket}.s3.{s3_region_code}.amazonaws.com/"

s3_secret_access_key = _get_config_value(
clp_config, f"{s3_credentials_key_prefix}.secret_access_key"
)

env_vars["PRESTO_WORKER_CLPPROPERTIES_STORAGE_TYPE"] = "s3"
env_vars["PRESTO_WORKER_CLPPROPERTIES_S3_AUTH_PROVIDER"] = "clp_package"
env_vars["PRESTO_WORKER_CLPPROPERTIES_S3_ACCESS_KEY_ID"] = s3_access_key_id
env_vars["PRESTO_WORKER_CLPPROPERTIES_S3_END_POINT"] = s3_end_point
env_vars["PRESTO_WORKER_CLPPROPERTIES_S3_SECRET_ACCESS_KEY"] = s3_secret_access_key
else:
logger.error(
"Expected CLP's archive_output.storage.type to be fs but found '%s'. Presto"
" currently only supports reading archives from the fs storage type.",
"Expected CLP's archive_output.storage.type to be fs or s3 but found '%s'. Presto"
" currently only supports reading archives from the fs or s3 storage type.",
clp_archive_output_storage_type,
)
return False

clp_archives_dir = _get_config_value(
clp_config,
"archive_output.storage.directory",
str(clp_package_dir / "var" / "data" / "archives"),
)
if Path(clp_archives_dir).is_absolute():
env_vars["CLP_ARCHIVES_DIR"] = clp_archives_dir
else:
env_vars["CLP_ARCHIVES_DIR"] = str(clp_package_dir / clp_archives_dir)

credentials_file_path = clp_package_dir / "etc" / "credentials.yml"
if not credentials_file_path.exists():
logger.error("'%s' doesn't exist. Did you start CLP?", credentials_file_path)
Expand All @@ -137,6 +161,11 @@ def _add_clp_env_vars(clp_package_dir: Path, env_vars: Dict[str, str]) -> bool:
return True


def _resolve_archives_dir(base_dir: Path, configured: Optional[str], default: Path) -> str:
effective = configured or str(default)
return effective if Path(effective).is_absolute() else str(base_dir / effective)


def _add_worker_env_vars(coordinator_common_env_file_path: Path, env_vars: Dict[str, str]) -> bool:
"""
Adds environment variables for worker config values to `env_vars`.
Expand Down
28 changes: 27 additions & 1 deletion tools/deployment/presto-clp/worker/scripts/generate-configs.sh
Original file line number Diff line number Diff line change
Expand Up @@ -48,13 +48,19 @@ update_config_file() {
local file_path=$1
local key=$2
local value=$3
local sensitive=${4:-false}
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

🧹 Nitpick (assertive)

Document the new "sensitive" parameter in update_config_file.

The function signature gained a fourth parameter, but the header comment still lists only three. Add the param to avoid misuse by future callers.

 # @param $1 Path to the properties file.
 # @param $2 The key to set.
 # @param $3 The value to set.
+# @param $4 (optional) "true" to mask the value when logging
📝 Committable suggestion

‼️ IMPORTANT
Carefully review the code before committing. Ensure that it accurately replaces the highlighted code, contains no missing lines, and has no issues with indentation. Thoroughly test & benchmark the code to ensure it meets the requirements.

Suggested change
local sensitive=${4:-false}
# Sets/updates the given kv-pair in the given properties file.
#
# @param $1 Path to the properties file.
# @param $2 The key to set.
# @param $3 The value to set.
# @param $4 (optional) "true" to mask the value when logging
update_config_file() {
local file_path=$1
local key=$2
local value=$3
local sensitive=${4:-false}
}
🤖 Prompt for AI Agents
In tools/deployment/presto-clp/worker/scripts/generate-configs.sh around line
51, the function update_config_file now accepts a fourth parameter "sensitive"
but the header comment only documents three parameters; update that header to
include the new "sensitive" parameter (name: sensitive, purpose: whether the
value is sensitive and should be handled/hidden, type: boolean/string, default:
false) and describe its default behavior so future callers know how to pass and
interpret it. Ensure the param description matches existing comment style and
mentions the default value and any special handling (e.g., masking or skipping
echoing) used by the function.


if grep --quiet "^${key}=.*$" "$file_path"; then
sed --in-place "s|^${key}=.*|${key}=${value}|" "$file_path"
else
echo "${key}=${value}" >>"$file_path"
fi
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

🛠️ Refactor suggestion

Make key/value matching and replacement regex-safe; avoid failures on missing files.

Keys like node.internal-address contain dots that are regex metacharacters. Using grep/sed with unescaped keys can produce false positives. Also, grep on a non-existent file will fail under set -e. Guard the file and use fixed-string grep; escape replacement metacharacters in the value.

-    if grep --quiet "^${key}=.*$" "$file_path"; then
-        sed --in-place "s|^${key}=.*|${key}=${value}|" "$file_path"
-    else
-        echo "${key}=${value}" >>"$file_path"
-    fi
+    # Ensure the file exists to avoid grep/sed failures under `set -e`
+    [[ -f "$file_path" ]] || : >"$file_path"
+    # Match keys literally to avoid regex surprises with dots, etc.
+    if grep -F --quiet "^${key}=" "$file_path"; then
+        # Escape replacement metacharacters in value for sed
+        local escaped_value=${value//\\/\\\\}
+        escaped_value=${escaped_value//|/\\|}
+        escaped_value=${escaped_value//&/\\&}
+        local escaped_key
+        escaped_key=$(printf '%s' "$key" | sed -e 's/[.[\*^$+?{}|()]/\\&/g')
+        sed --in-place "s|^${escaped_key}=.*|${key}=${escaped_value}|" "$file_path"
+    else
+        echo "${key}=${value}" >>"$file_path"
+    fi

Committable suggestion skipped: line range outside the PR's diff.

🤖 Prompt for AI Agents
In tools/deployment/presto-clp/worker/scripts/generate-configs.sh around lines
53-57, the current grep/sed logic treats the key as a regex and will fail when
keys contain regex metacharacters and when the target file is missing; also the
replacement value can break sed if it contains &, \ or /. Fix by first ensuring
the file exists (touch the file_path if missing), use grep --quiet
--fixed-strings (or grep -qF) to check for the literal key, and when calling sed
escape the key so it is matched literally (escape regex metacharacters such as
.[]*^$) or build the sed command from a safely escaped key; additionally
escape/quote the replacement value for sed (escape &, \ and the chosen
delimiter) before passing it to sed --in-place so the substitution is robust.

log "INFO" "Set ${key}=${value} in ${file_path}"
if [[ "$sensitive" == "true" ]]; then
local masked="****${value: -4}"
log "INFO" "Set ${key}=${masked} in ${file_path}"
else
log "INFO" "Set ${key}=${value} in ${file_path}"
fi
}

apt-get update && apt-get install --assume-yes --no-install-recommends jq wget
Expand All @@ -74,6 +80,26 @@ done
rm -f "${PRESTO_CONFIG_DIR}/catalog/"*
mv "${PRESTO_CONFIG_DIR}/clp.properties" "${PRESTO_CONFIG_DIR}/catalog"

# Update clp.properties
readonly CLP_PROPERTIES_FILE="/opt/presto-server/etc/catalog/clp.properties"
if [ "${PRESTO_WORKER_CLPPROPERTIES_STORAGE_TYPE:-}" = "s3" ]; then
log "INFO" "Enable S3 support"
for var in PRESTO_WORKER_CLPPROPERTIES_S3_AUTH_PROVIDER \
PRESTO_WORKER_CLPPROPERTIES_S3_ACCESS_KEY_ID \
PRESTO_WORKER_CLPPROPERTIES_S3_SECRET_ACCESS_KEY \
PRESTO_WORKER_CLPPROPERTIES_S3_END_POINT; do
if [ -z "${!var:-}" ]; then
log "ERROR" "Missing required env var: $var"
exit 1
fi
done
update_config_file "$CLP_PROPERTIES_FILE" "clp.storage-type" "${PRESTO_WORKER_CLPPROPERTIES_STORAGE_TYPE}"
update_config_file "$CLP_PROPERTIES_FILE" "clp.s3-auth-provider" "${PRESTO_WORKER_CLPPROPERTIES_S3_AUTH_PROVIDER}"
update_config_file "$CLP_PROPERTIES_FILE" "clp.s3-access-key-id" "${PRESTO_WORKER_CLPPROPERTIES_S3_ACCESS_KEY_ID}" true
update_config_file "$CLP_PROPERTIES_FILE" "clp.s3-end-point" "${PRESTO_WORKER_CLPPROPERTIES_S3_END_POINT}"
update_config_file "$CLP_PROPERTIES_FILE" "clp.s3-secret-access-key" "${PRESTO_WORKER_CLPPROPERTIES_S3_SECRET_ACCESS_KEY}" true
fi

# Update config.properties
readonly CONFIG_PROPERTIES_FILE="/opt/presto-server/etc/config.properties"
version=$(get_coordinator_version "$CONFIG_PROPERTIES_FILE")
Expand Down
Loading