Skip to content
Merged
Show file tree
Hide file tree
Changes from 13 commits
Commits
Show all changes
51 commits
Select commit Hold shift + click to select a range
af8ce88
Rename generate-user-env-vars-file since it will also need to generat…
kirkrodrigues Aug 12, 2025
e0a1820
Move CLP config load out of _add_clp_env_vars.
kirkrodrigues Aug 13, 2025
4deb2ba
Add S3 support
anlowee Aug 18, 2025
728efe2
Fix
anlowee Aug 18, 2025
2ba53ca
Fix
anlowee Aug 18, 2025
aa3e18d
Add logs
anlowee Aug 18, 2025
67e16ce
Revert "Add logs"
anlowee Aug 18, 2025
ba16edf
Merge branch 'main' into xwei/s3-support-config
anlowee Aug 18, 2025
78139be
Merge branch 'main' into xwei/s3-support-config
anlowee Aug 18, 2025
8d6744d
Merge branch 'main' into xwei/s3-support-config
anlowee Aug 19, 2025
2afac89
Fix lint
anlowee Aug 19, 2025
d4bf46a
Merge branch 'xwei/s3-support-config' of github.com:anlowee/clp into …
anlowee Aug 19, 2025
5e13836
Merge branch 'main' into xwei/s3-support-config
anlowee Aug 19, 2025
d7478f8
Address coderabbitai comments
anlowee Aug 20, 2025
14b3a80
Address coderabbitai comments
anlowee Aug 20, 2025
c09641b
Merge branch 'main' into xwei/s3-support-config
anlowee Aug 20, 2025
d59af37
Update docs to remove the limitation that only local file system file…
anlowee Aug 20, 2025
80959fc
Merge branch 'main' into xwei/s3-support-config
anlowee Aug 20, 2025
ada03bb
Generate clp.properties by pythong script
anlowee Aug 25, 2025
9d92fc9
Merge branch 'xwei/s3-support-config' of github.com:anlowee/clp into …
anlowee Aug 25, 2025
968c29c
Lint fix
anlowee Aug 25, 2025
cdaf3f5
Update the docs and config
anlowee Aug 25, 2025
40c352c
Merge branch 'main' into xwei/s3-support-config
anlowee Aug 25, 2025
e5e1ed6
Refactor _generate_worker_clp_properties.
kirkrodrigues Sep 2, 2025
b9a898f
Remove worker's clp.properties since it'll be generated.
kirkrodrigues Sep 2, 2025
e012fb0
Undo unnecessary changes in generate-configs.sh.
kirkrodrigues Sep 2, 2025
354a1aa
Refactor s3 config reading.
kirkrodrigues Sep 2, 2025
d1aa25b
Refactor path resolution. Use correct key for staging_directory.
kirkrodrigues Sep 2, 2025
29c4232
Extract CLP S3 env var extraction.
kirkrodrigues Sep 2, 2025
c4bcb8b
Remove obsolete method.
kirkrodrigues Sep 2, 2025
5cb13b9
Apply linter.
kirkrodrigues Sep 2, 2025
570c18a
Edit set-up-config.sh.
kirkrodrigues Sep 2, 2025
9842ef6
Note how to configure S3 config and known issue in docs.
kirkrodrigues Sep 2, 2025
f769282
Remove blank line.
kirkrodrigues Sep 2, 2025
a388247
Fix: Require secret_access_key.
kirkrodrigues Sep 2, 2025
72ae05b
Use correct type annotations.
kirkrodrigues Sep 2, 2025
ccaf9a3
Address coderabbitai comments
anlowee Sep 2, 2025
34617b9
Merge remote-tracking branch 'origin/main' into xwei/s3-support-config
anlowee Sep 2, 2025
b3694b7
Fix a bug
anlowee Sep 2, 2025
6bcacfb
Update issue
anlowee Sep 2, 2025
0a670b0
Merge branch 'main' into xwei/s3-support-config
anlowee Sep 3, 2025
33592c9
docs: Change supported release to clp v0.5.0; Remove SELECT * warning…
kirkrodrigues Sep 3, 2025
bc86d60
Set PRESTO_WORKER_CLPPROPERTIES_STORAGE_TYPE in both fs and s3 cases …
kirkrodrigues Sep 3, 2025
3402f9c
Fix type annotation for _get_config_value.
kirkrodrigues Sep 3, 2025
2ac911a
Use _get_required_config_value for database credentials; Add config f…
kirkrodrigues Sep 3, 2025
06b5771
Minor touch-up.
kirkrodrigues Sep 3, 2025
bc90d7f
Update link to split config file syntax.
kirkrodrigues Sep 3, 2025
2b3c624
Merge branch 'main' into xwei/s3-support-config
anlowee Sep 3, 2025
8ac0b03
Address comments
anlowee Sep 3, 2025
2306ba1
Merge branch 'xwei/s3-support-config' of github.com:anlowee/clp into …
anlowee Sep 3, 2025
f4d08b4
Merge branch 'main' into xwei/s3-support-config
anlowee Sep 3, 2025
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
import logging
import sys
from pathlib import Path
from typing import Dict, Optional
from typing import Any, Dict, Optional

import yaml
from dotenv import dotenv_values
Expand All @@ -28,7 +28,9 @@ def main(argv=None) -> int:
argv = sys.argv

args_parser = argparse.ArgumentParser(
description="Generates an environment variables file for any user-configured properties."
description=(
"Generates any necessary config files corresponding to user-configured properties."
)
)
args_parser.add_argument(
"--clp-package-dir", help="CLP package directory.", required=True, type=Path
Expand All @@ -41,8 +43,20 @@ def main(argv=None) -> int:
clp_package_dir: Path = parsed_args.clp_package_dir.resolve()
output_file: Path = parsed_args.output_file

clp_config_file_path = clp_package_dir / "etc" / "clp-config.yml"
if not clp_config_file_path.exists():
logger.error(
"'%s' doesn't exist. Is '%s' the location of the CLP package?",
clp_config_file_path,
clp_package_dir.resolve(),
)
return False

with open(clp_config_file_path, "r") as clp_config_file:
clp_config = yaml.safe_load(clp_config_file)

env_vars: Dict[str, str] = {}
if not _add_clp_env_vars(clp_package_dir, env_vars):
if not _add_clp_env_vars(clp_config, clp_package_dir, env_vars):
return 1

script_dir = Path(__file__).parent.resolve()
Expand All @@ -56,28 +70,19 @@ def main(argv=None) -> int:
return 0


def _add_clp_env_vars(clp_package_dir: Path, env_vars: Dict[str, str]) -> bool:
def _add_clp_env_vars(
clp_config: Dict[str, Any], clp_package_dir: Path, env_vars: Dict[str, str]
) -> bool:
"""
Adds environment variables for CLP config values to `env_vars`.

:param clp_config:
:param clp_package_dir:
:param env_vars:
:return: Whether the environment variables were successfully added.
"""
env_vars["PRESTO_COORDINATOR_CLPPROPERTIES_METADATA_TABLE_PREFIX"] = "clp_"

clp_config_file_path = clp_package_dir / "etc" / "clp-config.yml"
if not clp_config_file_path.exists():
logger.error(
"'%s' doesn't exist. Is '%s' the location of the CLP package?",
clp_config_file_path,
clp_package_dir.resolve(),
)
return False

with open(clp_config_file_path, "r") as clp_config_file:
clp_config = yaml.safe_load(clp_config_file)

database_type = _get_config_value(clp_config, "database.type", "mariadb")
if "mariadb" != database_type and "mysql" != database_type:
logger.error(
Expand All @@ -98,24 +103,56 @@ def _add_clp_env_vars(clp_package_dir: Path, env_vars: Dict[str, str]) -> bool:
clp_archive_output_storage_type = _get_config_value(
clp_config, "archive_output.storage.type", "fs"
)
if "fs" != clp_archive_output_storage_type:
if "fs" == clp_archive_output_storage_type:
clp_archives_dir = _get_config_value(
clp_config,
"archive_output.storage.directory",
str(clp_package_dir / "var" / "data" / "archives"),
)
if Path(clp_archives_dir).is_absolute():
env_vars["CLP_ARCHIVES_DIR"] = clp_archives_dir
else:
env_vars["CLP_ARCHIVES_DIR"] = str(clp_package_dir / clp_archives_dir)
elif "s3" == clp_archive_output_storage_type:
# This will not be used, just to ensure CLP_ARCHIVES_DIR is not an empty string
clp_archives_dir = _get_config_value(
clp_config,
"archive_output.storage.directory",
str(clp_package_dir / "var" / "data" / "staged-archives"),
)
if Path(clp_archives_dir).is_absolute():
env_vars["CLP_ARCHIVES_DIR"] = clp_archives_dir
else:
env_vars["CLP_ARCHIVES_DIR"] = str(clp_package_dir / clp_archives_dir)

s3_config_key_prefix = f"archive_output.storage.s3_config"
s3_credentials_key_prefix = f"{s3_config_key_prefix}.aws_authentication.credentials"

s3_access_key_id = _get_config_value(
clp_config, f"{s3_credentials_key_prefix}.access_key_id"
)

s3_bucket = _get_config_value(clp_config, f"{s3_config_key_prefix}.bucket")
s3_region_code = _get_config_value(clp_config, f"{s3_config_key_prefix}.region_code")
s3_end_point = f"https://{s3_bucket}.s3.{s3_region_code}.amazonaws.com/"

s3_secret_access_key = _get_config_value(
clp_config, f"{s3_credentials_key_prefix}.secret_access_key"
)

env_vars["PRESTO_WORKER_CLPPROPERTIES_STORAGE_TYPE"] = "s3"
env_vars["PRESTO_WORKER_CLPPROPERTIES_S3_AUTH_PROVIDER"] = "clp_package"
env_vars["PRESTO_WORKER_CLPPROPERTIES_S3_ACCESS_KEY_ID"] = s3_access_key_id
env_vars["PRESTO_WORKER_CLPPROPERTIES_S3_END_POINT"] = s3_end_point
env_vars["PRESTO_WORKER_CLPPROPERTIES_S3_SECRET_ACCESS_KEY"] = s3_secret_access_key
else:
logger.error(
"Expected CLP's archive_output.storage.type to be fs but found '%s'. Presto"
" currently only supports reading archives from the fs storage type.",
"Expected CLP's archive_output.storage.type to be fs or s3 but found '%s'. Presto"
" currently only supports reading archives from the fs or s3 storage type.",
clp_archive_output_storage_type,
)
return False

clp_archives_dir = _get_config_value(
clp_config,
"archive_output.storage.directory",
str(clp_package_dir / "var" / "data" / "archives"),
)
if Path(clp_archives_dir).is_absolute():
env_vars["CLP_ARCHIVES_DIR"] = clp_archives_dir
else:
env_vars["CLP_ARCHIVES_DIR"] = str(clp_package_dir / clp_archives_dir)

credentials_file_path = clp_package_dir / "etc" / "credentials.yml"
if not credentials_file_path.exists():
logger.error("'%s' doesn't exist. Did you start CLP?", credentials_file_path)
Expand Down
2 changes: 1 addition & 1 deletion tools/deployment/presto-clp/scripts/set-up-config.sh
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,6 @@ echo "Installing required Python packages..."
pip3 install -r "${script_dir}/requirements.txt"

echo "Generating environment variables file for user-configured properties..."
python3 "${script_dir}/generate-user-env-vars-file.py" \
python3 "${script_dir}/generate-configs.py" \
--clp-package-dir "${clp_package_dir}" \
--output-file "${script_dir}/../.env"
11 changes: 11 additions & 0 deletions tools/deployment/presto-clp/worker/scripts/generate-configs.sh
Original file line number Diff line number Diff line change
Expand Up @@ -74,6 +74,17 @@ done
rm -f "${PRESTO_CONFIG_DIR}/catalog/"*
mv "${PRESTO_CONFIG_DIR}/clp.properties" "${PRESTO_CONFIG_DIR}/catalog"

# Update clp.properties
readonly CLP_PROPERTIES_FILE="/opt/presto-server/etc/catalog/clp.properties"
if [ -n "${PRESTO_WORKER_CLPPROPERTIES_S3_AUTH_PROVIDER:-}" ]; then
log "INFO" "Enable S3 support"
update_config_file "$CLP_PROPERTIES_FILE" "clp.storage-type" "${PRESTO_WORKER_CLPPROPERTIES_STORAGE_TYPE}"
update_config_file "$CLP_PROPERTIES_FILE" "clp.s3-auth-provider" "${PRESTO_WORKER_CLPPROPERTIES_S3_AUTH_PROVIDER}"
update_config_file "$CLP_PROPERTIES_FILE" "clp.s3-access-key-id" "${PRESTO_WORKER_CLPPROPERTIES_S3_ACCESS_KEY_ID}"
update_config_file "$CLP_PROPERTIES_FILE" "clp.s3-end-point" "${PRESTO_WORKER_CLPPROPERTIES_S3_END_POINT}"
update_config_file "$CLP_PROPERTIES_FILE" "clp.s3-secret-access-key" "${PRESTO_WORKER_CLPPROPERTIES_S3_SECRET_ACCESS_KEY}"
fi

# Update config.properties
readonly CONFIG_PROPERTIES_FILE="/opt/presto-server/etc/config.properties"
version=$(get_coordinator_version "$CONFIG_PROPERTIES_FILE")
Expand Down
Loading