Skip to content
Open
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
89 changes: 81 additions & 8 deletions tools/deployment/presto-clp/scripts/init.py
Original file line number Diff line number Diff line change
@@ -1,9 +1,12 @@
import argparse
import logging
import math
import os
import sys
from pathlib import Path
from typing import Any, Dict, Optional

import psutil
import yaml
from dotenv import dotenv_values

Expand Down Expand Up @@ -226,21 +229,91 @@ def _add_worker_env_vars(coordinator_common_env_file_path: Path, env_vars: Dict[
config = dotenv_values(coordinator_common_env_file_path)

try:
env_vars["PRESTO_COORDINATOR_CONFIGPROPERTIES_DISCOVERY_URI"] = (
f'http://{config["PRESTO_COORDINATOR_SERVICENAME"]}'
f':{config["PRESTO_COORDINATOR_HTTPPORT"]}'
coordinator_service_name = _get_required_env_var(
config, "PRESTO_COORDINATOR_SERVICENAME", coordinator_common_env_file_path
)
except KeyError as e:
logger.error(
"Missing required key %s in '%s'",
e,
coordinator_common_env_file_path,
coordinator_http_port = _get_required_env_var(
config, "PRESTO_COORDINATOR_HTTPPORT", coordinator_common_env_file_path
)
except KeyError:
return False

env_vars["PRESTO_COORDINATOR_CONFIGPROPERTIES_DISCOVERY_URI"] = (
f"http://{coordinator_service_name}:{coordinator_http_port}"
)

if not _configure_worker_memory_env_vars(env_vars):
return False

return True


def _configure_worker_memory_env_vars(env_vars: Dict[str, str]) -> bool:
"""Adds memory-related environment variables for the worker."""

try:
total_memory_bytes = int(psutil.virtual_memory().total)
except (AttributeError, ValueError) as exc:
logger.error("Unable to determine total system memory: %s", exc)
return False

total_memory_gb = total_memory_bytes / (1024**3)
total_memory_gb_floor = max(1, math.floor(total_memory_gb))

system_memory_gb = min(
total_memory_gb_floor,
max(1, math.floor(total_memory_gb * 0.90)),
)
query_memory_gb = min(
system_memory_gb,
max(1, math.floor(system_memory_gb * (2 / 3))),
)
system_mem_limit_gb = min(
total_memory_gb_floor,
max(system_memory_gb, max(1, math.floor(total_memory_gb * 0.94))),
)

Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

⚠️ Potential issue | 🟠 Major

Make memory detection cgroup-aware; switch to exception logging.

psutil reports host RAM, not container limits. In containers, this can over-allocate Presto and cause OOM. Also, Ruff flags TRY400 here.

Apply these changes:

  • Detect Linux cgroup limits (v2: /sys/fs/cgroup/memory.max; v1: /sys/fs/cgroup/memory/memory.limit_in_bytes) and cap to the lower of psutil total and cgroup limit.
  • Use logger.exception when memory detection fails.

Patch inside function:

-    try:
-        total_memory_bytes = int(psutil.virtual_memory().total)
-    except (AttributeError, ValueError) as exc:
-        logger.error("Unable to determine total system memory: %s", exc)
-        return False
+    try:
+        total_memory_bytes = int(psutil.virtual_memory().total)
+    except Exception:
+        logger.exception("Unable to determine total system memory")
+        return False
+
+    # Respect container memory limits when present (cgroup v2/v1)
+    cgroup_limit_bytes = _read_cgroup_memory_limit_bytes()
+    if cgroup_limit_bytes and cgroup_limit_bytes > 0:
+        total_memory_bytes = min(total_memory_bytes, cgroup_limit_bytes)

Add this helper (outside the function) to keep things tidy:

from pathlib import Path

def _read_cgroup_memory_limit_bytes() -> Optional[int]:
    # cgroup v2
    v2 = Path("/sys/fs/cgroup/memory.max")
    # cgroup v1
    v1 = Path("/sys/fs/cgroup/memory/memory.limit_in_bytes")
    for p in (v2, v1):
        try:
            with open(p, "r", encoding="utf-8") as f:
                raw = f.read().strip()
            if raw and raw.lower() != "max":
                val = int(raw)
                if val > 0:
                    return val
        except FileNotFoundError:
            continue
        except Exception as exc:
            logger.debug("Ignoring cgroup memory read error from %s: %s", p, exc)
    return None

This prevents overshooting container RAM and aligns with typical Presto deployment patterns.

🧰 Tools
🪛 Ruff (0.14.1)

257-257: Use logging.exception instead of logging.error

Replace with exception

(TRY400)

🤖 Prompt for AI Agents
In tools/deployment/presto-clp/scripts/init.py around lines 254 to 275,
psutil.virtual_memory() is being used alone which returns host RAM and can
over-allocate inside containers; change the logic to detect cgroup limits and
cap total_memory_bytes to the lower of psutil total and any cgroup memory limit
(check cgroup v2 /sys/fs/cgroup/memory.max and cgroup v1
/sys/fs/cgroup/memory/memory.limit_in_bytes, ignore values of "max" or
non-positive, and treat missing files as no limit). Add a helper function
(placed outside the current function) that reads those two paths robustly,
returns an Optional[int] cgroup limit, and logs debug on read errors; then use
that helper to set total_memory_bytes = min(psutil_total, cgroup_limit) when
cgroup_limit is present. Also switch the existing except handler to
logger.exception(...) so exceptions during memory detection are logged with
traceback.

env_vars["PRESTO_WORKER_CONFIGPROPERTIES_SYSTEM_MEMORY_GB"] = str(system_memory_gb)
env_vars["PRESTO_WORKER_CONFIGPROPERTIES_QUERY_MEMORY_GB"] = str(query_memory_gb)
env_vars["PRESTO_WORKER_CONFIGPROPERTIES_SYSTEM_MEM_LIMIT_GB"] = str(system_mem_limit_gb)

logger.info(
(
"Detected %.2f GB of system memory; configured Presto worker memory limits "
"(system-memory-gb=%s, query-memory-gb=%s, system-mem-limit-gb=%s)."
),
total_memory_gb,
env_vars["PRESTO_WORKER_CONFIGPROPERTIES_SYSTEM_MEMORY_GB"],
env_vars["PRESTO_WORKER_CONFIGPROPERTIES_QUERY_MEMORY_GB"],
env_vars["PRESTO_WORKER_CONFIGPROPERTIES_SYSTEM_MEM_LIMIT_GB"],
)

return True


def _get_required_env_var(config: Dict[str, Optional[str]], key: str, env_file_path: Path) -> str:
"""
Fetches a required entry from a dotenv-style config.

:param config: Mapping of config keys to their string values.
:param key: The key that must exist in `config` with a non-empty value.
:param env_file_path: Path to the file that produced `config`, for logging context.
:return: The string value associated with `key`.
:raises KeyError: If `key` is missing or empty.
"""

value = config.get(key)
if value is None:
logger.error("Missing required key %s in '%s'", key, env_file_path)
raise KeyError(key)

if value == "":
logger.error("Empty value for required key %s in '%s'", key, env_file_path)
raise KeyError(key)

return value


def _generate_worker_clp_properties(
worker_config_template_path: Path, env_vars: Dict[str, str]
) -> bool:
Expand Down
1 change: 1 addition & 0 deletions tools/deployment/presto-clp/scripts/requirements.txt
Original file line number Diff line number Diff line change
@@ -1,2 +1,3 @@
psutil
python-dotenv
PyYAML
6 changes: 5 additions & 1 deletion tools/deployment/presto-clp/worker.env
Original file line number Diff line number Diff line change
@@ -1,4 +1,8 @@
PRESTO_WORKER_HTTP_PORT=8080

# node.properties
PRESTO_WORKER_CONFIGPROPERTIES_SYSTEM_MEMORY_GB=8
PRESTO_WORKER_CONFIGPROPERTIES_QUERY_MEMORY_GB=5

PRESTO_WORKER_NODEPROPERTIES_LOCATION=worker-location

PRESTO_WORKER_CONFIGPROPERTIES_SYSTEM_MEM_LIMIT_GB=8
Original file line number Diff line number Diff line change
Expand Up @@ -3,3 +3,6 @@ http-server.http.port=${PRESTO_WORKER_HTTP_PORT}
shutdown-onset-sec=1
register-test-functions=false
runtime-metrics-collection-enabled=false
system-memory-gb=${PRESTO_WORKER_CONFIGPROPERTIES_SYSTEM_MEMORY_GB}
query-memory-gb=${PRESTO_WORKER_CONFIGPROPERTIES_QUERY_MEMORY_GB}
system-mem-limit-gb=${PRESTO_WORKER_CONFIGPROPERTIES_SYSTEM_MEM_LIMIT_GB}
Loading