Skip to content

Commit fdd2f82

Browse files
authored
fix(deployment): Use CLP Package's Docker Compose network for Presto deployments (fixes #1498). (#1501)
1 parent 1b92ca8 commit fdd2f82

File tree

5 files changed

+67
-26
lines changed

5 files changed

+67
-26
lines changed

components/clp-package-utils/clp_package_utils/controller.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -503,8 +503,8 @@ def _set_up_env_for_webui(self, container_clp_config: CLPConfig) -> EnvVarsDict:
503503

504504
query_engine = self._clp_config.package.query_engine
505505
if QueryEngine.PRESTO == query_engine:
506-
server_settings_json_updates["PrestoHost"] = self._clp_config.presto.host
507-
server_settings_json_updates["PrestoPort"] = self._clp_config.presto.port
506+
server_settings_json_updates["PrestoHost"] = container_clp_config.presto.host
507+
server_settings_json_updates["PrestoPort"] = container_clp_config.presto.port
508508
else:
509509
server_settings_json_updates["PrestoHost"] = None
510510
server_settings_json_updates["PrestoPort"] = None

components/clp-py-utils/clp_py_utils/clp_config.py

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -32,6 +32,7 @@
3232
RESULTS_CACHE_COMPONENT_NAME = "results_cache"
3333
COMPRESSION_SCHEDULER_COMPONENT_NAME = "compression_scheduler"
3434
QUERY_SCHEDULER_COMPONENT_NAME = "query_scheduler"
35+
PRESTO_COORDINATOR_COMPONENT_NAME = "presto-coordinator"
3536
COMPRESSION_WORKER_COMPONENT_NAME = "compression_worker"
3637
QUERY_WORKER_COMPONENT_NAME = "query_worker"
3738
WEBUI_COMPONENT_NAME = "webui"
@@ -591,9 +592,15 @@ class GarbageCollector(BaseModel):
591592

592593

593594
class Presto(BaseModel):
595+
DEFAULT_PORT: ClassVar[int] = 8080
596+
594597
host: DomainStr
595598
port: Port
596599

600+
def transform_for_container(self):
601+
self.host = PRESTO_COORDINATOR_COMPONENT_NAME
602+
self.port = self.DEFAULT_PORT
603+
597604

598605
def _get_env_var(name: str) -> str:
599606
value = os.getenv(name)
@@ -815,6 +822,8 @@ def transform_for_container(self):
815822
self.results_cache.transform_for_container()
816823
self.query_scheduler.transform_for_container()
817824
self.reducer.transform_for_container()
825+
if self.package.query_engine == QueryEngine.PRESTO and self.presto is not None:
826+
self.presto.transform_for_container()
818827

819828

820829
class WorkerConfig(BaseModel):

docs/src/user-docs/guides-using-presto.md

Lines changed: 0 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -43,23 +43,6 @@ Using Presto with CLP requires:
4343
query_engine: "presto"
4444
```
4545
46-
* Set the `database.host` key to a non-localhost hostname/IP.
47-
48-
```yaml
49-
database:
50-
# type: "mariadb"
51-
host: "<non-local-ip-address>"
52-
# port: 3306
53-
# name: "clp-db"
54-
```
55-
56-
:::{note}
57-
This change is necessary because the Presto containers run on a Docker network, and CLP's
58-
database runs on the host network. `localhost` will refer to a different entity in each of
59-
those contexts. This limitation will be addressed in the future when we unify Presto and CLP's
60-
deployment infrastructure.
61-
:::
62-
6346
* Set the `results_cache.retention_period` key to `null` since the CLP + Presto integration
6447
doesn't yet support garbage collection.
6548

tools/deployment/presto-clp/docker-compose.yaml

Lines changed: 5 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,7 @@ services:
1111
- "./coordinator/scripts:/scripts:ro"
1212
- "coordinator-config:/opt/presto-server/etc"
1313
networks:
14-
- "presto"
14+
- "clp-package"
1515
ports:
1616
- "8889:8080"
1717
healthcheck:
@@ -40,7 +40,7 @@ services:
4040
- "./worker/scripts:/scripts:ro"
4141
- "worker-config:/opt/presto-server/etc"
4242
networks:
43-
- "presto"
43+
- "clp-package"
4444

4545
volumes:
4646
# Dummy volume to use when a bind mount is not desired.
@@ -50,5 +50,6 @@ volumes:
5050
worker-config:
5151

5252
networks:
53-
presto:
54-
driver: "bridge"
53+
clp-package:
54+
name: "${CLP_PACKAGE_NETWORK_NAME:-clp-package_default}"
55+
external: true

tools/deployment/presto-clp/scripts/init.py

Lines changed: 51 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,11 @@
77
import yaml
88
from dotenv import dotenv_values
99

10+
# Database endpoint inside the CLP Package Docker network. Must match the constants defined in
11+
# `components/clp-py-utils/clp_py_utils/clp_config.py`.
12+
DATABASE_COMPONENT_NAME = "database"
13+
DATABASE_DEFAULT_PORT = 3306
14+
1015
# Set up console logging
1116
logging_console_handler = logging.StreamHandler()
1217
logging_formatter = logging.Formatter(
@@ -101,11 +106,9 @@ def _add_clp_env_vars(
101106
)
102107
return False
103108

104-
database_host = _get_config_value(clp_config, "database.host", "localhost")
105-
database_port = _get_config_value(clp_config, "database.port", 3306)
106109
database_name = _get_config_value(clp_config, "database.name", "clp-db")
107110
env_vars["PRESTO_COORDINATOR_CLPPROPERTIES_METADATA_DATABASE_URL"] = (
108-
f"jdbc:mysql://{database_host}:{database_port}"
111+
f"jdbc:mysql://{DATABASE_COMPONENT_NAME}:{DATABASE_DEFAULT_PORT}"
109112
)
110113
env_vars["PRESTO_COORDINATOR_CLPPROPERTIES_METADATA_DATABASE_NAME"] = database_name
111114

@@ -163,6 +166,11 @@ def _add_clp_env_vars(
163166
env_vars["PRESTO_COORDINATOR_CLPPROPERTIES_METADATA_DATABASE_USER"] = database_user
164167
env_vars["PRESTO_COORDINATOR_CLPPROPERTIES_METADATA_DATABASE_PASSWORD"] = database_password
165168

169+
instance_id = _get_clp_package_instance_id(clp_config, clp_package_dir)
170+
if instance_id is None:
171+
return False
172+
env_vars["CLP_PACKAGE_NETWORK_NAME"] = f"clp-package-{instance_id}_default"
173+
166174
return True
167175

168176

@@ -274,6 +282,46 @@ def _generate_worker_clp_properties(
274282
return True
275283

276284

285+
def _get_clp_package_instance_id(
286+
clp_config: Dict[str, Any], clp_package_dir: Path
287+
) -> Optional[str]:
288+
"""
289+
Retrieves the CLP package instance ID from the logs directory.
290+
291+
:param clp_config:
292+
:param clp_package_dir:
293+
:return: The instance ID if it could be read, otherwise `None`.
294+
"""
295+
296+
logs_directory = _get_path_clp_config_value(
297+
clp_config, "logs_directory", Path("var") / "log", clp_package_dir
298+
)
299+
instance_id_path = logs_directory / "instance-id"
300+
if not instance_id_path.exists():
301+
logger.error(
302+
"Cannot determine the CLP package Docker network because '%s' does not exist."
303+
" Start the CLP package at least once before configuring Presto.",
304+
instance_id_path,
305+
)
306+
return None
307+
308+
try:
309+
instance_id = instance_id_path.read_text(encoding="utf-8").strip()
310+
except OSError:
311+
logger.exception("Failed to read the CLP package instance ID from '%s'.", instance_id_path)
312+
return None
313+
314+
if not instance_id:
315+
logger.error(
316+
"Instance ID file '%s' is empty. Restart the CLP package to regenerate the instance"
317+
" ID.",
318+
instance_id_path,
319+
)
320+
return None
321+
322+
return instance_id
323+
324+
277325
def _get_path_clp_config_value(
278326
clp_config: Dict[str, Any], key: str, default_value: Path, clp_package_dir: Path
279327
) -> Path:

0 commit comments

Comments
 (0)