Skip to content

Commit bd48ca5

Browse files
committed
Merge remote-tracking branch 'origin/main' into retension_period
2 parents 7633a6c + 58a60c6 commit bd48ca5

File tree

4 files changed

+141
-17
lines changed

4 files changed

+141
-17
lines changed

components/clp-package-utils/clp_package_utils/scripts/start_clp.py

Lines changed: 38 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,7 @@
2323
CONTROLLER_TARGET_NAME,
2424
DB_COMPONENT_NAME,
2525
GARBAGE_COLLECTOR_NAME,
26+
get_components_for_target,
2627
QUERY_JOBS_TABLE_NAME,
2728
QUERY_SCHEDULER_COMPONENT_NAME,
2829
QUERY_WORKER_COMPONENT_NAME,
@@ -884,6 +885,7 @@ def start_webui(
884885

885886
client_settings_json_updates = {
886887
"ClpStorageEngine": clp_config.package.storage_engine,
888+
"ClpQueryEngine": clp_config.package.query_engine,
887889
"MongoDbSearchResultsMetadataCollectionName": clp_config.webui.results_metadata_collection_name,
888890
"SqlDbClpArchivesTableName": archives_table_name,
889891
"SqlDbClpDatasetsTableName": get_datasets_table_name(table_prefix),
@@ -1197,6 +1199,15 @@ def main(argv):
11971199
config_file_path = pathlib.Path(parsed_args.config)
11981200
clp_config = load_config_file(config_file_path, default_config_file_path, clp_home)
11991201

1202+
runnable_components = clp_config.get_runnable_components()
1203+
components_to_start = get_components_for_target(target)
1204+
components_to_start = components_to_start.intersection(runnable_components)
1205+
1206+
# Exit early if no components to start
1207+
if len(components_to_start) == 0:
1208+
logger.error(f"{target} not available with current configuration")
1209+
return -1
1210+
12001211
# Validate and load necessary credentials
12011212
if target in (
12021213
ALL_TARGET_NAME,
@@ -1278,35 +1289,46 @@ def main(argv):
12781289
conf_dir = clp_home / "etc"
12791290

12801291
# Start components
1281-
if target in (ALL_TARGET_NAME, DB_COMPONENT_NAME):
1292+
if DB_COMPONENT_NAME in components_to_start:
12821293
start_db(instance_id, clp_config, conf_dir)
1283-
if target in (ALL_TARGET_NAME, CONTROLLER_TARGET_NAME, DB_COMPONENT_NAME):
1294+
1295+
if (
1296+
target == CONTROLLER_TARGET_NAME and DB_COMPONENT_NAME in runnable_components
1297+
) or DB_COMPONENT_NAME in components_to_start:
12841298
create_db_tables(instance_id, clp_config, container_clp_config, mounts)
1285-
if target in (ALL_TARGET_NAME, CONTROLLER_TARGET_NAME, QUEUE_COMPONENT_NAME):
1299+
1300+
if QUEUE_COMPONENT_NAME in components_to_start:
12861301
start_queue(instance_id, clp_config)
1287-
if target in (ALL_TARGET_NAME, CONTROLLER_TARGET_NAME, REDIS_COMPONENT_NAME):
1302+
1303+
if REDIS_COMPONENT_NAME in components_to_start:
12881304
start_redis(instance_id, clp_config, conf_dir)
1289-
if target in (ALL_TARGET_NAME, RESULTS_CACHE_COMPONENT_NAME):
1305+
1306+
if RESULTS_CACHE_COMPONENT_NAME in components_to_start:
12901307
start_results_cache(instance_id, clp_config, conf_dir)
1291-
if target in (ALL_TARGET_NAME, CONTROLLER_TARGET_NAME, RESULTS_CACHE_COMPONENT_NAME):
1308+
1309+
if (
1310+
target == CONTROLLER_TARGET_NAME and RESULTS_CACHE_COMPONENT_NAME in runnable_components
1311+
) or RESULTS_CACHE_COMPONENT_NAME in components_to_start:
12921312
create_results_cache_indices(instance_id, clp_config, container_clp_config, mounts)
1293-
if target in (
1294-
ALL_TARGET_NAME,
1295-
CONTROLLER_TARGET_NAME,
1296-
COMPRESSION_SCHEDULER_COMPONENT_NAME,
1297-
):
1313+
1314+
if COMPRESSION_SCHEDULER_COMPONENT_NAME in components_to_start:
12981315
start_compression_scheduler(instance_id, clp_config, container_clp_config, mounts)
1299-
if target in (ALL_TARGET_NAME, CONTROLLER_TARGET_NAME, QUERY_SCHEDULER_COMPONENT_NAME):
1316+
1317+
if QUERY_SCHEDULER_COMPONENT_NAME in components_to_start:
13001318
start_query_scheduler(instance_id, clp_config, container_clp_config, mounts)
1301-
if target in (ALL_TARGET_NAME, COMPRESSION_WORKER_COMPONENT_NAME):
1319+
1320+
if COMPRESSION_WORKER_COMPONENT_NAME in components_to_start:
13021321
start_compression_worker(
13031322
instance_id, clp_config, container_clp_config, num_workers, mounts
13041323
)
1305-
if target in (ALL_TARGET_NAME, QUERY_WORKER_COMPONENT_NAME):
1324+
1325+
if QUERY_WORKER_COMPONENT_NAME in components_to_start:
13061326
start_query_worker(instance_id, clp_config, container_clp_config, num_workers, mounts)
1307-
if target in (ALL_TARGET_NAME, REDUCER_COMPONENT_NAME):
1327+
1328+
if REDUCER_COMPONENT_NAME in components_to_start:
13081329
start_reducer(instance_id, clp_config, container_clp_config, num_workers, mounts)
1309-
if target in (ALL_TARGET_NAME, WEBUI_COMPONENT_NAME):
1330+
1331+
if WEBUI_COMPONENT_NAME in components_to_start:
13101332
start_webui(instance_id, clp_config, container_clp_config, mounts)
13111333
if target in (ALL_TARGET_NAME, GARBAGE_COLLECTOR_NAME):
13121334
start_garbage_collector(instance_id, clp_config, container_clp_config, mounts)

components/clp-py-utils/clp_py_utils/clp_config.py

Lines changed: 86 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
import pathlib
22
from enum import auto
3-
from typing import Literal, Optional, Union
3+
from typing import Literal, Optional, Set, Union
44

55
from dotenv import dotenv_values
66
from pydantic import BaseModel, PrivateAttr, root_validator, validator
@@ -28,10 +28,41 @@
2828
WEBUI_COMPONENT_NAME = "webui"
2929
GARBAGE_COLLECTOR_NAME = "garbage_collector"
3030

31+
# Component groups
32+
GENERAL_SCHEDULING_COMPONENTS = {
33+
QUEUE_COMPONENT_NAME,
34+
REDIS_COMPONENT_NAME,
35+
}
36+
COMPRESSION_COMPONENTS = GENERAL_SCHEDULING_COMPONENTS | {
37+
DB_COMPONENT_NAME,
38+
COMPRESSION_SCHEDULER_COMPONENT_NAME,
39+
COMPRESSION_WORKER_COMPONENT_NAME,
40+
}
41+
QUERY_COMPONENTS = GENERAL_SCHEDULING_COMPONENTS | {
42+
DB_COMPONENT_NAME,
43+
QUERY_SCHEDULER_COMPONENT_NAME,
44+
QUERY_WORKER_COMPONENT_NAME,
45+
REDUCER_COMPONENT_NAME,
46+
}
47+
UI_COMPONENTS = {
48+
RESULTS_CACHE_COMPONENT_NAME,
49+
WEBUI_COMPONENT_NAME,
50+
}
51+
ALL_COMPONENTS = COMPRESSION_COMPONENTS | QUERY_COMPONENTS | UI_COMPONENTS
52+
3153
# Target names
3254
ALL_TARGET_NAME = ""
3355
CONTROLLER_TARGET_NAME = "controller"
3456

57+
TARGET_TO_COMPONENTS = {
58+
ALL_TARGET_NAME: ALL_COMPONENTS,
59+
CONTROLLER_TARGET_NAME: GENERAL_SCHEDULING_COMPONENTS
60+
| {
61+
COMPRESSION_SCHEDULER_COMPONENT_NAME,
62+
QUERY_SCHEDULER_COMPONENT_NAME,
63+
},
64+
}
65+
3566
QUERY_JOBS_TABLE_NAME = "query_jobs"
3667
QUERY_TASKS_TABLE_NAME = "query_tasks"
3768
COMPRESSION_JOBS_TABLE_NAME = "compression_jobs"
@@ -50,6 +81,12 @@ class StorageEngine(KebabCaseStrEnum):
5081
CLP_S = auto()
5182

5283

84+
class QueryEngine(KebabCaseStrEnum):
85+
CLP = auto()
86+
CLP_S = auto()
87+
PRESTO = auto()
88+
89+
5390
class StorageType(LowercaseStrEnum):
5491
FS = auto()
5592
S3 = auto()
@@ -63,10 +100,12 @@ class AwsAuthType(LowercaseStrEnum):
63100

64101

65102
VALID_STORAGE_ENGINES = [storage_engine.value for storage_engine in StorageEngine]
103+
VALID_QUERY_ENGINES = [query_engine.value for query_engine in QueryEngine]
66104

67105

68106
class Package(BaseModel):
69107
storage_engine: str = "clp"
108+
query_engine: str = "clp"
70109

71110
@validator("storage_engine")
72111
def validate_storage_engine(cls, field):
@@ -77,6 +116,37 @@ def validate_storage_engine(cls, field):
77116
)
78117
return field
79118

119+
@validator("query_engine")
120+
def validate_query_engine(cls, field):
121+
if field not in VALID_QUERY_ENGINES:
122+
raise ValueError(
123+
f"package.query_engine must be one of the following"
124+
f" {'|'.join(VALID_QUERY_ENGINES)}"
125+
)
126+
return field
127+
128+
@root_validator
129+
def validate_query_engine_package_compatibility(cls, values):
130+
query_engine = values.get("query_engine")
131+
storage_engine = values.get("storage_engine")
132+
133+
if query_engine in [QueryEngine.CLP, QueryEngine.CLP_S]:
134+
if query_engine != storage_engine:
135+
raise ValueError(
136+
f"query_engine '{query_engine}' is only compatible with "
137+
f"storage_engine '{query_engine}'."
138+
)
139+
elif query_engine == QueryEngine.PRESTO:
140+
if storage_engine != StorageEngine.CLP_S:
141+
raise ValueError(
142+
f"query_engine '{QueryEngine.PRESTO}' is only compatible with "
143+
f"storage_engine '{StorageEngine.CLP_S}'."
144+
)
145+
else:
146+
raise ValueError(f"Unsupported query_engine '{query_engine}'.")
147+
148+
return values
149+
80150

81151
class Database(BaseModel):
82152
type: str = "mariadb"
@@ -799,6 +869,12 @@ def load_redis_credentials_from_file(self):
799869
f"Credentials file '{self.credentials_file_path}' does not contain key '{ex}'."
800870
)
801871

872+
def get_runnable_components(self) -> Set[str]:
873+
if QueryEngine.PRESTO == self.package.query_engine:
874+
return COMPRESSION_COMPONENTS | UI_COMPONENTS
875+
else:
876+
return ALL_COMPONENTS
877+
802878
def dump_to_primitive_dict(self):
803879
d = self.dict()
804880
d["logs_input"] = self.logs_input.dump_to_primitive_dict()
@@ -833,3 +909,12 @@ def dump_to_primitive_dict(self):
833909
d["stream_output"] = self.stream_output.dump_to_primitive_dict()
834910

835911
return d
912+
913+
914+
def get_components_for_target(target: str) -> Set[str]:
915+
if target in TARGET_TO_COMPONENTS:
916+
return TARGET_TO_COMPONENTS[target]
917+
elif target in ALL_COMPONENTS:
918+
return {target}
919+
else:
920+
return set()

components/package-template/src/etc/clp-config.yml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,7 @@
1212
#
1313
#package:
1414
# storage_engine: "clp"
15+
# query_engine: "clp"
1516
#
1617
#database:
1718
# type: "mariadb" # "mariadb" or "mysql"

taskfile.yaml

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -470,6 +470,7 @@ tasks:
470470
# NOTE: The trailing slash after the source is necessary so that rsync copies
471471
# `/parents/A` -> `/parents/B` rather than `/parents/A` -> `/parents/B/A`
472472
- "rsync --archive '{{.G_PACKAGE_BUILD_DIR}}/' '{{.OUTPUT_DIR}}'"
473+
473474
# Set the storage engine for the package
474475
- task: "utils:misc:replace-text"
475476
vars:
@@ -481,6 +482,21 @@ tasks:
481482
FILE_PATH: "{{.OUTPUT_DIR}}/etc/clp-config.yml"
482483
SED_EXP: >-
483484
s/(\#[[:space:]]*storage_engine: ")[^"]+"/\1{{.STORAGE_ENGINE}}"/
485+
486+
# Set the query engine for the package
487+
# NOTE: We set `query_engine` to `STORAGE_ENGINE` intentionally since compatible CLP storage
488+
# and query engines have the same name.
489+
- task: "utils:misc:replace-text"
490+
vars:
491+
FILE_PATH: "{{.OUTPUT_DIR}}/lib/python3/site-packages/clp_py_utils/clp_config.py"
492+
SED_EXP: >-
493+
s/([[:space:]]*query_engine: str = ")[^"]+"/\1{{.STORAGE_ENGINE}}"/
494+
- task: "utils:misc:replace-text"
495+
vars:
496+
FILE_PATH: "{{.OUTPUT_DIR}}/etc/clp-config.yml"
497+
SED_EXP: >-
498+
s/(\#[[:space:]]*query_engine: ")[^"]+"/\1{{.STORAGE_ENGINE}}"/
499+
484500
- >-
485501
tar czf '{{.OUTPUT_FILE}}'
486502
--directory '{{.G_BUILD_DIR}}'

0 commit comments

Comments
 (0)