Skip to content

Commit b19b1ec

Browse files
committed
Merge remote-tracking branch 'origin/main' into bundle-mariadb
# Conflicts: # tools/docker-images/clp-package/Dockerfile
2 parents 24db721 + 11d73fb commit b19b1ec

File tree

40 files changed

+2902
-2978
lines changed

40 files changed

+2902
-2978
lines changed

components/clp-mcp-server/pyproject.toml

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -9,9 +9,9 @@ dependencies = [
99
"aiomysql>=0.2.0",
1010
"click>=8.3.0",
1111
"fastmcp>=2.12.4",
12-
"msgpack>=1.1.1",
12+
"msgpack>=1.1.2",
1313
"paginate>=0.5.7",
14-
"pymongo>=4.15.1",
14+
"pymongo>=4.15.3",
1515
]
1616

1717
[project.scripts]

components/clp-mcp-server/uv.lock

Lines changed: 286 additions & 257 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

components/clp-package-utils/poetry.lock

Lines changed: 0 additions & 556 deletions
This file was deleted.
Lines changed: 12 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -1,23 +1,19 @@
11
[project]
2-
requires-python = ">= 3.9"
3-
4-
[tool.poetry]
52
name = "clp-package-utils"
63
version = "0.5.2-dev"
74
description = "Utilities for the CLP package."
8-
authors = ["YScope Inc. <[email protected]>"]
5+
authors = [{name = "YScope Inc.", email = "[email protected]"}]
96
readme = "README.md"
10-
11-
[tool.poetry.dependencies]
12-
python = ">= 3.9"
13-
Brotli = "^1.1.0"
14-
msgpack = "^1.0.7"
15-
psutil = "^7.1.0"
16-
pydantic = "^2.11.9"
17-
pymongo = "^4.7.2"
18-
PyYAML = "^6.0.1"
7+
requires-python = ">=3.9"
8+
dependencies = [
9+
"Brotli>=1.1.0",
10+
"msgpack>=1.1.2",
11+
"psutil>=7.1.0",
12+
"pydantic>=2.12.3",
13+
"pymongo>=4.15.3",
14+
"PyYAML>=6.0.3",
15+
]
1916

2017
[build-system]
21-
requires = ["poetry-core"]
22-
build-backend = "poetry.core.masonry.api"
23-
18+
requires = ["hatchling"]
19+
build-backend = "hatchling.build"

components/clp-package-utils/uv.lock

Lines changed: 552 additions & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

components/clp-py-utils/clp_py_utils/clp_config.py

Lines changed: 39 additions & 78 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,7 @@
1010
Field,
1111
field_validator,
1212
model_validator,
13+
PlainSerializer,
1314
PrivateAttr,
1415
)
1516
from strenum import KebabCaseStrEnum, LowercaseStrEnum
@@ -21,6 +22,7 @@
2122
read_yaml_config_file,
2223
validate_path_could_be_dir,
2324
)
25+
from .serialization_utils import serialize_path, serialize_str_enum
2426

2527
# Constants
2628
# Component names
@@ -98,6 +100,8 @@
98100
CLP_QUEUE_PASS_ENV_VAR_NAME = "CLP_QUEUE_PASS"
99101
CLP_REDIS_PASS_ENV_VAR_NAME = "CLP_REDIS_PASS"
100102

103+
# Serializer
104+
StrEnumSerializer = PlainSerializer(serialize_str_enum)
101105
# Generic types
102106
NonEmptyStr = Annotated[str, Field(min_length=1)]
103107
PositiveFloat = Annotated[float, Field(gt=0)]
@@ -106,6 +110,7 @@
106110
# TODO: Replace this with pydantic_extra_types.domain.DomainStr.
107111
DomainStr = NonEmptyStr
108112
Port = Annotated[int, Field(gt=0, lt=2**16)]
113+
SerializablePath = Annotated[pathlib.Path, PlainSerializer(serialize_path)]
109114
ZstdCompressionLevel = Annotated[int, Field(ge=1, le=19)]
110115

111116

@@ -114,17 +119,26 @@ class StorageEngine(KebabCaseStrEnum):
114119
CLP_S = auto()
115120

116121

122+
StorageEngineStr = Annotated[StorageEngine, StrEnumSerializer]
123+
124+
117125
class DatabaseEngine(KebabCaseStrEnum):
118126
MARIADB = auto()
119127
MYSQL = auto()
120128

121129

130+
DatabaseEngineStr = Annotated[DatabaseEngine, StrEnumSerializer]
131+
132+
122133
class QueryEngine(KebabCaseStrEnum):
123134
CLP = auto()
124135
CLP_S = auto()
125136
PRESTO = auto()
126137

127138

139+
QueryEngineStr = Annotated[QueryEngine, StrEnumSerializer]
140+
141+
128142
class StorageType(LowercaseStrEnum):
129143
FS = auto()
130144
S3 = auto()
@@ -137,9 +151,12 @@ class AwsAuthType(LowercaseStrEnum):
137151
ec2 = auto()
138152

139153

154+
AwsAuthTypeStr = Annotated[AwsAuthType, StrEnumSerializer]
155+
156+
140157
class Package(BaseModel):
141-
storage_engine: StorageEngine = StorageEngine.CLP
142-
query_engine: QueryEngine = QueryEngine.CLP
158+
storage_engine: StorageEngineStr = StorageEngine.CLP
159+
query_engine: QueryEngineStr = QueryEngine.CLP
143160

144161
@model_validator(mode="after")
145162
def validate_query_engine_package_compatibility(self):
@@ -163,15 +180,9 @@ def validate_query_engine_package_compatibility(self):
163180

164181
return self
165182

166-
def dump_to_primitive_dict(self):
167-
d = self.model_dump()
168-
d["storage_engine"] = d["storage_engine"].value
169-
d["query_engine"] = d["query_engine"].value
170-
return d
171-
172183

173184
class Database(BaseModel):
174-
type: DatabaseEngine = DatabaseEngine.MARIADB
185+
type: DatabaseEngineStr = DatabaseEngine.MARIADB
175186
host: DomainStr = "localhost"
176187
port: Port = 3306
177188
name: NonEmptyStr = "clp-db"
@@ -232,7 +243,6 @@ def get_clp_connection_params_and_type(self, disable_localhost_socket_connection
232243

233244
def dump_to_primitive_dict(self):
234245
d = self.model_dump(exclude={"username", "password"})
235-
d["type"] = d["type"].value
236246
return d
237247

238248
def load_credentials_from_file(self, credentials_file_path: pathlib.Path):
@@ -360,12 +370,7 @@ class S3Credentials(BaseModel):
360370

361371

362372
class AwsAuthentication(BaseModel):
363-
type: Literal[
364-
AwsAuthType.credentials.value,
365-
AwsAuthType.profile.value,
366-
AwsAuthType.env_vars.value,
367-
AwsAuthType.ec2.value,
368-
]
373+
type: AwsAuthTypeStr
369374
profile: Optional[NonEmptyStr] = None
370375
credentials: Optional[S3Credentials] = None
371376

@@ -408,13 +413,10 @@ class S3IngestionConfig(BaseModel):
408413
type: Literal[StorageType.S3.value] = StorageType.S3.value
409414
aws_authentication: AwsAuthentication
410415

411-
def dump_to_primitive_dict(self):
412-
return self.model_dump()
413-
414416

415417
class FsStorage(BaseModel):
416418
type: Literal[StorageType.FS.value] = StorageType.FS.value
417-
directory: pathlib.Path
419+
directory: SerializablePath
418420

419421
@field_validator("directory", mode="before")
420422
@classmethod
@@ -425,16 +427,11 @@ def validate_directory(cls, value):
425427
def make_config_paths_absolute(self, clp_home: pathlib.Path):
426428
self.directory = make_config_path_absolute(clp_home, self.directory)
427429

428-
def dump_to_primitive_dict(self):
429-
d = self.model_dump()
430-
d["directory"] = str(d["directory"])
431-
return d
432-
433430

434431
class S3Storage(BaseModel):
435432
type: Literal[StorageType.S3.value] = StorageType.S3.value
436433
s3_config: S3Config
437-
staging_directory: pathlib.Path
434+
staging_directory: SerializablePath
438435

439436
@field_validator("staging_directory", mode="before")
440437
@classmethod
@@ -455,30 +452,25 @@ def validate_key_prefix(cls, value):
455452
def make_config_paths_absolute(self, clp_home: pathlib.Path):
456453
self.staging_directory = make_config_path_absolute(clp_home, self.staging_directory)
457454

458-
def dump_to_primitive_dict(self):
459-
d = self.model_dump()
460-
d["staging_directory"] = str(d["staging_directory"])
461-
return d
462-
463455

464456
class FsIngestionConfig(FsStorage):
465-
directory: pathlib.Path = pathlib.Path("/")
457+
directory: SerializablePath = pathlib.Path("/")
466458

467459

468460
class ArchiveFsStorage(FsStorage):
469-
directory: pathlib.Path = CLP_DEFAULT_DATA_DIRECTORY_PATH / "archives"
461+
directory: SerializablePath = CLP_DEFAULT_DATA_DIRECTORY_PATH / "archives"
470462

471463

472464
class StreamFsStorage(FsStorage):
473-
directory: pathlib.Path = CLP_DEFAULT_DATA_DIRECTORY_PATH / "streams"
465+
directory: SerializablePath = CLP_DEFAULT_DATA_DIRECTORY_PATH / "streams"
474466

475467

476468
class ArchiveS3Storage(S3Storage):
477-
staging_directory: pathlib.Path = CLP_DEFAULT_DATA_DIRECTORY_PATH / "staged-archives"
469+
staging_directory: SerializablePath = CLP_DEFAULT_DATA_DIRECTORY_PATH / "staged-archives"
478470

479471

480472
class StreamS3Storage(S3Storage):
481-
staging_directory: pathlib.Path = CLP_DEFAULT_DATA_DIRECTORY_PATH / "staged-streams"
473+
staging_directory: SerializablePath = CLP_DEFAULT_DATA_DIRECTORY_PATH / "staged-streams"
482474

483475

484476
def _get_directory_from_storage_config(
@@ -520,11 +512,6 @@ def set_directory(self, directory: pathlib.Path):
520512
def get_directory(self) -> pathlib.Path:
521513
return _get_directory_from_storage_config(self.storage)
522514

523-
def dump_to_primitive_dict(self):
524-
d = self.model_dump()
525-
d["storage"] = self.storage.dump_to_primitive_dict()
526-
return d
527-
528515

529516
class StreamOutput(BaseModel):
530517
storage: Union[StreamFsStorage, StreamS3Storage] = StreamFsStorage()
@@ -536,11 +523,6 @@ def set_directory(self, directory: pathlib.Path):
536523
def get_directory(self) -> pathlib.Path:
537524
return _get_directory_from_storage_config(self.storage)
538525

539-
def dump_to_primitive_dict(self):
540-
d = self.model_dump()
541-
d["storage"] = self.storage.dump_to_primitive_dict()
542-
return d
543-
544526

545527
class WebUi(BaseModel):
546528
host: DomainStr = "localhost"
@@ -590,24 +572,26 @@ class CLPConfig(BaseModel):
590572
query_worker: QueryWorker = QueryWorker()
591573
webui: WebUi = WebUi()
592574
garbage_collector: GarbageCollector = GarbageCollector()
593-
credentials_file_path: pathlib.Path = CLP_DEFAULT_CREDENTIALS_FILE_PATH
575+
credentials_file_path: SerializablePath = CLP_DEFAULT_CREDENTIALS_FILE_PATH
594576

595577
presto: Optional[Presto] = None
596578

597579
archive_output: ArchiveOutput = ArchiveOutput()
598580
stream_output: StreamOutput = StreamOutput()
599-
data_directory: pathlib.Path = pathlib.Path("var") / "data"
600-
logs_directory: pathlib.Path = pathlib.Path("var") / "log"
601-
aws_config_directory: Optional[pathlib.Path] = None
581+
data_directory: SerializablePath = pathlib.Path("var") / "data"
582+
logs_directory: SerializablePath = pathlib.Path("var") / "log"
583+
aws_config_directory: Optional[SerializablePath] = None
602584

603-
_container_image_id_path: pathlib.Path = PrivateAttr(
585+
_container_image_id_path: SerializablePath = PrivateAttr(
604586
default=CLP_PACKAGE_CONTAINER_IMAGE_ID_PATH
605587
)
606-
_version_file_path: pathlib.Path = PrivateAttr(default=CLP_VERSION_FILE_PATH)
588+
_version_file_path: SerializablePath = PrivateAttr(default=CLP_VERSION_FILE_PATH)
607589

608590
@field_validator("aws_config_directory")
609591
@classmethod
610-
def expand_profile_user_home(cls, value: Optional[pathlib.Path]):
592+
def expand_profile_user_home(
593+
cls, value: Optional[SerializablePath]
594+
) -> Optional[SerializablePath]:
611595
if value is not None:
612596
value = value.expanduser()
613597
return value
@@ -693,7 +677,7 @@ def validate_aws_config_dir(self):
693677
auth_configs.append(self.stream_output.storage.s3_config.aws_authentication)
694678

695679
for auth in auth_configs:
696-
if AwsAuthType.profile.value == auth.type:
680+
if AwsAuthType.profile == auth.type:
697681
profile_auth_used = True
698682
break
699683

@@ -735,27 +719,14 @@ def get_runnable_components(self) -> Set[str]:
735719

736720
def dump_to_primitive_dict(self):
737721
custom_serialized_fields = {
738-
"package",
739722
"database",
740723
"queue",
741724
"redis",
742-
"logs_input",
743-
"archive_output",
744-
"stream_output",
745725
}
746726
d = self.model_dump(exclude=custom_serialized_fields)
747727
for key in custom_serialized_fields:
748728
d[key] = getattr(self, key).dump_to_primitive_dict()
749729

750-
# Turn paths into primitive strings
751-
d["credentials_file_path"] = str(self.credentials_file_path)
752-
d["data_directory"] = str(self.data_directory)
753-
d["logs_directory"] = str(self.logs_directory)
754-
if self.aws_config_directory is not None:
755-
d["aws_config_directory"] = str(self.aws_config_directory)
756-
else:
757-
d["aws_config_directory"] = None
758-
759730
return d
760731

761732
@model_validator(mode="after")
@@ -772,22 +743,12 @@ def validate_presto_config(self):
772743
class WorkerConfig(BaseModel):
773744
package: Package = Package()
774745
archive_output: ArchiveOutput = ArchiveOutput()
775-
data_directory: pathlib.Path = CLPConfig().data_directory
746+
data_directory: SerializablePath = CLPConfig().data_directory
776747

777748
# Only needed by query workers.
778749
stream_output: StreamOutput = StreamOutput()
779750
stream_collection_name: str = ResultsCache().stream_collection_name
780751

781-
def dump_to_primitive_dict(self):
782-
d = self.model_dump()
783-
d["archive_output"] = self.archive_output.dump_to_primitive_dict()
784-
785-
# Turn paths into primitive strings
786-
d["data_directory"] = str(self.data_directory)
787-
d["stream_output"] = self.stream_output.dump_to_primitive_dict()
788-
789-
return d
790-
791752

792753
def get_components_for_target(target: str) -> Set[str]:
793754
if target in TARGET_TO_COMPONENTS:
Lines changed: 23 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,23 @@
1+
import pathlib
2+
3+
from strenum import StrEnum
4+
5+
6+
def serialize_str_enum(member: StrEnum) -> str:
7+
"""
8+
Serializes a `strenum.StrEnum` member to its underlying value.
9+
10+
:param member:
11+
:return: The underlying string value of the enum member.
12+
"""
13+
return member.value
14+
15+
16+
def serialize_path(path: pathlib.Path) -> str:
17+
"""
18+
Serializes a `pathlib.Path` to its string representation.
19+
20+
:param path:
21+
:return: The string representation of the path.
22+
"""
23+
return str(path)

0 commit comments

Comments
 (0)