Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion components/clp-package-utils/clp_package_utils/general.py
Original file line number Diff line number Diff line change
Expand Up @@ -353,7 +353,7 @@ def dump_container_config(
config_file_path_on_host = clp_config.logs_directory / config_filename
config_file_path_on_container = container_clp_config.logs_directory / config_filename
with open(config_file_path_on_host, "w") as f:
yaml.safe_dump(container_clp_config.dump_to_primitive_dict(), f)
yaml.safe_dump(container_clp_config.model_dump(), f)

return config_file_path_on_container, config_file_path_on_host

Expand Down
135 changes: 107 additions & 28 deletions components/clp-py-utils/clp_py_utils/clp_config.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@
model_validator,
PlainSerializer,
PrivateAttr,
TypeAdapter,
)
from strenum import KebabCaseStrEnum, LowercaseStrEnum

Expand Down Expand Up @@ -86,6 +87,10 @@
ZstdCompressionLevel = Annotated[int, Field(ge=1, le=19)]


_optional_non_empty_str_validator = TypeAdapter(Optional[NonEmptyStr])
_optional_str_validator = TypeAdapter(Optional[str])


class DeploymentType(KebabCaseStrEnum):
BASE = auto()
FULL = auto()
Expand Down Expand Up @@ -159,6 +164,8 @@ def validate_query_engine_package_compatibility(self):


class Database(BaseModel):
model_config = ConfigDict(extra="allow")

DEFAULT_PORT: ClassVar[int] = 3306

type: DatabaseEngineStr = DatabaseEngine.MARIADB
Expand All @@ -169,8 +176,43 @@ class Database(BaseModel):
auto_commit: bool = False
compress: bool = True

username: Optional[NonEmptyStr] = None
password: Optional[NonEmptyStr] = None
_username: Optional[NonEmptyStr] = PrivateAttr(default=None)
_password: Optional[NonEmptyStr] = PrivateAttr(default=None)

@property
def username(self) -> Optional[NonEmptyStr]:
return self._username

@username.setter
def username(self, value: Optional[NonEmptyStr]) -> None:
self._username = _optional_non_empty_str_validator.validate_python(value)

@property
def password(self) -> Optional[NonEmptyStr]:
return self._password

@password.setter
def password(self, value: Optional[NonEmptyStr]) -> None:
self._password = _optional_non_empty_str_validator.validate_python(value)

def model_post_init(self, __context: Any) -> None:
super().model_post_init(__context)
extra = getattr(self, "__pydantic_extra__", None)
if not extra:
return

if "username" in extra:
self.username = extra.pop("username")
if "password" in extra:
self.password = extra.pop("password")

# Maintain previous "extra='ignore'" semantics for any other unexpected keys.
for key in list(extra.keys()):
if key not in {"username", "password"}:
extra.pop(key)

if not extra:
self.__pydantic_extra__ = None

def ensure_credentials_loaded(self):
if self.username is None or self.password is None:
Expand Down Expand Up @@ -220,10 +262,6 @@ def get_clp_connection_params_and_type(self, disable_localhost_socket_connection
connection_params_and_type["ssl_cert"] = self.ssl_cert
return connection_params_and_type

def dump_to_primitive_dict(self):
d = self.model_dump(exclude={"username", "password"})
return d

def load_credentials_from_file(self, credentials_file_path: pathlib.Path):
config = read_yaml_config_file(credentials_file_path)
if config is None:
Expand Down Expand Up @@ -276,17 +314,40 @@ class QueryWorker(BaseModel):


class Redis(BaseModel):
model_config = ConfigDict(extra="allow")

DEFAULT_PORT: ClassVar[int] = 6379

host: DomainStr = "localhost"
port: Port = DEFAULT_PORT
query_backend_database: int = 0
compression_backend_database: int = 1
# redis can perform authentication without a username
password: Optional[str] = None
_password: Optional[str] = PrivateAttr(default=None)

@property
def password(self) -> Optional[str]:
return self._password

def dump_to_primitive_dict(self):
return self.model_dump(exclude={"password"})
@password.setter
def password(self, value: Optional[str]) -> None:
self._password = _optional_str_validator.validate_python(value)

def model_post_init(self, __context: Any) -> None:
super().model_post_init(__context)
extra = getattr(self, "__pydantic_extra__", None)
if not extra:
return

if "password" in extra:
self.password = extra.pop("password")

for key in list(extra.keys()):
if key != "password":
extra.pop(key)

if not extra:
self.__pydantic_extra__ = None

Comment on lines 316 to 351
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

⚠️ Potential issue | 🔴 Critical

🧩 Analysis chain

Verify Redis password validation requirements.

The Redis class follows the same pattern as Database but uses Optional[str] for password (line 326) instead of Optional[NonEmptyStr]. This allows empty passwords.

Verify whether Redis should accept empty passwords in your deployment:

  • Development environments might not require passwords
  • Production environments should always have non-empty passwords

Consider using Optional[NonEmptyStr] if empty passwords should not be allowed, or add documentation explaining why empty passwords are permitted.

The same concerns about __pydantic_extra__ manipulation from the Database class apply here. Consider the refactoring suggestions from the Database review.


🏁 Script executed:

#!/bin/bash
# Description: Check how Redis password is used in the codebase

# Find Redis password usage patterns
echo "=== Redis password usage ==="
rg -n -C3 'redis\.(password|_password)' --type py

# Check if empty password is handled specially anywhere
echo -e "\n=== Empty password handling ==="
rg -n 'password.*=.*""' --type py

Length of output: 2173


🏁 Script executed:

#!/bin/bash
# Compare Database and Redis password validation strategies

echo "=== Database password validation ==="
rg -n -A2 -B2 'class Database' components/clp-py-utils/clp_py_utils/clp_config.py | head -40

echo -e "\n=== Check Database password validator ==="
rg -n '_password.*PrivateAttr\|password.*=' components/clp-py-utils/clp_py_utils/clp_config.py | grep -A2 -B2 Database

echo -e "\n=== Check for any password validation config ==="
rg -n 'NonEmptyStr\|password' components/clp-py-utils/clp_py_utils/clp_config.py | head -20

Length of output: 329


🏁 Script executed:

#!/bin/bash
# View Database class password field definition (around line 166-200)
echo "=== Database class structure ==="
sed -n '166,210p' components/clp-py-utils/clp_py_utils/clp_config.py

echo -e "\n=== Redis class structure ==="
sed -n '316,340p' components/clp-py-utils/clp_py_utils/clp_config.py

# Also search for _optional_str_validator and any other validators used
echo -e "\n=== Validators defined in file ==="
rg -n 'validator|Validator|_.*validator' components/clp-py-utils/clp_py_utils/clp_config.py | head -20

Length of output: 3398


Use Optional[NonEmptyStr] for Redis password to match Database class validation.

The Redis class uses Optional[str] for password validation (line 326), while the Database class uses Optional[NonEmptyStr] (line 181). This inconsistency allows Redis to accept empty passwords.

Based on usage at line 735 in general.py (Celery RESULT_BACKEND connection string), empty passwords create configuration ambiguity. Either:

  1. Change Redis password to Optional[NonEmptyStr] with _optional_non_empty_str_validator for consistency with Database
  2. Document why empty passwords are intentionally allowed (e.g., development-only configuration)
🤖 Prompt for AI Agents
In components/clp-py-utils/clp_py_utils/clp_config.py around lines 316 to 351,
the Redis class currently treats password as Optional[str] allowing empty
strings; change the Redis password to Optional[NonEmptyStr] (update the
_password PrivateAttr type, the property return type, and the setter parameter)
and use the existing _optional_non_empty_str_validator instead of
_optional_str_validator in the password.setter so empty passwords are rejected
and behavior matches the Database class validation.

def load_credentials_from_file(self, credentials_file_path: pathlib.Path):
config = read_yaml_config_file(credentials_file_path)
Expand Down Expand Up @@ -341,16 +402,49 @@ def transform_for_container(self):


class Queue(BaseModel):
model_config = ConfigDict(extra="allow")

DEFAULT_PORT: ClassVar[int] = 5672

host: DomainStr = "localhost"
port: Port = DEFAULT_PORT

username: Optional[NonEmptyStr] = None
password: Optional[str] = None
_username: Optional[NonEmptyStr] = PrivateAttr(default=None)
_password: Optional[str] = PrivateAttr(default=None)

@property
def username(self) -> Optional[NonEmptyStr]:
return self._username

@username.setter
def username(self, value: Optional[NonEmptyStr]) -> None:
self._username = _optional_non_empty_str_validator.validate_python(value)

@property
def password(self) -> Optional[str]:
return self._password

@password.setter
def password(self, value: Optional[str]) -> None:
self._password = _optional_str_validator.validate_python(value)

def dump_to_primitive_dict(self):
return self.model_dump(exclude={"username", "password"})
def model_post_init(self, __context: Any) -> None:
super().model_post_init(__context)
extra = getattr(self, "__pydantic_extra__", None)
if not extra:
return

if "username" in extra:
self.username = extra.pop("username")
if "password" in extra:
self.password = extra.pop("password")

for key in list(extra.keys()):
if key not in {"username", "password"}:
extra.pop(key)

if not extra:
self.__pydantic_extra__ = None

Comment on lines 404 to 448
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

🛠️ Refactor suggestion | 🟠 Major

🧩 Analysis chain

Verify Queue password validation and ensure consistency.

The Queue class uses Optional[str] for password (line 413), allowing empty passwords, while username requires Optional[NonEmptyStr] (line 412). This is inconsistent with the Database class where both username and password require non-empty strings.

Verify whether Queue passwords should be allowed to be empty:

  • RabbitMQ (the queue implementation) typically requires non-empty passwords in production
  • If empty passwords are not valid, change line 413 to use Optional[NonEmptyStr]

The same concerns about __pydantic_extra__ manipulation apply here. See the Database class review for refactoring suggestions.


🏁 Script executed:

#!/bin/bash
# Description: Check Queue password validation requirements

# Find Queue password usage and validation
echo "=== Queue password usage ==="
rg -n -C3 'queue\.(password|_password)' --type py

# Check credentials file generation for queue password
echo -e "\n=== Credentials file generation ==="
rg -n -A5 -B5 'QUEUE_COMPONENT_NAME.*password' --type py

Length of output: 4524


🏁 Script executed:

#!/bin/bash
# Check get_config_value implementation and Database class for comparison

echo "=== get_config_value implementation ==="
rg -n -A10 'def get_config_value' --type py

echo -e "\n=== Database class password definition ==="
rg -n -B5 -A15 'class Database' --type py | head -60

echo -e "\n=== Check for password fallbacks or defaults ==="
rg -n 'password.*=.*get_config_value|password.*default' --type py | head -20

Length of output: 5328


🏁 Script executed:

#!/bin/bash
# Check for any special handling of empty passwords and Queue auth configuration

echo "=== Check Queue authentication mode/behavior ==="
rg -n 'Queue|queue.*auth' --type py components/clp-py-utils/clp_py_utils/clp_config.py | head -20

echo -e "\n=== Check Redis password usage and comparison ==="
rg -n -B3 -A3 'class Redis' --type py components/clp-py-utils/clp_py_utils/clp_config.py

echo -e "\n=== Check if empty passwords are tested or validated ==="
rg -n 'password.*""|\\.password.*None' --type py | grep -i 'test\|default' | head -10

echo -e "\n=== Check credentials file schema/documentation ==="
rg -n 'QUEUE_COMPONENT_NAME|queue.*user.*password' --type py | head -20

Length of output: 1784


🏁 Script executed:

#!/bin/bash
# Final checks: Look for any AMQP/RabbitMQ setup that might explain Optional[str]

echo "=== Check for RabbitMQ guest/no-auth mode or defaults ==="
rg -n -i 'rabbitmq|guest|noauth|auth.*disable' --type py

echo -e "\n=== Check Redis auth configuration or mode ==="
rg -n -B5 -A5 'class Redis' --type py

echo -e "\n=== Check if Queue/Redis passwords are ever None or empty in practice ==="
rg -n 'password\s*=\s*None|password\s*==\s*""' --type py

echo -e "\n=== Verify if Database pattern is intentional by checking comments ==="
rg -n -B3 'NonEmptyStr.*password|password.*NonEmptyStr' --type py

Length of output: 2155


Align Queue password validation with Database for consistency and correctness.

The Queue class uses Optional[str] for password (line 413), allowing empty strings, while the Database class requires Optional[NonEmptyStr]. This inconsistency is problematic because:

  • Generated credentials are always non-empty (secrets.token_urlsafe(8))
  • AMQP connection strings require valid credentials
  • There is no evidence of empty passwords being intentional or used

Change line 413 from _password: Optional[str] to _password: Optional[NonEmptyStr] and update the setter validator to match the Database pattern (use _optional_non_empty_str_validator).

🤖 Prompt for AI Agents
In components/clp-py-utils/clp_py_utils/clp_config.py around lines 404 to 448,
the Queue class declares _password as Optional[str] and uses the optional string
validator, which allows empty passwords and is inconsistent with Database
(Optional[NonEmptyStr]); change the private attribute type to
Optional[NonEmptyStr] and update the password setter to use
_optional_non_empty_str_validator.validate_python(value) so passwords are
validated as non-empty, and ensure any related type hints and imports remain
correct.

def load_credentials_from_file(self, credentials_file_path: pathlib.Path):
config = read_yaml_config_file(credentials_file_path)
Expand Down Expand Up @@ -426,9 +520,6 @@ class S3IngestionConfig(BaseModel):
type: Literal[StorageType.S3.value] = StorageType.S3.value
aws_authentication: AwsAuthentication

def dump_to_primitive_dict(self):
return self.model_dump()

def transform_for_container(self):
pass

Expand Down Expand Up @@ -773,18 +864,6 @@ def get_deployment_type(self) -> DeploymentType:
else:
return DeploymentType.FULL

def dump_to_primitive_dict(self):
custom_serialized_fields = {
"database",
"queue",
"redis",
}
d = self.model_dump(exclude=custom_serialized_fields)
for key in custom_serialized_fields:
d[key] = getattr(self, key).dump_to_primitive_dict()

return d

@model_validator(mode="after")
def validate_presto_config(self):
query_engine = self.package.query_engine
Expand Down
Loading