diff --git a/docs/llm-prompts/pydantic-annotated-fields.md b/docs/llm-prompts/pydantic-annotated-fields.md new file mode 100644 index 000000000000..9b128e7bd72a --- /dev/null +++ b/docs/llm-prompts/pydantic-annotated-fields.md @@ -0,0 +1,84 @@ +# Prompt + +``` +Please convert all pydantic model fields that use `Field()` with default values to use the Annotated pattern instead. +Follow these guidelines: + +1. Move default values outside of `Field()` like this: `field_name: Annotated[field_type, Field(description="")] = default_value`. +2. Keep all other parameters like validation_alias and descriptions inside `Field()`. +3. For fields using default_factory, keep that parameter as is in the `Field()` constructor, but set the default value outside to DEFAULT_FACTORY from common_library.basic_types. Example: `field_name: Annotated[dict_type, Field(default_factory=dict)] = DEFAULT_FACTORY`. +4. Add the import: `from common_library.basic_types import DEFAULT_FACTORY` if it's not already present. +5. If `Field()` has no parameters (empty), don't use Annotated at all. Just use: `field_name: field_type = default_value`. +6. Leave any model validations, `model_config` settings, and `field_validators` untouched. +``` +## Examples + +### Before: + +```python +from pydantic import BaseModel, Field + +class UserModel(BaseModel): + name: str = Field(default="Anonymous", description="User's display name") + age: int = Field(default=18, ge=0, lt=120) + tags: list[str] = Field(default_factory=list, description="User tags") + metadata: dict[str, str] = Field(default_factory=dict) + is_active: bool = Field(default=True) +``` + +- **After** + +```python +from typing import Annotated +from pydantic import BaseModel, Field +from common_library.basic_types import DEFAULT_FACTORY + +class UserModel(BaseModel): + name: Annotated[str, Field(description="User's display name")] = "Anonymous" + age: Annotated[int, Field(ge=0, lt=120)] = 18 + tags: Annotated[list[str], Field(default_factory=list, description="User tags")] = DEFAULT_FACTORY + metadata: Annotated[dict[str, str], Field(default_factory=dict)] = DEFAULT_FACTORY + is_active: bool = True +``` + +## Another Example with Complex Fields + +### Before: + +```python +from pydantic import BaseModel, Field, field_validator +from datetime import datetime + +class ProjectModel(BaseModel): + id: str = Field(default_factory=uuid.uuid4, description="Unique project identifier") + name: str = Field(default="Untitled Project", min_length=3, max_length=50) + created_at: datetime = Field(default_factory=datetime.now) + config: dict = Field(default={"version": "1.0", "theme": "default"}) + + @field_validator("name") + def validate_name(cls, v): + if v.isdigit(): + raise ValueError("Name cannot be only digits") + return v +``` + +### After: + +```python +from typing import Annotated +from pydantic import BaseModel, Field, field_validator +from datetime import datetime +from common_library.basic_types import DEFAULT_FACTORY + +class ProjectModel(BaseModel): + id: Annotated[str, Field(default_factory=uuid.uuid4, description="Unique project identifier")] = DEFAULT_FACTORY + name: Annotated[str, Field(min_length=3, max_length=50)] = "Untitled Project" + created_at: Annotated[datetime, Field(default_factory=datetime.now)] = DEFAULT_FACTORY + config: dict = {"version": "1.0", "theme": "default"} + + @field_validator("name") + def validate_name(cls, v): + if v.isdigit(): + raise ValueError("Name cannot be only digits") + return v +``` diff --git a/packages/models-library/src/models_library/basic_types.py b/packages/models-library/src/models_library/basic_types.py index d56b3037d526..fe367a04a29c 100644 --- a/packages/models-library/src/models_library/basic_types.py +++ b/packages/models-library/src/models_library/basic_types.py @@ -3,6 +3,7 @@ from re import Pattern from typing import Annotated, ClassVar, Final, TypeAlias +import annotated_types from common_library.basic_types import BootModeEnum, BuildTargetEnum, LogLevel from pydantic import Field, HttpUrl, PositiveInt, StringConstraints from pydantic_core import core_schema @@ -13,15 +14,16 @@ SIMPLE_VERSION_RE, UUID_RE, ) +from .utils.common_validators import trim_string_before assert issubclass(LogLevel, Enum) # nosec assert issubclass(BootModeEnum, Enum) # nosec assert issubclass(BuildTargetEnum, Enum) # nosec __all__: tuple[str, ...] = ( - "LogLevel", "BootModeEnum", "BuildTargetEnum", + "LogLevel", ) @@ -70,12 +72,31 @@ UUIDStr: TypeAlias = Annotated[str, StringConstraints(pattern=UUID_RE)] +SafeQueryStr: TypeAlias = Annotated[ + str, + StringConstraints( + max_length=512, # Reasonable limit for query parameters to avoid overflows + strip_whitespace=True, + ), + annotated_types.doc( + """ + A string that is safe to use in URLs and query parameters. + """, + ), +] + + # non-empty bounded string used as identifier # e.g. "123" or "name_123" or "fa327c73-52d8-462a-9267-84eeaf0f90e3" but NOT "" _ELLIPSIS_CHAR: Final[str] = "..." class ConstrainedStr(str): + """Emulates pydantic's v1 constrained types + + DEPRECATED: Use instead Annotated[str, StringConstraints(...)] + """ + pattern: str | Pattern[str] | None = None min_length: int | None = None max_length: int | None = None @@ -102,6 +123,11 @@ def __get_pydantic_core_schema__(cls, _source_type, _handler): class IDStr(ConstrainedStr): + """Non-empty bounded string used as identifier + + DEPRECATED: Use instead Annotated[str, StringConstraints(strip_whitespace=True, min_length=1, max_length=100)] + """ + strip_whitespace = True min_length = 1 max_length = 100 @@ -125,21 +151,36 @@ def concatenate(*args: "IDStr", link_char: str = " ") -> "IDStr": return IDStr(result) -class ShortTruncatedStr(ConstrainedStr): - # NOTE: Use to input e.g. titles or display names - # A truncated string: - # - Strips whitespaces and truncate strings that exceed the specified characters limit (curtail_length). - # - Ensures that the **input** data length to the API is controlled and prevents exceeding large inputs silently, i.e. without raising errors. - # SEE https://github.com/ITISFoundation/osparc-simcore/pull/5989#discussion_r1650506583 - strip_whitespace = True - curtail_length = 600 - +_SHORT_TRUNCATED_STR_MAX_LENGTH: Final[int] = 600 +ShortTruncatedStr: TypeAlias = Annotated[ + str, + StringConstraints(strip_whitespace=True), + trim_string_before(max_length=_SHORT_TRUNCATED_STR_MAX_LENGTH), + annotated_types.doc( + """ + A truncated string used to input e.g. titles or display names. + Strips whitespaces and truncate strings that exceed the specified characters limit (curtail_length). + Ensures that the **input** data length to the API is controlled and prevents exceeding large inputs silently, + i.e. without raising errors. + """ + # SEE https://github.com/ITISFoundation/osparc-simcore/pull/5989#discussion_r1650506583 + ), +] -class LongTruncatedStr(ConstrainedStr): - # NOTE: Use to input e.g. descriptions or summaries - # Analogous to ShortTruncatedStr - strip_whitespace = True - curtail_length = 65536 # same as github descripton +_LONG_TRUNCATED_STR_MAX_LENGTH: Final[int] = 65536 # same as github description +LongTruncatedStr: TypeAlias = Annotated[ + str, + StringConstraints(strip_whitespace=True), + trim_string_before(max_length=_LONG_TRUNCATED_STR_MAX_LENGTH), + annotated_types.doc( + """ + A truncated string used to input e.g. descriptions or summaries. + Strips whitespaces and truncate strings that exceed the specified characters limit (curtail_length). + Ensures that the **input** data length to the API is controlled and prevents exceeding large inputs silently, + i.e. without raising errors. + """ + ), +] # auto-incremented primary-key IDs diff --git a/packages/models-library/src/models_library/projects.py b/packages/models-library/src/models_library/projects.py index 1889d5ee714c..0c4dd0884b9a 100644 --- a/packages/models-library/src/models_library/projects.py +++ b/packages/models-library/src/models_library/projects.py @@ -11,7 +11,14 @@ from models_library.basic_types import ConstrainedStr from models_library.folders import FolderID from models_library.workspaces import WorkspaceID -from pydantic import BaseModel, ConfigDict, Field, HttpUrl, field_validator +from pydantic import ( + BaseModel, + ConfigDict, + Field, + HttpUrl, + StringConstraints, + field_validator, +) from .basic_regex import DATE_RE, UUID_RE_BASE from .emails import LowerCaseEmailStr @@ -35,8 +42,7 @@ _DATETIME_FORMAT: Final[str] = "%Y-%m-%dT%H:%M:%S.%fZ" -class ProjectIDStr(ConstrainedStr): - pattern = UUID_RE_BASE +ProjectIDStr: TypeAlias = Annotated[str, StringConstraints(pattern=UUID_RE_BASE)] class DateTimeStr(ConstrainedStr): diff --git a/packages/models-library/src/models_library/rabbitmq_basic_types.py b/packages/models-library/src/models_library/rabbitmq_basic_types.py index e8ae694b8be2..c1602f9d4181 100644 --- a/packages/models-library/src/models_library/rabbitmq_basic_types.py +++ b/packages/models-library/src/models_library/rabbitmq_basic_types.py @@ -1,7 +1,7 @@ -from typing import Final +from typing import Annotated, Final, TypeAlias from models_library.basic_types import ConstrainedStr -from pydantic import TypeAdapter +from pydantic import StringConstraints, TypeAdapter REGEX_RABBIT_QUEUE_ALLOWED_SYMBOLS: Final[str] = r"^[\w\-\.]*$" @@ -21,7 +21,9 @@ def from_entries(cls, entries: dict[str, str]) -> "RPCNamespace": return TypeAdapter(cls).validate_python(composed_string) -class RPCMethodName(ConstrainedStr): - pattern = REGEX_RABBIT_QUEUE_ALLOWED_SYMBOLS - min_length: int = 1 - max_length: int = 252 +RPCMethodName: TypeAlias = Annotated[ + str, + StringConstraints( + pattern=REGEX_RABBIT_QUEUE_ALLOWED_SYMBOLS, min_length=1, max_length=252 + ), +] diff --git a/packages/models-library/tests/test_basic_types.py b/packages/models-library/tests/test_basic_types.py index dbd847246cf9..adf7fe5ecb35 100644 --- a/packages/models-library/tests/test_basic_types.py +++ b/packages/models-library/tests/test_basic_types.py @@ -2,6 +2,7 @@ import pytest from models_library.basic_types import ( + _SHORT_TRUNCATED_STR_MAX_LENGTH, EnvVarKey, IDStr, MD5Str, @@ -76,16 +77,28 @@ def test_string_identifier_constraint_type(): def test_short_truncated_string(): + curtail_length = _SHORT_TRUNCATED_STR_MAX_LENGTH assert ( - TypeAdapter(ShortTruncatedStr).validate_python( - "X" * ShortTruncatedStr.curtail_length - ) - == "X" * ShortTruncatedStr.curtail_length - ) + TypeAdapter(ShortTruncatedStr).validate_python("X" * curtail_length) + == "X" * curtail_length + ), "Max length string should remain intact" assert ( - TypeAdapter(ShortTruncatedStr).validate_python( - "X" * (ShortTruncatedStr.curtail_length + 1) - ) - == "X" * ShortTruncatedStr.curtail_length - ) + TypeAdapter(ShortTruncatedStr).validate_python("X" * (curtail_length + 1)) + == "X" * curtail_length + ), "Overlong string should be truncated exactly to max length" + + assert ( + TypeAdapter(ShortTruncatedStr).validate_python("X" * (curtail_length + 100)) + == "X" * curtail_length + ), "Much longer string should still truncate to exact max length" + + # below limit + assert TypeAdapter(ShortTruncatedStr).validate_python( + "X" * (curtail_length - 1) + ) == "X" * (curtail_length - 1), "Under-length string should not be modified" + + # spaces are trimmed + assert ( + TypeAdapter(ShortTruncatedStr).validate_python(" " * (curtail_length + 1)) == "" + ), "Only-whitespace string should become empty string" diff --git a/packages/models-library/tests/test_projects.py b/packages/models-library/tests/test_projects.py index 2b659bf3c196..86514df2da22 100644 --- a/packages/models-library/tests/test_projects.py +++ b/packages/models-library/tests/test_projects.py @@ -7,7 +7,8 @@ import pytest from faker import Faker -from models_library.api_schemas_webserver.projects import LongTruncatedStr, ProjectPatch +from models_library.api_schemas_webserver.projects import ProjectPatch +from models_library.basic_types import _LONG_TRUNCATED_STR_MAX_LENGTH from models_library.projects import Project @@ -47,8 +48,7 @@ def test_project_with_thumbnail_as_empty_string(minimal_project: dict[str, Any]) def test_project_patch_truncates_description(): # NOTE: checks https://github.com/ITISFoundation/osparc-simcore/issues/5988 - assert LongTruncatedStr.curtail_length - len_truncated = int(LongTruncatedStr.curtail_length) + len_truncated = _LONG_TRUNCATED_STR_MAX_LENGTH long_description = "X" * (len_truncated + 10) assert len(long_description) > len_truncated diff --git a/services/api-server/src/simcore_service_api_server/api/dependencies/models_schemas_solvers_filters.py b/services/api-server/src/simcore_service_api_server/api/dependencies/models_schemas_solvers_filters.py index a0c67e6cab49..2fafb34f984f 100644 --- a/services/api-server/src/simcore_service_api_server/api/dependencies/models_schemas_solvers_filters.py +++ b/services/api-server/src/simcore_service_api_server/api/dependencies/models_schemas_solvers_filters.py @@ -1,6 +1,7 @@ from typing import Annotated, Any from fastapi import Query +from models_library.basic_types import SafeQueryStr from pydantic.fields import FieldInfo from ...models.schemas.solvers_filters import SolversListFilters @@ -21,11 +22,11 @@ def _get_query_params(field: FieldInfo) -> dict[str, Any]: def get_solvers_filters( # pylint: disable=unsubscriptable-object solver_id: Annotated[ - str | None, + SafeQueryStr | None, Query(**_get_query_params(SolversListFilters.model_fields["solver_id"])), ] = None, version_display: Annotated[ - str | None, + SafeQueryStr | None, Query(**_get_query_params(SolversListFilters.model_fields["version_display"])), ] = None, ) -> SolversListFilters: diff --git a/services/api-server/src/simcore_service_api_server/api/routes/studies_jobs.py b/services/api-server/src/simcore_service_api_server/api/routes/studies_jobs.py index d310170d89ee..c19845df2aac 100644 --- a/services/api-server/src/simcore_service_api_server/api/routes/studies_jobs.py +++ b/services/api-server/src/simcore_service_api_server/api/routes/studies_jobs.py @@ -163,7 +163,7 @@ async def create_study_job( await webserver_api.patch_project( project_id=job.id, - patch_params=ProjectPatch(name=job.name), # type: ignore[arg-type] + patch_params=ProjectPatch(name=job.name), ) await wb_api_rpc.mark_project_as_job(