diff --git a/packages/models-library/VERSION b/packages/models-library/VERSION index 0ea3a944b399..0d91a54c7d43 100644 --- a/packages/models-library/VERSION +++ b/packages/models-library/VERSION @@ -1 +1 @@ -0.2.0 +0.3.0 diff --git a/packages/models-library/setup.cfg b/packages/models-library/setup.cfg index b483a024d04c..2eec3789ac03 100644 --- a/packages/models-library/setup.cfg +++ b/packages/models-library/setup.cfg @@ -1,5 +1,5 @@ [bumpversion] -current_version = 0.2.0 +current_version = 0.3.0 commit = True message = packages/models-library version: {current_version} → {new_version} tag = False @@ -16,10 +16,10 @@ test = pytest [tool:pytest] asyncio_mode = auto asyncio_default_fixture_loop_scope = function -markers = +markers = diagnostics: "can be used to run diagnostics against deployed data (e.g. database, registry etc)" testit: "marks test to run during development" [mypy] -plugins = +plugins = pydantic.mypy diff --git a/packages/models-library/src/models_library/api_schemas_webserver/folders_v2.py b/packages/models-library/src/models_library/api_schemas_webserver/folders_v2.py index 88333f0b0d98..da97f1d7adae 100644 --- a/packages/models-library/src/models_library/api_schemas_webserver/folders_v2.py +++ b/packages/models-library/src/models_library/api_schemas_webserver/folders_v2.py @@ -1,10 +1,10 @@ from datetime import datetime from typing import Annotated, Self +from models_library.string_types import DisplaySafeStr from pydantic import ConfigDict, Field, field_validator from ..access_rights import AccessRights -from ..basic_types import IDStr from ..folders import FolderDB, FolderID from ..groups import GroupID from ..utils.common_validators import null_or_none_str_to_none_validator @@ -53,7 +53,7 @@ def from_domain_model( class FolderCreateBodyParams(InputSchema): - name: IDStr + name: DisplaySafeStr parent_folder_id: FolderID | None = None workspace_id: WorkspaceID | None = None model_config = ConfigDict(extra="forbid") @@ -68,7 +68,7 @@ class FolderCreateBodyParams(InputSchema): class FolderReplaceBodyParams(InputSchema): - name: IDStr + name: DisplaySafeStr parent_folder_id: FolderID | None = None model_config = ConfigDict(extra="forbid") diff --git a/packages/models-library/src/models_library/api_schemas_webserver/groups.py b/packages/models-library/src/models_library/api_schemas_webserver/groups.py index 5f56fbc9790e..8dc0166aaec9 100644 --- a/packages/models-library/src/models_library/api_schemas_webserver/groups.py +++ b/packages/models-library/src/models_library/api_schemas_webserver/groups.py @@ -3,6 +3,7 @@ from common_library.basic_types import DEFAULT_FACTORY from common_library.dict_tools import remap_keys +from models_library.string_types import DescriptionSafeStr, NameSafeStr from pydantic import ( AnyHttpUrl, AnyUrl, @@ -27,7 +28,7 @@ StandardGroupCreate, StandardGroupUpdate, ) -from ..users import UserID, UserNameID +from ..users import UserID, UserNameID, UserNameSafeID from ..utils.common_validators import create__check_only_one_is_set__root_validator from ._base import InputSchema, OutputSchema, OutputSchemaWithoutCamelCase @@ -155,8 +156,8 @@ def _update_json_schema_extra(schema: JsonDict) -> None: class GroupCreate(InputSchema): - label: str - description: str + label: NameSafeStr + description: DescriptionSafeStr thumbnail: AnyUrl | None = None def to_domain_model(self) -> StandardGroupCreate: @@ -173,8 +174,8 @@ def to_domain_model(self) -> StandardGroupCreate: class GroupUpdate(InputSchema): - label: str | None = None - description: str | None = None + label: NameSafeStr | None = None + description: DescriptionSafeStr | None = None thumbnail: AnyUrl | None = None def to_domain_model(self) -> StandardGroupUpdate: @@ -373,7 +374,7 @@ class GroupUserAdd(InputSchema): """ uid: UserID | None = None - user_name: Annotated[UserNameID | None, Field(alias="userName")] = None + user_name: Annotated[UserNameSafeID | None, Field(alias="userName")] = None email: Annotated[ LowerCaseEmailStr | None, Field( diff --git a/packages/models-library/src/models_library/api_schemas_webserver/projects.py b/packages/models-library/src/models_library/api_schemas_webserver/projects.py index 083628693882..efcedf3b2bc0 100644 --- a/packages/models-library/src/models_library/api_schemas_webserver/projects.py +++ b/packages/models-library/src/models_library/api_schemas_webserver/projects.py @@ -22,7 +22,6 @@ from pydantic.config import JsonDict from ..api_schemas_long_running_tasks.tasks import TaskGet -from ..basic_types import LongTruncatedStr, ShortTruncatedStr from ..emails import LowerCaseEmailStr from ..folders import FolderID from ..groups import GroupID @@ -41,6 +40,7 @@ ProjectShareStatus, ProjectStateRunningState, ) +from ..string_types import LongTruncatedStr, ShortTruncatedStr from ..utils._original_fastapi_encoders import jsonable_encoder from ..utils.common_validators import ( empty_str_to_none_pre_validator, diff --git a/packages/models-library/src/models_library/api_schemas_webserver/users.py b/packages/models-library/src/models_library/api_schemas_webserver/users.py index 052b8bb4440e..a0f91828aafb 100644 --- a/packages/models-library/src/models_library/api_schemas_webserver/users.py +++ b/packages/models-library/src/models_library/api_schemas_webserver/users.py @@ -1,7 +1,7 @@ import re from datetime import date, datetime from enum import Enum -from typing import Annotated, Any, Literal, Self, TypeAlias +from typing import Annotated, Any, Literal, Self import annotated_types from common_library.basic_types import DEFAULT_FACTORY @@ -11,11 +11,11 @@ from models_library.rest_filters import Filters from models_library.rest_pagination import PageQueryParameters from pydantic import ( + AfterValidator, BaseModel, ConfigDict, EmailStr, Field, - StringConstraints, ValidationInfo, field_validator, model_validator, @@ -27,12 +27,18 @@ from ..groups import AccessRightsDict, Group, GroupID, GroupsByTypeTuple, PrimaryGroupID from ..products import ProductName from ..rest_base import RequestParameters +from ..string_types import ( + GlobPatternSafeStr, + SearchPatternSafeStr, + validate_input_xss_safety, +) from ..users import ( FirstNameStr, LastNameStr, MyProfile, UserID, UserNameID, + UserNameSafeID, UserPermission, UserThirdPartyToken, ) @@ -201,10 +207,21 @@ def from_domain_model( ) +FirstNameSafeStr = Annotated[ + FirstNameStr, + AfterValidator(validate_input_xss_safety), +] + +LastNameSafeStr = Annotated[ + LastNameStr, + AfterValidator(validate_input_xss_safety), +] + + class MyProfileRestPatch(InputSchemaWithoutCamelCase): - first_name: FirstNameStr | None = None - last_name: LastNameStr | None = None - user_name: Annotated[IDStr | None, Field(alias="userName", min_length=4)] = None + first_name: FirstNameSafeStr | None = None + last_name: LastNameSafeStr | None = None + user_name: Annotated[UserNameSafeID | None, Field(alias="userName")] = None # NOTE: phone is updated via a dedicated endpoint! privacy: MyProfilePrivacyPatch | None = None @@ -262,8 +279,7 @@ class UsersGetParams(RequestParameters): class UsersSearch(InputSchema): match_: Annotated[ - str, - StringConstraints(strip_whitespace=True, min_length=1, max_length=80), + SearchPatternSafeStr, Field( description="Search string to match with usernames and public profiles (e.g. emails, first/last name)", alias="match", @@ -314,17 +330,9 @@ class UserAccountReject(InputSchema): email: EmailStr -GlobString: TypeAlias = Annotated[ - str, - StringConstraints( - min_length=3, max_length=200, strip_whitespace=True, pattern=r"^[^%]*$" - ), -] - - class UserAccountSearchQueryParams(RequestParameters): email: Annotated[ - GlobString | None, + GlobPatternSafeStr | None, Field( description="complete or glob pattern for an email", ), @@ -336,7 +344,7 @@ class UserAccountSearchQueryParams(RequestParameters): ), ] = None user_name: Annotated[ - GlobString | None, + GlobPatternSafeStr | None, Field( description="complete or glob pattern for a username", ), diff --git a/packages/models-library/src/models_library/basic_types.py b/packages/models-library/src/models_library/basic_types.py index fe367a04a29c..106854b62d0a 100644 --- a/packages/models-library/src/models_library/basic_types.py +++ b/packages/models-library/src/models_library/basic_types.py @@ -14,7 +14,6 @@ SIMPLE_VERSION_RE, UUID_RE, ) -from .utils.common_validators import trim_string_before assert issubclass(LogLevel, Enum) # nosec assert issubclass(BootModeEnum, Enum) # nosec @@ -151,38 +150,6 @@ def concatenate(*args: "IDStr", link_char: str = " ") -> "IDStr": return IDStr(result) -_SHORT_TRUNCATED_STR_MAX_LENGTH: Final[int] = 600 -ShortTruncatedStr: TypeAlias = Annotated[ - str, - StringConstraints(strip_whitespace=True), - trim_string_before(max_length=_SHORT_TRUNCATED_STR_MAX_LENGTH), - annotated_types.doc( - """ - A truncated string used to input e.g. titles or display names. - Strips whitespaces and truncate strings that exceed the specified characters limit (curtail_length). - Ensures that the **input** data length to the API is controlled and prevents exceeding large inputs silently, - i.e. without raising errors. - """ - # SEE https://github.com/ITISFoundation/osparc-simcore/pull/5989#discussion_r1650506583 - ), -] - -_LONG_TRUNCATED_STR_MAX_LENGTH: Final[int] = 65536 # same as github description -LongTruncatedStr: TypeAlias = Annotated[ - str, - StringConstraints(strip_whitespace=True), - trim_string_before(max_length=_LONG_TRUNCATED_STR_MAX_LENGTH), - annotated_types.doc( - """ - A truncated string used to input e.g. descriptions or summaries. - Strips whitespaces and truncate strings that exceed the specified characters limit (curtail_length). - Ensures that the **input** data length to the API is controlled and prevents exceeding large inputs silently, - i.e. without raising errors. - """ - ), -] - - # auto-incremented primary-key IDs IdInt: TypeAlias = PositiveInt PrimaryKeyInt: TypeAlias = PositiveInt diff --git a/packages/models-library/src/models_library/string_types.py b/packages/models-library/src/models_library/string_types.py new file mode 100644 index 000000000000..cc7a2b4dcc5d --- /dev/null +++ b/packages/models-library/src/models_library/string_types.py @@ -0,0 +1,270 @@ +import re +from typing import Annotated, Final, NamedTuple, TypeAlias + +import annotated_types +from pydantic import ( + AfterValidator, + StringConstraints, +) +from pydantic_core import PydanticCustomError + +from .utils.common_validators import trim_string_before + +# --- shared heuristics --- +MIN_DESCRIPTION_LENGTH: Final[int] = 3 +MAX_DESCRIPTION_LENGTH: Final[int] = 5000 +MAX_NAME_LENGTH: Final[int] = 100 + +_SHORT_TRUNCATED_STR_MAX_LENGTH: Final[int] = 600 +_LONG_TRUNCATED_STR_MAX_LENGTH: Final[int] = 65536 # same as github descriptions + + +STRING_UNSAFE_CONTENT_ERROR_CODE: Final[str] = "string_unsafe_content" + + +class XSSPattern(NamedTuple): + pattern: re.Pattern + message: str + + +_SAFE_XSS_PATTERNS: Final[list[XSSPattern]] = [ + # === Lightweight, non-backtracking safe checks (bounded / literal / simple alternations) === + XSSPattern( + re.compile(r"(?i)<\s*(?:script|iframe|object|embed|link|meta|base)\b"), + "Contains potentially dangerous HTML tags", + ), + XSSPattern( + re.compile(r"(?i)"), + "Contains potentially dangerous HTML closing tags", + ), + XSSPattern( + re.compile( + r"(?i)\b(?:src|href|xlink:href|srcdoc)\s*=\s*(?:['\"]\s*)?(?:javascript:|vbscript:|data:)", + re.IGNORECASE, + ), + "Contains unsafe URL protocols in attributes", + ), + XSSPattern( + re.compile(r"(?i)javascript%3a|vbscript%3a|data%3a"), + "Contains encoded malicious protocols", + ), + XSSPattern( + re.compile( + r"(?ix)&#\s*(?:x[0-9a-f]{1,6}|[0-9]{1,6})\s*;\s*(?:javascript:|vbscript:|data:)", + re.IGNORECASE, + ), + "Contains encoded characters followed by unsafe protocols", + ), + XSSPattern( + re.compile(r"(?i)\bon[a-z]{1,20}\s*="), + "Contains inline event handlers", + ), + XSSPattern( + re.compile( + r"(?ix)style\s*=\s*['\"][^'\"]{0,500}\b(?:expression\(|url\s*\()", + re.IGNORECASE, + ), + "Contains potentially dangerous CSS expressions", + ), + XSSPattern( + re.compile( + r"(?ix)<\s*(?:img|svg)\b[^>]{0,500}\b(?:src|xlink:href)\s*=\s*['\"]?(?:javascript:|data:)", + re.IGNORECASE, + ), + "Contains unsafe protocols in image or SVG tags", + ), + XSSPattern( + re.compile( + r"(?ix)<\s*meta\b[^>]{0,200}\bhttp-equiv\s*=\s*['\"]?refresh['\"]?", + re.IGNORECASE, + ), + "Contains meta refresh directives", + ), + XSSPattern( + re.compile(r"(?i)\bsrcdoc\s*=\s*['\"]"), + "Contains srcdoc attribute which may execute arbitrary HTML", + ), + XSSPattern( + re.compile(r"[\x00-\x08\x0B-\x0C\x0E-\x1F\x7F]"), + "Contains control or invisible characters", + ), + XSSPattern( + re.compile( + r"(?i)(\$\{[^}]{0,200}\}|\#\{[^}]{0,200}\}|<%[^%]{0,200}%>|{{[^}]{0,200}})" + ), + "Contains template injection patterns", + ), + XSSPattern( + re.compile(r"(?i)\bvbscript\s*:"), + "Contains VBScript protocol", + ), +] + + +def _contains_percent_or_entity_obfuscation(value_lower: str) -> bool: + # simple substring checks — no heavy regex backtracking + if ( + "javascript%3a" in value_lower + or "vbscript%3a" in value_lower + or "data%3a" in value_lower + ): + return True + return "data:text/html" in value_lower + + +def _contains_obfuscated_protocol_by_normalization(value_lower: str) -> bool: + # remove ALL non-alphanumeric chars for maximum normalization + # this catches heavily spaced out patterns like "j a v a s c r i p t:" + norm = re.sub(r"[^a-z0-9]", "", value_lower) + return ( + "javascript" in norm + or "vbscript" in norm + or "datatext" in norm + or "data:" in value_lower # keep original check for data: protocol + ) + + +def validate_input_xss_safety(value: str) -> str: + # Run fast, simple regex checks first (fail-fast). + for xss_pattern in _SAFE_XSS_PATTERNS: + if xss_pattern.pattern.search(value): + raise PydanticCustomError( + STRING_UNSAFE_CONTENT_ERROR_CODE, + "{msg}", + {"msg": xss_pattern.message}, + ) + + value_lower = value.lower() + # Fast substring / percent-encoding checks (no backtracking risk) + if _contains_percent_or_entity_obfuscation(value_lower): + raise PydanticCustomError( + STRING_UNSAFE_CONTENT_ERROR_CODE, + "Contains encoded malicious content", + {}, + ) + + # Normalization-based obfuscation detection (de-duplicates heavy regex) + if _contains_obfuscated_protocol_by_normalization(value_lower): + raise PydanticCustomError( + STRING_UNSAFE_CONTENT_ERROR_CODE, + "Contains obfuscated unsafe protocols", + {}, + ) + + return value + + +# --- core composition primitives --- +# +# `*SafeStr` types MUST be used for INPUT string fields in the external APIs +# + +NameSafeStr: TypeAlias = Annotated[ + str, + StringConstraints( + strip_whitespace=True, + min_length=1, + max_length=MAX_NAME_LENGTH, + pattern=r"^[A-Za-z0-9 ._-]+$", + # CAREFUL: string that ONLY contains alphanumeric characters, spaces, dots, underscores, or hyphens + ), + AfterValidator(validate_input_xss_safety), + annotated_types.doc( + """ A safe string used in **name identifiers**, It might be very restrictive for display names (e.g. titles or labels) """ + ), +] + +DisplaySafeStr: TypeAlias = Annotated[ + str, + StringConstraints( + strip_whitespace=True, + min_length=1, + max_length=MAX_NAME_LENGTH, + ), + AfterValidator(validate_input_xss_safety), + annotated_types.doc(""" Like `NameSafeStr` but more suited for display names"""), +] + +DescriptionSafeStr: TypeAlias = Annotated[ + str, + StringConstraints( + strip_whitespace=True, + min_length=MIN_DESCRIPTION_LENGTH, + max_length=MAX_DESCRIPTION_LENGTH, + ), + AfterValidator(validate_input_xss_safety), +] + + +GlobPatternSafeStr: TypeAlias = Annotated[ + str, + StringConstraints( + min_length=3, + max_length=200, + strip_whitespace=True, + pattern=r"^[A-Za-z0-9 ._\*@-]*$", # Allow alphanumeric, spaces, dots, underscores, hyphens, asterisks and at signs + ), + AfterValidator(validate_input_xss_safety), +] + + +SearchPatternSafeStr: TypeAlias = Annotated[ + str, + StringConstraints( + strip_whitespace=True, + min_length=1, + max_length=200, + pattern=r"^[A-Za-z0-9 ._@-]*$", # Allow alphanumeric, spaces, dots, underscores, hyphens, and at signs + ), + AfterValidator(validate_input_xss_safety), + annotated_types.doc( + """ + A safe string used for search patterns. + Strips whitespaces and enforces a length between 1 and 200 characters. + Ensures that the input does not contain percent signs (%) to prevent wildcard searches. + Additionally, it validates the input to ensure it does not contain potentially unsafe content such as SQL + or JavaScript injection patterns. + """ + ), +] + + +# --- truncating string types --- +ShortTruncatedStr: TypeAlias = Annotated[ + str, + StringConstraints(strip_whitespace=True), + trim_string_before(max_length=_SHORT_TRUNCATED_STR_MAX_LENGTH), + AfterValidator(validate_input_xss_safety), + annotated_types.doc( + """ + A truncated string used to input e.g. titles or display names. + Strips whitespaces and truncate strings that exceed the specified characters limit (curtail_length). + Ensures that the **input** data length to the API is controlled and prevents exceeding large inputs SILENTLY, + i.e. without raising errors. + """ + # SEE https://github.com/ITISFoundation/osparc-simcore/pull/5989#discussion_r1650506583 + ), +] + + +LongTruncatedStr: TypeAlias = Annotated[ + str, + StringConstraints(strip_whitespace=True), + trim_string_before(max_length=_LONG_TRUNCATED_STR_MAX_LENGTH), + AfterValidator(validate_input_xss_safety), + annotated_types.doc( + """ + A truncated string used to input e.g. descriptions or summaries. + Strips whitespaces and truncate strings that exceed the specified characters limit (curtail_length). + Ensures that the **input** data length to the API is controlled and prevents exceeding large inputs SILENTLY, + i.e. without raising errors. + """ + ), +] + +# --- tag color string (hex format) --- + +ColorStr = Annotated[ + str, + StringConstraints(pattern=re.compile(r"^#([A-Fa-f0-9]{6}|[A-Fa-f0-9]{3})$")), +] diff --git a/packages/models-library/src/models_library/users.py b/packages/models-library/src/models_library/users.py index eba810e7df9d..aeb4e5c94580 100644 --- a/packages/models-library/src/models_library/users.py +++ b/packages/models-library/src/models_library/users.py @@ -2,7 +2,15 @@ from typing import Annotated, TypeAlias from common_library.users_enums import UserRole -from pydantic import BaseModel, ConfigDict, Field, PositiveInt, StringConstraints +from models_library.string_types import validate_input_xss_safety +from pydantic import ( + AfterValidator, + BaseModel, + ConfigDict, + Field, + PositiveInt, + StringConstraints, +) from pydantic.config import JsonDict from typing_extensions import ( # https://docs.pydantic.dev/latest/api/standard_library_types/#typeddict TypedDict, @@ -12,9 +20,13 @@ UserID: TypeAlias = PositiveInt UserNameID: TypeAlias = Annotated[ - str, StringConstraints(strip_whitespace=True, min_length=1, max_length=100) + str, StringConstraints(strip_whitespace=True, min_length=4, max_length=100) +] +UserNameSafeID: TypeAlias = Annotated[ + UserNameID, AfterValidator(validate_input_xss_safety) ] + FirstNameStr: TypeAlias = Annotated[ str, StringConstraints(strip_whitespace=True, max_length=255) ] diff --git a/packages/models-library/tests/test_api_schemas_webserver_users.py b/packages/models-library/tests/test_api_schemas_webserver_users.py index 43375a67e208..98562c93173f 100644 --- a/packages/models-library/tests/test_api_schemas_webserver_users.py +++ b/packages/models-library/tests/test_api_schemas_webserver_users.py @@ -32,7 +32,7 @@ def test_my_profile_patch_username_min_len(): MyProfileRestPatch.model_validate({"userName": "abc"}) assert err_info.value.error_count() == 1 - assert err_info.value.errors()[0]["type"] == "too_short" + assert err_info.value.errors()[0]["type"] == "string_too_short" MyProfileRestPatch.model_validate({"userName": "abcd"}) # OK diff --git a/packages/models-library/tests/test_basic_types.py b/packages/models-library/tests/test_basic_types.py index adf7fe5ecb35..227de4a4410e 100644 --- a/packages/models-library/tests/test_basic_types.py +++ b/packages/models-library/tests/test_basic_types.py @@ -2,12 +2,10 @@ import pytest from models_library.basic_types import ( - _SHORT_TRUNCATED_STR_MAX_LENGTH, EnvVarKey, IDStr, MD5Str, SHA1Str, - ShortTruncatedStr, UUIDStr, VersionTag, ) @@ -74,31 +72,3 @@ def test_string_identifier_constraint_type(): TypeAdapter(IDStr).validate_python("X" * IDStr.max_length) with pytest.raises(ValidationError): TypeAdapter(IDStr).validate_python("X" * (IDStr.max_length + 1)) - - -def test_short_truncated_string(): - curtail_length = _SHORT_TRUNCATED_STR_MAX_LENGTH - assert ( - TypeAdapter(ShortTruncatedStr).validate_python("X" * curtail_length) - == "X" * curtail_length - ), "Max length string should remain intact" - - assert ( - TypeAdapter(ShortTruncatedStr).validate_python("X" * (curtail_length + 1)) - == "X" * curtail_length - ), "Overlong string should be truncated exactly to max length" - - assert ( - TypeAdapter(ShortTruncatedStr).validate_python("X" * (curtail_length + 100)) - == "X" * curtail_length - ), "Much longer string should still truncate to exact max length" - - # below limit - assert TypeAdapter(ShortTruncatedStr).validate_python( - "X" * (curtail_length - 1) - ) == "X" * (curtail_length - 1), "Under-length string should not be modified" - - # spaces are trimmed - assert ( - TypeAdapter(ShortTruncatedStr).validate_python(" " * (curtail_length + 1)) == "" - ), "Only-whitespace string should become empty string" diff --git a/packages/models-library/tests/test_projects.py b/packages/models-library/tests/test_projects.py index 86514df2da22..e7f4e347023c 100644 --- a/packages/models-library/tests/test_projects.py +++ b/packages/models-library/tests/test_projects.py @@ -8,8 +8,8 @@ import pytest from faker import Faker from models_library.api_schemas_webserver.projects import ProjectPatch -from models_library.basic_types import _LONG_TRUNCATED_STR_MAX_LENGTH from models_library.projects import Project +from models_library.string_types import _LONG_TRUNCATED_STR_MAX_LENGTH @pytest.fixture() diff --git a/packages/models-library/tests/test_string_types.py b/packages/models-library/tests/test_string_types.py new file mode 100644 index 000000000000..dd8ca7c018e5 --- /dev/null +++ b/packages/models-library/tests/test_string_types.py @@ -0,0 +1,155 @@ +# pylint: disable=redefined-outer-name +# pylint: disable=unused-argument +# pylint: disable=unused-variable +# pylint: disable=too-many-arguments + + +import pytest +from models_library.string_types import ( + _SHORT_TRUNCATED_STR_MAX_LENGTH, + DescriptionSafeStr, + NameSafeStr, + ShortTruncatedStr, +) +from pydantic import BaseModel, TypeAdapter, ValidationError + + +def test_short_truncated_string(): + curtail_length = _SHORT_TRUNCATED_STR_MAX_LENGTH + assert ( + TypeAdapter(ShortTruncatedStr).validate_python("X" * curtail_length) + == "X" * curtail_length + ), "Max length string should remain intact" + + assert ( + TypeAdapter(ShortTruncatedStr).validate_python("X" * (curtail_length + 1)) + == "X" * curtail_length + ), "Overlong string should be truncated exactly to max length" + + assert ( + TypeAdapter(ShortTruncatedStr).validate_python("X" * (curtail_length + 100)) + == "X" * curtail_length + ), "Much longer string should still truncate to exact max length" + + # below limit + assert TypeAdapter(ShortTruncatedStr).validate_python( + "X" * (curtail_length - 1) + ) == "X" * (curtail_length - 1), "Under-length string should not be modified" + + # spaces are trimmed + assert ( + TypeAdapter(ShortTruncatedStr).validate_python(" " * (curtail_length + 1)) == "" + ), "Only-whitespace string should become empty string" + + +class InputRequestModel(BaseModel): + name: NameSafeStr + description: DescriptionSafeStr + + +@pytest.mark.parametrize( + "name,description,should_pass", + [ + # ✅ valid inputs + pytest.param("Alice", "Simple markdown **text**.", True, id="valid-alice"), + pytest.param( + "ACME_Inc", "Multi-line\nMarkdown _description_.", True, id="valid-acme" + ), + pytest.param( + "John-Doe", "Has some inline HTML.", True, id="valid-html-inline" + ), + # ❌ unsafe / invalid names + pytest.param("", False, id="invalid-desc-script" + ), + pytest.param("SafeName", " ", False, id="invalid-desc-whitespace"), + pytest.param("SafeName", "a" * 6000, False, id="invalid-desc-too-long"), + # ❌ additional JS injection patterns that should be caught + pytest.param( + "SafeName", + "", + False, + id="invalid-desc-iframe", + ), + pytest.param( + "SafeName", + "", + False, + id="invalid-desc-img-onerror", + ), + pytest.param( + "SafeName", + "", + False, + id="invalid-desc-svg-onload", + ), + pytest.param( + "SafeName", "vbscript:msgbox(1)", False, id="invalid-desc-vbscript" + ), + pytest.param( + "SafeName", "javascript:alert(1)", False, id="invalid-desc-encoded-js" + ), + # ❌ ReDoS (Regular expression Denial of Service) test patterns + pytest.param( + "SafeName", + "alert(1)", + False, + id="redos-script-spaces", + ), + pytest.param( + "SafeName", + "", + False, + id="redos-img-attributes", + ), + # ❌ Obfuscated protocol tests + pytest.param( + "SafeName", + "j a v a s c r i p t:alert(1)", + False, + id="invalid-desc-spaced-js", + ), + pytest.param( + "SafeName", + "java\nscript\t:alert(1)", + False, + id="invalid-desc-newline-js", + ), + pytest.param( + "SafeName", + "d\ta\tt\ta:text/html,", + False, + id="invalid-desc-obfuscated-data", + ), + pytest.param( + "SafeName", + "v b\ts c r i p t:MsgBox(1)", + False, + id="invalid-desc-spaced-vbs", + ), + ], +) +def test_safe_string_types(name: str, description: str, should_pass: bool): + if should_pass: + model = InputRequestModel(name=name, description=description) + assert model.name + assert model.description + else: + with pytest.raises(ValidationError) as exc_info: + InputRequestModel(name=name, description=description) + + assert exc_info.value.error_count() in (1, 2) + + for error in exc_info.value.errors(): + assert error["loc"][0] in ("name", "description") + assert error["type"] in ( + # NOTE: these codes could be used by the front-end if needed + "string_pattern_mismatch", + "string_unsafe_content", + "string_too_short", + "string_too_long", + ), error["msg"] diff --git a/services/api-server/src/simcore_service_api_server/api/routes/studies.py b/services/api-server/src/simcore_service_api_server/api/routes/studies.py index d13f7facaa2e..af2e0528ee44 100644 --- a/services/api-server/src/simcore_service_api_server/api/routes/studies.py +++ b/services/api-server/src/simcore_service_api_server/api/routes/studies.py @@ -4,9 +4,9 @@ from fastapi import APIRouter, Body, Depends, Header, Query, status from fastapi_pagination.api import create_page from models_library.api_schemas_webserver.projects import ProjectGet, ProjectPatch -from models_library.basic_types import LongTruncatedStr, ShortTruncatedStr from models_library.projects import ProjectID from models_library.projects_nodes_io import NodeID +from models_library.string_types import LongTruncatedStr, ShortTruncatedStr from ...models.pagination import OnePage, Page, PaginationParams from ...models.schemas.errors import ErrorGet diff --git a/services/web/server/src/simcore_service_webserver/api/v0/openapi.yaml b/services/web/server/src/simcore_service_webserver/api/v0/openapi.yaml index ac535b341ab3..a876db33a0df 100644 --- a/services/web/server/src/simcore_service_webserver/api/v0/openapi.yaml +++ b/services/web/server/src/simcore_service_webserver/api/v0/openapi.yaml @@ -1804,7 +1804,7 @@ paths: - type: string minLength: 3 maxLength: 200 - pattern: ^[^%]*$ + pattern: ^[A-Za-z0-9 ._\*@-]*$ - type: 'null' title: Email - name: primary_group_id @@ -1825,7 +1825,7 @@ paths: - type: string minLength: 3 maxLength: 200 - pattern: ^[^%]*$ + pattern: ^[A-Za-z0-9 ._\*@-]*$ - type: 'null' title: User Name responses: @@ -13038,9 +13038,14 @@ components: properties: label: type: string + maxLength: 100 + minLength: 1 + pattern: ^[A-Za-z0-9 ._-]+$ title: Label description: type: string + maxLength: 5000 + minLength: 3 title: Description thumbnail: anyOf: @@ -13126,11 +13131,16 @@ components: label: anyOf: - type: string + maxLength: 100 + minLength: 1 + pattern: ^[A-Za-z0-9 ._-]+$ - type: 'null' title: Label description: anyOf: - type: string + maxLength: 5000 + minLength: 3 - type: 'null' title: Description thumbnail: @@ -13155,7 +13165,7 @@ components: anyOf: - type: string maxLength: 100 - minLength: 1 + minLength: 4 - type: 'null' title: Username email: @@ -13183,7 +13193,7 @@ components: anyOf: - type: string maxLength: 100 - minLength: 1 + minLength: 4 - type: 'null' title: Username description: None if private @@ -18097,10 +18107,15 @@ components: properties: name: type: string + maxLength: 100 + minLength: 1 + pattern: ^[A-Za-z0-9 ._-]+$ title: Name description: anyOf: - type: string + maxLength: 5000 + minLength: 3 - type: 'null' title: Description color: @@ -18189,11 +18204,16 @@ components: name: anyOf: - type: string + maxLength: 100 + minLength: 1 + pattern: ^[A-Za-z0-9 ._-]+$ - type: 'null' title: Name description: anyOf: - type: string + maxLength: 5000 + minLength: 3 - type: 'null' title: Description color: @@ -18623,7 +18643,7 @@ components: anyOf: - type: string maxLength: 100 - minLength: 1 + minLength: 4 - type: 'null' title: Invitedby accountRequestStatus: @@ -18634,7 +18654,7 @@ components: anyOf: - type: string maxLength: 100 - minLength: 1 + minLength: 4 - type: 'null' title: Accountrequestreviewedby accountRequestReviewedAt: @@ -18671,7 +18691,7 @@ components: anyOf: - type: string maxLength: 100 - minLength: 1 + minLength: 4 - type: 'null' title: Username description: Username of the user if an account was created @@ -18783,7 +18803,7 @@ components: anyOf: - type: string maxLength: 100 - minLength: 1 + minLength: 4 - type: 'null' title: Username firstName: @@ -18941,8 +18961,9 @@ components: properties: match: type: string - maxLength: 80 + maxLength: 200 minLength: 1 + pattern: ^[A-Za-z0-9 ._@-]*$ title: Match description: Search string to match with usernames and public profiles (e.g. emails, first/last name) diff --git a/services/web/server/src/simcore_service_webserver/tags/schemas.py b/services/web/server/src/simcore_service_webserver/tags/schemas.py index 7ff06e1ae358..890ffcd697bf 100644 --- a/services/web/server/src/simcore_service_webserver/tags/schemas.py +++ b/services/web/server/src/simcore_service_webserver/tags/schemas.py @@ -1,12 +1,12 @@ -import re -from typing import Annotated, Self +from typing import Self from common_library.groups_dicts import AccessRightsDict from models_library.api_schemas_webserver._base import InputSchema, OutputSchema from models_library.groups import GroupID from models_library.rest_base import RequestParameters, StrictRequestParameters +from models_library.string_types import ColorStr, DescriptionSafeStr, NameSafeStr from models_library.users import UserID -from pydantic import Field, PositiveInt, StringConstraints +from pydantic import Field, PositiveInt from servicelib.aiohttp.request_keys import RQT_USERID_KEY from simcore_postgres_database.utils_tags import TagAccessRightsDict, TagDict @@ -19,21 +19,16 @@ class TagPathParams(StrictRequestParameters): tag_id: PositiveInt -ColorStr = Annotated[ - str, StringConstraints(pattern=re.compile(r"^#([A-Fa-f0-9]{6}|[A-Fa-f0-9]{3})$")) -] - - class TagUpdate(InputSchema): - name: str | None = None - description: str | None = None + name: NameSafeStr | None = None + description: DescriptionSafeStr | None = None color: ColorStr | None = None priority: int | None = None class TagCreate(InputSchema): - name: str - description: str | None = None + name: NameSafeStr + description: DescriptionSafeStr | None = None color: ColorStr priority: int | None = None diff --git a/services/web/server/tests/unit/with_dbs/03/users/conftest.py b/services/web/server/tests/unit/with_dbs/03/users/conftest.py index 2272c5bc9f62..8ba0a7467d16 100644 --- a/services/web/server/tests/unit/with_dbs/03/users/conftest.py +++ b/services/web/server/tests/unit/with_dbs/03/users/conftest.py @@ -11,8 +11,12 @@ import sqlalchemy as sa from aiohttp import web from aiohttp.test_utils import TestServer +from pytest_simcore.helpers.postgres_users import ( + insert_and_get_user_and_secrets_lifespan, +) from pytest_simcore.helpers.typing_env import EnvVarsDict from servicelib.aiohttp.application import create_safe_application +from simcore_postgres_database.models.users import UserRole from simcore_postgres_database.models.users_details import ( users_pre_registration_details, ) @@ -73,11 +77,6 @@ async def product_owner_user( ) -> AsyncIterable[dict[str, Any]]: """A PO user in the database""" - from pytest_simcore.helpers.postgres_users import ( - insert_and_get_user_and_secrets_lifespan, - ) - from simcore_postgres_database.models.users import UserRole - async with insert_and_get_user_and_secrets_lifespan( # pylint:disable=contextmanager-generator-missing-cleanup asyncpg_engine, email="po-user@email.com",