From feda097d8143adfe526ea262890b1cd8b733e58f Mon Sep 17 00:00:00 2001 From: Pedro Crespo-Valero <32402063+pcrespov@users.noreply.github.com> Date: Thu, 16 Oct 2025 18:26:14 +0200 Subject: [PATCH 01/27] =?UTF-8?q?=F0=9F=8E=A8=20Refactor=20string=20type?= =?UTF-8?q?=20definitions:=20Move=20LongTruncatedStr=20and=20ShortTruncate?= =?UTF-8?q?dStr=20to=20string=5Ftypes.py=20and=20update=20imports?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../api_schemas_webserver/projects.py | 2 +- .../src/models_library/basic_types.py | 33 --------- .../src/models_library/string_types.py | 70 +++++++++++++++++++ .../models-library/tests/test_basic_types.py | 30 -------- .../models-library/tests/test_string_types.py | 33 +++++++++ .../api/routes/studies.py | 2 +- .../tests/unit/with_dbs/03/users/conftest.py | 9 ++- 7 files changed, 109 insertions(+), 70 deletions(-) create mode 100644 packages/models-library/src/models_library/string_types.py create mode 100644 packages/models-library/tests/test_string_types.py diff --git a/packages/models-library/src/models_library/api_schemas_webserver/projects.py b/packages/models-library/src/models_library/api_schemas_webserver/projects.py index 083628693882..efcedf3b2bc0 100644 --- a/packages/models-library/src/models_library/api_schemas_webserver/projects.py +++ b/packages/models-library/src/models_library/api_schemas_webserver/projects.py @@ -22,7 +22,6 @@ from pydantic.config import JsonDict from ..api_schemas_long_running_tasks.tasks import TaskGet -from ..basic_types import LongTruncatedStr, ShortTruncatedStr from ..emails import LowerCaseEmailStr from ..folders import FolderID from ..groups import GroupID @@ -41,6 +40,7 @@ ProjectShareStatus, ProjectStateRunningState, ) +from ..string_types import LongTruncatedStr, ShortTruncatedStr from ..utils._original_fastapi_encoders import jsonable_encoder from ..utils.common_validators import ( empty_str_to_none_pre_validator, diff --git a/packages/models-library/src/models_library/basic_types.py b/packages/models-library/src/models_library/basic_types.py index fe367a04a29c..106854b62d0a 100644 --- a/packages/models-library/src/models_library/basic_types.py +++ b/packages/models-library/src/models_library/basic_types.py @@ -14,7 +14,6 @@ SIMPLE_VERSION_RE, UUID_RE, ) -from .utils.common_validators import trim_string_before assert issubclass(LogLevel, Enum) # nosec assert issubclass(BootModeEnum, Enum) # nosec @@ -151,38 +150,6 @@ def concatenate(*args: "IDStr", link_char: str = " ") -> "IDStr": return IDStr(result) -_SHORT_TRUNCATED_STR_MAX_LENGTH: Final[int] = 600 -ShortTruncatedStr: TypeAlias = Annotated[ - str, - StringConstraints(strip_whitespace=True), - trim_string_before(max_length=_SHORT_TRUNCATED_STR_MAX_LENGTH), - annotated_types.doc( - """ - A truncated string used to input e.g. titles or display names. - Strips whitespaces and truncate strings that exceed the specified characters limit (curtail_length). - Ensures that the **input** data length to the API is controlled and prevents exceeding large inputs silently, - i.e. without raising errors. - """ - # SEE https://github.com/ITISFoundation/osparc-simcore/pull/5989#discussion_r1650506583 - ), -] - -_LONG_TRUNCATED_STR_MAX_LENGTH: Final[int] = 65536 # same as github description -LongTruncatedStr: TypeAlias = Annotated[ - str, - StringConstraints(strip_whitespace=True), - trim_string_before(max_length=_LONG_TRUNCATED_STR_MAX_LENGTH), - annotated_types.doc( - """ - A truncated string used to input e.g. descriptions or summaries. - Strips whitespaces and truncate strings that exceed the specified characters limit (curtail_length). - Ensures that the **input** data length to the API is controlled and prevents exceeding large inputs silently, - i.e. without raising errors. - """ - ), -] - - # auto-incremented primary-key IDs IdInt: TypeAlias = PositiveInt PrimaryKeyInt: TypeAlias = PositiveInt diff --git a/packages/models-library/src/models_library/string_types.py b/packages/models-library/src/models_library/string_types.py new file mode 100644 index 000000000000..876f48d38d73 --- /dev/null +++ b/packages/models-library/src/models_library/string_types.py @@ -0,0 +1,70 @@ +import re +from typing import Annotated, Final, TypeAlias + +import annotated_types +from pydantic import AfterValidator, StringConstraints + +from .utils.common_validators import trim_string_before + +# --- heuristics --- +SQL_INJECTION_PATTERN = re.compile( + r"(\b(SELECT|INSERT|UPDATE|DELETE|DROP|UNION|ALTER|CREATE|EXEC)\b|--|;|'|\")", + re.IGNORECASE, +) +JS_INJECTION_PATTERN = re.compile( + r"(||on\w+\s*=|javascript:)", re.IGNORECASE +) + +MIN_DESCRIPTION_LENGTH = 3 # minimum length for description strings without whitespaces + + +def _validate_input_safety(value: str) -> str: + # reject likely injection content + if SQL_INJECTION_PATTERN.search(value) or JS_INJECTION_PATTERN.search(value): + msg = "Potentially unsafe content detected." + raise ValueError(msg) + return value + + +def _strip_all_whitespaces(value: str) -> str: + # normalize whitespaces + return re.sub(r"\s+", " ", value).strip() + + +DescriptionSafeStr: TypeAlias = Annotated[ + str, + StringConstraints(strip_whitespace=True, min_length=MIN_DESCRIPTION_LENGTH), + AfterValidator(_validate_input_safety), +] + + +_SHORT_TRUNCATED_STR_MAX_LENGTH: Final[int] = 600 +ShortTruncatedStr: TypeAlias = Annotated[ + str, + StringConstraints(strip_whitespace=True), + trim_string_before(max_length=_SHORT_TRUNCATED_STR_MAX_LENGTH), + annotated_types.doc( + """ + A truncated string used to input e.g. titles or display names. + Strips whitespaces and truncate strings that exceed the specified characters limit (curtail_length). + Ensures that the **input** data length to the API is controlled and prevents exceeding large inputs silently, + i.e. without raising errors. + """ + # SEE https://github.com/ITISFoundation/osparc-simcore/pull/5989#discussion_r1650506583 + ), +] + +_LONG_TRUNCATED_STR_MAX_LENGTH: Final[int] = 65536 # same as github description +LongTruncatedStr: TypeAlias = Annotated[ + str, + StringConstraints(strip_whitespace=True), + trim_string_before(max_length=_LONG_TRUNCATED_STR_MAX_LENGTH), + annotated_types.doc( + """ + A truncated string used to input e.g. descriptions or summaries. + Strips whitespaces and truncate strings that exceed the specified characters limit (curtail_length). + Ensures that the **input** data length to the API is controlled and prevents exceeding large inputs silently, + i.e. without raising errors. + """ + ), +] diff --git a/packages/models-library/tests/test_basic_types.py b/packages/models-library/tests/test_basic_types.py index adf7fe5ecb35..227de4a4410e 100644 --- a/packages/models-library/tests/test_basic_types.py +++ b/packages/models-library/tests/test_basic_types.py @@ -2,12 +2,10 @@ import pytest from models_library.basic_types import ( - _SHORT_TRUNCATED_STR_MAX_LENGTH, EnvVarKey, IDStr, MD5Str, SHA1Str, - ShortTruncatedStr, UUIDStr, VersionTag, ) @@ -74,31 +72,3 @@ def test_string_identifier_constraint_type(): TypeAdapter(IDStr).validate_python("X" * IDStr.max_length) with pytest.raises(ValidationError): TypeAdapter(IDStr).validate_python("X" * (IDStr.max_length + 1)) - - -def test_short_truncated_string(): - curtail_length = _SHORT_TRUNCATED_STR_MAX_LENGTH - assert ( - TypeAdapter(ShortTruncatedStr).validate_python("X" * curtail_length) - == "X" * curtail_length - ), "Max length string should remain intact" - - assert ( - TypeAdapter(ShortTruncatedStr).validate_python("X" * (curtail_length + 1)) - == "X" * curtail_length - ), "Overlong string should be truncated exactly to max length" - - assert ( - TypeAdapter(ShortTruncatedStr).validate_python("X" * (curtail_length + 100)) - == "X" * curtail_length - ), "Much longer string should still truncate to exact max length" - - # below limit - assert TypeAdapter(ShortTruncatedStr).validate_python( - "X" * (curtail_length - 1) - ) == "X" * (curtail_length - 1), "Under-length string should not be modified" - - # spaces are trimmed - assert ( - TypeAdapter(ShortTruncatedStr).validate_python(" " * (curtail_length + 1)) == "" - ), "Only-whitespace string should become empty string" diff --git a/packages/models-library/tests/test_string_types.py b/packages/models-library/tests/test_string_types.py new file mode 100644 index 000000000000..74a7018b4602 --- /dev/null +++ b/packages/models-library/tests/test_string_types.py @@ -0,0 +1,33 @@ +from models_library.basic_types import ( + _SHORT_TRUNCATED_STR_MAX_LENGTH, +) +from models_library.string_types import ShortTruncatedStr +from pydantic import TypeAdapter + + +def test_short_truncated_string(): + curtail_length = _SHORT_TRUNCATED_STR_MAX_LENGTH + assert ( + TypeAdapter(ShortTruncatedStr).validate_python("X" * curtail_length) + == "X" * curtail_length + ), "Max length string should remain intact" + + assert ( + TypeAdapter(ShortTruncatedStr).validate_python("X" * (curtail_length + 1)) + == "X" * curtail_length + ), "Overlong string should be truncated exactly to max length" + + assert ( + TypeAdapter(ShortTruncatedStr).validate_python("X" * (curtail_length + 100)) + == "X" * curtail_length + ), "Much longer string should still truncate to exact max length" + + # below limit + assert TypeAdapter(ShortTruncatedStr).validate_python( + "X" * (curtail_length - 1) + ) == "X" * (curtail_length - 1), "Under-length string should not be modified" + + # spaces are trimmed + assert ( + TypeAdapter(ShortTruncatedStr).validate_python(" " * (curtail_length + 1)) == "" + ), "Only-whitespace string should become empty string" diff --git a/services/api-server/src/simcore_service_api_server/api/routes/studies.py b/services/api-server/src/simcore_service_api_server/api/routes/studies.py index d13f7facaa2e..af2e0528ee44 100644 --- a/services/api-server/src/simcore_service_api_server/api/routes/studies.py +++ b/services/api-server/src/simcore_service_api_server/api/routes/studies.py @@ -4,9 +4,9 @@ from fastapi import APIRouter, Body, Depends, Header, Query, status from fastapi_pagination.api import create_page from models_library.api_schemas_webserver.projects import ProjectGet, ProjectPatch -from models_library.basic_types import LongTruncatedStr, ShortTruncatedStr from models_library.projects import ProjectID from models_library.projects_nodes_io import NodeID +from models_library.string_types import LongTruncatedStr, ShortTruncatedStr from ...models.pagination import OnePage, Page, PaginationParams from ...models.schemas.errors import ErrorGet diff --git a/services/web/server/tests/unit/with_dbs/03/users/conftest.py b/services/web/server/tests/unit/with_dbs/03/users/conftest.py index 2272c5bc9f62..8ba0a7467d16 100644 --- a/services/web/server/tests/unit/with_dbs/03/users/conftest.py +++ b/services/web/server/tests/unit/with_dbs/03/users/conftest.py @@ -11,8 +11,12 @@ import sqlalchemy as sa from aiohttp import web from aiohttp.test_utils import TestServer +from pytest_simcore.helpers.postgres_users import ( + insert_and_get_user_and_secrets_lifespan, +) from pytest_simcore.helpers.typing_env import EnvVarsDict from servicelib.aiohttp.application import create_safe_application +from simcore_postgres_database.models.users import UserRole from simcore_postgres_database.models.users_details import ( users_pre_registration_details, ) @@ -73,11 +77,6 @@ async def product_owner_user( ) -> AsyncIterable[dict[str, Any]]: """A PO user in the database""" - from pytest_simcore.helpers.postgres_users import ( - insert_and_get_user_and_secrets_lifespan, - ) - from simcore_postgres_database.models.users import UserRole - async with insert_and_get_user_and_secrets_lifespan( # pylint:disable=contextmanager-generator-missing-cleanup asyncpg_engine, email="po-user@email.com", From 84813ac4cffc14f98d306c8c5522d713a3d71e06 Mon Sep 17 00:00:00 2001 From: Pedro Crespo-Valero <32402063+pcrespov@users.noreply.github.com> Date: Thu, 16 Oct 2025 19:03:02 +0200 Subject: [PATCH 02/27] =?UTF-8?q?=F0=9F=8E=A8=20Enhance=20string=20safety:?= =?UTF-8?q?=20Refactor=20string=20types=20to=20include=20validation=20for?= =?UTF-8?q?=20SQL=20and=20JS=20injection,=20and=20add=20tests=20for=20safe?= =?UTF-8?q?=20string=20types?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../src/models_library/string_types.py | 61 +++++++++++----- .../models-library/tests/test_string_types.py | 71 ++++++++++++++++++- 2 files changed, 112 insertions(+), 20 deletions(-) diff --git a/packages/models-library/src/models_library/string_types.py b/packages/models-library/src/models_library/string_types.py index 876f48d38d73..6e1ba43f74c5 100644 --- a/packages/models-library/src/models_library/string_types.py +++ b/packages/models-library/src/models_library/string_types.py @@ -2,47 +2,73 @@ from typing import Annotated, Final, TypeAlias import annotated_types -from pydantic import AfterValidator, StringConstraints +from pydantic import ( + AfterValidator, + StringConstraints, +) +from pydantic_core import PydanticCustomError from .utils.common_validators import trim_string_before -# --- heuristics --- -SQL_INJECTION_PATTERN = re.compile( +# --- shared heuristics --- +MIN_DESCRIPTION_LENGTH: Final[int] = 3 +MAX_DESCRIPTION_LENGTH: Final[int] = 5000 +MAX_NAME_LENGTH: Final[int] = 100 + +_SHORT_TRUNCATED_STR_MAX_LENGTH: Final[int] = 600 +_LONG_TRUNCATED_STR_MAX_LENGTH: Final[int] = 65536 # same as github descriptions + +_SQL_INJECTION_PATTERN: Final[re.Pattern] = re.compile( r"(\b(SELECT|INSERT|UPDATE|DELETE|DROP|UNION|ALTER|CREATE|EXEC)\b|--|;|'|\")", re.IGNORECASE, ) -JS_INJECTION_PATTERN = re.compile( - r"(||on\w+\s*=|javascript:)", re.IGNORECASE +_JS_INJECTION_PATTERN: Final[re.Pattern] = re.compile( + r"(<\s*script.*?>||on\w+\s*=|javascript:|data:text/html)", + re.IGNORECASE, ) -MIN_DESCRIPTION_LENGTH = 3 # minimum length for description strings without whitespaces +STRING_UNSAFE_CONTENT_ERROR_CODE: Final[str] = "string_unsafe_content" def _validate_input_safety(value: str) -> str: - # reject likely injection content - if SQL_INJECTION_PATTERN.search(value) or JS_INJECTION_PATTERN.search(value): - msg = "Potentially unsafe content detected." - raise ValueError(msg) + if _SQL_INJECTION_PATTERN.search(value) or _JS_INJECTION_PATTERN.search(value): + msg_template = "This input contains potentially unsafe content." + raise PydanticCustomError(STRING_UNSAFE_CONTENT_ERROR_CODE, msg_template, {}) return value -def _strip_all_whitespaces(value: str) -> str: - # normalize whitespaces - return re.sub(r"\s+", " ", value).strip() +# --- core composition primitives --- +# +# *SafeStr types MUST be used for INPUT string fields that will be stored in the DB or shown in the UI +# +NameSafeStr: TypeAlias = Annotated[ + str, + StringConstraints( + strip_whitespace=True, + min_length=1, + max_length=MAX_NAME_LENGTH, + pattern=r"^[A-Za-z0-9 ._\-]+$", # strict whitelist + ), + AfterValidator(_validate_input_safety), +] DescriptionSafeStr: TypeAlias = Annotated[ str, - StringConstraints(strip_whitespace=True, min_length=MIN_DESCRIPTION_LENGTH), + StringConstraints( + strip_whitespace=True, + min_length=MIN_DESCRIPTION_LENGTH, + max_length=MAX_DESCRIPTION_LENGTH, + ), AfterValidator(_validate_input_safety), ] - -_SHORT_TRUNCATED_STR_MAX_LENGTH: Final[int] = 600 +# --- truncating string types --- ShortTruncatedStr: TypeAlias = Annotated[ str, StringConstraints(strip_whitespace=True), trim_string_before(max_length=_SHORT_TRUNCATED_STR_MAX_LENGTH), + AfterValidator(_validate_input_safety), annotated_types.doc( """ A truncated string used to input e.g. titles or display names. @@ -54,11 +80,12 @@ def _strip_all_whitespaces(value: str) -> str: ), ] -_LONG_TRUNCATED_STR_MAX_LENGTH: Final[int] = 65536 # same as github description + LongTruncatedStr: TypeAlias = Annotated[ str, StringConstraints(strip_whitespace=True), trim_string_before(max_length=_LONG_TRUNCATED_STR_MAX_LENGTH), + AfterValidator(_validate_input_safety), annotated_types.doc( """ A truncated string used to input e.g. descriptions or summaries. diff --git a/packages/models-library/tests/test_string_types.py b/packages/models-library/tests/test_string_types.py index 74a7018b4602..48dd86054a2f 100644 --- a/packages/models-library/tests/test_string_types.py +++ b/packages/models-library/tests/test_string_types.py @@ -1,8 +1,17 @@ -from models_library.basic_types import ( +# pylint: disable=redefined-outer-name +# pylint: disable=unused-argument +# pylint: disable=unused-variable +# pylint: disable=too-many-arguments + + +import pytest +from models_library.string_types import ( _SHORT_TRUNCATED_STR_MAX_LENGTH, + DescriptionSafeStr, + NameSafeStr, + ShortTruncatedStr, ) -from models_library.string_types import ShortTruncatedStr -from pydantic import TypeAdapter +from pydantic import BaseModel, TypeAdapter, ValidationError def test_short_truncated_string(): @@ -31,3 +40,59 @@ def test_short_truncated_string(): assert ( TypeAdapter(ShortTruncatedStr).validate_python(" " * (curtail_length + 1)) == "" ), "Only-whitespace string should become empty string" + + +class InputRequestModel(BaseModel): + name: NameSafeStr + description: DescriptionSafeStr + + +@pytest.mark.parametrize( + "name,description,should_pass", + [ + # ✅ valid inputs + pytest.param("Alice", "Simple markdown **text**.", True, id="valid-alice"), + pytest.param( + "ACME_Inc", "Multi-line\nMarkdown _description_.", True, id="valid-acme" + ), + pytest.param( + "John-Doe", "Has some inline HTML.", True, id="valid-html-inline" + ), + # ❌ unsafe / invalid names + pytest.param("", False, id="invalid-desc-script" + ), + pytest.param( + "SafeName", "UNION SELECT data FROM users", False, id="invalid-desc-sql" + ), + pytest.param("SafeName", " ", False, id="invalid-desc-whitespace"), + pytest.param("SafeName", "a" * 6000, False, id="invalid-desc-too-long"), + ], +) +def test_safe_string_types(name: str, description: str, should_pass: bool): + if should_pass: + model = InputRequestModel(name=name, description=description) + assert model.name + assert model.description + else: + with pytest.raises(ValidationError) as exc_info: + InputRequestModel(name=name, description=description) + + assert exc_info.value.error_count() in (1, 2) + + for error in exc_info.value.errors(): + assert error["loc"][0] in ("name", "description") + assert error["type"] in ( + # NOTE: these codes could be used by the front-end if needed + "string_pattern_mismatch", + "string_unsafe_content", + "string_too_short", + "string_too_long", + ), error["msg"] From d8d5c39091160c46df57649fc383be1935c10fb1 Mon Sep 17 00:00:00 2001 From: Pedro Crespo-Valero <32402063+pcrespov@users.noreply.github.com> Date: Thu, 16 Oct 2025 19:15:45 +0200 Subject: [PATCH 03/27] =?UTF-8?q?=F0=9F=8E=A8=20Refactor=20string=20safety?= =?UTF-8?q?=20validation:=20Rename=20validation=20function=20and=20update?= =?UTF-8?q?=20usage=20in=20string=20type=20definitions?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../api_schemas_webserver/users.py | 27 +++++++------------ .../src/models_library/string_types.py | 26 +++++++++++++----- 2 files changed, 30 insertions(+), 23 deletions(-) diff --git a/packages/models-library/src/models_library/api_schemas_webserver/users.py b/packages/models-library/src/models_library/api_schemas_webserver/users.py index 052b8bb4440e..557f35fb06dd 100644 --- a/packages/models-library/src/models_library/api_schemas_webserver/users.py +++ b/packages/models-library/src/models_library/api_schemas_webserver/users.py @@ -1,7 +1,7 @@ import re from datetime import date, datetime from enum import Enum -from typing import Annotated, Any, Literal, Self, TypeAlias +from typing import Annotated, Any, Literal, Self import annotated_types from common_library.basic_types import DEFAULT_FACTORY @@ -15,7 +15,6 @@ ConfigDict, EmailStr, Field, - StringConstraints, ValidationInfo, field_validator, model_validator, @@ -27,6 +26,7 @@ from ..groups import AccessRightsDict, Group, GroupID, GroupsByTypeTuple, PrimaryGroupID from ..products import ProductName from ..rest_base import RequestParameters +from ..string_types import GlobPatternSafeStr, NameSafeStr from ..users import ( FirstNameStr, LastNameStr, @@ -202,9 +202,11 @@ def from_domain_model( class MyProfileRestPatch(InputSchemaWithoutCamelCase): - first_name: FirstNameStr | None = None - last_name: LastNameStr | None = None - user_name: Annotated[IDStr | None, Field(alias="userName", min_length=4)] = None + first_name: NameSafeStr | None = None + last_name: NameSafeStr | None = None + user_name: Annotated[NameSafeStr | None, Field(alias="userName", min_length=4)] = ( + None + ) # NOTE: phone is updated via a dedicated endpoint! privacy: MyProfilePrivacyPatch | None = None @@ -262,8 +264,7 @@ class UsersGetParams(RequestParameters): class UsersSearch(InputSchema): match_: Annotated[ - str, - StringConstraints(strip_whitespace=True, min_length=1, max_length=80), + NameSafeStr, Field( description="Search string to match with usernames and public profiles (e.g. emails, first/last name)", alias="match", @@ -314,17 +315,9 @@ class UserAccountReject(InputSchema): email: EmailStr -GlobString: TypeAlias = Annotated[ - str, - StringConstraints( - min_length=3, max_length=200, strip_whitespace=True, pattern=r"^[^%]*$" - ), -] - - class UserAccountSearchQueryParams(RequestParameters): email: Annotated[ - GlobString | None, + GlobPatternSafeStr | None, Field( description="complete or glob pattern for an email", ), @@ -336,7 +329,7 @@ class UserAccountSearchQueryParams(RequestParameters): ), ] = None user_name: Annotated[ - GlobString | None, + GlobPatternSafeStr | None, Field( description="complete or glob pattern for a username", ), diff --git a/packages/models-library/src/models_library/string_types.py b/packages/models-library/src/models_library/string_types.py index 6e1ba43f74c5..5308886db680 100644 --- a/packages/models-library/src/models_library/string_types.py +++ b/packages/models-library/src/models_library/string_types.py @@ -30,7 +30,7 @@ STRING_UNSAFE_CONTENT_ERROR_CODE: Final[str] = "string_unsafe_content" -def _validate_input_safety(value: str) -> str: +def validate_input_safety(value: str) -> str: if _SQL_INJECTION_PATTERN.search(value) or _JS_INJECTION_PATTERN.search(value): msg_template = "This input contains potentially unsafe content." raise PydanticCustomError(STRING_UNSAFE_CONTENT_ERROR_CODE, msg_template, {}) @@ -39,7 +39,7 @@ def _validate_input_safety(value: str) -> str: # --- core composition primitives --- # -# *SafeStr types MUST be used for INPUT string fields that will be stored in the DB or shown in the UI +# `*SafeStr` types MUST be used for INPUT string fields in the external APIs # NameSafeStr: TypeAlias = Annotated[ @@ -50,9 +50,10 @@ def _validate_input_safety(value: str) -> str: max_length=MAX_NAME_LENGTH, pattern=r"^[A-Za-z0-9 ._\-]+$", # strict whitelist ), - AfterValidator(_validate_input_safety), + AfterValidator(validate_input_safety), ] + DescriptionSafeStr: TypeAlias = Annotated[ str, StringConstraints( @@ -60,15 +61,28 @@ def _validate_input_safety(value: str) -> str: min_length=MIN_DESCRIPTION_LENGTH, max_length=MAX_DESCRIPTION_LENGTH, ), - AfterValidator(_validate_input_safety), + AfterValidator(validate_input_safety), ] + +GlobPatternSafeStr: TypeAlias = Annotated[ + str, + StringConstraints( + min_length=3, + max_length=200, + strip_whitespace=True, + pattern=r"^[^%]*$", + ), + AfterValidator(validate_input_safety), +] + + # --- truncating string types --- ShortTruncatedStr: TypeAlias = Annotated[ str, StringConstraints(strip_whitespace=True), trim_string_before(max_length=_SHORT_TRUNCATED_STR_MAX_LENGTH), - AfterValidator(_validate_input_safety), + AfterValidator(validate_input_safety), annotated_types.doc( """ A truncated string used to input e.g. titles or display names. @@ -85,7 +99,7 @@ def _validate_input_safety(value: str) -> str: str, StringConstraints(strip_whitespace=True), trim_string_before(max_length=_LONG_TRUNCATED_STR_MAX_LENGTH), - AfterValidator(_validate_input_safety), + AfterValidator(validate_input_safety), annotated_types.doc( """ A truncated string used to input e.g. descriptions or summaries. From 3ff0cb04cbfe191484ed0355229ab2c3a2588c7a Mon Sep 17 00:00:00 2001 From: Pedro Crespo-Valero <32402063+pcrespov@users.noreply.github.com> Date: Thu, 16 Oct 2025 19:18:50 +0200 Subject: [PATCH 04/27] =?UTF-8?q?=F0=9F=8E=A8=20Enhance=20user=20input=20s?= =?UTF-8?q?afety:=20Add=20validation=20to=20first=20and=20last=20name=20fi?= =?UTF-8?q?elds=20and=20update=20OpenAPI=20schema=20for=20username=20and?= =?UTF-8?q?=20match=20properties?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../api_schemas_webserver/users.py | 18 +++++++++++++++--- .../api/v0/openapi.yaml | 4 +++- 2 files changed, 18 insertions(+), 4 deletions(-) diff --git a/packages/models-library/src/models_library/api_schemas_webserver/users.py b/packages/models-library/src/models_library/api_schemas_webserver/users.py index 557f35fb06dd..82b724f6997d 100644 --- a/packages/models-library/src/models_library/api_schemas_webserver/users.py +++ b/packages/models-library/src/models_library/api_schemas_webserver/users.py @@ -11,6 +11,7 @@ from models_library.rest_filters import Filters from models_library.rest_pagination import PageQueryParameters from pydantic import ( + AfterValidator, BaseModel, ConfigDict, EmailStr, @@ -26,7 +27,7 @@ from ..groups import AccessRightsDict, Group, GroupID, GroupsByTypeTuple, PrimaryGroupID from ..products import ProductName from ..rest_base import RequestParameters -from ..string_types import GlobPatternSafeStr, NameSafeStr +from ..string_types import GlobPatternSafeStr, NameSafeStr, validate_input_safety from ..users import ( FirstNameStr, LastNameStr, @@ -201,9 +202,20 @@ def from_domain_model( ) +FirstNameSafeStr = Annotated[ + FirstNameStr, + AfterValidator(validate_input_safety), +] + +LastNameSafeStr = Annotated[ + LastNameStr, + AfterValidator(validate_input_safety), +] + + class MyProfileRestPatch(InputSchemaWithoutCamelCase): - first_name: NameSafeStr | None = None - last_name: NameSafeStr | None = None + first_name: FirstNameSafeStr | None = None + last_name: LastNameSafeStr | None = None user_name: Annotated[NameSafeStr | None, Field(alias="userName", min_length=4)] = ( None ) diff --git a/services/web/server/src/simcore_service_webserver/api/v0/openapi.yaml b/services/web/server/src/simcore_service_webserver/api/v0/openapi.yaml index ac535b341ab3..7bb5fd6d6e44 100644 --- a/services/web/server/src/simcore_service_webserver/api/v0/openapi.yaml +++ b/services/web/server/src/simcore_service_webserver/api/v0/openapi.yaml @@ -14076,6 +14076,7 @@ components: - type: string maxLength: 100 minLength: 4 + pattern: ^[A-Za-z0-9 ._\-]+$ - type: 'null' title: Username privacy: @@ -18941,8 +18942,9 @@ components: properties: match: type: string - maxLength: 80 + maxLength: 100 minLength: 1 + pattern: ^[A-Za-z0-9 ._\-]+$ title: Match description: Search string to match with usernames and public profiles (e.g. emails, first/last name) From f2aa4f7c31ccc015267801e13b60e34c20cef044 Mon Sep 17 00:00:00 2001 From: Pedro Crespo-Valero <32402063+pcrespov@users.noreply.github.com> Date: Thu, 16 Oct 2025 19:23:14 +0200 Subject: [PATCH 05/27] =?UTF-8?q?=F0=9F=8E=A8=20Enhance=20input=20validati?= =?UTF-8?q?on:=20Add=20length=20and=20pattern=20constraints=20for=20name?= =?UTF-8?q?=20and=20description=20fields=20in=20OpenAPI=20schema;=20update?= =?UTF-8?q?=20schemas=20to=20use=20safe=20string=20types?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../src/simcore_service_webserver/api/v0/openapi.yaml | 10 ++++++++++ .../src/simcore_service_webserver/tags/schemas.py | 9 +++++---- 2 files changed, 15 insertions(+), 4 deletions(-) diff --git a/services/web/server/src/simcore_service_webserver/api/v0/openapi.yaml b/services/web/server/src/simcore_service_webserver/api/v0/openapi.yaml index 7bb5fd6d6e44..c990cfaeb4b9 100644 --- a/services/web/server/src/simcore_service_webserver/api/v0/openapi.yaml +++ b/services/web/server/src/simcore_service_webserver/api/v0/openapi.yaml @@ -18098,10 +18098,15 @@ components: properties: name: type: string + maxLength: 100 + minLength: 1 + pattern: ^[A-Za-z0-9 ._\-]+$ title: Name description: anyOf: - type: string + maxLength: 5000 + minLength: 3 - type: 'null' title: Description color: @@ -18190,11 +18195,16 @@ components: name: anyOf: - type: string + maxLength: 100 + minLength: 1 + pattern: ^[A-Za-z0-9 ._\-]+$ - type: 'null' title: Name description: anyOf: - type: string + maxLength: 5000 + minLength: 3 - type: 'null' title: Description color: diff --git a/services/web/server/src/simcore_service_webserver/tags/schemas.py b/services/web/server/src/simcore_service_webserver/tags/schemas.py index 7ff06e1ae358..9d7693c45c4d 100644 --- a/services/web/server/src/simcore_service_webserver/tags/schemas.py +++ b/services/web/server/src/simcore_service_webserver/tags/schemas.py @@ -5,6 +5,7 @@ from models_library.api_schemas_webserver._base import InputSchema, OutputSchema from models_library.groups import GroupID from models_library.rest_base import RequestParameters, StrictRequestParameters +from models_library.string_types import DescriptionSafeStr, NameSafeStr from models_library.users import UserID from pydantic import Field, PositiveInt, StringConstraints from servicelib.aiohttp.request_keys import RQT_USERID_KEY @@ -25,15 +26,15 @@ class TagPathParams(StrictRequestParameters): class TagUpdate(InputSchema): - name: str | None = None - description: str | None = None + name: NameSafeStr | None = None + description: DescriptionSafeStr | None = None color: ColorStr | None = None priority: int | None = None class TagCreate(InputSchema): - name: str - description: str | None = None + name: NameSafeStr + description: DescriptionSafeStr | None = None color: ColorStr priority: int | None = None From 3c57f7b3cfedc6d27d8b943e1d253c6fd079eadf Mon Sep 17 00:00:00 2001 From: Pedro Crespo-Valero <32402063+pcrespov@users.noreply.github.com> Date: Thu, 16 Oct 2025 19:25:35 +0200 Subject: [PATCH 06/27] =?UTF-8?q?=F0=9F=8E=A8=20Add=20ColorStr=20type:=20I?= =?UTF-8?q?ntroduce=20ColorStr=20for=20hex=20color=20validation=20in=20str?= =?UTF-8?q?ing=5Ftypes.py=20and=20update=20schemas.py=20to=20use=20it?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../src/models_library/string_types.py | 7 +++++++ .../src/simcore_service_webserver/tags/schemas.py | 12 +++--------- 2 files changed, 10 insertions(+), 9 deletions(-) diff --git a/packages/models-library/src/models_library/string_types.py b/packages/models-library/src/models_library/string_types.py index 5308886db680..dadd3a4f8cc3 100644 --- a/packages/models-library/src/models_library/string_types.py +++ b/packages/models-library/src/models_library/string_types.py @@ -109,3 +109,10 @@ def validate_input_safety(value: str) -> str: """ ), ] + +# --- tag color string (hex format) --- + +ColorStr = Annotated[ + str, + StringConstraints(pattern=re.compile(r"^#([A-Fa-f0-9]{6}|[A-Fa-f0-9]{3})$")), +] diff --git a/services/web/server/src/simcore_service_webserver/tags/schemas.py b/services/web/server/src/simcore_service_webserver/tags/schemas.py index 9d7693c45c4d..890ffcd697bf 100644 --- a/services/web/server/src/simcore_service_webserver/tags/schemas.py +++ b/services/web/server/src/simcore_service_webserver/tags/schemas.py @@ -1,13 +1,12 @@ -import re -from typing import Annotated, Self +from typing import Self from common_library.groups_dicts import AccessRightsDict from models_library.api_schemas_webserver._base import InputSchema, OutputSchema from models_library.groups import GroupID from models_library.rest_base import RequestParameters, StrictRequestParameters -from models_library.string_types import DescriptionSafeStr, NameSafeStr +from models_library.string_types import ColorStr, DescriptionSafeStr, NameSafeStr from models_library.users import UserID -from pydantic import Field, PositiveInt, StringConstraints +from pydantic import Field, PositiveInt from servicelib.aiohttp.request_keys import RQT_USERID_KEY from simcore_postgres_database.utils_tags import TagAccessRightsDict, TagDict @@ -20,11 +19,6 @@ class TagPathParams(StrictRequestParameters): tag_id: PositiveInt -ColorStr = Annotated[ - str, StringConstraints(pattern=re.compile(r"^#([A-Fa-f0-9]{6}|[A-Fa-f0-9]{3})$")) -] - - class TagUpdate(InputSchema): name: NameSafeStr | None = None description: DescriptionSafeStr | None = None From 1250b442dd984b3554e28811d9efaf32d9e3f2c6 Mon Sep 17 00:00:00 2001 From: Pedro Crespo-Valero <32402063+pcrespov@users.noreply.github.com> Date: Thu, 16 Oct 2025 19:32:48 +0200 Subject: [PATCH 07/27] =?UTF-8?q?=F0=9F=8E=A8=20Update=20schemas=20to=20us?= =?UTF-8?q?e=20safe=20string=20types:=20Replace=20IDStr=20with=20NameSafeS?= =?UTF-8?q?tr=20in=20folder=20and=20group=20schemas;=20enhance=20OpenAPI?= =?UTF-8?q?=20schema=20with=20validation=20patterns=20for=20name=20and=20d?= =?UTF-8?q?escription=20fields.?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../api_schemas_webserver/folders_v2.py | 6 +++--- .../models_library/api_schemas_webserver/groups.py | 13 +++++++------ packages/models-library/src/models_library/users.py | 12 +++++++++++- .../simcore_service_webserver/api/v0/openapi.yaml | 12 ++++++++++++ 4 files changed, 33 insertions(+), 10 deletions(-) diff --git a/packages/models-library/src/models_library/api_schemas_webserver/folders_v2.py b/packages/models-library/src/models_library/api_schemas_webserver/folders_v2.py index 88333f0b0d98..4be151f2d87a 100644 --- a/packages/models-library/src/models_library/api_schemas_webserver/folders_v2.py +++ b/packages/models-library/src/models_library/api_schemas_webserver/folders_v2.py @@ -1,10 +1,10 @@ from datetime import datetime from typing import Annotated, Self +from models_library.string_types import NameSafeStr from pydantic import ConfigDict, Field, field_validator from ..access_rights import AccessRights -from ..basic_types import IDStr from ..folders import FolderDB, FolderID from ..groups import GroupID from ..utils.common_validators import null_or_none_str_to_none_validator @@ -53,7 +53,7 @@ def from_domain_model( class FolderCreateBodyParams(InputSchema): - name: IDStr + name: NameSafeStr parent_folder_id: FolderID | None = None workspace_id: WorkspaceID | None = None model_config = ConfigDict(extra="forbid") @@ -68,7 +68,7 @@ class FolderCreateBodyParams(InputSchema): class FolderReplaceBodyParams(InputSchema): - name: IDStr + name: NameSafeStr parent_folder_id: FolderID | None = None model_config = ConfigDict(extra="forbid") diff --git a/packages/models-library/src/models_library/api_schemas_webserver/groups.py b/packages/models-library/src/models_library/api_schemas_webserver/groups.py index 5f56fbc9790e..8dc0166aaec9 100644 --- a/packages/models-library/src/models_library/api_schemas_webserver/groups.py +++ b/packages/models-library/src/models_library/api_schemas_webserver/groups.py @@ -3,6 +3,7 @@ from common_library.basic_types import DEFAULT_FACTORY from common_library.dict_tools import remap_keys +from models_library.string_types import DescriptionSafeStr, NameSafeStr from pydantic import ( AnyHttpUrl, AnyUrl, @@ -27,7 +28,7 @@ StandardGroupCreate, StandardGroupUpdate, ) -from ..users import UserID, UserNameID +from ..users import UserID, UserNameID, UserNameSafeID from ..utils.common_validators import create__check_only_one_is_set__root_validator from ._base import InputSchema, OutputSchema, OutputSchemaWithoutCamelCase @@ -155,8 +156,8 @@ def _update_json_schema_extra(schema: JsonDict) -> None: class GroupCreate(InputSchema): - label: str - description: str + label: NameSafeStr + description: DescriptionSafeStr thumbnail: AnyUrl | None = None def to_domain_model(self) -> StandardGroupCreate: @@ -173,8 +174,8 @@ def to_domain_model(self) -> StandardGroupCreate: class GroupUpdate(InputSchema): - label: str | None = None - description: str | None = None + label: NameSafeStr | None = None + description: DescriptionSafeStr | None = None thumbnail: AnyUrl | None = None def to_domain_model(self) -> StandardGroupUpdate: @@ -373,7 +374,7 @@ class GroupUserAdd(InputSchema): """ uid: UserID | None = None - user_name: Annotated[UserNameID | None, Field(alias="userName")] = None + user_name: Annotated[UserNameSafeID | None, Field(alias="userName")] = None email: Annotated[ LowerCaseEmailStr | None, Field( diff --git a/packages/models-library/src/models_library/users.py b/packages/models-library/src/models_library/users.py index eba810e7df9d..6b7b3f2ece0b 100644 --- a/packages/models-library/src/models_library/users.py +++ b/packages/models-library/src/models_library/users.py @@ -2,7 +2,15 @@ from typing import Annotated, TypeAlias from common_library.users_enums import UserRole -from pydantic import BaseModel, ConfigDict, Field, PositiveInt, StringConstraints +from models_library.string_types import validate_input_safety +from pydantic import ( + AfterValidator, + BaseModel, + ConfigDict, + Field, + PositiveInt, + StringConstraints, +) from pydantic.config import JsonDict from typing_extensions import ( # https://docs.pydantic.dev/latest/api/standard_library_types/#typeddict TypedDict, @@ -14,6 +22,8 @@ UserNameID: TypeAlias = Annotated[ str, StringConstraints(strip_whitespace=True, min_length=1, max_length=100) ] +UserNameSafeID: TypeAlias = Annotated[UserNameID, AfterValidator(validate_input_safety)] + FirstNameStr: TypeAlias = Annotated[ str, StringConstraints(strip_whitespace=True, max_length=255) diff --git a/services/web/server/src/simcore_service_webserver/api/v0/openapi.yaml b/services/web/server/src/simcore_service_webserver/api/v0/openapi.yaml index c990cfaeb4b9..7ca4d54f50b6 100644 --- a/services/web/server/src/simcore_service_webserver/api/v0/openapi.yaml +++ b/services/web/server/src/simcore_service_webserver/api/v0/openapi.yaml @@ -12828,6 +12828,7 @@ components: type: string maxLength: 100 minLength: 1 + pattern: ^[A-Za-z0-9 ._\-]+$ title: Name parentFolderId: anyOf: @@ -12919,6 +12920,7 @@ components: type: string maxLength: 100 minLength: 1 + pattern: ^[A-Za-z0-9 ._\-]+$ title: Name parentFolderId: anyOf: @@ -13038,9 +13040,14 @@ components: properties: label: type: string + maxLength: 100 + minLength: 1 + pattern: ^[A-Za-z0-9 ._\-]+$ title: Label description: type: string + maxLength: 5000 + minLength: 3 title: Description thumbnail: anyOf: @@ -13126,11 +13133,16 @@ components: label: anyOf: - type: string + maxLength: 100 + minLength: 1 + pattern: ^[A-Za-z0-9 ._\-]+$ - type: 'null' title: Label description: anyOf: - type: string + maxLength: 5000 + minLength: 3 - type: 'null' title: Description thumbnail: From ba713ef6cd75aca767b86ac5dca190fba8037228 Mon Sep 17 00:00:00 2001 From: Pedro Crespo-Valero <32402063+pcrespov@users.noreply.github.com> Date: Thu, 16 Oct 2025 19:45:16 +0200 Subject: [PATCH 08/27] =?UTF-8?q?=F0=9F=8E=A8=20Improve=20documentation=20?= =?UTF-8?q?clarity:=20Update=20comments=20in=20ShortTruncatedStr=20and=20L?= =?UTF-8?q?ongTruncatedStr=20to=20emphasize=20silent=20handling=20of=20lar?= =?UTF-8?q?ge=20inputs;=20refactor=20import=20statement=20in=20test=5Fproj?= =?UTF-8?q?ects.py=20for=20consistency.?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- packages/models-library/src/models_library/string_types.py | 4 ++-- packages/models-library/tests/test_projects.py | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/packages/models-library/src/models_library/string_types.py b/packages/models-library/src/models_library/string_types.py index dadd3a4f8cc3..9a576e91d6e0 100644 --- a/packages/models-library/src/models_library/string_types.py +++ b/packages/models-library/src/models_library/string_types.py @@ -87,7 +87,7 @@ def validate_input_safety(value: str) -> str: """ A truncated string used to input e.g. titles or display names. Strips whitespaces and truncate strings that exceed the specified characters limit (curtail_length). - Ensures that the **input** data length to the API is controlled and prevents exceeding large inputs silently, + Ensures that the **input** data length to the API is controlled and prevents exceeding large inputs SILENTLY, i.e. without raising errors. """ # SEE https://github.com/ITISFoundation/osparc-simcore/pull/5989#discussion_r1650506583 @@ -104,7 +104,7 @@ def validate_input_safety(value: str) -> str: """ A truncated string used to input e.g. descriptions or summaries. Strips whitespaces and truncate strings that exceed the specified characters limit (curtail_length). - Ensures that the **input** data length to the API is controlled and prevents exceeding large inputs silently, + Ensures that the **input** data length to the API is controlled and prevents exceeding large inputs SILENTLY, i.e. without raising errors. """ ), diff --git a/packages/models-library/tests/test_projects.py b/packages/models-library/tests/test_projects.py index 86514df2da22..e7f4e347023c 100644 --- a/packages/models-library/tests/test_projects.py +++ b/packages/models-library/tests/test_projects.py @@ -8,8 +8,8 @@ import pytest from faker import Faker from models_library.api_schemas_webserver.projects import ProjectPatch -from models_library.basic_types import _LONG_TRUNCATED_STR_MAX_LENGTH from models_library.projects import Project +from models_library.string_types import _LONG_TRUNCATED_STR_MAX_LENGTH @pytest.fixture() From bf6cb5ee4c3dce676622c73a65ccabebdc4538c1 Mon Sep 17 00:00:00 2001 From: Pedro Crespo-Valero <32402063+pcrespov@users.noreply.github.com> Date: Thu, 16 Oct 2025 19:52:17 +0200 Subject: [PATCH 09/27] =?UTF-8?q?=F0=9F=8E=A8=20Enhance=20input=20validati?= =?UTF-8?q?on:=20Update=20SQL=20and=20JS=20injection=20patterns=20for=20im?= =?UTF-8?q?proved=20security;=20add=20SearchPatternSafeStr=20type=20for=20?= =?UTF-8?q?flexible=20pattern=20matching.?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../src/models_library/string_types.py | 33 ++++++++++++++++--- 1 file changed, 29 insertions(+), 4 deletions(-) diff --git a/packages/models-library/src/models_library/string_types.py b/packages/models-library/src/models_library/string_types.py index 9a576e91d6e0..ae681d6ce250 100644 --- a/packages/models-library/src/models_library/string_types.py +++ b/packages/models-library/src/models_library/string_types.py @@ -19,14 +19,26 @@ _LONG_TRUNCATED_STR_MAX_LENGTH: Final[int] = 65536 # same as github descriptions _SQL_INJECTION_PATTERN: Final[re.Pattern] = re.compile( - r"(\b(SELECT|INSERT|UPDATE|DELETE|DROP|UNION|ALTER|CREATE|EXEC)\b|--|;|'|\")", + r"(\b(SELECT|INSERT|UPDATE|DELETE|DROP|UNION|ALTER|CREATE|EXEC|TRUNCATE|MERGE|GRANT|REVOKE|COMMIT|ROLLBACK|DECLARE|CAST|CONVERT)\b|--|;|/\*|\*/)", re.IGNORECASE, ) _JS_INJECTION_PATTERN: Final[re.Pattern] = re.compile( - r"(<\s*script.*?>||on\w+\s*=|javascript:|data:text/html)", - re.IGNORECASE, + r"""( + <\s*script.*?>|| + <\s*iframe.*?>|| + <\s*object.*?>|| + <\s*embed.*?>|| + <\s*link[^>]*href\s*=\s*["']?\s*javascript:.*?>| + vbscript:| + javascript:| + data:text/html| + javascript:|javascript:| # encoded 'javascript:' + <\s*img[^>]*onerror\s*=| + <\s*svg[^>]*onload\s*=| + on[a-z]+\s*= # any event handler + )""", + re.IGNORECASE | re.VERBOSE, ) - STRING_UNSAFE_CONTENT_ERROR_CODE: Final[str] = "string_unsafe_content" @@ -77,6 +89,19 @@ def validate_input_safety(value: str) -> str: ] +SearchPatternSafeStr: TypeAlias = Annotated[ + str, + StringConstraints( + strip_whitespace=True, + min_length=1, + max_length=200, + # Allow most printable unicode characters except percent (for LIKE), still block injection via validator + pattern=r"^[^\%]+$", + ), + AfterValidator(validate_input_safety), +] + + # --- truncating string types --- ShortTruncatedStr: TypeAlias = Annotated[ str, From eac19dd18128489c49c23a0d019aa84940db63fe Mon Sep 17 00:00:00 2001 From: Pedro Crespo-Valero <32402063+pcrespov@users.noreply.github.com> Date: Thu, 16 Oct 2025 19:57:58 +0200 Subject: [PATCH 10/27] fixes username str --- .../models_library/api_schemas_webserver/users.py | 5 ++--- packages/models-library/src/models_library/users.py | 2 +- .../simcore_service_webserver/api/v0/openapi.yaml | 13 ++++++------- 3 files changed, 9 insertions(+), 11 deletions(-) diff --git a/packages/models-library/src/models_library/api_schemas_webserver/users.py b/packages/models-library/src/models_library/api_schemas_webserver/users.py index 82b724f6997d..93848d19bbc7 100644 --- a/packages/models-library/src/models_library/api_schemas_webserver/users.py +++ b/packages/models-library/src/models_library/api_schemas_webserver/users.py @@ -34,6 +34,7 @@ MyProfile, UserID, UserNameID, + UserNameSafeID, UserPermission, UserThirdPartyToken, ) @@ -216,9 +217,7 @@ def from_domain_model( class MyProfileRestPatch(InputSchemaWithoutCamelCase): first_name: FirstNameSafeStr | None = None last_name: LastNameSafeStr | None = None - user_name: Annotated[NameSafeStr | None, Field(alias="userName", min_length=4)] = ( - None - ) + user_name: Annotated[UserNameSafeID | None, Field(alias="userName")] = None # NOTE: phone is updated via a dedicated endpoint! privacy: MyProfilePrivacyPatch | None = None diff --git a/packages/models-library/src/models_library/users.py b/packages/models-library/src/models_library/users.py index 6b7b3f2ece0b..1b4b2aa76d30 100644 --- a/packages/models-library/src/models_library/users.py +++ b/packages/models-library/src/models_library/users.py @@ -20,7 +20,7 @@ UserID: TypeAlias = PositiveInt UserNameID: TypeAlias = Annotated[ - str, StringConstraints(strip_whitespace=True, min_length=1, max_length=100) + str, StringConstraints(strip_whitespace=True, min_length=4, max_length=100) ] UserNameSafeID: TypeAlias = Annotated[UserNameID, AfterValidator(validate_input_safety)] diff --git a/services/web/server/src/simcore_service_webserver/api/v0/openapi.yaml b/services/web/server/src/simcore_service_webserver/api/v0/openapi.yaml index 7ca4d54f50b6..f961919271e6 100644 --- a/services/web/server/src/simcore_service_webserver/api/v0/openapi.yaml +++ b/services/web/server/src/simcore_service_webserver/api/v0/openapi.yaml @@ -13167,7 +13167,7 @@ components: anyOf: - type: string maxLength: 100 - minLength: 1 + minLength: 4 - type: 'null' title: Username email: @@ -13195,7 +13195,7 @@ components: anyOf: - type: string maxLength: 100 - minLength: 1 + minLength: 4 - type: 'null' title: Username description: None if private @@ -14088,7 +14088,6 @@ components: - type: string maxLength: 100 minLength: 4 - pattern: ^[A-Za-z0-9 ._\-]+$ - type: 'null' title: Username privacy: @@ -18646,7 +18645,7 @@ components: anyOf: - type: string maxLength: 100 - minLength: 1 + minLength: 4 - type: 'null' title: Invitedby accountRequestStatus: @@ -18657,7 +18656,7 @@ components: anyOf: - type: string maxLength: 100 - minLength: 1 + minLength: 4 - type: 'null' title: Accountrequestreviewedby accountRequestReviewedAt: @@ -18694,7 +18693,7 @@ components: anyOf: - type: string maxLength: 100 - minLength: 1 + minLength: 4 - type: 'null' title: Username description: Username of the user if an account was created @@ -18806,7 +18805,7 @@ components: anyOf: - type: string maxLength: 100 - minLength: 1 + minLength: 4 - type: 'null' title: Username firstName: From 3a30b7c0a395089c3c35701a94960e7b61d894f6 Mon Sep 17 00:00:00 2001 From: Pedro Crespo-Valero <32402063+pcrespov@users.noreply.github.com> Date: Thu, 16 Oct 2025 20:00:44 +0200 Subject: [PATCH 11/27] search --- .../src/models_library/api_schemas_webserver/users.py | 8 ++++++-- .../models-library/src/models_library/string_types.py | 10 +++++++++- 2 files changed, 15 insertions(+), 3 deletions(-) diff --git a/packages/models-library/src/models_library/api_schemas_webserver/users.py b/packages/models-library/src/models_library/api_schemas_webserver/users.py index 93848d19bbc7..af0183ddd58f 100644 --- a/packages/models-library/src/models_library/api_schemas_webserver/users.py +++ b/packages/models-library/src/models_library/api_schemas_webserver/users.py @@ -27,7 +27,11 @@ from ..groups import AccessRightsDict, Group, GroupID, GroupsByTypeTuple, PrimaryGroupID from ..products import ProductName from ..rest_base import RequestParameters -from ..string_types import GlobPatternSafeStr, NameSafeStr, validate_input_safety +from ..string_types import ( + GlobPatternSafeStr, + SearchPatternSafeStr, + validate_input_safety, +) from ..users import ( FirstNameStr, LastNameStr, @@ -275,7 +279,7 @@ class UsersGetParams(RequestParameters): class UsersSearch(InputSchema): match_: Annotated[ - NameSafeStr, + SearchPatternSafeStr, Field( description="Search string to match with usernames and public profiles (e.g. emails, first/last name)", alias="match", diff --git a/packages/models-library/src/models_library/string_types.py b/packages/models-library/src/models_library/string_types.py index ae681d6ce250..0a308f6fb798 100644 --- a/packages/models-library/src/models_library/string_types.py +++ b/packages/models-library/src/models_library/string_types.py @@ -95,10 +95,18 @@ def validate_input_safety(value: str) -> str: strip_whitespace=True, min_length=1, max_length=200, - # Allow most printable unicode characters except percent (for LIKE), still block injection via validator pattern=r"^[^\%]+$", ), AfterValidator(validate_input_safety), + annotated_types.doc( + """ + A safe string used for search patterns. + Strips whitespaces and enforces a length between 1 and 200 characters. + Ensures that the input does not contain percent signs (%) to prevent wildcard searches. + Additionally, it validates the input to ensure it does not contain potentially unsafe content such as SQL + or JavaScript injection patterns. + """ + ), ] From c386a5fb168eb05852be49857d3980b7ddab49e1 Mon Sep 17 00:00:00 2001 From: Pedro Crespo-Valero <32402063+pcrespov@users.noreply.github.com> Date: Thu, 16 Oct 2025 20:01:08 +0200 Subject: [PATCH 12/27] =?UTF-8?q?packages/models-library=20version:=200.2.?= =?UTF-8?q?0=20=E2=86=92=200.3.0?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- packages/models-library/VERSION | 2 +- packages/models-library/setup.cfg | 6 +++--- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/packages/models-library/VERSION b/packages/models-library/VERSION index 0ea3a944b399..0d91a54c7d43 100644 --- a/packages/models-library/VERSION +++ b/packages/models-library/VERSION @@ -1 +1 @@ -0.2.0 +0.3.0 diff --git a/packages/models-library/setup.cfg b/packages/models-library/setup.cfg index b483a024d04c..2eec3789ac03 100644 --- a/packages/models-library/setup.cfg +++ b/packages/models-library/setup.cfg @@ -1,5 +1,5 @@ [bumpversion] -current_version = 0.2.0 +current_version = 0.3.0 commit = True message = packages/models-library version: {current_version} → {new_version} tag = False @@ -16,10 +16,10 @@ test = pytest [tool:pytest] asyncio_mode = auto asyncio_default_fixture_loop_scope = function -markers = +markers = diagnostics: "can be used to run diagnostics against deployed data (e.g. database, registry etc)" testit: "marks test to run during development" [mypy] -plugins = +plugins = pydantic.mypy From b455092210e7319e0f84c325fd9e38aaa51b7055 Mon Sep 17 00:00:00 2001 From: Pedro Crespo-Valero <32402063+pcrespov@users.noreply.github.com> Date: Thu, 16 Oct 2025 20:02:04 +0200 Subject: [PATCH 13/27] oas --- .../server/src/simcore_service_webserver/api/v0/openapi.yaml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/services/web/server/src/simcore_service_webserver/api/v0/openapi.yaml b/services/web/server/src/simcore_service_webserver/api/v0/openapi.yaml index f961919271e6..33c5def378f4 100644 --- a/services/web/server/src/simcore_service_webserver/api/v0/openapi.yaml +++ b/services/web/server/src/simcore_service_webserver/api/v0/openapi.yaml @@ -18963,9 +18963,9 @@ components: properties: match: type: string - maxLength: 100 + maxLength: 200 minLength: 1 - pattern: ^[A-Za-z0-9 ._\-]+$ + pattern: ^[^\%]+$ title: Match description: Search string to match with usernames and public profiles (e.g. emails, first/last name) From 1704aed0c8ac222f7bd5d1fd9c5ddae2112f15ba Mon Sep 17 00:00:00 2001 From: Pedro Crespo-Valero <32402063+pcrespov@users.noreply.github.com> Date: Thu, 16 Oct 2025 20:07:01 +0200 Subject: [PATCH 14/27] =?UTF-8?q?=F0=9F=94=92=20Enhance=20security:=20Upda?= =?UTF-8?q?te=20SQL=20and=20JS=20injection=20patterns;=20add=20tests=20for?= =?UTF-8?q?=20additional=20injection=20scenarios.?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../src/models_library/string_types.py | 19 ++-------- .../models-library/tests/test_string_types.py | 38 +++++++++++++++++++ 2 files changed, 41 insertions(+), 16 deletions(-) diff --git a/packages/models-library/src/models_library/string_types.py b/packages/models-library/src/models_library/string_types.py index 0a308f6fb798..00899752c881 100644 --- a/packages/models-library/src/models_library/string_types.py +++ b/packages/models-library/src/models_library/string_types.py @@ -19,25 +19,12 @@ _LONG_TRUNCATED_STR_MAX_LENGTH: Final[int] = 65536 # same as github descriptions _SQL_INJECTION_PATTERN: Final[re.Pattern] = re.compile( - r"(\b(SELECT|INSERT|UPDATE|DELETE|DROP|UNION|ALTER|CREATE|EXEC|TRUNCATE|MERGE|GRANT|REVOKE|COMMIT|ROLLBACK|DECLARE|CAST|CONVERT)\b|--|;|/\*|\*/)", + r"(\b(SELECT|INSERT|UPDATE|DELETE|DROP|UNION|ALTER|CREATE|EXEC|TRUNCATE|MERGE|GRANT|REVOKE|COMMIT|ROLLBACK|DECLARE|CAST|CONVERT)\b|--|;|/\*|\*/|')", re.IGNORECASE, ) _JS_INJECTION_PATTERN: Final[re.Pattern] = re.compile( - r"""( - <\s*script.*?>|| - <\s*iframe.*?>|| - <\s*object.*?>|| - <\s*embed.*?>|| - <\s*link[^>]*href\s*=\s*["']?\s*javascript:.*?>| - vbscript:| - javascript:| - data:text/html| - javascript:|javascript:| # encoded 'javascript:' - <\s*img[^>]*onerror\s*=| - <\s*svg[^>]*onload\s*=| - on[a-z]+\s*= # any event handler - )""", - re.IGNORECASE | re.VERBOSE, + r"(<\s*script.*?>||<\s*iframe.*?>||<\s*object.*?>||<\s*embed.*?>||<\s*link[^>]*href\s*=\s*[\"']?\s*javascript:|vbscript:|javascript:|data:text/html|javascript:|javascript:|<\s*img[^>]*onerror\s*=|<\s*svg[^>]*onload\s*=|on[a-z]+\s*=)", + re.IGNORECASE, ) STRING_UNSAFE_CONTENT_ERROR_CODE: Final[str] = "string_unsafe_content" diff --git a/packages/models-library/tests/test_string_types.py b/packages/models-library/tests/test_string_types.py index 48dd86054a2f..ef731d06ede4 100644 --- a/packages/models-library/tests/test_string_types.py +++ b/packages/models-library/tests/test_string_types.py @@ -74,6 +74,44 @@ class InputRequestModel(BaseModel): ), pytest.param("SafeName", " ", False, id="invalid-desc-whitespace"), pytest.param("SafeName", "a" * 6000, False, id="invalid-desc-too-long"), + # ❌ additional SQL injection patterns that should be caught + pytest.param( + "SafeName", + "/* comment */ SELECT * FROM users", + False, + id="invalid-desc-sql-comment", + ), + pytest.param( + "SafeName", "TRUNCATE TABLE logs", False, id="invalid-desc-sql-truncate" + ), + pytest.param( + "SafeName", "DECLARE @var INT", False, id="invalid-desc-sql-declare" + ), + # ❌ additional JS injection patterns that should be caught + pytest.param( + "SafeName", + "", + False, + id="invalid-desc-iframe", + ), + pytest.param( + "SafeName", + "", + False, + id="invalid-desc-img-onerror", + ), + pytest.param( + "SafeName", + "", + False, + id="invalid-desc-svg-onload", + ), + pytest.param( + "SafeName", "vbscript:msgbox(1)", False, id="invalid-desc-vbscript" + ), + pytest.param( + "SafeName", "javascript:alert(1)", False, id="invalid-desc-encoded-js" + ), ], ) def test_safe_string_types(name: str, description: str, should_pass: bool): From b0f33cdaf7765c50a2e88db8b1e74dd670a9770b Mon Sep 17 00:00:00 2001 From: Pedro Crespo-Valero <32402063+pcrespov@users.noreply.github.com> Date: Thu, 16 Oct 2025 20:11:07 +0200 Subject: [PATCH 15/27] =?UTF-8?q?=F0=9F=94=92=20Enhance=20security:=20Refa?= =?UTF-8?q?ctor=20SQL=20and=20JS=20injection=20patterns;=20add=20ReDoS=20t?= =?UTF-8?q?est=20cases=20for=20input=20validation.?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../src/models_library/string_types.py | 6 ++---- packages/models-library/tests/test_string_types.py | 13 +++++++++++++ 2 files changed, 15 insertions(+), 4 deletions(-) diff --git a/packages/models-library/src/models_library/string_types.py b/packages/models-library/src/models_library/string_types.py index 00899752c881..3871498c9fa7 100644 --- a/packages/models-library/src/models_library/string_types.py +++ b/packages/models-library/src/models_library/string_types.py @@ -19,12 +19,10 @@ _LONG_TRUNCATED_STR_MAX_LENGTH: Final[int] = 65536 # same as github descriptions _SQL_INJECTION_PATTERN: Final[re.Pattern] = re.compile( - r"(\b(SELECT|INSERT|UPDATE|DELETE|DROP|UNION|ALTER|CREATE|EXEC|TRUNCATE|MERGE|GRANT|REVOKE|COMMIT|ROLLBACK|DECLARE|CAST|CONVERT)\b|--|;|/\*|\*/|')", - re.IGNORECASE, + r"(?i)\b(?:SELECT|INSERT|UPDATE|DELETE|DROP|UNION|ALTER|CREATE|EXEC|TRUNCATE|MERGE|GRANT|REVOKE|COMMIT|ROLLBACK|DECLARE|CAST|CONVERT)\b|--|;|/\*|\*/|'", ) _JS_INJECTION_PATTERN: Final[re.Pattern] = re.compile( - r"(<\s*script.*?>||<\s*iframe.*?>||<\s*object.*?>||<\s*embed.*?>||<\s*link[^>]*href\s*=\s*[\"']?\s*javascript:|vbscript:|javascript:|data:text/html|javascript:|javascript:|<\s*img[^>]*onerror\s*=|<\s*svg[^>]*onload\s*=|on[a-z]+\s*=)", - re.IGNORECASE, + r"(?i)<(?:script|iframe|object|embed)\b[^>]*>||]*href\s*=\s*[\"']?\s*javascript:|(?:vb|java)script:|data:text/html|&#(?:x6A|106);avascript:|<(?:img|svg)\b[^>]*on\w+\s*=|on[a-z]+\s*=", ) STRING_UNSAFE_CONTENT_ERROR_CODE: Final[str] = "string_unsafe_content" diff --git a/packages/models-library/tests/test_string_types.py b/packages/models-library/tests/test_string_types.py index ef731d06ede4..10c58e0102a6 100644 --- a/packages/models-library/tests/test_string_types.py +++ b/packages/models-library/tests/test_string_types.py @@ -112,6 +112,19 @@ class InputRequestModel(BaseModel): pytest.param( "SafeName", "javascript:alert(1)", False, id="invalid-desc-encoded-js" ), + # ❌ ReDoS (Regular expression Denial of Service) test patterns + pytest.param( + "SafeName", + "" * 1000 + "alert(1)", + False, + id="redos-nested-tags", + ), + pytest.param( + "SafeName", + "SELECT " + "a" * 10000 + " FROM users", + False, + id="redos-long-sql-keyword", + ), ], ) def test_safe_string_types(name: str, description: str, should_pass: bool): From 6f3b22b7d435d51a386a38bc9bf2b05ad30dde86 Mon Sep 17 00:00:00 2001 From: Pedro Crespo-Valero <32402063+pcrespov@users.noreply.github.com> Date: Thu, 16 Oct 2025 20:23:29 +0200 Subject: [PATCH 16/27] =?UTF-8?q?=F0=9F=94=92=20Enhance=20security:=20Refi?= =?UTF-8?q?ne=20SQL=20and=20JS=20injection=20patterns;=20improve=20ReDoS?= =?UTF-8?q?=20test=20cases=20for=20input=20validation.?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../src/models_library/string_types.py | 4 ++-- .../models-library/tests/test_string_types.py | 20 +++++++++++++++---- 2 files changed, 18 insertions(+), 6 deletions(-) diff --git a/packages/models-library/src/models_library/string_types.py b/packages/models-library/src/models_library/string_types.py index 3871498c9fa7..cc5d766ce8cb 100644 --- a/packages/models-library/src/models_library/string_types.py +++ b/packages/models-library/src/models_library/string_types.py @@ -19,10 +19,10 @@ _LONG_TRUNCATED_STR_MAX_LENGTH: Final[int] = 65536 # same as github descriptions _SQL_INJECTION_PATTERN: Final[re.Pattern] = re.compile( - r"(?i)\b(?:SELECT|INSERT|UPDATE|DELETE|DROP|UNION|ALTER|CREATE|EXEC|TRUNCATE|MERGE|GRANT|REVOKE|COMMIT|ROLLBACK|DECLARE|CAST|CONVERT)\b|--|;|/\*|\*/|'", + r"(?i)(?:\b(?:SELECT|INSERT|UPDATE|DELETE|DROP|UNION|ALTER|CREATE|EXEC|TRUNCATE|MERGE|GRANT|REVOKE|COMMIT|ROLLBACK|DECLARE|CAST|CONVERT)\b|--|;|/\*|\*/|')", ) _JS_INJECTION_PATTERN: Final[re.Pattern] = re.compile( - r"(?i)<(?:script|iframe|object|embed)\b[^>]*>||]*href\s*=\s*[\"']?\s*javascript:|(?:vb|java)script:|data:text/html|&#(?:x6A|106);avascript:|<(?:img|svg)\b[^>]*on\w+\s*=|on[a-z]+\s*=", + r"(?i)<(?:script|iframe|object|embed)(?:\s[^>]{0,100})?>||]{0,200})?href\s*=\s*[\"']?\s*javascript:|(?:vb|java)script:|data:text/html|&#(?:x6A|106);avascript:|<(?:img|svg)(?:\s[^>]{0,200})?on\w+\s*=|on[a-z]+\s*=", ) STRING_UNSAFE_CONTENT_ERROR_CODE: Final[str] = "string_unsafe_content" diff --git a/packages/models-library/tests/test_string_types.py b/packages/models-library/tests/test_string_types.py index 10c58e0102a6..8580bc78ab7e 100644 --- a/packages/models-library/tests/test_string_types.py +++ b/packages/models-library/tests/test_string_types.py @@ -115,15 +115,27 @@ class InputRequestModel(BaseModel): # ❌ ReDoS (Regular expression Denial of Service) test patterns pytest.param( "SafeName", - "" * 1000 + "alert(1)", + "alert(1)", False, - id="redos-nested-tags", + id="redos-script-spaces", ), pytest.param( "SafeName", - "SELECT " + "a" * 10000 + " FROM users", + "", False, - id="redos-long-sql-keyword", + id="redos-img-attributes", + ), + pytest.param( + "SafeName", + "SELECT" + " " * 1000 + "* FROM users", + False, + id="redos-sql-spaces", + ), + pytest.param( + "SafeName", + "/*" + "*" * 500 + "*/ SELECT data", + False, + id="redos-sql-nested-comments", ), ], ) From a206f71467346b1e84237957f90021e01481b73a Mon Sep 17 00:00:00 2001 From: Pedro Crespo-Valero <32402063+pcrespov@users.noreply.github.com> Date: Thu, 16 Oct 2025 20:30:50 +0200 Subject: [PATCH 17/27] =?UTF-8?q?=F0=9F=94=92=20Fix=20validation=20error?= =?UTF-8?q?=20type=20in=20username=20length=20test?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../models-library/tests/test_api_schemas_webserver_users.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/packages/models-library/tests/test_api_schemas_webserver_users.py b/packages/models-library/tests/test_api_schemas_webserver_users.py index 43375a67e208..98562c93173f 100644 --- a/packages/models-library/tests/test_api_schemas_webserver_users.py +++ b/packages/models-library/tests/test_api_schemas_webserver_users.py @@ -32,7 +32,7 @@ def test_my_profile_patch_username_min_len(): MyProfileRestPatch.model_validate({"userName": "abc"}) assert err_info.value.error_count() == 1 - assert err_info.value.errors()[0]["type"] == "too_short" + assert err_info.value.errors()[0]["type"] == "string_too_short" MyProfileRestPatch.model_validate({"userName": "abcd"}) # OK From 7f2e907b1477a6322c3951bcf3f17257c26b7788 Mon Sep 17 00:00:00 2001 From: Pedro Crespo-Valero <32402063+pcrespov@users.noreply.github.com> Date: Fri, 17 Oct 2025 15:45:29 +0200 Subject: [PATCH 18/27] =?UTF-8?q?=F0=9F=94=92=20Enhance=20security:=20Add?= =?UTF-8?q?=20XSS=20detection=20pattern=20and=20update=20validation=20test?= =?UTF-8?q?s=20to=20remove=20SQL=20injection=20cases.?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../src/models_library/string_types.py | 50 ++++++++++++++++--- .../models-library/tests/test_string_types.py | 31 ------------ 2 files changed, 44 insertions(+), 37 deletions(-) diff --git a/packages/models-library/src/models_library/string_types.py b/packages/models-library/src/models_library/string_types.py index cc5d766ce8cb..99857975b333 100644 --- a/packages/models-library/src/models_library/string_types.py +++ b/packages/models-library/src/models_library/string_types.py @@ -18,17 +18,55 @@ _SHORT_TRUNCATED_STR_MAX_LENGTH: Final[int] = 600 _LONG_TRUNCATED_STR_MAX_LENGTH: Final[int] = 65536 # same as github descriptions -_SQL_INJECTION_PATTERN: Final[re.Pattern] = re.compile( - r"(?i)(?:\b(?:SELECT|INSERT|UPDATE|DELETE|DROP|UNION|ALTER|CREATE|EXEC|TRUNCATE|MERGE|GRANT|REVOKE|COMMIT|ROLLBACK|DECLARE|CAST|CONVERT)\b|--|;|/\*|\*/|')", -) -_JS_INJECTION_PATTERN: Final[re.Pattern] = re.compile( - r"(?i)<(?:script|iframe|object|embed)(?:\s[^>]{0,100})?>||]{0,200})?href\s*=\s*[\"']?\s*javascript:|(?:vb|java)script:|data:text/html|&#(?:x6A|106);avascript:|<(?:img|svg)(?:\s[^>]{0,200})?on\w+\s*=|on[a-z]+\s*=", + +# Detect potentially malicious HTML or JavaScript code — specifically cross-site scripting (XSS) vectors +_XSS_PATTERN: Final = re.compile( + r""" + (?ix) # i: case-insensitive, x: verbose (allow comments/whitespace) + + # --- Dangerous tags (open or close) --- + < + (?:script|iframe|object|embed) # tag names + (?:\s[^>]{0,100})?> # optional attributes before '>' + | + # closing tags + + # --- tags with javascript: href --- + | + ]{0,200})? # optional attributes + href\s*=\s*["']?\s*javascript: + + # --- Scripting protocols --- + | + (?:vb|java)script: + + # --- Data URI containing HTML --- + | + data:text/html + + # --- Obfuscated 'javascript:' using HTML entities --- + | + &#(?:x6A|106);avascript: + + # --- or tags with event handlers --- + | + <(?:img|svg) + (?:\s[^>]{0,200})? + on\w+\s*= + + # --- Any inline event handler (e.g., onload=, onclick=) --- + | + on[a-z]+\s*= +""", + re.VERBOSE | re.IGNORECASE, ) STRING_UNSAFE_CONTENT_ERROR_CODE: Final[str] = "string_unsafe_content" def validate_input_safety(value: str) -> str: - if _SQL_INJECTION_PATTERN.search(value) or _JS_INJECTION_PATTERN.search(value): + # NOTE: Don't sanitize against SL injects since underlying repository layer does it + if _XSS_PATTERN.search(value): msg_template = "This input contains potentially unsafe content." raise PydanticCustomError(STRING_UNSAFE_CONTENT_ERROR_CODE, msg_template, {}) return value diff --git a/packages/models-library/tests/test_string_types.py b/packages/models-library/tests/test_string_types.py index 8580bc78ab7e..e4416558ce3b 100644 --- a/packages/models-library/tests/test_string_types.py +++ b/packages/models-library/tests/test_string_types.py @@ -60,33 +60,14 @@ class InputRequestModel(BaseModel): ), # ❌ unsafe / invalid names pytest.param(" or tags with event handlers --- - | - <(?:img|svg) - (?:\s[^>]{0,200})? - on\w+\s*= - - # --- Any inline event handler (e.g., onload=, onclick=) --- - | - on[a-z]+\s*= -""", - re.VERBOSE | re.IGNORECASE, -) STRING_UNSAFE_CONTENT_ERROR_CODE: Final[str] = "string_unsafe_content" +class XSSPattern(NamedTuple): + pattern: re.Pattern + message: str + + +_SAFE_XSS_PATTERNS: Final[list[XSSPattern]] = [ + # === Lightweight, non-backtracking safe checks (bounded / literal / simple alternations) === + XSSPattern( + re.compile(r"(?i)<\s*(?:script|iframe|object|embed|link|meta|base)\b"), + "Contains potentially dangerous HTML tags", + ), + XSSPattern( + re.compile(r"(?i)"), + "Contains potentially dangerous HTML closing tags", + ), + XSSPattern( + re.compile( + r"(?i)\b(?:src|href|xlink:href|srcdoc)\s*=\s*['\"]?\s*(?:javascript:|vbscript:|data:)", + re.IGNORECASE, + ), + "Contains unsafe URL protocols in attributes", + ), + XSSPattern( + re.compile(r"(?i)javascript%3a|vbscript%3a|data%3a"), + "Contains encoded malicious protocols", + ), + XSSPattern( + re.compile( + r"(?ix)&#\s*(?:x[0-9a-f]{1,6}|[0-9]{1,6})\s*;\s*(?:javascript:|vbscript:|data:)", + re.IGNORECASE, + ), + "Contains encoded characters followed by unsafe protocols", + ), + XSSPattern( + re.compile(r"(?i)\bon[a-z]{1,20}\s*="), + "Contains inline event handlers", + ), + XSSPattern( + re.compile( + r"(?ix)style\s*=\s*['\"][^'\"]{0,500}\b(?:expression\(|url\s*\()", + re.IGNORECASE, + ), + "Contains potentially dangerous CSS expressions", + ), + XSSPattern( + re.compile( + r"(?ix)<\s*(?:img|svg)\b[^>]{0,500}\b(?:src|xlink:href)\s*=\s*['\"]?(?:javascript:|data:)", + re.IGNORECASE, + ), + "Contains unsafe protocols in image or SVG tags", + ), + XSSPattern( + re.compile( + r"(?ix)<\s*meta\b[^>]{0,200}\bhttp-equiv\s*=\s*['\"]?refresh['\"]?", + re.IGNORECASE, + ), + "Contains meta refresh directives", + ), + XSSPattern( + re.compile(r"(?i)\bsrcdoc\s*=\s*['\"]"), + "Contains srcdoc attribute which may execute arbitrary HTML", + ), + XSSPattern( + re.compile(r"[\x00-\x08\x0B-\x0C\x0E-\x1F\x7F]"), + "Contains control or invisible characters", + ), + XSSPattern( + re.compile( + r"(?i)(\$\{[^}]{0,200}\}|\#\{[^}]{0,200}\}|<%[^%]{0,200}%>|{{[^}]{0,200}})" + ), + "Contains template injection patterns", + ), + XSSPattern( + re.compile(r"(?i)\bvbscript\s*:"), + "Contains VBScript protocol", + ), +] + + +def _contains_percent_or_entity_obfuscation(value_lower: str) -> bool: + # simple substring checks — no heavy regex backtracking + if ( + "javascript%3a" in value_lower + or "vbscript%3a" in value_lower + or "data%3a" in value_lower + ): + return True + return "data:text/html" in value_lower + + +def _contains_obfuscated_protocol_by_normalization(value: str) -> bool: + # remove common separators/control chars and check for plain protocols in the normalized stream + # this avoids complex interleaved-regexes that cause backtracking + norm = re.sub(r"[\s\x00-\x1f\x7f\W]+", "", value).lower() + return ( + norm.startswith("javascript:") + or "javascript:" in norm + or "vbscript:" in norm + or "data:text/html" in norm + or "data:" in norm + ) + + def validate_input_safety(value: str) -> str: - # NOTE: Don't sanitize against SL injects since underlying repository layer does it - if _XSS_PATTERN.search(value): - msg_template = "This input contains potentially unsafe content." - raise PydanticCustomError(STRING_UNSAFE_CONTENT_ERROR_CODE, msg_template, {}) + # Run fast, simple regex checks first (fail-fast). + for xss_pattern in _SAFE_XSS_PATTERNS: + if xss_pattern.pattern.search(value): + raise PydanticCustomError( + STRING_UNSAFE_CONTENT_ERROR_CODE, + "{details}", + {"details": xss_pattern.message}, + ) + + # Lowercase once for substring checks + vlow = value.lower() + + # Fast substring / percent-encoding checks (no backtracking risk) + if _contains_percent_or_entity_obfuscation(vlow): + raise PydanticCustomError( + STRING_UNSAFE_CONTENT_ERROR_CODE, + "Contains encoded malicious content", + {}, + ) + + # Normalization-based obfuscation detection (de-duplicates heavy regex) + if _contains_obfuscated_protocol_by_normalization(value): + raise PydanticCustomError( + STRING_UNSAFE_CONTENT_ERROR_CODE, + "Contains obfuscated unsafe protocols", + {}, + ) + return value @@ -83,7 +168,7 @@ def validate_input_safety(value: str) -> str: strip_whitespace=True, min_length=1, max_length=MAX_NAME_LENGTH, - pattern=r"^[A-Za-z0-9 ._\-]+$", # strict whitelist + pattern=r"^[A-Za-z0-9 ._-]+$", # string that ONLY contains alphanumeric characters, spaces, dots, underscores, or hyphens ), AfterValidator(validate_input_safety), ] @@ -106,7 +191,7 @@ def validate_input_safety(value: str) -> str: min_length=3, max_length=200, strip_whitespace=True, - pattern=r"^[^%]*$", + pattern=r"^[A-Za-z0-9 ._\*-]*$", # Allow alphanumeric, spaces, dots, underscores, hyphens, and asterisks ), AfterValidator(validate_input_safety), ] @@ -118,7 +203,7 @@ def validate_input_safety(value: str) -> str: strip_whitespace=True, min_length=1, max_length=200, - pattern=r"^[^\%]+$", + pattern=r"^[A-Za-z0-9 ._\-]*$", # Allow alphanumeric, spaces, dots, underscores, hyphens, and asterisks ), AfterValidator(validate_input_safety), annotated_types.doc( From df455a218e49cc6fd660887621c9c50d5db3c8f7 Mon Sep 17 00:00:00 2001 From: Pedro Crespo-Valero <32402063+pcrespov@users.noreply.github.com> Date: Fri, 17 Oct 2025 17:16:52 +0200 Subject: [PATCH 20/27] =?UTF-8?q?=F0=9F=94=92=20Enhance=20security:=20Impr?= =?UTF-8?q?ove=20obfuscated=20protocol=20detection=20in=20input=20validati?= =?UTF-8?q?on=20and=20add=20corresponding=20test=20cases.?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../src/models_library/string_types.py | 29 +++++++++---------- .../models-library/tests/test_string_types.py | 25 ++++++++++++++++ 2 files changed, 38 insertions(+), 16 deletions(-) diff --git a/packages/models-library/src/models_library/string_types.py b/packages/models-library/src/models_library/string_types.py index 8d816be864b9..1aa5d346f92c 100644 --- a/packages/models-library/src/models_library/string_types.py +++ b/packages/models-library/src/models_library/string_types.py @@ -112,16 +112,15 @@ def _contains_percent_or_entity_obfuscation(value_lower: str) -> bool: return "data:text/html" in value_lower -def _contains_obfuscated_protocol_by_normalization(value: str) -> bool: - # remove common separators/control chars and check for plain protocols in the normalized stream - # this avoids complex interleaved-regexes that cause backtracking - norm = re.sub(r"[\s\x00-\x1f\x7f\W]+", "", value).lower() +def _contains_obfuscated_protocol_by_normalization(value_lower: str) -> bool: + # remove ALL non-alphanumeric chars for maximum normalization + # this catches heavily spaced out patterns like "j a v a s c r i p t:" + norm = re.sub(r"[^a-z0-9]", "", value_lower) return ( - norm.startswith("javascript:") - or "javascript:" in norm - or "vbscript:" in norm - or "data:text/html" in norm - or "data:" in norm + "javascript" in norm + or "vbscript" in norm + or "datatext" in norm + or "data:" in value_lower # keep original check for data: protocol ) @@ -131,15 +130,13 @@ def validate_input_safety(value: str) -> str: if xss_pattern.pattern.search(value): raise PydanticCustomError( STRING_UNSAFE_CONTENT_ERROR_CODE, - "{details}", - {"details": xss_pattern.message}, + "{msg}", + {"msg": xss_pattern.message}, ) - # Lowercase once for substring checks - vlow = value.lower() - + value_lower = value.lower() # Fast substring / percent-encoding checks (no backtracking risk) - if _contains_percent_or_entity_obfuscation(vlow): + if _contains_percent_or_entity_obfuscation(value_lower): raise PydanticCustomError( STRING_UNSAFE_CONTENT_ERROR_CODE, "Contains encoded malicious content", @@ -147,7 +144,7 @@ def validate_input_safety(value: str) -> str: ) # Normalization-based obfuscation detection (de-duplicates heavy regex) - if _contains_obfuscated_protocol_by_normalization(value): + if _contains_obfuscated_protocol_by_normalization(value_lower): raise PydanticCustomError( STRING_UNSAFE_CONTENT_ERROR_CODE, "Contains obfuscated unsafe protocols", diff --git a/packages/models-library/tests/test_string_types.py b/packages/models-library/tests/test_string_types.py index e4416558ce3b..dd8ca7c018e5 100644 --- a/packages/models-library/tests/test_string_types.py +++ b/packages/models-library/tests/test_string_types.py @@ -106,6 +106,31 @@ class InputRequestModel(BaseModel): False, id="redos-img-attributes", ), + # ❌ Obfuscated protocol tests + pytest.param( + "SafeName", + "j a v a s c r i p t:alert(1)", + False, + id="invalid-desc-spaced-js", + ), + pytest.param( + "SafeName", + "java\nscript\t:alert(1)", + False, + id="invalid-desc-newline-js", + ), + pytest.param( + "SafeName", + "d\ta\tt\ta:text/html,", + False, + id="invalid-desc-obfuscated-data", + ), + pytest.param( + "SafeName", + "v b\ts c r i p t:MsgBox(1)", + False, + id="invalid-desc-spaced-vbs", + ), ], ) def test_safe_string_types(name: str, description: str, should_pass: bool): From 5cbb20bf3799e091bbd8f849f945478625d27139 Mon Sep 17 00:00:00 2001 From: Pedro Crespo-Valero <32402063+pcrespov@users.noreply.github.com> Date: Fri, 17 Oct 2025 17:24:53 +0200 Subject: [PATCH 21/27] =?UTF-8?q?=F0=9F=94=92=20Enhance=20security:=20Rena?= =?UTF-8?q?me=20input=20validation=20function=20to=20improve=20XSS=20safet?= =?UTF-8?q?y=20checks=20and=20update=20related=20validators.?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../api_schemas_webserver/users.py | 6 +++--- .../src/models_library/string_types.py | 16 ++++++++-------- .../models-library/src/models_library/users.py | 6 ++++-- 3 files changed, 15 insertions(+), 13 deletions(-) diff --git a/packages/models-library/src/models_library/api_schemas_webserver/users.py b/packages/models-library/src/models_library/api_schemas_webserver/users.py index af0183ddd58f..a0f91828aafb 100644 --- a/packages/models-library/src/models_library/api_schemas_webserver/users.py +++ b/packages/models-library/src/models_library/api_schemas_webserver/users.py @@ -30,7 +30,7 @@ from ..string_types import ( GlobPatternSafeStr, SearchPatternSafeStr, - validate_input_safety, + validate_input_xss_safety, ) from ..users import ( FirstNameStr, @@ -209,12 +209,12 @@ def from_domain_model( FirstNameSafeStr = Annotated[ FirstNameStr, - AfterValidator(validate_input_safety), + AfterValidator(validate_input_xss_safety), ] LastNameSafeStr = Annotated[ LastNameStr, - AfterValidator(validate_input_safety), + AfterValidator(validate_input_xss_safety), ] diff --git a/packages/models-library/src/models_library/string_types.py b/packages/models-library/src/models_library/string_types.py index 1aa5d346f92c..7ef7bbdc19de 100644 --- a/packages/models-library/src/models_library/string_types.py +++ b/packages/models-library/src/models_library/string_types.py @@ -124,7 +124,7 @@ def _contains_obfuscated_protocol_by_normalization(value_lower: str) -> bool: ) -def validate_input_safety(value: str) -> str: +def validate_input_xss_safety(value: str) -> str: # Run fast, simple regex checks first (fail-fast). for xss_pattern in _SAFE_XSS_PATTERNS: if xss_pattern.pattern.search(value): @@ -167,7 +167,7 @@ def validate_input_safety(value: str) -> str: max_length=MAX_NAME_LENGTH, pattern=r"^[A-Za-z0-9 ._-]+$", # string that ONLY contains alphanumeric characters, spaces, dots, underscores, or hyphens ), - AfterValidator(validate_input_safety), + AfterValidator(validate_input_xss_safety), ] @@ -178,7 +178,7 @@ def validate_input_safety(value: str) -> str: min_length=MIN_DESCRIPTION_LENGTH, max_length=MAX_DESCRIPTION_LENGTH, ), - AfterValidator(validate_input_safety), + AfterValidator(validate_input_xss_safety), ] @@ -190,7 +190,7 @@ def validate_input_safety(value: str) -> str: strip_whitespace=True, pattern=r"^[A-Za-z0-9 ._\*-]*$", # Allow alphanumeric, spaces, dots, underscores, hyphens, and asterisks ), - AfterValidator(validate_input_safety), + AfterValidator(validate_input_xss_safety), ] @@ -200,9 +200,9 @@ def validate_input_safety(value: str) -> str: strip_whitespace=True, min_length=1, max_length=200, - pattern=r"^[A-Za-z0-9 ._\-]*$", # Allow alphanumeric, spaces, dots, underscores, hyphens, and asterisks + pattern=r"^[A-Za-z0-9 ._-]*$", # Allow alphanumeric, spaces, dots, underscores, hyphens, and asterisks ), - AfterValidator(validate_input_safety), + AfterValidator(validate_input_xss_safety), annotated_types.doc( """ A safe string used for search patterns. @@ -220,7 +220,7 @@ def validate_input_safety(value: str) -> str: str, StringConstraints(strip_whitespace=True), trim_string_before(max_length=_SHORT_TRUNCATED_STR_MAX_LENGTH), - AfterValidator(validate_input_safety), + AfterValidator(validate_input_xss_safety), annotated_types.doc( """ A truncated string used to input e.g. titles or display names. @@ -237,7 +237,7 @@ def validate_input_safety(value: str) -> str: str, StringConstraints(strip_whitespace=True), trim_string_before(max_length=_LONG_TRUNCATED_STR_MAX_LENGTH), - AfterValidator(validate_input_safety), + AfterValidator(validate_input_xss_safety), annotated_types.doc( """ A truncated string used to input e.g. descriptions or summaries. diff --git a/packages/models-library/src/models_library/users.py b/packages/models-library/src/models_library/users.py index 1b4b2aa76d30..aeb4e5c94580 100644 --- a/packages/models-library/src/models_library/users.py +++ b/packages/models-library/src/models_library/users.py @@ -2,7 +2,7 @@ from typing import Annotated, TypeAlias from common_library.users_enums import UserRole -from models_library.string_types import validate_input_safety +from models_library.string_types import validate_input_xss_safety from pydantic import ( AfterValidator, BaseModel, @@ -22,7 +22,9 @@ UserNameID: TypeAlias = Annotated[ str, StringConstraints(strip_whitespace=True, min_length=4, max_length=100) ] -UserNameSafeID: TypeAlias = Annotated[UserNameID, AfterValidator(validate_input_safety)] +UserNameSafeID: TypeAlias = Annotated[ + UserNameID, AfterValidator(validate_input_xss_safety) +] FirstNameStr: TypeAlias = Annotated[ From 8f2ddcfc0b9706835299a4451881a343e91d8f6b Mon Sep 17 00:00:00 2001 From: Pedro Crespo-Valero <32402063+pcrespov@users.noreply.github.com> Date: Fri, 17 Oct 2025 17:25:31 +0200 Subject: [PATCH 22/27] oas --- .../api/v0/openapi.yaml | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/services/web/server/src/simcore_service_webserver/api/v0/openapi.yaml b/services/web/server/src/simcore_service_webserver/api/v0/openapi.yaml index 33c5def378f4..ecf9891f2273 100644 --- a/services/web/server/src/simcore_service_webserver/api/v0/openapi.yaml +++ b/services/web/server/src/simcore_service_webserver/api/v0/openapi.yaml @@ -1804,7 +1804,7 @@ paths: - type: string minLength: 3 maxLength: 200 - pattern: ^[^%]*$ + pattern: ^[A-Za-z0-9 ._\*-]*$ - type: 'null' title: Email - name: primary_group_id @@ -1825,7 +1825,7 @@ paths: - type: string minLength: 3 maxLength: 200 - pattern: ^[^%]*$ + pattern: ^[A-Za-z0-9 ._\*-]*$ - type: 'null' title: User Name responses: @@ -12828,7 +12828,7 @@ components: type: string maxLength: 100 minLength: 1 - pattern: ^[A-Za-z0-9 ._\-]+$ + pattern: ^[A-Za-z0-9 ._-]+$ title: Name parentFolderId: anyOf: @@ -12920,7 +12920,7 @@ components: type: string maxLength: 100 minLength: 1 - pattern: ^[A-Za-z0-9 ._\-]+$ + pattern: ^[A-Za-z0-9 ._-]+$ title: Name parentFolderId: anyOf: @@ -13042,7 +13042,7 @@ components: type: string maxLength: 100 minLength: 1 - pattern: ^[A-Za-z0-9 ._\-]+$ + pattern: ^[A-Za-z0-9 ._-]+$ title: Label description: type: string @@ -13135,7 +13135,7 @@ components: - type: string maxLength: 100 minLength: 1 - pattern: ^[A-Za-z0-9 ._\-]+$ + pattern: ^[A-Za-z0-9 ._-]+$ - type: 'null' title: Label description: @@ -18111,7 +18111,7 @@ components: type: string maxLength: 100 minLength: 1 - pattern: ^[A-Za-z0-9 ._\-]+$ + pattern: ^[A-Za-z0-9 ._-]+$ title: Name description: anyOf: @@ -18208,7 +18208,7 @@ components: - type: string maxLength: 100 minLength: 1 - pattern: ^[A-Za-z0-9 ._\-]+$ + pattern: ^[A-Za-z0-9 ._-]+$ - type: 'null' title: Name description: @@ -18965,7 +18965,7 @@ components: type: string maxLength: 200 minLength: 1 - pattern: ^[^\%]+$ + pattern: ^[A-Za-z0-9 ._-]*$ title: Match description: Search string to match with usernames and public profiles (e.g. emails, first/last name) From 1261930456a2330003479e53eb2f5e114db0db6f Mon Sep 17 00:00:00 2001 From: Pedro Crespo-Valero <32402063+pcrespov@users.noreply.github.com> Date: Fri, 17 Oct 2025 17:36:57 +0200 Subject: [PATCH 23/27] =?UTF-8?q?=F0=9F=94=92=20Enhance=20security:=20Upda?= =?UTF-8?q?te=20XSS=20pattern=20to=20improve=20detection=20of=20unsafe=20U?= =?UTF-8?q?RL=20protocols=20in=20attributes.?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- packages/models-library/src/models_library/string_types.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/packages/models-library/src/models_library/string_types.py b/packages/models-library/src/models_library/string_types.py index 7ef7bbdc19de..1a820b303e01 100644 --- a/packages/models-library/src/models_library/string_types.py +++ b/packages/models-library/src/models_library/string_types.py @@ -39,7 +39,7 @@ class XSSPattern(NamedTuple): ), XSSPattern( re.compile( - r"(?i)\b(?:src|href|xlink:href|srcdoc)\s*=\s*['\"]?\s*(?:javascript:|vbscript:|data:)", + r"(?i)\b(?:src|href|xlink:href|srcdoc)\s*=\s*(?:['\"]\s*)?(?:javascript:|vbscript:|data:)", re.IGNORECASE, ), "Contains unsafe URL protocols in attributes", From 9bd3e93e82707be7e9e29aa2111addca22ae1261 Mon Sep 17 00:00:00 2001 From: Pedro Crespo-Valero <32402063+pcrespov@users.noreply.github.com> Date: Mon, 20 Oct 2025 10:40:34 +0200 Subject: [PATCH 24/27] updates --- .../api_schemas_webserver/folders_v2.py | 6 +++--- .../src/models_library/string_types.py | 18 ++++++++++++++++-- 2 files changed, 19 insertions(+), 5 deletions(-) diff --git a/packages/models-library/src/models_library/api_schemas_webserver/folders_v2.py b/packages/models-library/src/models_library/api_schemas_webserver/folders_v2.py index 4be151f2d87a..da97f1d7adae 100644 --- a/packages/models-library/src/models_library/api_schemas_webserver/folders_v2.py +++ b/packages/models-library/src/models_library/api_schemas_webserver/folders_v2.py @@ -1,7 +1,7 @@ from datetime import datetime from typing import Annotated, Self -from models_library.string_types import NameSafeStr +from models_library.string_types import DisplaySafeStr from pydantic import ConfigDict, Field, field_validator from ..access_rights import AccessRights @@ -53,7 +53,7 @@ def from_domain_model( class FolderCreateBodyParams(InputSchema): - name: NameSafeStr + name: DisplaySafeStr parent_folder_id: FolderID | None = None workspace_id: WorkspaceID | None = None model_config = ConfigDict(extra="forbid") @@ -68,7 +68,7 @@ class FolderCreateBodyParams(InputSchema): class FolderReplaceBodyParams(InputSchema): - name: NameSafeStr + name: DisplaySafeStr parent_folder_id: FolderID | None = None model_config = ConfigDict(extra="forbid") diff --git a/packages/models-library/src/models_library/string_types.py b/packages/models-library/src/models_library/string_types.py index 1a820b303e01..ccbaa2a0a5c4 100644 --- a/packages/models-library/src/models_library/string_types.py +++ b/packages/models-library/src/models_library/string_types.py @@ -165,11 +165,25 @@ def validate_input_xss_safety(value: str) -> str: strip_whitespace=True, min_length=1, max_length=MAX_NAME_LENGTH, - pattern=r"^[A-Za-z0-9 ._-]+$", # string that ONLY contains alphanumeric characters, spaces, dots, underscores, or hyphens + pattern=r"^[A-Za-z0-9 ._-]+$", + # CAREFUL: string that ONLY contains alphanumeric characters, spaces, dots, underscores, or hyphens ), AfterValidator(validate_input_xss_safety), + annotated_types.doc( + """ A safe string used in **name identifiers**, It might be very restrictive for display names (e.g. titles or labels) """ + ), ] +DisplaySafeStr: TypeAlias = Annotated[ + str, + StringConstraints( + strip_whitespace=True, + min_length=1, + max_length=MAX_NAME_LENGTH, + ), + AfterValidator(validate_input_xss_safety), + annotated_types.doc(""" Like `NameSafeStr` but more suited for display names"""), +] DescriptionSafeStr: TypeAlias = Annotated[ str, @@ -200,7 +214,7 @@ def validate_input_xss_safety(value: str) -> str: strip_whitespace=True, min_length=1, max_length=200, - pattern=r"^[A-Za-z0-9 ._-]*$", # Allow alphanumeric, spaces, dots, underscores, hyphens, and asterisks + pattern=r"^[A-Za-z0-9 ._@-]*$", # Allow alphanumeric, spaces, dots, underscores, hyphens, and at signs ), AfterValidator(validate_input_xss_safety), annotated_types.doc( From e56435b315a11d8872f3efe120674ef6cb80cb21 Mon Sep 17 00:00:00 2001 From: Pedro Crespo-Valero <32402063+pcrespov@users.noreply.github.com> Date: Mon, 20 Oct 2025 10:44:57 +0200 Subject: [PATCH 25/27] udpates oas --- .../server/src/simcore_service_webserver/api/v0/openapi.yaml | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/services/web/server/src/simcore_service_webserver/api/v0/openapi.yaml b/services/web/server/src/simcore_service_webserver/api/v0/openapi.yaml index ecf9891f2273..1256c23fc769 100644 --- a/services/web/server/src/simcore_service_webserver/api/v0/openapi.yaml +++ b/services/web/server/src/simcore_service_webserver/api/v0/openapi.yaml @@ -12828,7 +12828,6 @@ components: type: string maxLength: 100 minLength: 1 - pattern: ^[A-Za-z0-9 ._-]+$ title: Name parentFolderId: anyOf: @@ -12920,7 +12919,6 @@ components: type: string maxLength: 100 minLength: 1 - pattern: ^[A-Za-z0-9 ._-]+$ title: Name parentFolderId: anyOf: @@ -18965,7 +18963,7 @@ components: type: string maxLength: 200 minLength: 1 - pattern: ^[A-Za-z0-9 ._-]*$ + pattern: ^[A-Za-z0-9 ._@-]*$ title: Match description: Search string to match with usernames and public profiles (e.g. emails, first/last name) From ace2d316ca46d6d9ff35b1d0575d1039fa3744c2 Mon Sep 17 00:00:00 2001 From: Pedro Crespo-Valero <32402063+pcrespov@users.noreply.github.com> Date: Tue, 21 Oct 2025 14:48:34 +0200 Subject: [PATCH 26/27] include @ for emails --- packages/models-library/src/models_library/string_types.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/packages/models-library/src/models_library/string_types.py b/packages/models-library/src/models_library/string_types.py index ccbaa2a0a5c4..cc7a2b4dcc5d 100644 --- a/packages/models-library/src/models_library/string_types.py +++ b/packages/models-library/src/models_library/string_types.py @@ -202,7 +202,7 @@ def validate_input_xss_safety(value: str) -> str: min_length=3, max_length=200, strip_whitespace=True, - pattern=r"^[A-Za-z0-9 ._\*-]*$", # Allow alphanumeric, spaces, dots, underscores, hyphens, and asterisks + pattern=r"^[A-Za-z0-9 ._\*@-]*$", # Allow alphanumeric, spaces, dots, underscores, hyphens, asterisks and at signs ), AfterValidator(validate_input_xss_safety), ] From dd3ca3811781f1fc224eef706ee070efa32edc48 Mon Sep 17 00:00:00 2001 From: Pedro Crespo-Valero <32402063+pcrespov@users.noreply.github.com> Date: Tue, 21 Oct 2025 16:38:02 +0200 Subject: [PATCH 27/27] updates OAS --- .../server/src/simcore_service_webserver/api/v0/openapi.yaml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/services/web/server/src/simcore_service_webserver/api/v0/openapi.yaml b/services/web/server/src/simcore_service_webserver/api/v0/openapi.yaml index 1256c23fc769..a876db33a0df 100644 --- a/services/web/server/src/simcore_service_webserver/api/v0/openapi.yaml +++ b/services/web/server/src/simcore_service_webserver/api/v0/openapi.yaml @@ -1804,7 +1804,7 @@ paths: - type: string minLength: 3 maxLength: 200 - pattern: ^[A-Za-z0-9 ._\*-]*$ + pattern: ^[A-Za-z0-9 ._\*@-]*$ - type: 'null' title: Email - name: primary_group_id @@ -1825,7 +1825,7 @@ paths: - type: string minLength: 3 maxLength: 200 - pattern: ^[A-Za-z0-9 ._\*-]*$ + pattern: ^[A-Za-z0-9 ._\*@-]*$ - type: 'null' title: User Name responses: