Skip to content

Commit 832c098

Browse files
authored
Input validation testing closes #552 (#553)
* Added input validation test suite Signed-off-by: Mihai Criveti <[email protected]> * Added input validation test suite - making it easier to spot failing tests Signed-off-by: Mihai Criveti <[email protected]> * Added input validation test suite - making it easier to spot failing tests. Removed - and . from tool name validation Signed-off-by: Mihai Criveti <[email protected]> * Added input validation test suite - making it easier to spot failing tests. Better output Signed-off-by: Mihai Criveti <[email protected]> * Improve test_tool_create_url_validation validation Signed-off-by: Mihai Criveti <[email protected]> * Improve DANGEROUS_HTML_PATTERN validation Signed-off-by: Mihai Criveti <[email protected]> * pdate the sanitize_display_text method to catch polyglot payloads Signed-off-by: Mihai Criveti <[email protected]> * allow safe Jinja2 but prevent SSTI in validate_template Signed-off-by: Mihai Criveti <[email protected]> * Add additional validation to ToolInvocation and ResourceSubscription to validate parameters Signed-off-by: Mihai Criveti <[email protected]> * Update doctest and validation for json depth Signed-off-by: Mihai Criveti <[email protected]> * Update doctest warning on backslash escape Signed-off-by: Mihai Criveti <[email protected]> --------- Signed-off-by: Mihai Criveti <[email protected]>
1 parent 7d42d50 commit 832c098

File tree

4 files changed

+2259
-24
lines changed

4 files changed

+2259
-24
lines changed

mcpgateway/config.py

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -489,7 +489,10 @@ def validate_database(self) -> None:
489489
db_dir.mkdir(parents=True)
490490

491491
# Validation patterns for safe display (configurable)
492-
validation_dangerous_html_pattern: str = r"<(script|iframe|object|embed|link|meta|base|form)\b|</*(script|iframe|object|embed|link|meta|base|form)>"
492+
validation_dangerous_html_pattern: str = (
493+
r"<(script|iframe|object|embed|link|meta|base|form|img|svg|video|audio|source|track|area|map|canvas|applet|frame|frameset|html|head|body|style)\b|</*(script|iframe|object|embed|link|meta|base|form|img|svg|video|audio|source|track|area|map|canvas|applet|frame|frameset|html|head|body|style)>"
494+
)
495+
493496
validation_dangerous_js_pattern: str = r"javascript:|vbscript:|on\w+\s*=|data:.*script"
494497
validation_allowed_url_schemes: List[str] = ["http://", "https://", "ws://", "wss://"]
495498

mcpgateway/schemas.py

Lines changed: 285 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -28,7 +28,7 @@
2828
from typing import Any, Dict, List, Literal, Optional, Self, Union
2929

3030
# Third-Party
31-
from pydantic import AnyHttpUrl, BaseModel, ConfigDict, Field, field_serializer, field_validator, model_validator, ValidationInfo
31+
from pydantic import AnyHttpUrl, BaseModel, ConfigDict, Field, field_serializer, field_validator, model_validator, ValidationInfo, ValidationError
3232

3333
# First-Party
3434
from mcpgateway.config import settings
@@ -737,14 +737,145 @@ class ToolRead(BaseModelWithConfigDict):
737737
class ToolInvocation(BaseModelWithConfigDict):
738738
"""Schema for tool invocation requests.
739739
740+
This schema validates tool invocation requests to ensure they follow MCP
741+
(Model Context Protocol) naming conventions and prevent security vulnerabilities
742+
such as XSS attacks or deeply nested payloads that could cause DoS.
743+
740744
Captures:
741-
- Tool name to invoke
742-
- Arguments matching tool's input schema
745+
- Tool name to invoke (validated for safety and MCP compliance)
746+
- Arguments matching tool's input schema (validated for depth limits)
747+
748+
Validation Rules:
749+
- Tool names must start with a letter and contain only letters, numbers,
750+
underscores, and hyphens
751+
- Tool names cannot contain HTML special characters (<, >, ", ', /)
752+
- Arguments are validated to prevent excessively deep nesting (default max: 10 levels)
753+
754+
Attributes:
755+
name (str): Name of the tool to invoke. Must follow MCP naming conventions.
756+
arguments (Dict[str, Any]): Arguments to pass to the tool. Must match the
757+
tool's input schema and not exceed depth limits.
758+
759+
Examples:
760+
>>> # Valid tool invocation
761+
>>> tool_inv = ToolInvocation(name="get_weather", arguments={"city": "London"})
762+
>>> tool_inv.name
763+
'get_weather'
764+
>>> tool_inv.arguments
765+
{'city': 'London'}
766+
767+
>>> # Valid tool name with underscores and numbers
768+
>>> tool_inv = ToolInvocation(name="tool_v2_beta", arguments={})
769+
>>> tool_inv.name
770+
'tool_v2_beta'
771+
772+
>>> # Invalid: Tool name with special characters
773+
>>> try:
774+
... ToolInvocation(name="tool-name!", arguments={})
775+
... except ValidationError as e:
776+
... print("Validation failed: Special characters not allowed")
777+
Validation failed: Special characters not allowed
778+
779+
>>> # Invalid: XSS attempt in tool name
780+
>>> try:
781+
... ToolInvocation(name="<script>alert('XSS')</script>", arguments={})
782+
... except ValidationError as e:
783+
... print("Validation failed: HTML tags not allowed")
784+
Validation failed: HTML tags not allowed
785+
786+
>>> # Invalid: Tool name starting with number
787+
>>> try:
788+
... ToolInvocation(name="123_tool", arguments={})
789+
... except ValidationError as e:
790+
... print("Validation failed: Must start with letter")
791+
Validation failed: Must start with letter
792+
793+
>>> # Valid: Complex but not too deep arguments
794+
>>> args = {"level1": {"level2": {"level3": {"data": "value"}}}}
795+
>>> tool_inv = ToolInvocation(name="process_data", arguments=args)
796+
>>> tool_inv.arguments["level1"]["level2"]["level3"]["data"]
797+
'value'
798+
799+
>>> # Invalid: Arguments too deeply nested (>10 levels)
800+
>>> deep_args = {"a": {"b": {"c": {"d": {"e": {"f": {"g": {"h": {"i": {"j": {"k": "too deep"}}}}}}}}}}}
801+
>>> try:
802+
... ToolInvocation(name="process_data", arguments=deep_args)
803+
... except ValidationError as e:
804+
... print("Validation failed: Exceeds maximum depth")
805+
Validation failed: Exceeds maximum depth
806+
807+
>>> # Edge case: Empty tool name
808+
>>> try:
809+
... ToolInvocation(name="", arguments={})
810+
... except ValidationError as e:
811+
... print("Validation failed: Name cannot be empty")
812+
Validation failed: Name cannot be empty
813+
814+
>>> # Valid: Tool name with hyphen (but not starting/ending)
815+
>>> tool_inv = ToolInvocation(name="get_user_info", arguments={"id": 123})
816+
>>> tool_inv.name
817+
'get_user_info'
818+
819+
>>> # Arguments with various types
820+
>>> args = {
821+
... "string": "value",
822+
... "number": 42,
823+
... "boolean": True,
824+
... "array": [1, 2, 3],
825+
... "nested": {"key": "value"}
826+
... }
827+
>>> tool_inv = ToolInvocation(name="complex_tool", arguments=args)
828+
>>> tool_inv.arguments["number"]
829+
42
743830
"""
744831

745832
name: str = Field(..., description="Name of tool to invoke")
746833
arguments: Dict[str, Any] = Field(default_factory=dict, description="Arguments matching tool's input schema")
747834

835+
@field_validator("name")
836+
@classmethod
837+
def validate_name(cls, v: str) -> str:
838+
"""Ensure tool names follow MCP naming conventions.
839+
840+
Validates that the tool name:
841+
- Is not empty
842+
- Starts with a letter (not a number or special character)
843+
- Contains only letters, numbers, underscores, and hyphens
844+
- Does not contain HTML special characters that could cause XSS
845+
- Does not exceed maximum length (255 characters)
846+
847+
Args:
848+
v (str): Tool name to validate
849+
850+
Returns:
851+
str: The validated tool name if it passes all checks
852+
853+
Raises:
854+
ValueError: If the tool name violates any validation rules
855+
"""
856+
return SecurityValidator.validate_tool_name(v)
857+
858+
@field_validator("arguments")
859+
@classmethod
860+
def validate_arguments(cls, v: Dict[str, Any]) -> Dict[str, Any]:
861+
"""Validate arguments structure depth to prevent DoS attacks.
862+
863+
Ensures that the arguments dictionary doesn't have excessive nesting
864+
that could cause performance issues or stack overflow. The default
865+
maximum depth is 10 levels.
866+
867+
Args:
868+
v (dict): Arguments dictionary to validate
869+
870+
Returns:
871+
dict: The validated arguments if within depth limits
872+
873+
Raises:
874+
ValueError: If the arguments exceed the maximum allowed depth
875+
"""
876+
SecurityValidator.validate_json_depth(v)
877+
return v
878+
748879

749880
class ToolResult(BaseModelWithConfigDict):
750881
"""Schema for tool invocation results.
@@ -1000,14 +1131,163 @@ class ResourceRead(BaseModelWithConfigDict):
10001131
class ResourceSubscription(BaseModelWithConfigDict):
10011132
"""Schema for resource subscriptions.
10021133
1134+
This schema validates resource subscription requests to ensure URIs are safe
1135+
and subscriber IDs follow proper formatting rules. It prevents various
1136+
injection attacks and ensures data consistency.
1137+
10031138
Tracks:
1004-
- Resource URI being subscribed to
1005-
- Unique subscriber identifier
1139+
- Resource URI being subscribed to (validated for safety)
1140+
- Unique subscriber identifier (validated for proper format)
1141+
1142+
Validation Rules:
1143+
- URIs cannot contain HTML special characters (<, >, ", ', backslash)
1144+
- URIs cannot contain directory traversal sequences (..)
1145+
- URIs must contain only safe characters (alphanumeric, _, -, :, /, ?, =, &, %)
1146+
- Subscriber IDs must contain only alphanumeric characters, underscores, hyphens, and dots
1147+
- Both fields have maximum length limits (255 characters)
1148+
1149+
Attributes:
1150+
uri (str): URI of the resource to subscribe to. Must be a safe, valid URI.
1151+
subscriber_id (str): Unique identifier for the subscriber. Must follow
1152+
identifier naming conventions.
1153+
1154+
Examples:
1155+
>>> # Valid subscription
1156+
>>> sub = ResourceSubscription(uri="/api/v1/users/123", subscriber_id="client_001")
1157+
>>> sub.uri
1158+
'/api/v1/users/123'
1159+
>>> sub.subscriber_id
1160+
'client_001'
1161+
1162+
>>> # Valid URI with query parameters
1163+
>>> sub = ResourceSubscription(uri="/data?type=json&limit=10", subscriber_id="app.service.1")
1164+
>>> sub.uri
1165+
'/data?type=json&limit=10'
1166+
1167+
>>> # Valid subscriber ID with dots (common for service names)
1168+
>>> sub = ResourceSubscription(uri="/events", subscriber_id="com.example.service")
1169+
>>> sub.subscriber_id
1170+
'com.example.service'
1171+
1172+
>>> # Invalid: XSS attempt in URI
1173+
>>> try:
1174+
... ResourceSubscription(uri="<script>alert('XSS')</script>", subscriber_id="sub1")
1175+
... except ValidationError as e:
1176+
... print("Validation failed: HTML characters not allowed")
1177+
Validation failed: HTML characters not allowed
1178+
1179+
>>> # Invalid: Directory traversal in URI
1180+
>>> try:
1181+
... ResourceSubscription(uri="/api/../../../etc/passwd", subscriber_id="sub1")
1182+
... except ValidationError as e:
1183+
... print("Validation failed: Directory traversal detected")
1184+
Validation failed: Directory traversal detected
1185+
1186+
>>> # Invalid: SQL injection attempt in URI
1187+
>>> try:
1188+
... ResourceSubscription(uri="/users'; DROP TABLE users;--", subscriber_id="sub1")
1189+
... except ValidationError as e:
1190+
... print("Validation failed: Invalid characters in URI")
1191+
Validation failed: Invalid characters in URI
1192+
1193+
>>> # Invalid: Special characters in subscriber ID
1194+
>>> try:
1195+
... ResourceSubscription(uri="/api/data", subscriber_id="sub@123!")
1196+
... except ValidationError as e:
1197+
... print("Validation failed: Invalid subscriber ID format")
1198+
Validation failed: Invalid subscriber ID format
1199+
1200+
>>> # Invalid: Empty URI
1201+
>>> try:
1202+
... ResourceSubscription(uri="", subscriber_id="sub1")
1203+
... except ValidationError as e:
1204+
... print("Validation failed: URI cannot be empty")
1205+
Validation failed: URI cannot be empty
1206+
1207+
>>> # Invalid: Empty subscriber ID
1208+
>>> try:
1209+
... ResourceSubscription(uri="/api/data", subscriber_id="")
1210+
... except ValidationError as e:
1211+
... print("Validation failed: Subscriber ID cannot be empty")
1212+
Validation failed: Subscriber ID cannot be empty
1213+
1214+
>>> # Valid: Complex but safe URI
1215+
>>> sub = ResourceSubscription(
1216+
... uri="/api/v2/resources/category:items/filter?status=active&limit=50",
1217+
... subscriber_id="monitor-service-01"
1218+
... )
1219+
>>> sub.uri
1220+
'/api/v2/resources/category:items/filter?status=active&limit=50'
1221+
1222+
>>> # Edge case: Maximum length validation (simulated)
1223+
>>> long_uri = "/" + "a" * 254 # Just under limit
1224+
>>> sub = ResourceSubscription(uri=long_uri, subscriber_id="sub1")
1225+
>>> len(sub.uri)
1226+
255
1227+
1228+
>>> # Invalid: Quotes in URI (could break out of attributes)
1229+
>>> try:
1230+
... ResourceSubscription(uri='/api/data"onclick="alert(1)', subscriber_id="sub1")
1231+
... except ValidationError as e:
1232+
... print("Validation failed: Quotes not allowed in URI")
1233+
Validation failed: Quotes not allowed in URI
10061234
"""
10071235

10081236
uri: str = Field(..., description="URI of resource to subscribe to")
10091237
subscriber_id: str = Field(..., description="Unique subscriber identifier")
10101238

1239+
@field_validator("uri")
1240+
@classmethod
1241+
def validate_uri(cls, v: str) -> str:
1242+
"""Validate URI format for safety and correctness.
1243+
1244+
Ensures the URI:
1245+
- Is not empty
1246+
- Does not contain HTML special characters that could cause XSS
1247+
- Does not contain directory traversal sequences (..)
1248+
- Contains only allowed characters for URIs
1249+
- Does not exceed maximum length (255 characters)
1250+
1251+
This prevents various injection attacks including XSS, path traversal,
1252+
and other URI-based vulnerabilities.
1253+
1254+
Args:
1255+
v (str): URI to validate
1256+
1257+
Returns:
1258+
str: The validated URI if it passes all security checks
1259+
1260+
Raises:
1261+
ValueError: If the URI contains dangerous patterns or invalid characters
1262+
"""
1263+
return SecurityValidator.validate_uri(v, "Resource URI")
1264+
1265+
@field_validator("subscriber_id")
1266+
@classmethod
1267+
def validate_subscriber_id(cls, v: str) -> str:
1268+
"""Validate subscriber ID format.
1269+
1270+
Ensures the subscriber ID:
1271+
- Is not empty
1272+
- Contains only alphanumeric characters, underscores, hyphens, and dots
1273+
- Does not contain HTML special characters
1274+
- Follows standard identifier naming conventions
1275+
- Does not exceed maximum length (255 characters)
1276+
1277+
This ensures consistency and prevents injection attacks through
1278+
subscriber identifiers.
1279+
1280+
Args:
1281+
v (str): Subscriber ID to validate
1282+
1283+
Returns:
1284+
str: The validated subscriber ID if it passes all checks
1285+
1286+
Raises:
1287+
ValueError: If the subscriber ID violates naming conventions
1288+
"""
1289+
return SecurityValidator.validate_identifier(v, "Subscriber ID")
1290+
10111291

10121292
class ResourceNotification(BaseModelWithConfigDict):
10131293
"""Schema for resource update notifications.

0 commit comments

Comments
 (0)