feat(event-handler): add clean File parameter support for multipart uploads

oyiz-michael · oyiz-michael · commit a09f1fab1125 · 2025-08-06T22:24:08.000+01:00
- Add public File parameter class extending _File
- Support multipart/form-data parsing with WebKit boundary compatibility
- OpenAPI schema generation with format: binary for file uploads
- Enhanced dependant logic to handle File + Form parameter combinations
- Clean implementation based on upstream develop branch

Changes:
- params.py: Add File(_File) public class with proper documentation
- dependant.py: Add File parameter support in body field info logic
- openapi_validation.py: Add multipart parsing with boundary detection
- test_file_form_validation.py: Basic test coverage for File parameters

This provides customers with File parameter support using the same
pattern as Query, Path, Header parameters with Annotated types.
diff --git a/aws_lambda_powertools/event_handler/middlewares/openapi_validation.py b/aws_lambda_powertools/event_handler/middlewares/openapi_validation.py
@@ -35,6 +35,7 @@
 CONTENT_DISPOSITION_NAME_PARAM = "name="
 APPLICATION_JSON_CONTENT_TYPE = "application/json"
 APPLICATION_FORM_CONTENT_TYPE = "application/x-www-form-urlencoded"
+MULTIPART_FORM_CONTENT_TYPE = "multipart/form-data"
 
 
 class OpenAPIRequestValidationMiddleware(BaseMiddlewareHandler):
@@ -125,8 +126,12 @@ def _get_body(self, app: EventHandlerInstance) -> dict[str, Any]:
         elif content_type.startswith(APPLICATION_FORM_CONTENT_TYPE):
             return self._parse_form_data(app)
 
+        # Handle multipart form data
+        elif content_type.startswith(MULTIPART_FORM_CONTENT_TYPE):
+            return self._parse_multipart_data(app, content_type)
+
         else:
-            raise NotImplementedError("Only JSON body or Form() are supported")
+            raise NotImplementedError(f"Content type '{content_type}' is not supported")
 
     def _parse_json_data(self, app: EventHandlerInstance) -> dict[str, Any]:
         """Parse JSON data from the request body."""
@@ -169,6 +174,91 @@ def _parse_form_data(self, app: EventHandlerInstance) -> dict[str, Any]:
                 ],
             ) from e
 
+    def _parse_multipart_data(self, app: EventHandlerInstance, content_type: str) -> dict[str, Any]:
+        """Parse multipart/form-data."""
+        import base64
+        import re
+
+        try:
+            # Get the raw body - it might be base64 encoded
+            body = app.current_event.body or ""
+
+            # Handle base64 encoded body (common in Lambda)
+            if app.current_event.is_base64_encoded:
+                try:
+                    decoded_bytes = base64.b64decode(body)
+                except Exception:
+                    # If decoding fails, use body as-is
+                    decoded_bytes = body.encode("utf-8") if isinstance(body, str) else body
+            else:
+                decoded_bytes = body.encode("utf-8") if isinstance(body, str) else body
+
+            # Extract boundary from content type - handle both standard and WebKit boundaries
+            boundary_match = re.search(r"boundary=([^;,\s]+)", content_type)
+            if not boundary_match:
+                # Handle WebKit browsers that may use different boundary formats
+                webkit_match = re.search(r"WebKitFormBoundary([a-zA-Z0-9]+)", content_type)
+                if webkit_match:
+                    boundary = "WebKitFormBoundary" + webkit_match.group(1)
+                else:
+                    raise ValueError("No boundary found in multipart content-type")
+            else:
+                boundary = boundary_match.group(1).strip('"')
+            boundary_bytes = ("--" + boundary).encode("utf-8")
+
+            # Parse multipart sections
+            parsed_data: dict[str, Any] = {}
+            if decoded_bytes:
+                sections = decoded_bytes.split(boundary_bytes)
+
+                for section in sections[1:-1]:  # Skip first empty and last closing parts
+                    if not section.strip():
+                        continue
+
+                    # Split headers and content
+                    header_end = section.find(b"\r\n\r\n")
+                    if header_end == -1:
+                        header_end = section.find(b"\n\n")
+                        if header_end == -1:
+                            continue
+                        content = section[header_end + 2 :].strip()
+                    else:
+                        content = section[header_end + 4 :].strip()
+
+                    headers_part = section[:header_end].decode("utf-8", errors="ignore")
+
+                    # Extract field name from Content-Disposition header
+                    name_match = re.search(r'name="([^"]+)"', headers_part)
+                    if name_match:
+                        field_name = name_match.group(1)
+
+                        # Check if it's a file field
+                        if "filename=" in headers_part:
+                            # It's a file - store as bytes
+                            parsed_data[field_name] = content
+                        else:
+                            # It's a regular form field - decode as string
+                            try:
+                                parsed_data[field_name] = content.decode("utf-8")
+                            except UnicodeDecodeError:
+                                # If can't decode as text, keep as bytes
+                                parsed_data[field_name] = content
+
+            return parsed_data
+
+        except Exception as e:
+            raise RequestValidationError(
+                [
+                    {
+                        "type": "multipart_invalid",
+                        "loc": ("body",),
+                        "msg": "Invalid multipart form data",
+                        "input": {},
+                        "ctx": {"error": str(e)},
+                    },
+                ]
+            ) from e
+
 
 class OpenAPIResponseValidationMiddleware(BaseMiddlewareHandler):
     """
diff --git a/aws_lambda_powertools/event_handler/openapi/dependant.py b/aws_lambda_powertools/event_handler/openapi/dependant.py
@@ -14,6 +14,7 @@
 from aws_lambda_powertools.event_handler.openapi.params import (
     Body,
     Dependant,
+    File,
     Form,
     Header,
     Param,
@@ -367,13 +368,23 @@ def get_body_field_info(
     if not required:
         body_field_info_kwargs["default"] = None
 
-    if any(isinstance(f.field_info, _File) for f in flat_dependant.body_params):
-        # MAINTENANCE: body_field_info: type[Body] = _File
-        raise NotImplementedError("_File fields are not supported in request bodies")
-    elif any(isinstance(f.field_info, Form) for f in flat_dependant.body_params):
+    # Check for File parameters
+    has_file_params = any(isinstance(f.field_info, File) for f in flat_dependant.body_params)
+    # Check for Form parameters
+    has_form_params = any(isinstance(f.field_info, Form) for f in flat_dependant.body_params)
+
+    if has_file_params:
+        # File parameters use multipart/form-data
+        body_field_info = Body
+        body_field_info_kwargs["media_type"] = "multipart/form-data"
+        body_field_info_kwargs["embed"] = True
+    elif has_form_params:
+        # Form parameters use application/x-www-form-urlencoded
         body_field_info = Body
         body_field_info_kwargs["media_type"] = "application/x-www-form-urlencoded"
+        body_field_info_kwargs["embed"] = True
     else:
+        # Regular JSON body parameters
         body_field_info = Body
 
         body_param_media_types = [
diff --git a/aws_lambda_powertools/event_handler/openapi/params.py b/aws_lambda_powertools/event_handler/openapi/params.py
@@ -29,6 +29,8 @@
 This turns the low-level function signature into typed, validated Pydantic models for consumption.
 """
 
+__all__ = ["Path", "Query", "Header", "Body", "Form", "File"]
+
 
 class ParamTypes(Enum):
     query = "query"
@@ -888,6 +890,29 @@ def __init__(
         )
 
 
+class File(_File):
+    """
+    Defines a file parameter that should be extracted from multipart form data.
+    
+    This parameter type is used for file uploads in multipart/form-data requests
+    and integrates with OpenAPI schema generation.
+    
+    Example:
+    -------
+    ```python
+    from typing import Annotated
+    from aws_lambda_powertools.event_handler import APIGatewayRestResolver
+    from aws_lambda_powertools.event_handler.openapi.params import File
+
+    app = APIGatewayRestResolver(enable_validation=True)
+
+    @app.post("/upload")
+    def upload_file(file: Annotated[bytes, File(description="File to upload")]):
+        return {"file_size": len(file)}
+    ```
+    """
+
+
 def get_flat_dependant(
     dependant: Dependant,
     visited: list[CacheKey] | None = None,
diff --git a/tests/functional/event_handler/_pydantic/test_file_form_validation.py b/tests/functional/event_handler/_pydantic/test_file_form_validation.py