Skip to content

Commit bd19bee

Browse files
committed
feat(event-handler): Add UploadFile class for file metadata access
- Add FastAPI-inspired UploadFile class with filename, content_type, size, headers properties - Enhance multipart parser to extract and preserve file metadata from Content-Disposition headers - Implement automatic type resolution for backward compatibility with existing bytes-based File parameters - Add comprehensive Pydantic schema validation for UploadFile class - Include 6 comprehensive test cases covering metadata access, backward compatibility, and file reconstruction scenarios - Update official example to showcase both new UploadFile and legacy bytes approaches - Maintain 100% backward compatibility - existing bytes code works unchanged - Address @leandrodamascena feedback about file reconstruction capabilities in Lambda environments Fixes: File parameter enhancement for metadata access in AWS Lambda file uploads
1 parent f78af9a commit bd19bee

File tree

4 files changed

+638
-32
lines changed

4 files changed

+638
-32
lines changed

aws_lambda_powertools/event_handler/middlewares/openapi_validation.py

Lines changed: 48 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,7 @@
55
import logging
66
import re
77
from copy import deepcopy
8-
from typing import TYPE_CHECKING, Any, Callable, Mapping, MutableMapping, Sequence
8+
from typing import TYPE_CHECKING, Any, Callable, Mapping, MutableMapping, Sequence, Union
99
from urllib.parse import parse_qs
1010

1111
from pydantic import BaseModel
@@ -20,7 +20,7 @@
2020
from aws_lambda_powertools.event_handler.openapi.dependant import is_scalar_field
2121
from aws_lambda_powertools.event_handler.openapi.encoders import jsonable_encoder
2222
from aws_lambda_powertools.event_handler.openapi.exceptions import RequestValidationError, ResponseValidationError
23-
from aws_lambda_powertools.event_handler.openapi.params import Param
23+
from aws_lambda_powertools.event_handler.openapi.params import Param, UploadFile
2424

2525
if TYPE_CHECKING:
2626
from aws_lambda_powertools.event_handler import Response
@@ -245,7 +245,7 @@ def _parse_multipart_sections(self, decoded_bytes: bytes, boundary_bytes: bytes)
245245

246246
return parsed_data
247247

248-
def _parse_multipart_section(self, section: bytes) -> tuple[str | None, bytes | str]:
248+
def _parse_multipart_section(self, section: bytes) -> tuple[str | None, bytes | str | UploadFile]:
249249
"""Parse a single multipart section to extract field name and content."""
250250
headers_part, content = self._split_section_headers_and_content(section)
251251

@@ -261,8 +261,30 @@ def _parse_multipart_section(self, section: bytes) -> tuple[str | None, bytes |
261261

262262
# Check if it's a file field and process accordingly
263263
if "filename=" in headers_part:
264-
# It's a file - store as bytes
265-
return field_name, content
264+
# It's a file - extract metadata and create UploadFile
265+
filename_match = re.search(r'filename="([^"]*)"', headers_part)
266+
filename = filename_match.group(1) if filename_match else None
267+
268+
# Extract Content-Type if present
269+
content_type_match = re.search(r"Content-Type:\s*([^\r\n]+)", headers_part, re.IGNORECASE)
270+
content_type = content_type_match.group(1).strip() if content_type_match else None
271+
272+
# Parse all headers from the section
273+
headers = {}
274+
for line_raw in headers_part.split("\n"):
275+
line = line_raw.strip()
276+
if ":" in line and not line.startswith("Content-Disposition"):
277+
key, value = line.split(":", 1)
278+
headers[key.strip()] = value.strip()
279+
280+
# Create UploadFile instance with metadata
281+
upload_file = UploadFile(
282+
file=content,
283+
filename=filename,
284+
content_type=content_type,
285+
headers=headers,
286+
)
287+
return field_name, upload_file
266288
else:
267289
# It's a regular form field - decode as string
268290
return field_name, self._decode_form_field_content(content)
@@ -509,6 +531,27 @@ def _request_body_to_args(
509531
continue
510532

511533
# MAINTENANCE: Handle byte and file fields
534+
# Check if we have an UploadFile but the field expects bytes
535+
from typing import get_args, get_origin
536+
537+
field_type = field.type_
538+
539+
# Handle Union types (e.g., Union[bytes, None] for optional parameters)
540+
if get_origin(field_type) is Union:
541+
# Get the non-None types from the Union
542+
union_args = get_args(field_type)
543+
non_none_types = [arg for arg in union_args if arg is not type(None)]
544+
if non_none_types:
545+
field_type = non_none_types[0] # Use the first non-None type
546+
547+
if isinstance(value, UploadFile) and field_type is bytes:
548+
# Convert UploadFile to bytes for backward compatibility
549+
value = value.file
550+
elif isinstance(value, bytes) and field_type == UploadFile:
551+
# Convert bytes to UploadFile if that's what's expected
552+
# This shouldn't normally happen in our current implementation,
553+
# but provides a fallback path
554+
value = UploadFile(file=value)
512555

513556
# Finally, validate the value
514557
values[field.name] = _validate_field(field=field, value=value, loc=loc, existing_errors=errors)

aws_lambda_powertools/event_handler/openapi/params.py

Lines changed: 99 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -29,7 +29,105 @@
2929
This turns the low-level function signature into typed, validated Pydantic models for consumption.
3030
"""
3131

32-
__all__ = ["Path", "Query", "Header", "Body", "Form", "File"]
32+
__all__ = ["Path", "Query", "Header", "Body", "Form", "File", "UploadFile"]
33+
34+
35+
class UploadFile:
36+
"""
37+
A file uploaded as part of a multipart/form-data request.
38+
39+
Similar to FastAPI's UploadFile, this class provides access to both file content
40+
and metadata such as filename, content type, and headers.
41+
42+
Example:
43+
```python
44+
@app.post("/upload")
45+
def upload_file(file: Annotated[UploadFile, File()]):
46+
return {
47+
"filename": file.filename,
48+
"content_type": file.content_type,
49+
"size": file.size,
50+
"content": file.file.decode() if file.size < 1000 else "File too large to display"
51+
}
52+
```
53+
"""
54+
55+
def __init__(
56+
self,
57+
file: bytes,
58+
filename: str | None = None,
59+
content_type: str | None = None,
60+
headers: dict[str, str] | None = None,
61+
):
62+
"""
63+
Initialize an UploadFile instance.
64+
65+
Parameters
66+
----------
67+
file : bytes
68+
The file content as bytes
69+
filename : str | None
70+
The original filename from the Content-Disposition header
71+
content_type : str | None
72+
The content type from the Content-Type header
73+
headers : dict[str, str] | None
74+
All headers from the multipart section
75+
"""
76+
self.file = file
77+
self.filename = filename
78+
self.content_type = content_type
79+
self.headers = headers or {}
80+
81+
@property
82+
def size(self) -> int:
83+
"""Return the size of the file in bytes."""
84+
return len(self.file)
85+
86+
def read(self, size: int = -1) -> bytes:
87+
"""
88+
Read and return up to size bytes from the file.
89+
90+
Parameters
91+
----------
92+
size : int
93+
Number of bytes to read. If -1 (default), read the entire file.
94+
95+
Returns
96+
-------
97+
bytes
98+
The file content
99+
"""
100+
if size == -1:
101+
return self.file
102+
return self.file[:size]
103+
104+
def __repr__(self) -> str:
105+
"""Return a string representation of the UploadFile."""
106+
return f"UploadFile(filename={self.filename!r}, size={self.size}, content_type={self.content_type!r})"
107+
108+
@classmethod
109+
def __get_pydantic_core_schema__(
110+
cls,
111+
_source_type: Any,
112+
_handler: Any,
113+
) -> Any:
114+
"""Return Pydantic core schema for UploadFile."""
115+
from pydantic_core import core_schema
116+
117+
# Define the schema for UploadFile validation
118+
return core_schema.no_info_plain_validator_function(
119+
cls._validate,
120+
serialization=core_schema.to_string_ser_schema(),
121+
)
122+
123+
@classmethod
124+
def _validate(cls, value: Any) -> UploadFile:
125+
"""Validate and convert value to UploadFile."""
126+
if isinstance(value, cls):
127+
return value
128+
if isinstance(value, bytes):
129+
return cls(file=value)
130+
raise ValueError(f"Expected UploadFile or bytes, got {type(value)}")
33131

34132

35133
class ParamTypes(Enum):
Lines changed: 90 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -1,31 +1,77 @@
11
"""
22
Example demonstrating File parameter usage for handling file uploads.
3+
This showcases both the new UploadFile class for metadata access and
4+
backward-compatible bytes approach.
35
"""
46

57
from __future__ import annotations
68

79
from typing import Annotated, Union
810

911
from aws_lambda_powertools.event_handler import APIGatewayRestResolver
10-
from aws_lambda_powertools.event_handler.openapi.params import File, Form
12+
from aws_lambda_powertools.event_handler.openapi.params import File, Form, UploadFile
1113

1214
# Initialize resolver with OpenAPI validation enabled
1315
app = APIGatewayRestResolver(enable_validation=True)
1416

1517

18+
# ========================================
19+
# NEW: UploadFile with Metadata Access
20+
# ========================================
21+
22+
23+
@app.post("/upload-with-metadata")
24+
def upload_file_with_metadata(file: Annotated[UploadFile, File(description="File with metadata access")]):
25+
"""Upload a file with full metadata access - NEW UploadFile feature!"""
26+
return {
27+
"status": "uploaded",
28+
"filename": file.filename,
29+
"content_type": file.content_type,
30+
"file_size": file.size,
31+
"headers": file.headers,
32+
"content_preview": file.read(100).decode("utf-8", errors="ignore"),
33+
"can_reconstruct_file": True,
34+
"message": "File uploaded with metadata access",
35+
}
36+
37+
38+
@app.post("/upload-mixed-form")
39+
def upload_file_with_form_data(
40+
file: Annotated[UploadFile, File(description="File with metadata")],
41+
description: Annotated[str, Form(description="File description")],
42+
category: Annotated[str | None, Form(description="File category")] = None,
43+
):
44+
"""Upload file with UploadFile metadata + form data."""
45+
return {
46+
"status": "uploaded",
47+
"filename": file.filename,
48+
"content_type": file.content_type,
49+
"file_size": file.size,
50+
"description": description,
51+
"category": category,
52+
"custom_headers": {k: v for k, v in file.headers.items() if k.startswith("X-")},
53+
"message": "File and form data uploaded with metadata",
54+
}
55+
56+
57+
# ========================================
58+
# BACKWARD COMPATIBLE: Bytes Approach
59+
# ========================================
60+
61+
1662
@app.post("/upload")
1763
def upload_single_file(file: Annotated[bytes, File(description="File to upload")]):
18-
"""Upload a single file."""
64+
"""Upload a single file - LEGACY bytes approach (still works!)."""
1965
return {"status": "uploaded", "file_size": len(file), "message": "File uploaded successfully"}
2066

2167

22-
@app.post("/upload-with-metadata")
23-
def upload_file_with_metadata(
68+
@app.post("/upload-legacy-metadata")
69+
def upload_file_legacy_with_metadata(
2470
file: Annotated[bytes, File(description="File to upload")],
2571
description: Annotated[str, Form(description="File description")],
2672
tags: Annotated[Union[str, None], Form(description="Optional tags")] = None, # noqa: UP007
2773
):
28-
"""Upload a file with additional form metadata."""
74+
"""Upload a file with additional form metadata - LEGACY bytes approach."""
2975
return {
3076
"status": "uploaded",
3177
"file_size": len(file),
@@ -37,22 +83,24 @@ def upload_file_with_metadata(
3783

3884
@app.post("/upload-multiple")
3985
def upload_multiple_files(
40-
primary_file: Annotated[bytes, File(alias="primary", description="Primary file")],
41-
secondary_file: Annotated[bytes, File(alias="secondary", description="Secondary file")],
86+
primary_file: Annotated[UploadFile, File(alias="primary", description="Primary file with metadata")],
87+
secondary_file: Annotated[bytes, File(alias="secondary", description="Secondary file as bytes")],
4288
):
43-
"""Upload multiple files."""
89+
"""Upload multiple files - showcasing BOTH UploadFile and bytes approaches."""
4490
return {
4591
"status": "uploaded",
46-
"primary_size": len(primary_file),
92+
"primary_filename": primary_file.filename,
93+
"primary_content_type": primary_file.content_type,
94+
"primary_size": primary_file.size,
4795
"secondary_size": len(secondary_file),
48-
"total_size": len(primary_file) + len(secondary_file),
49-
"message": "Multiple files uploaded successfully",
96+
"total_size": primary_file.size + len(secondary_file),
97+
"message": "Multiple files uploaded with mixed approaches",
5098
}
5199

52100

53101
@app.post("/upload-with-constraints")
54102
def upload_small_file(file: Annotated[bytes, File(description="Small file only", max_length=1024)]):
55-
"""Upload a file with size constraints (max 1KB)."""
103+
"""Upload a file with size constraints (max 1KB) - bytes approach."""
56104
return {
57105
"status": "uploaded",
58106
"file_size": len(file),
@@ -63,14 +111,16 @@ def upload_small_file(file: Annotated[bytes, File(description="Small file only",
63111
@app.post("/upload-optional")
64112
def upload_optional_file(
65113
message: Annotated[str, Form(description="Required message")],
66-
file: Annotated[Union[bytes, None], File(description="Optional file")] = None, # noqa: UP007
114+
file: Annotated[UploadFile | None, File(description="Optional file with metadata")] = None,
67115
):
68-
"""Upload with an optional file parameter."""
116+
"""Upload with an optional UploadFile parameter - NEW approach!"""
69117
return {
70118
"status": "processed",
71119
"message": message,
72120
"has_file": file is not None,
73-
"file_size": len(file) if file else 0,
121+
"filename": file.filename if file else None,
122+
"content_type": file.content_type if file else None,
123+
"file_size": file.size if file else 0,
74124
}
75125

76126

@@ -80,13 +130,28 @@ def lambda_handler(event, context):
80130
return app.resolve(event, context)
81131

82132

83-
# The File parameter provides:
84-
# 1. Automatic multipart/form-data parsing
85-
# 2. OpenAPI schema generation with proper file upload documentation
86-
# 3. Request validation with meaningful error messages
87-
# 4. Support for file constraints (max_length, etc.)
88-
# 5. Compatibility with WebKit and other browser boundary formats
89-
# 6. Base64-encoded request handling (common in AWS Lambda)
90-
# 7. Mixed file and form data support
91-
# 8. Multiple file upload support
92-
# 9. Optional file parameters
133+
# The File parameter now provides TWO approaches:
134+
#
135+
# 1. NEW UploadFile Class (Recommended):
136+
# - filename property (e.g., "document.pdf")
137+
# - content_type property (e.g., "application/pdf")
138+
# - size property (file size in bytes)
139+
# - headers property (dict of all multipart headers)
140+
# - read() method (flexible content access)
141+
# - Perfect for file reconstruction in Lambda/S3 scenarios
142+
#
143+
# 2. LEGACY bytes approach (Backward Compatible):
144+
# - Direct bytes content access
145+
# - Existing code continues to work unchanged
146+
# - Automatic conversion from UploadFile to bytes when needed
147+
#
148+
# Both approaches provide:
149+
# - Automatic multipart/form-data parsing
150+
# - OpenAPI schema generation with proper file upload documentation
151+
# - Request validation with meaningful error messages
152+
# - Support for file constraints (max_length, etc.)
153+
# - Compatibility with WebKit and other browser boundary formats
154+
# - Base64-encoded request handling (common in AWS Lambda)
155+
# - Mixed file and form data support
156+
# - Multiple file upload support
157+
# - Optional file parameters

0 commit comments

Comments
 (0)