Skip to content

Commit c8264c8

Browse files
committed
fix: Fixing CWE-409
1 parent 57ae7ec commit c8264c8

File tree

3 files changed

+20
-70
lines changed

3 files changed

+20
-70
lines changed

scripts/dynamodb-local-bulk-loader/README.md

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -142,6 +142,8 @@ uv run pytest --cov=src --cov-report=html
142142

143143
## Troubleshooting
144144

145+
**Security Note:** This project uses boto3/aioboto3 which depend on urllib3. Ensure you're using urllib3 >= 2.6.3 to avoid CWE-409 (decompression bomb vulnerability). The project's dependencies will pull in the correct version automatically.
146+
145147
**Throttling errors:**
146148
- Increase DynamoDB capacity
147149
- Reduce `--workers` to slow down writes

scripts/dynamodb-local-bulk-loader/tests/integration/test_async_loader_e2e.py

Lines changed: 15 additions & 52 deletions
Original file line numberDiff line numberDiff line change
@@ -1,16 +1,3 @@
1-
#
2-
# Copyright 2019 Amazon.com, Inc. or its affiliates. All Rights Reserved.
3-
#
4-
# This file is licensed under the Apache License, Version 2.0 (the "License").
5-
# You may not use this file except in compliance with the License. A copy of
6-
# the License is located at
7-
#
8-
# http://aws.amazon.com/apache2.0/
9-
#
10-
# This file is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
11-
# CONDITIONS OF ANY KIND, either express or implied. See the License for the
12-
# specific language governing permissions and limitations under the License.
13-
#
141
"""End-to-end integration test for async loader.
152
163
This test validates the complete workflow:
@@ -103,9 +90,7 @@ async def test_async_loader_end_to_end(dynamodb_table: str, temp_csv_file: Path)
10390
"""
10491
with mock_aws():
10592
# Create the DynamoDB table within the mocked context
106-
async with aioboto3.Session().resource(
107-
"dynamodb", region_name="us-east-1"
108-
) as dynamodb:
93+
async with aioboto3.Session().resource("dynamodb", region_name="us-east-1") as dynamodb:
10994
table = await dynamodb.create_table(
11095
TableName=dynamodb_table,
11196
KeySchema=[{"AttributeName": "id", "KeyType": "HASH"}],
@@ -153,9 +138,7 @@ async def test_async_loader_end_to_end(dynamodb_table: str, temp_csv_file: Path)
153138

154139
# Step 4: Verify DynamoDB contents
155140
# Read all items from DynamoDB and verify they match the CSV
156-
async with aioboto3.Session().resource(
157-
"dynamodb", region_name="us-east-1"
158-
) as dynamodb:
141+
async with aioboto3.Session().resource("dynamodb", region_name="us-east-1") as dynamodb:
159142
table = await dynamodb.Table(dynamodb_table)
160143

161144
# Scan table to get all items
@@ -164,25 +147,21 @@ async def test_async_loader_end_to_end(dynamodb_table: str, temp_csv_file: Path)
164147

165148
# Handle pagination if needed (shouldn't be necessary for 100 records)
166149
while "LastEvaluatedKey" in response:
167-
response = await table.scan(
168-
ExclusiveStartKey=response["LastEvaluatedKey"]
169-
)
150+
response = await table.scan(ExclusiveStartKey=response["LastEvaluatedKey"])
170151
actual_items.extend(response.get("Items", []))
171152

172153
# Verify count matches
173-
assert (
174-
len(actual_items) == num_records
175-
), f"DynamoDB should contain {num_records} items, found {len(actual_items)}"
154+
assert len(actual_items) == num_records, (
155+
f"DynamoDB should contain {num_records} items, found {len(actual_items)}"
156+
)
176157

177158
# Create lookup dictionary by id for easy comparison
178159
actual_by_id = {item["id"]: item for item in actual_items}
179160

180161
# Verify each expected record exists in DynamoDB with correct values
181162
for expected in expected_records:
182163
record_id = expected["id"]
183-
assert (
184-
record_id in actual_by_id
185-
), f"Record {record_id} should exist in DynamoDB"
164+
assert record_id in actual_by_id, f"Record {record_id} should exist in DynamoDB"
186165

187166
actual = actual_by_id[record_id]
188167

@@ -194,9 +173,7 @@ async def test_async_loader_end_to_end(dynamodb_table: str, temp_csv_file: Path)
194173
assert actual["email"] == expected["email"], "Email should match"
195174
assert actual["amount"] == expected["amount"], "Amount should match"
196175
assert actual["status"] == expected["status"], "Status should match"
197-
assert (
198-
actual["description"] == expected["description"]
199-
), "Description should match"
176+
assert actual["description"] == expected["description"], "Description should match"
200177

201178

202179
@pytest.mark.integration
@@ -212,9 +189,7 @@ async def test_async_loader_empty_csv(dynamodb_table: str, temp_csv_file: Path)
212189
"""
213190
with mock_aws():
214191
# Create the DynamoDB table within the mocked context
215-
async with aioboto3.Session().resource(
216-
"dynamodb", region_name="us-east-1"
217-
) as dynamodb:
192+
async with aioboto3.Session().resource("dynamodb", region_name="us-east-1") as dynamodb:
218193
table = await dynamodb.create_table(
219194
TableName=dynamodb_table,
220195
KeySchema=[{"AttributeName": "id", "KeyType": "HASH"}],
@@ -261,9 +236,7 @@ async def test_async_loader_empty_csv(dynamodb_table: str, temp_csv_file: Path)
261236

262237
@pytest.mark.integration
263238
@pytest.mark.asyncio
264-
async def test_async_loader_shuffle_verification(
265-
dynamodb_table: str, temp_csv_file: Path
266-
) -> None:
239+
async def test_async_loader_shuffle_verification(dynamodb_table: str, temp_csv_file: Path) -> None:
267240
"""Test that data shuffling prevents sequential writes.
268241
269242
This test validates Requirement 1.2: Data shuffling to prevent hot partitions.
@@ -279,9 +252,7 @@ async def test_async_loader_shuffle_verification(
279252
"""
280253
with mock_aws():
281254
# Create the DynamoDB table within the mocked context
282-
async with aioboto3.Session().resource(
283-
"dynamodb", region_name="us-east-1"
284-
) as dynamodb:
255+
async with aioboto3.Session().resource("dynamodb", region_name="us-east-1") as dynamodb:
285256
table = await dynamodb.create_table(
286257
TableName=dynamodb_table,
287258
KeySchema=[{"AttributeName": "id", "KeyType": "HASH"}],
@@ -317,9 +288,7 @@ async def test_async_loader_shuffle_verification(
317288
assert result.successful_writes == num_records, "All records should be written"
318289

319290
# Verify all IDs from CSV exist in DynamoDB
320-
async with aioboto3.Session().resource(
321-
"dynamodb", region_name="us-east-1"
322-
) as dynamodb:
291+
async with aioboto3.Session().resource("dynamodb", region_name="us-east-1") as dynamodb:
323292
table = await dynamodb.Table(dynamodb_table)
324293
response = await table.scan()
325294
actual_items = response.get("Items", [])
@@ -333,9 +302,7 @@ async def test_async_loader_shuffle_verification(
333302

334303
@pytest.mark.integration
335304
@pytest.mark.asyncio
336-
async def test_async_loader_batch_processing(
337-
dynamodb_table: str, temp_csv_file: Path
338-
) -> None:
305+
async def test_async_loader_batch_processing(dynamodb_table: str, temp_csv_file: Path) -> None:
339306
"""Test batch processing with different batch sizes.
340307
341308
Validates that the loader correctly handles batching logic.
@@ -346,9 +313,7 @@ async def test_async_loader_batch_processing(
346313
"""
347314
with mock_aws():
348315
# Create the DynamoDB table within the mocked context
349-
async with aioboto3.Session().resource(
350-
"dynamodb", region_name="us-east-1"
351-
) as dynamodb:
316+
async with aioboto3.Session().resource("dynamodb", region_name="us-east-1") as dynamodb:
352317
table = await dynamodb.create_table(
353318
TableName=dynamodb_table,
354319
KeySchema=[{"AttributeName": "id", "KeyType": "HASH"}],
@@ -379,9 +344,7 @@ async def test_async_loader_batch_processing(
379344
assert result.failed_writes == 0, "No records should fail"
380345

381346
# Verify DynamoDB contains all records
382-
async with aioboto3.Session().resource(
383-
"dynamodb", region_name="us-east-1"
384-
) as dynamodb:
347+
async with aioboto3.Session().resource("dynamodb", region_name="us-east-1") as dynamodb:
385348
table = await dynamodb.Table(dynamodb_table)
386349
response = await table.scan()
387350
actual_items = response.get("Items", [])

scripts/dynamodb-local-bulk-loader/tests/property/test_input_validation.py

Lines changed: 3 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -1,16 +1,3 @@
1-
#
2-
# Copyright 2019 Amazon.com, Inc. or its affiliates. All Rights Reserved.
3-
#
4-
# This file is licensed under the Apache License, Version 2.0 (the "License").
5-
# You may not use this file except in compliance with the License. A copy of
6-
# the License is located at
7-
#
8-
# http://aws.amazon.com/apache2.0/
9-
#
10-
# This file is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
11-
# CONDITIONS OF ANY KIND, either express or implied. See the License for the
12-
# specific language governing permissions and limitations under the License.
13-
#
141
"""Property-based tests for input validation across all loaders.
152
163
Feature: dynamodb-csv-bulk-loader
@@ -19,13 +6,13 @@
196
"""
207

218
import pytest
22-
from hypothesis import given, strategies as st
9+
from hypothesis import given
10+
from hypothesis import strategies as st
2311

2412
from src.async_loader import AsyncDynamoDBLoader
2513
from src.models import LoaderConfig
2614
from src.threaded_loader import ThreadedDynamoDBLoader
2715

28-
2916
# Valid AWS regions for testing
3017
VALID_REGIONS = [
3118
"us-east-1",
@@ -120,9 +107,7 @@ def test_property_empty_table_name_rejected(
120107
batch_size=st.integers(min_value=1, max_value=100),
121108
max_workers=st.integers(min_value=1, max_value=100),
122109
)
123-
def test_property_invalid_region_rejected(
124-
region: str, batch_size: int, max_workers: int
125-
) -> None:
110+
def test_property_invalid_region_rejected(region: str, batch_size: int, max_workers: int) -> None:
126111
"""Property: Invalid AWS region should be rejected with clear error.
127112
128113
For any region that is not a valid AWS region, the system should

0 commit comments

Comments
 (0)