Skip to content

Commit ee0039c

Browse files
committed
VED-470: Tidy up. Add test. Add type hints.
1 parent b38a2a8 commit ee0039c

File tree

2 files changed

+80
-65
lines changed

2 files changed

+80
-65
lines changed

mesh_processor/src/converter.py

Lines changed: 10 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11
import logging
22
import os
3+
from typing import BinaryIO
34

45
import boto3
56
from smart_open import open
@@ -12,7 +13,7 @@
1213
s3_client = boto3.client('s3')
1314

1415

15-
def parse_headers(headers_str: str):
16+
def parse_headers(headers_str: str) -> dict[str, str]:
1617
headers = dict(
1718
header_str.split(":", 1)
1819
for header_str in headers_str.split("\r\n")
@@ -21,7 +22,7 @@ def parse_headers(headers_str: str):
2122
return {k.strip(): v.strip() for k, v in headers.items()}
2223

2324

24-
def parse_header_value(header_value: str):
25+
def parse_header_value(header_value: str) -> tuple[str, dict[str, str]]:
2526
main_value, *params = header_value.split(";")
2627
parsed_params = dict(
2728
param.strip().split("=", 1)
@@ -31,15 +32,15 @@ def parse_header_value(header_value: str):
3132
return main_value, parsed_params
3233

3334

34-
def read_until_part_start(input_file, boundary):
35+
def read_until_part_start(input_file: BinaryIO, boundary: bytes) -> None:
3536
while line := input_file.readline():
3637
if line == b"--" + boundary + b"\r\n":
3738
return
3839
else:
3940
raise ValueError(f"Unexpected EOF")
4041

4142

42-
def read_headers_bytes(input_file):
43+
def read_headers_bytes(input_file: BinaryIO) -> bytes:
4344
headers_bytes = b''
4445
while line := input_file.readline():
4546
if line == b"\r\n":
@@ -49,13 +50,13 @@ def read_headers_bytes(input_file):
4950
raise ValueError("Unexpected EOF")
5051

5152

52-
def read_part_headers(input_file):
53+
def read_part_headers(input_file: BinaryIO) -> dict[str, str]:
5354
headers_bytes = read_headers_bytes(input_file)
5455
headers_str = headers_bytes.decode("utf-8")
5556
return parse_headers(headers_str)
5657

5758

58-
def stream_part_body(input_file, boundary, output_file):
59+
def stream_part_body(input_file: BinaryIO, boundary: bytes, output_file: BinaryIO) -> None:
5960
previous_line = None
6061
found_part_end = False
6162
while not found_part_end:
@@ -78,7 +79,7 @@ def stream_part_body(input_file, boundary, output_file):
7879
previous_line = line
7980

8081

81-
def transfer_multipart_content(bucket_name, file_key, boundary, filename):
82+
def transfer_multipart_content(bucket_name: str, file_key: str, boundary: bytes, filename: str) -> None:
8283
with open(
8384
f"s3://{bucket_name}/{file_key}",
8485
"rb",
@@ -100,7 +101,7 @@ def transfer_multipart_content(bucket_name, file_key, boundary, filename):
100101
stream_part_body(input_file, boundary, output_file)
101102

102103

103-
def process_record(record):
104+
def process_record(record: dict) -> None:
104105
bucket_name = record["s3"]["bucket"]["name"]
105106
file_key = record["s3"]["object"]["key"]
106107
logger.info(f"Processing {file_key}")
@@ -125,7 +126,7 @@ def process_record(record):
125126
logger.info(f"Transfer complete for {file_key}")
126127

127128

128-
def lambda_handler(event, _):
129+
def lambda_handler(event: dict, _context: dict) -> dict:
129130
success = True
130131

131132
for record in event["Records"]:

mesh_processor/tests/test_converter.py

Lines changed: 70 additions & 56 deletions
Original file line numberDiff line numberDiff line change
@@ -6,9 +6,27 @@
66
from moto import mock_aws
77

88

9+
def invoke_lambda(file_key: str):
10+
# Local import so that globals can be mocked
11+
from converter import lambda_handler
12+
lambda_handler(
13+
{
14+
"Records": [
15+
{
16+
"s3": {
17+
"bucket": {"name": "source-bucket"},
18+
"object": {"key": file_key}
19+
}
20+
}
21+
]
22+
},
23+
{}
24+
)
25+
26+
927
@mock_aws
1028
@patch.dict(os.environ, {"DESTINATION_BUCKET_NAME": "destination-bucket"})
11-
class NewTests(TestCase):
29+
class TestLambdaHandler(TestCase):
1230
def setUp(self):
1331
s3 = boto3.client("s3", region_name="eu-west-2")
1432
s3.create_bucket(Bucket="source-bucket", CreateBucketConfiguration={"LocationConstraint": "eu-west-2"})
@@ -26,17 +44,7 @@ def test_non_multipart_content_type(self):
2644
}
2745
)
2846

29-
from converter import lambda_handler
30-
lambda_handler({
31-
"Records": [
32-
{
33-
"s3": {
34-
"bucket": {"name": "source-bucket"},
35-
"object": {"key": "test-csv-file.csv"}
36-
}
37-
}
38-
]
39-
}, {})
47+
invoke_lambda("test-csv-file.csv")
4048

4149
response = s3.get_object(Bucket="destination-bucket", Key="overridden-filename.csv")
4250
body = response["Body"].read().decode("utf-8")
@@ -51,17 +59,7 @@ def test_non_multipart_content_type_no_mesh_metadata(self):
5159
ContentType="text/csv",
5260
)
5361

54-
from converter import lambda_handler
55-
lambda_handler({
56-
"Records": [
57-
{
58-
"s3": {
59-
"bucket": {"name": "source-bucket"},
60-
"object": {"key": "test-csv-file.csv"}
61-
}
62-
}
63-
]
64-
}, {})
62+
invoke_lambda("test-csv-file.csv")
6563

6664
response = s3.get_object(Bucket="destination-bucket", Key="test-csv-file.csv")
6765
body = response["Body"].read().decode("utf-8")
@@ -85,17 +83,7 @@ def test_multipart_content_type(self):
8583
ContentType="multipart/form-data; boundary=12345678",
8684
)
8785

88-
from converter import lambda_handler
89-
lambda_handler({
90-
"Records": [
91-
{
92-
"s3": {
93-
"bucket": {"name": "source-bucket"},
94-
"object": {"key": "test-dat-file.dat"}
95-
}
96-
}
97-
]
98-
}, {})
86+
invoke_lambda("test-dat-file.dat")
9987

10088
response = s3.get_object(Bucket="destination-bucket", Key="test-csv-file.csv")
10189
body = response["Body"].read().decode("utf-8")
@@ -124,17 +112,7 @@ def test_multipart_content_type_multiple_parts(self):
124112
ContentType="multipart/form-data; boundary=12345678",
125113
)
126114

127-
from converter import lambda_handler
128-
lambda_handler({
129-
"Records": [
130-
{
131-
"s3": {
132-
"bucket": {"name": "source-bucket"},
133-
"object": {"key": "test-dat-file.dat"}
134-
}
135-
}
136-
]
137-
}, {})
115+
invoke_lambda("test-dat-file.dat")
138116

139117
response = s3.get_object(Bucket="destination-bucket", Key="test-csv-file.csv")
140118
body = response["Body"].read().decode("utf-8")
@@ -158,18 +136,54 @@ def test_multipart_content_type_without_filename(self):
158136
ContentType="multipart/form-data; boundary=12345678",
159137
)
160138

161-
from converter import lambda_handler
162-
lambda_handler({
163-
"Records": [
164-
{
165-
"s3": {
166-
"bucket": {"name": "source-bucket"},
167-
"object": {"key": "test-dat-file.dat"}
168-
}
169-
}
170-
]
171-
}, {})
139+
invoke_lambda("test-dat-file.dat")
140+
141+
response = s3.get_object(Bucket="destination-bucket", Key="test-dat-file.dat")
142+
body = response["Body"].read().decode("utf-8")
143+
assert body == "some CSV content"
144+
145+
def test_multipart_content_type_without_headers(self):
146+
body = "\r\n".join([
147+
"--12345678",
148+
"",
149+
"some CSV content",
150+
"--12345678--",
151+
""
152+
])
153+
s3 = boto3.client("s3", region_name="eu-west-2")
154+
s3.put_object(
155+
Bucket="source-bucket",
156+
Key="test-dat-file.dat",
157+
Body=body.encode("utf-8"),
158+
ContentType="multipart/form-data; boundary=12345678",
159+
)
160+
161+
invoke_lambda("test-dat-file.dat")
172162

173163
response = s3.get_object(Bucket="destination-bucket", Key="test-dat-file.dat")
174164
body = response["Body"].read().decode("utf-8")
175165
assert body == "some CSV content"
166+
167+
def test_multipart_content_type_with_unix_line_endings(self):
168+
body = "\r\n".join([
169+
"--12345678",
170+
'Content-Disposition: form-data; name="File"; filename="test-csv-file.csv"',
171+
"Content-Type: text/csv",
172+
"",
173+
"some CSV content\nsplit across\nmultiple lines",
174+
"--12345678--",
175+
""
176+
])
177+
s3 = boto3.client("s3", region_name="eu-west-2")
178+
s3.put_object(
179+
Bucket="source-bucket",
180+
Key="test-dat-file.dat",
181+
Body=body.encode("utf-8"),
182+
ContentType="multipart/form-data; boundary=12345678",
183+
)
184+
185+
invoke_lambda("test-dat-file.dat")
186+
187+
response = s3.get_object(Bucket="destination-bucket", Key="test-csv-file.csv")
188+
body = response["Body"].read().decode("utf-8")
189+
assert body == "some CSV content\nsplit across\nmultiple lines"

0 commit comments

Comments
 (0)