Skip to content

Commit 583a50e

Browse files
committed
VED-470: Handle some extra cases. Fix EOF check. Upgrade to Python 3.11.
1 parent 5207702 commit 583a50e

File tree

6 files changed

+62
-10
lines changed

6 files changed

+62
-10
lines changed

.github/workflows/sonarcloud.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -107,7 +107,7 @@ jobs:
107107
id: meshprocessor
108108
continue-on-error: true
109109
run: |
110-
poetry env use 3.10
110+
poetry env use 3.11
111111
poetry install
112112
poetry run coverage run -m unittest discover || echo "mesh_processor tests failed" >> ../failed_tests.txt
113113
poetry run coverage xml -o ../mesh_processor-coverage.xml

mesh_processor/Dockerfile

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
FROM public.ecr.aws/lambda/python:3.10 AS base
1+
FROM public.ecr.aws/lambda/python:3.11 AS base
22

33
# Create a non-root user with a specific UID and GID
44
RUN mkdir -p /home/appuser && \

mesh_processor/poetry.lock

Lines changed: 2 additions & 2 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

mesh_processor/pyproject.toml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,7 @@ readme = "README.md"
77
packages = [{include = "src"}]
88

99
[tool.poetry.dependencies]
10-
python = "~3.10"
10+
python = "~3.11"
1111
boto3 = "~1.38.42"
1212
mypy-boto3-dynamodb = "^1.38.4"
1313
moto = {extras = ["s3"], version = "^5.1.8"}

mesh_processor/src/converter.py

Lines changed: 4 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -57,24 +57,23 @@ def read_part_headers(input_file: BinaryIO) -> dict[str, str]:
5757
def stream_part_body(input_file: BinaryIO, boundary: bytes, output_file: BinaryIO) -> None:
5858
previous_line = None
5959
found_part_end = False
60-
while not found_part_end:
61-
if (line := input_file.readline()) is None:
62-
raise ValueError("Unexpected EOF")
63-
60+
while line := input_file.readline():
6461
if line == b"--" + boundary + b"\r\n":
6562
logger.warning("Found additional part which will not be processed")
6663
found_part_end = True
67-
if line == b"--" + boundary + b"--\r\n":
64+
if line.startswith(b"--" + boundary + b"--"):
6865
found_part_end = True
6966

7067
if previous_line is not None:
7168
if found_part_end:
7269
# The final \r\n is part of the encapsulation boundary, so should not be included
7370
output_file.write(previous_line.rstrip(b'\r\n'))
71+
return
7472
else:
7573
output_file.write(previous_line)
7674

7775
previous_line = line
76+
raise ValueError("Unexpected EOF")
7877

7978

8079
def transfer_multipart_content(bucket_name: str, file_key: str, boundary: bytes, filename: str) -> None:

mesh_processor/tests/test_converter.py

Lines changed: 53 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -67,6 +67,7 @@ def test_non_multipart_content_type_no_mesh_metadata(self):
6767

6868
def test_multipart_content_type(self):
6969
body = "\r\n".join([
70+
"",
7071
"--12345678",
7172
'Content-Disposition: form-data; name="File"; filename="test-csv-file.csv"',
7273
"Content-Type: text/csv",
@@ -91,6 +92,7 @@ def test_multipart_content_type(self):
9192

9293
def test_multipart_content_type_multiple_parts(self):
9394
body = "\r\n".join([
95+
"",
9496
"--12345678",
9597
'Content-Disposition: form-data; name="File"; filename="test-csv-file.csv"',
9698
"Content-Type: text/csv",
@@ -120,6 +122,7 @@ def test_multipart_content_type_multiple_parts(self):
120122

121123
def test_multipart_content_type_without_filename(self):
122124
body = "\r\n".join([
125+
"",
123126
"--12345678",
124127
'Content-Disposition: form-data',
125128
"Content-Type: text/csv",
@@ -144,6 +147,7 @@ def test_multipart_content_type_without_filename(self):
144147

145148
def test_multipart_content_type_without_headers(self):
146149
body = "\r\n".join([
150+
"",
147151
"--12345678",
148152
"",
149153
"some CSV content",
@@ -166,6 +170,7 @@ def test_multipart_content_type_without_headers(self):
166170

167171
def test_multipart_content_type_with_unix_line_endings(self):
168172
body = "\r\n".join([
173+
"",
169174
"--12345678",
170175
'Content-Disposition: form-data; name="File"; filename="test-csv-file.csv"',
171176
"Content-Type: text/csv",
@@ -187,3 +192,51 @@ def test_multipart_content_type_with_unix_line_endings(self):
187192
response = s3.get_object(Bucket="destination-bucket", Key="test-csv-file.csv")
188193
body = response["Body"].read().decode("utf-8")
189194
assert body == "some CSV content\nsplit across\nmultiple lines"
195+
196+
def test_multipart_content_type_missing_first_newline(self):
197+
body = "\r\n".join([
198+
"--12345678",
199+
'Content-Disposition: form-data; name="File"; filename="test-csv-file.csv"',
200+
"Content-Type: text/csv",
201+
"",
202+
"some CSV content",
203+
"--12345678--",
204+
""
205+
])
206+
s3 = boto3.client("s3", region_name="eu-west-2")
207+
s3.put_object(
208+
Bucket="source-bucket",
209+
Key="test-dat-file.dat",
210+
Body=body.encode("utf-8"),
211+
ContentType="multipart/form-data; boundary=12345678",
212+
)
213+
214+
invoke_lambda("test-dat-file.dat")
215+
216+
response = s3.get_object(Bucket="destination-bucket", Key="test-csv-file.csv")
217+
body = response["Body"].read().decode("utf-8")
218+
assert body == "some CSV content"
219+
220+
def test_multipart_content_type_missing_final_newline(self):
221+
body = "\r\n".join([
222+
"",
223+
"--12345678",
224+
'Content-Disposition: form-data; name="File"; filename="test-csv-file.csv"',
225+
"Content-Type: text/csv",
226+
"",
227+
"some CSV content",
228+
"--12345678--",
229+
])
230+
s3 = boto3.client("s3", region_name="eu-west-2")
231+
s3.put_object(
232+
Bucket="source-bucket",
233+
Key="test-dat-file.dat",
234+
Body=body.encode("utf-8"),
235+
ContentType="multipart/form-data; boundary=12345678",
236+
)
237+
238+
invoke_lambda("test-dat-file.dat")
239+
240+
response = s3.get_object(Bucket="destination-bucket", Key="test-csv-file.csv")
241+
body = response["Body"].read().decode("utf-8")
242+
assert body == "some CSV content"

0 commit comments

Comments
 (0)