Skip to content

Commit 72dd6a2

Browse files
committed
VED-470: Increase MESH processor CPU & memory for faster processing. Only process files from the inbound prefix. Set the content type from headers. Override new line handling as recommended in the documentation.
1 parent 383e928 commit 72dd6a2

File tree

3 files changed

+12
-2
lines changed

3 files changed

+12
-2
lines changed

mesh_processor/src/converter.py

Lines changed: 9 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -98,11 +98,19 @@ def transfer_multipart_content(bucket_name: str, file_key: str, boundary: bytes,
9898
if content_disposition:
9999
_, content_disposition_params = parse_header_value(content_disposition)
100100
filename = content_disposition_params.get("filename") or filename
101+
content_type = headers.get("Content-Type") or "application/octet-stream"
101102

102103
with open(
103104
f"s3://{DESTINATION_BUCKET_NAME}/streaming/{filename}",
104105
"wb",
105-
transport_params={"client": s3_client}
106+
transport_params={
107+
"client": s3_client,
108+
"client_kwargs": {
109+
"S3.Client.create_multipart_upload": {
110+
"ContentType": content_type
111+
}
112+
}
113+
}
106114
) as output_file:
107115
stream_part_body(input_file, boundary, output_file)
108116

recordprocessor/src/utils_for_recordprocessor.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -19,7 +19,7 @@ def get_csv_content_dict_reader(file_key: str) -> DictReader:
1919
"""Returns the requested file contents from the source bucket in the form of a DictReader"""
2020
response = s3_client.get_object(Bucket=os.getenv("SOURCE_BUCKET_NAME"), Key=file_key)
2121
binary_io = response["Body"]
22-
text_io = TextIOWrapper(binary_io, encoding="utf-8")
22+
text_io = TextIOWrapper(binary_io, encoding="utf-8", newline="")
2323
return DictReader(text_io, delimiter="|")
2424

2525

terraform/mesh_processor.tf

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -214,6 +214,7 @@ resource "aws_lambda_function" "mesh_file_converter_lambda" {
214214
image_uri = module.mesh_processor_docker_image[0].image_uri
215215
architectures = ["x86_64"]
216216
timeout = 900
217+
memory_size = 1024
217218

218219
environment {
219220
variables = {
@@ -241,6 +242,7 @@ resource "aws_s3_bucket_notification" "mesh_datasources_lambda_notification" {
241242
lambda_function {
242243
lambda_function_arn = aws_lambda_function.mesh_file_converter_lambda[0].arn
243244
events = ["s3:ObjectCreated:*"]
245+
filter_prefix = "inbound/"
244246
}
245247
}
246248

0 commit comments

Comments
 (0)