Skip to content

Commit 304c3e3

Browse files
committed
fixed lint issue along with test case
1 parent 7857d02 commit 304c3e3

File tree

5 files changed

+95
-24
lines changed

5 files changed

+95
-24
lines changed

filenameprocessor/src/file_key_validation.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -54,7 +54,7 @@ def validate_file_key(file_key: str) -> tuple[str, str]:
5454
and version in Constants.VALID_VERSIONS
5555
and supplier # Note that if supplier could be identified, this also implies that ODS code is valid
5656
and is_valid_datetime(timestamp)
57-
and ((extension == "CSV") or (extension == "DAT")) # The DAT extension has been added for MESH file processing
57+
and ((extension == "CSV") or (extension == "DAT")) # The DAT extension has been added for MESH file processing
5858
):
5959
raise InvalidFileKeyError("Initial file validation failed: invalid file key")
6060

meshfileprocessor/src/converter.py

Lines changed: 11 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -1,13 +1,11 @@
11
import boto3
2-
import pandas as pd
3-
import io
42
import os
5-
import re
63

7-
def lambda_handler(event, context):
4+
5+
def lambda_handler(event, context):
86
s3 = boto3.client('s3')
9-
10-
# Destination bucket name
7+
8+
# Destination bucket name
119
destination_bucket = os.getenv("Destination_BUCKET_NAME")
1210

1311
for record in event["Records"]:
@@ -16,16 +14,16 @@ def lambda_handler(event, context):
1614
copy_source = {
1715
'Bucket': record["s3"]["bucket"]["name"],
1816
'Key': record["s3"]["object"]["key"]
19-
}
20-
17+
}
18+
2119
# Read the .dat file from S3
2220
dat_obj = s3.get_object(Bucket=bucket_name, Key=file_key)
23-
21+
2422
# Update the filename from Metadata
2523
file_name = ensure_dat_extension(dat_obj['Metadata'].get('mex-filename', None))
2624

2725
s3.copy_object(CopySource=copy_source, Bucket=destination_bucket, Key=file_name)
28-
26+
2927
return {
3028
'statusCode': 200,
3129
'body': 'Files converted and uploaded successfully!'
@@ -36,11 +34,11 @@ def ensure_dat_extension(file_name):
3634
if '.' in file_name:
3735
# Split the filename and extension
3836
base_name, extension = file_name.rsplit('.', 1)
39-
37+
4038
# Check if the extension is not 'dat'
4139
if extension != 'dat':
4240
file_name = f"{base_name}.dat"
43-
else:
41+
else:
4442
file_name += '.dat'
45-
43+
4644
return file_name
Lines changed: 61 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,61 @@
1+
from unittest import TestCase
2+
from unittest.mock import patch, MagicMock
3+
import os
4+
from src.converter import lambda_handler, ensure_dat_extension
5+
6+
7+
class TestLambdaHandler(TestCase):
8+
9+
@patch('boto3.client')
10+
@patch('os.getenv')
11+
def test_lambda_handler_success(self, mock_getenv, mock_boto_client):
12+
# Mock environment variable
13+
mock_getenv.return_value = "destination-bucket"
14+
15+
# Mock boto3 S3 client
16+
mock_s3 = MagicMock()
17+
mock_boto_client.return_value = mock_s3
18+
mock_s3.get_object.return_value = {
19+
'Metadata': {'mex-filename': '20250320121710483244_2DB240.txt'}
20+
}
21+
22+
# Define the event
23+
event = {
24+
"Records": [
25+
{
26+
"s3": {
27+
"bucket": {"name": "source-bucket"},
28+
"object": {"key": "20250320121710483244_2DB240.dat"}
29+
}
30+
}
31+
]
32+
}
33+
context = {}
34+
35+
# Call the lambda_handler function
36+
response = lambda_handler(event, context)
37+
38+
# Assertions
39+
mock_s3.get_object.assert_called_with(Bucket="source-bucket", Key="20250320121710483244_2DB240.dat")
40+
mock_s3.copy_object.assert_called_with(
41+
CopySource={'Bucket': "source-bucket", 'Key': "20250320121710483244_2DB240.dat"},
42+
Bucket="destination-bucket",
43+
Key="20250320121710483244_2DB240.dat"
44+
)
45+
self.assertEqual(response['statusCode'], 200)
46+
self.assertEqual(response['body'], 'Files converted and uploaded successfully!')
47+
48+
def test_ensure_dat_extension_with_other_extension(self):
49+
# Test case where file has an extension other than 'dat'
50+
result = ensure_dat_extension("COVID19_Vaccinations_v5_YGM41_20240927T13005921.txt")
51+
self.assertEqual(result, "COVID19_Vaccinations_v5_YGM41_20240927T13005921.dat")
52+
53+
def test_ensure_dat_extension_with_dat_extension(self):
54+
# Test case where file already has a 'dat' extension
55+
result = ensure_dat_extension("COVID19_Vaccinations_v5_YGM41_20240927T13005921.dat")
56+
self.assertEqual(result, "COVID19_Vaccinations_v5_YGM41_20240927T13005921.dat")
57+
58+
def test_ensure_dat_extension_without_extension(self):
59+
# Test case where file has no extension
60+
result = ensure_dat_extension("COVID19_Vaccinations_v5_YGM41_20240927T13005921")
61+
self.assertEqual(result, "COVID19_Vaccinations_v5_YGM41_20240927T13005921.dat")

recordprocessor/src/utils_for_recordprocessor.py

Lines changed: 10 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -17,10 +17,10 @@ def get_environment() -> str:
1717

1818

1919
def get_csv_content_dict_reader(file_key: str) -> DictReader:
20-
"""Returns the requested file contents from the source bucket in the form of a DictReader"""
20+
"""Returns the requested file contents from the source bucket in the form of a DictReader"""
2121
response = s3_client.get_object(Bucket=os.getenv("SOURCE_BUCKET_NAME"), Key=file_key)
2222
csv_data = response["Body"].read().decode("utf-8")
23-
23+
2424
# Verify and process the DAT file content coming from MESH
2525
if '.dat' in file_key:
2626
csv_data = extract_content(csv_data)
@@ -47,19 +47,20 @@ def invoke_filename_lambda(file_key: str, message_id: str) -> None:
4747
logger.error("Error invoking filename lambda: %s", error)
4848
raise
4949

50+
5051
def extract_content(dat_file_content):
51-
52-
boundary_pattern = re.compile(r'----------------------------\d+')
53-
52+
53+
boundary_pattern = re.compile(r'----------------------------\d+')
54+
5455
parts = boundary_pattern.split(dat_file_content)
55-
56+
5657
# Extract the content between the boundaries
5758
filecontent = None
5859
for part in parts:
59-
if 'Content-Disposition' in part and 'Content-Type' in part:
60+
if 'Content-Disposition' in part and 'Content-Type' in part:
6061

6162
content_start = part.index('Content-Type') + len('Content-Type: text/csv') + 2
62-
filecontent =part[content_start:].strip()
63+
filecontent = part[content_start:].strip()
6364
break
64-
65+
6566
return filecontent

recordprocessor/tests/test_utils_for_recordprocessor.py

Lines changed: 12 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,7 @@
1515
from tests.utils_for_recordprocessor_tests.mock_environment_variables import MOCK_ENVIRONMENT_DICT, BucketNames
1616

1717
with patch("os.environ", MOCK_ENVIRONMENT_DICT):
18-
from utils_for_recordprocessor import get_environment, get_csv_content_dict_reader, create_diagnostics_dictionary
18+
from utils_for_recordprocessor import get_environment, get_csv_content_dict_reader, create_diagnostics_dictionary, extract_content
1919

2020

2121
s3_client = boto3.client("s3", region_name=REGION_NAME)
@@ -76,6 +76,17 @@ def test_create_diagnostics_dictionary(self):
7676
},
7777
)
7878

79+
def test_extract_content_valid_input(self):
80+
dat_file_content = (
81+
"----------------------------1234567890\n"
82+
"Content-Disposition: form-data; name=\"file\"; filename=\"COVID19_Vaccinations_v5_YGM41_20250312T113455981.csv\"\n"
83+
"Content-Type: text/csv\n\n"
84+
"NHS_NUMBER|PERSON_FORENAME|PERSON_SURNAME|PERSON_DOB|PERSON_GENDER_CODE|PERSON_POSTCODE\n"
85+
"----------------------------1234567890\n")
86+
expected_content = "NHS_NUMBER|PERSON_FORENAME|PERSON_SURNAME|PERSON_DOB|PERSON_GENDER_CODE|PERSON_POSTCODE"
87+
result = extract_content(dat_file_content)
88+
self.assertEqual(result, expected_content)
89+
7990

8091
if __name__ == "__main__":
8192
unittest.main()

0 commit comments

Comments
 (0)