Skip to content

Commit 6fc5e32

Browse files
authored
VED-804 Move record processor to Python Lambdas directory (#935)
1 parent be12e80 commit 6fc5e32

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

55 files changed

+252
-538
lines changed

.github/dependabot.yml

Lines changed: 5 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -7,12 +7,13 @@ version: 2
77
updates:
88
- package-ecosystem: "docker"
99
directories:
10-
- "/lambdas/ack_backend"
11-
- "/lambdas/delta_backend"
10+
- "/batch_processor_filter"
1211
- "/filenameprocessor"
1312
- "/infrastructure/grafana/non-prod/docker"
13+
- "/lambdas/ack_backend"
14+
- "/lambdas/delta_backend"
15+
- "/lambdas/recordprocessor"
1416
- "/mesh_processor"
15-
- "/recordprocessor"
1617
- "/sandbox"
1718
schedule:
1819
interval: "daily"
@@ -54,7 +55,7 @@ updates:
5455
- "/tests/e2e_batch"
5556
- "/filenameprocessor"
5657
- "/mesh_processor"
57-
- "/recordprocessor"
58+
- "/lambdas/recordprocessor"
5859
- "/lambdas/ack_backend"
5960
- "/lambdas/delta_backend"
6061
- "/lambdas/redis_sync"

.github/workflows/quality-checks.yml

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -118,15 +118,15 @@ jobs:
118118
poetry run coverage xml -o ../batchprocessorfilter-coverage.xml
119119
120120
- name: Run unittest with recordprocessor-coverage
121-
working-directory: recordprocessor
121+
working-directory: lambdas/recordprocessor
122122
id: recordprocessor
123123
env:
124-
PYTHONPATH: ${{ github.workspace }}/recordprocessor/src:${{ github.workspace }}/recordprocessor/tests
124+
PYTHONPATH: ${{ env.LAMBDA_PATH }}/recordprocessor/src:${{ env.LAMBDA_PATH }}/recordprocessor/tests:${{ env.SHARED_PATH }}/src
125125
continue-on-error: true
126126
run: |
127127
poetry install
128-
poetry run coverage run -m unittest discover || echo "recordprocessor tests failed" >> ../failed_tests.txt
129-
poetry run coverage xml -o ../recordprocessor-coverage.xml
128+
poetry run coverage run --source=src -m unittest discover || echo "recordprocessor tests failed" >> ../../failed_tests.txt
129+
poetry run coverage xml -o ../../recordprocessor-coverage.xml
130130
131131
# This step is redundant - all of these tests will be run in the backend step below
132132
- name: Run unittest with recordforwarder-coverage

Makefile

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
SHELL=/usr/bin/env bash -euo pipefail
22

3-
PYTHON_PROJECT_DIRS_WITH_UNIT_TESTS = backend batch_processor_filter filenameprocessor mesh_processor recordprocessor lambdas/ack_backend lambdas/delta_backend lambdas/redis_sync lambdas/id_sync lambdas/mns_subscription lambdas/shared
3+
PYTHON_PROJECT_DIRS_WITH_UNIT_TESTS = backend batch_processor_filter filenameprocessor mesh_processor lambdas/recordprocessor lambdas/ack_backend lambdas/delta_backend lambdas/redis_sync lambdas/id_sync lambdas/mns_subscription lambdas/shared
44
PYTHON_PROJECT_DIRS = tests/e2e tests/e2e_batch quality_checks $(PYTHON_PROJECT_DIRS_WITH_UNIT_TESTS)
55

66
.PHONY: install lint format format-check clean publish build-proxy release initialise-all-python-venvs update-all-python-dependencies run-all-python-unit-tests build-all-docker-images

immunisation-fhir-api.code-workspace

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,7 @@
1010
"path": "filenameprocessor",
1111
},
1212
{
13-
"path": "recordprocessor",
13+
"path": "lambdas/recordprocessor",
1414
},
1515
{
1616
"path": "mesh_processor",

infrastructure/instance/ecs_batch_processor_config.tf

Lines changed: 7 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,7 @@ resource "aws_ecs_cluster" "ecs_cluster" {
55

66
# Locals for Lambda processing paths and hash
77
locals {
8-
processing_lambda_dir = abspath("${path.root}/../../recordprocessor")
8+
processing_lambda_dir = abspath("${path.root}/../../lambdas/recordprocessor")
99
processing_path_include = ["**"]
1010
processing_path_exclude = ["**/__pycache__/**"]
1111
processing_files_include = setunion([for f in local.processing_path_include : fileset(local.processing_lambda_dir, f)]...)
@@ -29,8 +29,9 @@ module "processing_docker_image" {
2929
source = "terraform-aws-modules/lambda/aws//modules/docker-build"
3030
version = "8.1.0"
3131

32-
create_ecr_repo = false
33-
ecr_repo = aws_ecr_repository.processing_repository.name
32+
create_ecr_repo = false
33+
docker_file_path = "./recordprocessor/Dockerfile"
34+
ecr_repo = aws_ecr_repository.processing_repository.name
3435
ecr_repo_lifecycle_policy = jsonencode({
3536
"rules" : [
3637
{
@@ -50,9 +51,10 @@ module "processing_docker_image" {
5051

5152
platform = "linux/amd64"
5253
use_image_tag = false
53-
source_path = local.processing_lambda_dir
54+
source_path = abspath("${path.root}/../../lambdas")
5455
triggers = {
55-
dir_sha = local.processing_lambda_dir_sha
56+
dir_sha = local.processing_lambda_dir_sha
57+
shared_dir_sha = local.shared_dir_sha
5658
}
5759
}
5860

Lines changed: 13 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -6,23 +6,25 @@ RUN mkdir -p /home/appuser && \
66
echo 'appuser:x:1001:' >> /etc/group && \
77
chown -R 1001:1001 /home/appuser && pip install "poetry~=2.1.4"
88

9-
# Install Poetry as root
10-
COPY poetry.lock pyproject.toml README.md ./
11-
RUN poetry config virtualenvs.create false && poetry install --no-interaction --no-ansi --no-root --only main
9+
# Copy recordprocessor Poetry files
10+
COPY ./recordprocessor/poetry.lock ./recordprocessor/pyproject.toml ./
1211

13-
# -----------------------------
14-
FROM base AS test
15-
COPY src src
16-
COPY tests tests
17-
RUN poetry install --no-interaction --no-ansi --no-root && \
18-
pytest --disable-warnings tests
12+
# Install dependencies
13+
WORKDIR /var/task
14+
RUN poetry config virtualenvs.create false && poetry install --no-interaction --no-ansi --no-root --only main
1915

2016
# -----------------------------
2117
FROM base AS build
2218

23-
COPY src .
19+
# Copy shared source code
20+
COPY ./shared/src/common ./common
21+
22+
# Copy recordprocessor source code
23+
COPY ./recordprocessor/src .
24+
25+
# Set correct permissions
2426
RUN chmod 644 $(find . -type f) && chmod 755 $(find . -type d)
27+
2528
# Switch to the non-root user for running the container
2629
USER 1001:1001
27-
CMD ["python", "batch_processor.py"]
2830
ENTRYPOINT ["python", "batch_processor.py"]
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,5 @@
1+
TEST_ENV := @PYTHONPATH=src:tests:../shared/src
2+
13
build:
24
docker build -t processor-lambda-build .
35

@@ -6,15 +8,15 @@ package:build
68
docker run --rm -v $(shell pwd)/build:/build processor-lambda-build
79

810
test:
9-
@PYTHONPATH=src:tests python -m unittest
11+
$(TEST_ENV) python -m unittest
1012

1113
coverage-run:
12-
@PYTHONPATH=src:tests coverage run -m unittest discover
14+
$(TEST_ENV) coverage run -m unittest discover
1315

1416
coverage-report:
15-
coverage report -m
17+
$(TEST_ENV) coverage report -m
1618

1719
coverage-html:
18-
coverage html
20+
$(TEST_ENV) coverage html
1921

2022
.PHONY: build package

lambdas/recordprocessor/README.md

Lines changed: 28 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,28 @@
1+
# record-processor project
2+
3+
## Overview
4+
5+
The Record Processor component is a core part of the batch processing system. The functionality is as follows:
6+
7+
- receive a batch job event from EventBridge pipes. The job will contain the filename, so that the batch .csv/.dat file
8+
can be retrieved, along with other metadata such as the supplier name and their permissions.
9+
- perform high-level validation e.g. marking a job as processed if the file is empty or failed if the supplier does not
10+
have permissions to interact with the requested vaccination type.
11+
- reads the batch source file and processes the content row by row.
12+
- for each of the rows, it validates whether the supplier can perform the given operation i.e. CREATE, UPDATE or DELETE
13+
and also checks for critical information such as the unique ID.
14+
- finally, if such checks pass, the flat CSV structure will be converted to an R4 Immunization FHIR JSON payload. The
15+
mapping is performed in `utils_for_fhir_conversion.py`.
16+
- the content is sent to Kinesis for further downstream processing where the requested operations will be performed on
17+
the IEDS table.
18+
19+
For more context, refer to the [Architecture Overview](https://nhsd-confluence.digital.nhs.uk/spaces/Vacc/pages/1035417049/Immunisation+FHIR+API+-+Solution+Architecture) in Confluence.
20+
21+
Finally, it is worth noting that this package is **not** deployed as a Lambda function. As the file processing can take
22+
some time for particularly large extracts, this is run in AWS ECS.
23+
24+
## Set up
25+
26+
Simply follow the instructions from the root README on _Setting up a virtual environment with poetry_.
27+
As is the case for developing with any of the Python projects, it is easiest to create a `.env` file, a `.envrc` file,
28+
and then install the poetry dependencies using `poetry install --no-root`.
Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,10 @@ version = "0.1.0"
44
description = ""
55
authors = ["Your Name <[email protected]>"]
66
readme = "README.md"
7-
packages = [{include = "src"}]
7+
packages = [
8+
{include = "src"},
9+
{include = "common", from = "../shared/src"}
10+
]
811

912
[tool.poetry.dependencies]
1013
python = "~3.11"

0 commit comments

Comments
 (0)