Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
32 commits
Select commit Hold shift + click to select a range
5213385
[PRMP-541] force runs virus scan
PedroSoaresNHS Nov 12, 2025
160a45e
Merge remote-tracking branch 'origin/main' into PRMP-541
PedroSoaresNHS Nov 12, 2025
97934fa
[PRMP-541] fixed formating issues
PedroSoaresNHS Nov 13, 2025
e3e34c8
[PRMP-541] catch correct exception
PedroSoaresNHS Nov 17, 2025
76eff55
[PRMP-541] added the stub
PedroSoaresNHS Nov 17, 2025
291bbd5
Merge remote-tracking branch 'origin/main' into PRMP-541
PedroSoaresNHS Nov 17, 2025
451c1cb
[PRMP-541] merged with main and fixed tests
PedroSoaresNHS Nov 17, 2025
4c96b31
[PRMP-541] increased test coverage and formated code
PedroSoaresNHS Nov 17, 2025
36c106b
Merge remote-tracking branch 'origin/main' into PRMP-541
PedroSoaresNHS Nov 18, 2025
00b2c40
[PRMP-541] add verification that the file was scanned successfully
PedroSoaresNHS Nov 20, 2025
7258557
Merge remote-tracking branch 'origin/main' into PRMP-541
PedroSoaresNHS Nov 20, 2025
5a706b6
[PRMP-541] merged with main
PedroSoaresNHS Nov 20, 2025
197259a
[PRMP-541] fixed comments
PedroSoaresNHS Nov 21, 2025
3955a02
[PRMP-541] fixed comments
PedroSoaresNHS Nov 21, 2025
3a30112
[PRMP-862] created kill switch
PedroSoaresNHS Nov 25, 2025
ad7d39e
[PRMP-862] testing getting transfer server ID from AWS instead of a v…
PedroSoaresNHS Nov 25, 2025
5f83c68
[PRMP-862] fixed tests
PedroSoaresNHS Nov 25, 2025
6a219fb
Merge remote-tracking branch 'origin/main' into PRMP-862
PedroSoaresNHS Nov 26, 2025
e08868a
[PRMP-862] merged with main and fixed imports
PedroSoaresNHS Nov 26, 2025
f65299d
[PRMP-862] updated kill switch to trigger every time
PedroSoaresNHS Nov 27, 2025
a7b5664
Merge remote-tracking branch 'origin/main' into PRMP-862
PedroSoaresNHS Nov 28, 2025
ad1d0b0
[PRMP-862] merged with main
PedroSoaresNHS Nov 28, 2025
19f01af
[PRMP-862] fixed comments
PedroSoaresNHS Dec 1, 2025
b03c7c8
Merge remote-tracking branch 'origin/main' into PRMP-862
PedroSoaresNHS Dec 1, 2025
ab6557d
[PRMP-862] fixed comments
PedroSoaresNHS Dec 1, 2025
3119543
[PRMP-862] fixed tests
PedroSoaresNHS Dec 1, 2025
62f9309
[PRMP-862] fixed comments
PedroSoaresNHS Dec 1, 2025
6b6279e
[PRMP 866] Alarm & Alerting for AWS Transfer Family kill switch (#913)
PedroSoaresNHS Dec 17, 2025
8a6845e
Merge remote-tracking branch 'origin/main' into PRMP-862
PedroSoaresNHS Dec 17, 2025
c114f6f
Merge remote-tracking branch 'origin/main' into PRMP-862
PedroSoaresNHS Dec 18, 2025
2e03948
Merge remote-tracking branch 'origin/main' into PRMP-862
PedroSoaresNHS Dec 19, 2025
893fa79
Merge remote-tracking branch 'origin/main' into PRMP-862
PedroSoaresNHS Dec 19, 2025
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
14 changes: 14 additions & 0 deletions .github/workflows/base-lambdas-reusable-deploy-all.yml
Original file line number Diff line number Diff line change
Expand Up @@ -697,6 +697,20 @@ jobs:
secrets:
AWS_ASSUME_ROLE: ${{ secrets.AWS_ASSUME_ROLE }}

deploy_transfer_kill_switch_lambda:
name: Deploy transfer kill switch lambda
uses: ./.github/workflows/base-lambdas-reusable-deploy.yml
with:
environment: ${{ inputs.environment }}
python_version: ${{ inputs.python_version }}
build_branch: ${{ inputs.build_branch }}
sandbox: ${{ inputs.sandbox }}
lambda_handler_name: transfer_family_kill_switch_handler
lambda_aws_name: TransferFamilyKillSwitch
lambda_layer_names: "core_lambda_layer"
secrets:
AWS_ASSUME_ROLE: ${{ secrets.AWS_ASSUME_ROLE }}

deploy_search_document_review_lambda:
name: Deploy Search Document Review
uses: ./.github/workflows/base-lambdas-reusable-deploy.yml
Expand Down
10 changes: 10 additions & 0 deletions lambdas/handlers/transfer_family_kill_switch_handler.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
from services.expedite_transfer_family_kill_switch_service import ExpediteKillSwitchService
from utils.decorators.handle_lambda_exceptions import handle_lambda_exceptions
from utils.decorators.set_audit_arg import set_request_context_for_logging


@handle_lambda_exceptions
@set_request_context_for_logging
def lambda_handler(event, context):
service = ExpediteKillSwitchService()
return service.handle_sns_event(event)
233 changes: 233 additions & 0 deletions lambdas/services/expedite_transfer_family_kill_switch_service.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,233 @@
import json
import os

import boto3

from utils.audit_logging_setup import LoggingService

logger = LoggingService(__name__)

EXPECTED_SCAN_RESULTS = {"Infected", "Error", "Unscannable", "Suspicious"}


def response(message: str):
return {
"statusCode": 200,
"body": json.dumps({"message": message}),
}

class ExpediteKillSwitchService:
def __init__(self):
self.transfer_client = boto3.client("transfer")
self.cloudwatch = boto3.client("cloudwatch")

self.staging_bucket = os.environ.get("STAGING_STORE_BUCKET_NAME", "")
self.workspace = os.environ.get("WORKSPACE", "")

def handle_sns_event(self, event: dict):
logger.info("Received SNS virus scan notification event", {"event": event})

server_id = self.get_transfer_server_id()
if not server_id:
logger.warning(
"No Transfer Family server ID resolved from AWS – kill switch disabled."
)
return {
"statusCode": 200,
"body": json.dumps(
{
"message": (
"Transfer family kill switch disabled – no Transfer server ID discovered"
)
}
),
}

logger.warning(
"Initiating Transfer Family shutdown.",
{
"server_id": server_id,
"workspace": self.workspace,
},
)

return self.stop_transfer_family_server(server_id)

def handle_scan_message(self, server_id: str, message: dict):
scan_result = message.get("scanResult")
bucket = message.get("bucket")
key = message.get("key")

if not self.is_relevant_scan_result(scan_result):
logger.info(
f"Ignoring scan result '{scan_result}' – not one of {EXPECTED_SCAN_RESULTS}"
)
return response("Scan result not relevant, no action taken")

if not self.has_required_fields(bucket, key):
logger.error("SNS payload missing required 'bucket' or 'key' fields")
return response("Invalid payload (missing bucket/key)")

if not self.is_quarantine_expedite(bucket, key):
logger.info(
"Scan notification is not for an expedite file – no kill switch action",
{
"bucket": bucket,
"key": key,
"staging_bucket": self.staging_bucket,
"workspace": self.workspace,
},
)
return response("Not an expedite file, no action taken")

if scan_result != "Infected":
logger.warning(
"Non-clean scan result for expedite file, but not 'Infected' – no kill switch action",
{
"scanResult": scan_result,
"bucket": bucket,
"key": key,
"workspace": self.workspace,
},
)
return response(
"Non-infected result for expedite file, no kill switch action"
)

logger.warning(
"Initiating Transfer Family shutdown.",
{
"server_id": server_id,
"bucket": bucket,
"key": key,
"scanResult": scan_result,
"workspace": self.workspace,
},
)

return self.stop_transfer_family_server(server_id)

def is_relevant_scan_result(self, scan_result: str) -> bool:
return scan_result in EXPECTED_SCAN_RESULTS

def has_required_fields(self, bucket: str, key: str) -> bool:
return bool(bucket and key)

def is_quarantine_expedite(self, bucket: str, key: str) -> bool:
"""
Example quarantine:
bucket = cloudstoragesecquarantine-...
key = "pre-prod-staging-bulk-store/expedite/..."
Where key starts with "<workspace>-staging-bulk-store/expedite/"
"""
if not self.staging_bucket:
return False

quarantine_prefix = f"{self.staging_bucket}/expedite/"
return (
bucket.startswith("cloudstoragesecquarantine-")
and key.startswith(quarantine_prefix)
)

def get_transfer_server_id(self) -> str:
"""
Discover Transfer Family servers in this account/region and return
the first ServerId, or "" if none exist or an error occurs.
"""
try:
resp = self.transfer_client.list_servers(MaxResults=1)
servers = resp.get("Servers", [])
if not servers:
logger.warning(
"No AWS Transfer Family servers found in account/region "
"– kill switch disabled."
)
return ""

server_id = servers[0]["ServerId"].strip()
logger.info(
"Resolved Transfer server ID via list_servers",
{"server_id": server_id},
)
return server_id

except Exception as exc:
logger.error(f"Failed to list Transfer Family servers: {exc}")
return ""

def extract_sns_message(self, event):
try:
records = event.get("Records")
if not records:
return None

sns_record = records[0].get("Sns")
if not sns_record:
return None

raw_message = sns_record.get("Message")
if not raw_message:
return None

return json.loads(raw_message)

except Exception as exc:
logger.error(f"Failed to parse SNS message: {exc}")
return None

def stop_transfer_family_server(self, server_id: str):
try:
desc = self.transfer_client.describe_server(ServerId=server_id)
logger.info(
"Transfer Family server found",
{"server_id": server_id, "state": desc["Server"]["State"]},
)

self.transfer_client.stop_server(ServerId=server_id)
logger.warning(
f"Transfer Family server {server_id} STOPPED due to virus scan trigger"
)
try:
self.report_kill_switch_activated(server_id=server_id)
except Exception as metric_exc:
logger.error(
f"Failed to publish kill switch metric: {metric_exc},"
f" leading to failing to inform that kill switch has been activated"
)
return response(
f"Server {server_id} stopped, but failed to alert the team"
)
return response(f"Server {server_id} stopped")

except self.transfer_client.exceptions.ResourceNotFoundException:
logger.error(f"Transfer Family server '{server_id}' not found")
return response("Server not found")

except Exception as exc:
logger.error(f"Failed to stop Transfer Family server: {exc}")
return response("Failed to stop server")

def report_kill_switch_activated(self, server_id: str):
try:
self.cloudwatch.put_metric_data(
Namespace="Custom/TransferFamilyKillSwitch",
MetricData=[
{
"MetricName": "ServerStopped",
"Dimensions": [
{"Name": "Workspace", "Value": self.workspace or "unknown"},
],
"Value": 1.0,
"Unit": "Count",
}
],
)
except Exception as metric_exc:
logger.error(
f"Failed to publish kill switch metric: {metric_exc},"
f" leading to failing to inform that kill switch has been activated"
)

logger.warning(
f"Transfer Family server {server_id} STOPPED due to infected expedite upload"
)
Original file line number Diff line number Diff line change
@@ -0,0 +1,31 @@
import pytest
from handlers.transfer_family_kill_switch_handler import lambda_handler


@pytest.fixture
def mock_service(mocker):
service_instance = mocker.Mock()
mocker.patch(
"handlers.transfer_family_kill_switch_handler.ExpediteKillSwitchService",
return_value=service_instance,
)
return service_instance


@pytest.fixture
def context(mocker):
context = mocker.Mock()
context.aws_request_id = "test-request-id"
return context


def test_lambda_handler_delegates_to_service_handle_sns_event(mock_service, context):
event = {"Records": []}
expected_response = {"statusCode": 200, "body": '{"message": "ok"}'}

mock_service.handle_sns_event.return_value = expected_response

resp = lambda_handler(event, context)

mock_service.handle_sns_event.assert_called_once_with(event)
assert resp == expected_response
Original file line number Diff line number Diff line change
Expand Up @@ -8,9 +8,10 @@

import pytest
from botocore.exceptions import ClientError
from freezegun import freeze_time

from enums.upload_status import UploadStatus
from enums.virus_scan_result import VirusScanResult
from freezegun import freeze_time
from models.staging_metadata import (
METADATA_FILENAME,
BulkUploadQueueMetadata,
Expand Down
Loading
Loading