Skip to content

Commit 461ab13

Browse files
committed
Merge branch 'main' into feauture/te-sd-ELI-452-cicd-improvements
2 parents 7484b2f + 5a68678 commit 461ab13

File tree

11 files changed

+495
-40
lines changed

11 files changed

+495
-40
lines changed

.github/workflows/base-deploy.yml

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -153,6 +153,13 @@ jobs:
153153
echo "Running: make terraform env=$ENVIRONMENT workspace=$WORKSPACE stack=api-layer tf-command=apply"
154154
make terraform env=$ENVIRONMENT stack=api-layer tf-command=apply workspace=$WORKSPACE
155155
156+
- name: "Validate Feature Toggles"
157+
env:
158+
ENV: ${{ needs.metadata.outputs.environment }}
159+
run: |
160+
pip install boto3
161+
python scripts/feature_toggle/validate_toggles.py
162+
156163
- name: "Tag and Release"
157164
if: ${{ needs.metadata.outputs.environment == 'preprod' || needs.metadata.outputs.environment == 'prod' }}
158165
env:

infrastructure/stacks/api-layer/iam_policies.tf

Lines changed: 47 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -341,6 +341,7 @@ data "aws_iam_policy_document" "s3_audit_kms_key_policy" {
341341
#checkov:skip=CKV_AWS_356: Root user needs full KMS key management
342342
#checkov:skip=CKV_AWS_109: Root user needs full KMS key management
343343

344+
# Allow root user to have full control
344345
statement {
345346
sid = "EnableIamUserPermissions"
346347
effect = "Allow"
@@ -351,12 +352,20 @@ data "aws_iam_policy_document" "s3_audit_kms_key_policy" {
351352
actions = ["kms:*"]
352353
resources = ["*"]
353354
}
355+
356+
# Allow Lambda, Firehose, and external write roles to use the KMS key
354357
statement {
355-
sid = "AllowLambdaFullWrite"
358+
sid = "AllowAuditKeyAccess"
356359
effect = "Allow"
357360
principals {
358361
type = "AWS"
359-
identifiers = [aws_iam_role.eligibility_lambda_role.arn, aws_iam_role.eligibility_audit_firehose_role.arn]
362+
identifiers = concat(
363+
[
364+
aws_iam_role.eligibility_lambda_role.arn,
365+
aws_iam_role.eligibility_audit_firehose_role.arn
366+
],
367+
aws_iam_role.write_access_role[*].arn
368+
)
360369
}
361370
actions = [
362371
"kms:Decrypt",
@@ -459,3 +468,39 @@ resource "aws_kms_key_policy" "sns_encryption_key_policy" {
459468
]
460469
})
461470
}
471+
472+
# Policy doc for external write role to read, move, and tag objects in S3
473+
data "aws_iam_policy_document" "external_s3_read_move_policy_doc" {
474+
statement {
475+
sid = "ListBucket"
476+
actions = [
477+
"s3:ListBucket",
478+
"s3:ListBucketVersions"
479+
]
480+
resources = [
481+
module.s3_audit_bucket.storage_bucket_arn
482+
]
483+
}
484+
485+
statement {
486+
sid = "ReadMoveTagObjects"
487+
actions = [
488+
"s3:GetObject",
489+
"s3:GetObjectVersion",
490+
"s3:GetObjectTagging",
491+
"s3:PutObjectTagging",
492+
"s3:PutObject"
493+
]
494+
resources = [
495+
"${module.s3_audit_bucket.storage_bucket_arn}/*"
496+
]
497+
}
498+
}
499+
500+
# Attach external S3 read, move & tagging policy to external write role
501+
resource "aws_iam_role_policy" "external_s3_read_move_policy" {
502+
count = length(aws_iam_role.write_access_role)
503+
name = "S3ReadMoveTagAccess"
504+
role = aws_iam_role.write_access_role[count.index].id
505+
policy = data.aws_iam_policy_document.external_s3_read_move_policy_doc.json
506+
}

infrastructure/stacks/api-layer/ssm.tf

Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -92,3 +92,25 @@ resource "aws_ssm_parameter" "splunk_hec_endpoint" {
9292
ignore_changes = [value]
9393
}
9494
}
95+
96+
resource "aws_ssm_parameter" "feature_toggles" {
97+
for_each = jsondecode(file("${path.module}/../../../scripts/feature_toggle/feature_toggle.json"))
98+
99+
name = "/${var.environment}/feature_toggles/${each.key}"
100+
#checkov:skip=CKV2_AWS_34: Since this is a feature toggle config, secure string not needed
101+
type = "String"
102+
103+
value = lookup(each.value.env_overrides, var.environment, each.value.default_state)
104+
105+
tags = {
106+
Environment = var.environment
107+
ManagedBy = "terraform"
108+
Purpose = each.value.purpose
109+
Ticket = each.value.ticket
110+
Created = each.value.created
111+
}
112+
113+
lifecycle {
114+
ignore_changes = [value]
115+
}
116+
}
Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,3 @@
1+
{
2+
3+
}
Lines changed: 88 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,88 @@
1+
import json
2+
import logging
3+
import os
4+
import sys
5+
6+
import boto3
7+
8+
logging.basicConfig(level=logging.INFO, format='%(message)s')
9+
10+
11+
def validate_feature_toggles():
12+
script_dir = os.path.dirname(os.path.abspath(__file__))
13+
toggles_file_path = os.path.join(script_dir, "feature_toggle.json")
14+
toggles_file_name = os.path.basename(toggles_file_path)
15+
16+
try:
17+
environment = os.getenv("ENV")
18+
if not environment:
19+
raise KeyError("The 'ENV' environment variable is not set.")
20+
21+
logging.info(f"Verifying toggles from '{toggles_file_name}' in environment: {environment}")
22+
23+
if not os.path.exists(toggles_file_path):
24+
logging.error(f"FATAL: '{toggles_file_path}' not found.")
25+
sys.exit(1)
26+
27+
with open(toggles_file_path, "r") as f:
28+
toggles_data = json.load(f)
29+
30+
ssm_client = boto3.client("ssm")
31+
missing_toggles = []
32+
mismatched_toggles = []
33+
34+
for toggle_name, toggle_details in toggles_data.items():
35+
parameter_name = f"/{environment}/feature_toggles/{toggle_name}"
36+
37+
default_state = toggle_details.get('default_state', False)
38+
env_overrides = toggle_details.get('env_overrides', {})
39+
expected_state = env_overrides.get(environment, default_state)
40+
expected_state_str = str(expected_state).lower()
41+
42+
logging.info(f"Checking for: {parameter_name} (expected value: {expected_state_str})")
43+
44+
try:
45+
parameter = ssm_client.get_parameter(Name=parameter_name)
46+
actual_state = parameter['Parameter']['Value']
47+
48+
if actual_state.lower() != expected_state_str:
49+
logging.error(f"--> MISMATCH: {parameter_name} - Expected '{expected_state_str}', but found '{actual_state}'")
50+
mismatched_toggles.append((parameter_name, expected_state_str, actual_state))
51+
52+
except ssm_client.exceptions.ParameterNotFound:
53+
logging.error(f"--> MISSING: {parameter_name}")
54+
missing_toggles.append(parameter_name)
55+
56+
has_errors = False
57+
if missing_toggles:
58+
has_errors = True
59+
logging.error(
60+
f"\nERROR: The following required feature toggles were not found in SSM:")
61+
for toggle in missing_toggles:
62+
logging.error(f"- {toggle}")
63+
64+
if mismatched_toggles:
65+
has_errors = True
66+
logging.error(
67+
f"\nERROR: The following feature toggles have incorrect values in SSM:")
68+
for name, expected, actual in mismatched_toggles:
69+
logging.error(f"- {name}: Expected '{expected}', but found '{actual}'")
70+
71+
if has_errors:
72+
sys.exit(1)
73+
74+
logging.info(f"\nSuccess: All required feature toggles are present in SSM with the correct values.")
75+
76+
except KeyError as e:
77+
logging.error(f"FATAL: {e}")
78+
sys.exit(1)
79+
except json.JSONDecodeError:
80+
logging.error(f"FATAL: Could not decode JSON from '{toggles_file_path}'. Please check for syntax errors.")
81+
sys.exit(1)
82+
except Exception as e:
83+
logging.error(f"An unexpected error occurred: {e}")
84+
sys.exit(1)
85+
86+
87+
if __name__ == "__main__":
88+
validate_feature_toggles()

src/eligibility_signposting_api/feature_toggle/__init__.py

Whitespace-only changes.
Lines changed: 134 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,134 @@
1+
# Feature Toggles
2+
3+
Feature toggles allow us to deploy code to production in a disabled state, enabling it later without a new deployment.
4+
5+
## How It Works
6+
7+
Our feature toggle system is built on **AWS Systems Manager (SSM) Parameter Store**.
8+
9+
1. **Single Source of Truth**: AWS SSM is the single source of truth for the current state (`true` or `false`) of all feature toggles.
10+
2. **Infrastructure as Code**: Toggles are defined in Terraform, ensuring configuration is version-controlled and repeatable across environments.
11+
3. **CI/CD Validation**: The `feature_toggle.json` file in the repository lists all toggles the application requires. The CI/CD pipeline checks that every toggle in this file exists in AWS SSM before a deployment can proceed.
12+
4. **Runtime Caching**: The application code uses a cached `is_feature_enabled()` function to check a toggle's state at runtime, minimizing calls to AWS and ensuring high performance.
13+
14+
## Developer Workflow
15+
16+
### Step 1: Define the Toggle (The Single Source of Truth)
17+
18+
Adding a new toggle is a single-step process. You only need to add a new entry to the `feature_toggle.json` file. This file defines the toggle's metadata and its intended state for each environment.
19+
20+
`default_state`: The safe, production-like state (usually `false`).
21+
22+
`env_overrides`: An optional map to set a different state for specific environments (e.g., enabling the feature in `dev` and `test` for QA). If an environment is not listed, it uses the `default_state`.
23+
24+
**File: [feature_toggle.json](../../../scripts/feature_toggle/feature_toggle.json)**
25+
26+
```json
27+
{
28+
"enable_dynamic_status_text": {
29+
"purpose": "Enables dynamic status text based on conditions.",
30+
"ticket": "ELI-427",
31+
"created": "2025-09-02",
32+
"default_state": false,
33+
"env_overrides": {
34+
"dev": true,
35+
"test": true
36+
}
37+
}
38+
}
39+
```
40+
41+
Our Terraform setup automatically reads this file and creates the corresponding SSM parameters. You do not need to write new Terraform code for each toggle.
42+
43+
**File: [ssm.tf](../../../infrastructure/stacks/api-layer/ssm.tf) (For Reference—No edits needed)**
44+
45+
```terraform
46+
resource "aws_ssm_parameter" "feature_toggles" {
47+
for_each = jsondecode(file("${path.root}/scripts/feature_toggle/feature_toggle.json"))
48+
49+
name = "/${var.environment}/feature_toggles/${each.key}"
50+
type = "String"
51+
52+
value = lookup(each.value.env_overrides, var.environment, each.value.default_state)
53+
54+
tags = {
55+
Environment = var.environment
56+
ManagedBy = "terraform"
57+
Purpose = each.value.purpose
58+
Ticket = each.value.ticket
59+
Created = each.value.created
60+
}
61+
62+
lifecycle {
63+
ignore_changes = [value]
64+
}
65+
}
66+
```
67+
68+
### Step 2: Implement and Test the Logic
69+
70+
Import and use the `is_feature_enabled()` function to create a conditional code path.
71+
72+
**File (Example): `eligibility_signposting_api/services/calculators/eligibility_calculator.py`**
73+
74+
```python
75+
from eligibility_signposting_api.feature_toggle.feature_toggle import is_feature_enabled
76+
77+
if is_feature_enabled("enable_dynamic_status_text"):
78+
# New feature logic
79+
status_text = self.get_status_text(active_iteration.status_text, ConditionName(cc.target), status)
80+
else:
81+
# Existing (old) logic
82+
status_text = status.get_default_status_text(ConditionName(cc.target))
83+
```
84+
85+
You must write unit tests that cover both the "on" and "off" states of the toggle. Use `pytest.mark.parametrize` to run the same test with both states and `unittest.mock.patch` to control the toggle's return value.
86+
87+
**Important**: The patch path must point to **where the function is used**, not where it is defined.
88+
89+
**File (Example): `tests/unit/services/calculators/test_eligibility_calculator.py`**
90+
91+
```python
92+
import pytest
93+
from unittest.mock import patch
94+
95+
@pytest.mark.parametrize(
96+
"enable_dynamic_status_text, expected_rsv_text",
97+
[
98+
(True, "You are not eligible to take RSV vaccine"), # Case 1: Toggle is ON
99+
(False, "We do not believe you can have it"), # Case 2: Toggle is OFF
100+
],
101+
)
102+
@patch("eligibility_signposting_api.services.calculators.eligibility_calculator.is_feature_enabled")
103+
def test_status_text_is_conditional_on_toggle(
104+
mock_is_feature_enabled,
105+
enable_dynamic_status_text,
106+
expected_rsv_text,
107+
faker: Faker
108+
):
109+
110+
# This mock controls the toggle for the test run
111+
mock_is_feature_enabled.return_value = enable_dynamic_status_text
112+
113+
# Given, When, Then...
114+
assert actual_text_from_audit == expected_rsv_text
115+
```
116+
117+
### Step 3: Commit and Deploy (The Automation)
118+
119+
1. Terraform Apply: During the infrastructure step of deployment, Terraform executes the ssm.tf configuration. It reads the updated feature_toggle.json file.
120+
2. Creation: Because of the 'for each' loop, Terraform detects the new feature toggle entry. It then automatically runs the aws_ssm_parameter resource block for this new item, creating the parameter in AWS SSM with the correct name (e.g., /Dev/feature_toggles/enable_dynamic_status_text) and the appropriate initial value based on the environment (true for Dev and Test, false for others).
121+
3. Validation: Immediately after the validate_toggles.py script runs. It reads the same JSON file, sees that the feature toggle is required, and queries AWS SSM to confirm that Terraform successfully created it.
122+
123+
### Step 4: Cleanup Process
124+
125+
Feature toggles are **technical debt**. Once a feature is fully released and stable, the toggle and all associated conditional logic must be removed.
126+
127+
Follow the **"Two-Ticket" Rule**:
128+
129+
1. When you create a ticket to add a feature toggle, immediately create a second ticket to remove it.
130+
2. Link the two tickets.
131+
3. Once the feature is permanently enabled, schedule the cleanup ticket in an upcoming sprint to remove the toggle from:
132+
- The application code
133+
- All related test code
134+
- The `feature_toggle.json` file
Lines changed: 34 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,34 @@
1+
import logging
2+
import os
3+
4+
import boto3
5+
from botocore.exceptions import ClientError
6+
from cachetools import TTLCache, cached
7+
8+
aws_region = os.getenv("AWS_DEFAULT_REGION")
9+
ssm_client = boto3.client("ssm", region_name=aws_region)
10+
environment = os.getenv("ENV")
11+
feature_toggles_prefix = f"/{environment}/feature_toggles/"
12+
13+
logger = logging.getLogger(__name__)
14+
15+
ssm_cache_in_seconds = TTLCache(maxsize=128, ttl=300)
16+
17+
18+
@cached(ssm_cache_in_seconds)
19+
def get_ssm_parameter(parameter_name: str) -> str:
20+
logger.info("Fetching '%s' from AWS SSM (not from cache).", parameter_name)
21+
try:
22+
response = ssm_client.get_parameter(Name=parameter_name, WithDecryption=True)
23+
return response["Parameter"]["Value"]
24+
except ssm_client.exceptions.ParameterNotFound:
25+
logger.warning("Parameter '%s' not found in SSM.", parameter_name)
26+
return "false"
27+
except ClientError:
28+
logger.exception("An AWS client error occurred fetching '%s' from SSM.", parameter_name)
29+
return "false"
30+
31+
32+
def is_feature_enabled(feature_name: str) -> bool:
33+
parameter_name = feature_toggles_prefix + feature_name
34+
return get_ssm_parameter(parameter_name).lower().strip() == "true"

src/eligibility_signposting_api/services/processors/action_rule_handler.py

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -50,13 +50,14 @@ def _handle(self, person: Person, best_active_iteration: Iteration, rule_type: R
5050
matched_action_rule_priority, matched_action_rule_name = None, None
5151
for _, rule_group in groupby(sorted_rules_by_priority, key=priority_getter):
5252
rule_group_list = list(rule_group)
53-
matcher_matched_list = [
53+
54+
all_rules_matched = all(
5455
RuleCalculator(person=person, rule=rule).evaluate_exclusion()[1].matcher_matched
5556
for rule in rule_group_list
56-
]
57+
)
5758

5859
comms_routing = rule_group_list[0].comms_routing
59-
if comms_routing and all(matcher_matched_list):
60+
if comms_routing and all_rules_matched:
6061
rule_actions = self._get_actions_from_comms(action_mapper, comms_routing)
6162
if rule_actions and len(rule_actions) > 0:
6263
actions = rule_actions

0 commit comments

Comments
 (0)