Skip to content

Commit fefcbf4

Browse files
authored
ELI-446: Adds setup for feature toggles (#352)
* ELI-446: Adds setup for feature toggles * ELI-446: Fixes 'English usage' * ELI-446: Fixes 'Markdown format' * ELI-446: Fixes 'Markdown format' * ELI-446: Skip checkov warning as this is not a secure string * ELI-446: Adds toggle unit tests * ELI-446: Linting fix * ELI-446: Linting fix * ELI-446: Amend readme * ELI-446: Amend readme
1 parent 6c25a8f commit fefcbf4

File tree

8 files changed

+390
-0
lines changed

8 files changed

+390
-0
lines changed

.github/workflows/base-deploy.yml

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -150,6 +150,13 @@ jobs:
150150
git config user.name "github-actions"
151151
git config user.email "[email protected]"
152152
153+
- name: "Validate Feature Toggles"
154+
env:
155+
ENV: ${{ needs.metadata.outputs.environment }}
156+
run: |
157+
pip install boto3
158+
python scripts/feature_toggle/validate_toggles.py
159+
153160
# ---------- Preprod path: create RC tag + pre-release ----------
154161
- name: "Create/Push RC tag for preprod"
155162
if: ${{ needs.metadata.outputs.environment == 'preprod' }}

infrastructure/stacks/api-layer/ssm.tf

Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -92,3 +92,25 @@ resource "aws_ssm_parameter" "splunk_hec_endpoint" {
9292
ignore_changes = [value]
9393
}
9494
}
95+
96+
resource "aws_ssm_parameter" "feature_toggles" {
97+
for_each = jsondecode(file("${path.root}/scripts/feature_toggle/feature_toggle.json"))
98+
99+
name = "/${var.environment}/feature_toggles/${each.key}"
100+
#checkov:skip=CKV2_AWS_34: Since this is a feature toggle config, secure string not needed
101+
type = "String"
102+
103+
value = lookup(each.value.env_overrides, var.environment, each.value.default_state)
104+
105+
tags = {
106+
Environment = var.environment
107+
ManagedBy = "terraform"
108+
Purpose = each.value.purpose
109+
Ticket = each.value.ticket
110+
Created = each.value.created
111+
}
112+
113+
lifecycle {
114+
ignore_changes = [value]
115+
}
116+
}
Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,3 @@
1+
{
2+
3+
}
Lines changed: 88 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,88 @@
1+
import json
2+
import logging
3+
import os
4+
import sys
5+
6+
import boto3
7+
8+
logging.basicConfig(level=logging.INFO, format='%(message)s')
9+
10+
11+
def validate_feature_toggles():
12+
script_dir = os.path.dirname(os.path.abspath(__file__))
13+
toggles_file_path = os.path.join(script_dir, "feature_toggle.json")
14+
toggles_file_name = os.path.basename(toggles_file_path)
15+
16+
try:
17+
environment = os.getenv("ENV")
18+
if not environment:
19+
raise KeyError("The 'ENV' environment variable is not set.")
20+
21+
logging.info(f"Verifying toggles from '{toggles_file_name}' in environment: {environment}")
22+
23+
if not os.path.exists(toggles_file_path):
24+
logging.error(f"FATAL: '{toggles_file_path}' not found.")
25+
sys.exit(1)
26+
27+
with open(toggles_file_path, "r") as f:
28+
toggles_data = json.load(f)
29+
30+
ssm_client = boto3.client("ssm")
31+
missing_toggles = []
32+
mismatched_toggles = []
33+
34+
for toggle_name, toggle_details in toggles_data.items():
35+
parameter_name = f"/{environment}/feature_toggles/{toggle_name}"
36+
37+
default_state = toggle_details.get('default_state', False)
38+
env_overrides = toggle_details.get('env_overrides', {})
39+
expected_state = env_overrides.get(environment, default_state)
40+
expected_state_str = str(expected_state).lower()
41+
42+
logging.info(f"Checking for: {parameter_name} (expected value: {expected_state_str})")
43+
44+
try:
45+
parameter = ssm_client.get_parameter(Name=parameter_name)
46+
actual_state = parameter['Parameter']['Value']
47+
48+
if actual_state.lower() != expected_state_str:
49+
logging.error(f"--> MISMATCH: {parameter_name} - Expected '{expected_state_str}', but found '{actual_state}'")
50+
mismatched_toggles.append((parameter_name, expected_state_str, actual_state))
51+
52+
except ssm_client.exceptions.ParameterNotFound:
53+
logging.error(f"--> MISSING: {parameter_name}")
54+
missing_toggles.append(parameter_name)
55+
56+
has_errors = False
57+
if missing_toggles:
58+
has_errors = True
59+
logging.error(
60+
f"\nERROR: The following required feature toggles were not found in SSM:")
61+
for toggle in missing_toggles:
62+
logging.error(f"- {toggle}")
63+
64+
if mismatched_toggles:
65+
has_errors = True
66+
logging.error(
67+
f"\nERROR: The following feature toggles have incorrect values in SSM:")
68+
for name, expected, actual in mismatched_toggles:
69+
logging.error(f"- {name}: Expected '{expected}', but found '{actual}'")
70+
71+
if has_errors:
72+
sys.exit(1)
73+
74+
logging.info(f"\nSuccess: All required feature toggles are present in SSM with the correct values.")
75+
76+
except KeyError as e:
77+
logging.error(f"FATAL: {e}")
78+
sys.exit(1)
79+
except json.JSONDecodeError:
80+
logging.error(f"FATAL: Could not decode JSON from '{toggles_file_path}'. Please check for syntax errors.")
81+
sys.exit(1)
82+
except Exception as e:
83+
logging.error(f"An unexpected error occurred: {e}")
84+
sys.exit(1)
85+
86+
87+
if __name__ == "__main__":
88+
validate_feature_toggles()

src/eligibility_signposting_api/feature_toggle/__init__.py

Whitespace-only changes.
Lines changed: 134 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,134 @@
1+
# Feature Toggles
2+
3+
Feature toggles allow us to deploy code to production in a disabled state, enabling it later without a new deployment.
4+
5+
## How It Works
6+
7+
Our feature toggle system is built on **AWS Systems Manager (SSM) Parameter Store**.
8+
9+
1. **Single Source of Truth**: AWS SSM is the single source of truth for the current state (`true` or `false`) of all feature toggles.
10+
2. **Infrastructure as Code**: Toggles are defined in Terraform, ensuring configuration is version-controlled and repeatable across environments.
11+
3. **CI/CD Validation**: The `feature_toggle.json` file in the repository lists all toggles the application requires. The CI/CD pipeline checks that every toggle in this file exists in AWS SSM before a deployment can proceed.
12+
4. **Runtime Caching**: The application code uses a cached `is_feature_enabled()` function to check a toggle's state at runtime, minimizing calls to AWS and ensuring high performance.
13+
14+
## Developer Workflow
15+
16+
### Step 1: Define the Toggle (The Single Source of Truth)
17+
18+
Adding a new toggle is a single-step process. You only need to add a new entry to the `feature_toggle.json` file. This file defines the toggle's metadata and its intended state for each environment.
19+
20+
`default_state`: The safe, production-like state (usually `false`).
21+
22+
`env_overrides`: An optional map to set a different state for specific environments (e.g., enabling the feature in `dev` and `test` for QA). If an environment is not listed, it uses the `default_state`.
23+
24+
**File: [feature_toggle.json](../../../scripts/feature_toggle/feature_toggle.json)**
25+
26+
```json
27+
{
28+
"enable_dynamic_status_text": {
29+
"purpose": "Enables dynamic status text based on conditions.",
30+
"ticket": "ELI-427",
31+
"created": "2025-09-02",
32+
"default_state": false,
33+
"env_overrides": {
34+
"dev": true,
35+
"test": true
36+
}
37+
}
38+
}
39+
```
40+
41+
Our Terraform setup automatically reads this file and creates the corresponding SSM parameters. You do not need to write new Terraform code for each toggle.
42+
43+
**File: [ssm.tf](../../../infrastructure/stacks/api-layer/ssm.tf) (For Reference—No edits needed)**
44+
45+
```terraform
46+
resource "aws_ssm_parameter" "feature_toggles" {
47+
for_each = jsondecode(file("${path.root}/scripts/feature_toggle/feature_toggle.json"))
48+
49+
name = "/${var.environment}/feature_toggles/${each.key}"
50+
type = "String"
51+
52+
value = lookup(each.value.env_overrides, var.environment, each.value.default_state)
53+
54+
tags = {
55+
Environment = var.environment
56+
ManagedBy = "terraform"
57+
Purpose = each.value.purpose
58+
Ticket = each.value.ticket
59+
Created = each.value.created
60+
}
61+
62+
lifecycle {
63+
ignore_changes = [value]
64+
}
65+
}
66+
```
67+
68+
### Step 2: Implement and Test the Logic
69+
70+
Import and use the `is_feature_enabled()` function to create a conditional code path.
71+
72+
**File (Example): `eligibility_signposting_api/services/calculators/eligibility_calculator.py`**
73+
74+
```python
75+
from eligibility_signposting_api.feature_toggle.feature_toggle import is_feature_enabled
76+
77+
if is_feature_enabled("enable_dynamic_status_text"):
78+
# New feature logic
79+
status_text = self.get_status_text(active_iteration.status_text, ConditionName(cc.target), status)
80+
else:
81+
# Existing (old) logic
82+
status_text = status.get_default_status_text(ConditionName(cc.target))
83+
```
84+
85+
You must write unit tests that cover both the "on" and "off" states of the toggle. Use `pytest.mark.parametrize` to run the same test with both states and `unittest.mock.patch` to control the toggle's return value.
86+
87+
**Important**: The patch path must point to **where the function is used**, not where it is defined.
88+
89+
**File (Example): `tests/unit/services/calculators/test_eligibility_calculator.py`**
90+
91+
```python
92+
import pytest
93+
from unittest.mock import patch
94+
95+
@pytest.mark.parametrize(
96+
"enable_dynamic_status_text, expected_rsv_text",
97+
[
98+
(True, "You are not eligible to take RSV vaccine"), # Case 1: Toggle is ON
99+
(False, "We do not believe you can have it"), # Case 2: Toggle is OFF
100+
],
101+
)
102+
@patch("eligibility_signposting_api.services.calculators.eligibility_calculator.is_feature_enabled")
103+
def test_status_text_is_conditional_on_toggle(
104+
mock_is_feature_enabled,
105+
enable_dynamic_status_text,
106+
expected_rsv_text,
107+
faker: Faker
108+
):
109+
110+
# This mock controls the toggle for the test run
111+
mock_is_feature_enabled.return_value = enable_dynamic_status_text
112+
113+
# Given, When, Then...
114+
assert actual_text_from_audit == expected_rsv_text
115+
```
116+
117+
### Step 3: Commit and Deploy (The Automation)
118+
119+
1. Terraform Apply: During the infrastructure step of deployment, Terraform executes the ssm.tf configuration. It reads the updated feature_toggle.json file.
120+
2. Creation: Because of the 'for each' loop, Terraform detects the new feature toggle entry. It then automatically runs the aws_ssm_parameter resource block for this new item, creating the parameter in AWS SSM with the correct name (e.g., /Dev/feature_toggles/enable_dynamic_status_text) and the appropriate initial value based on the environment (true for Dev and Test, false for others).
121+
3. Validation: Immediately after the validate_toggles.py script runs. It reads the same JSON file, sees that the feature toggle is required, and queries AWS SSM to confirm that Terraform successfully created it.
122+
123+
### Step 4: Cleanup Process
124+
125+
Feature toggles are **technical debt**. Once a feature is fully released and stable, the toggle and all associated conditional logic must be removed.
126+
127+
Follow the **"Two-Ticket" Rule**:
128+
129+
1. When you create a ticket to add a feature toggle, immediately create a second ticket to remove it.
130+
2. Link the two tickets.
131+
3. Once the feature is permanently enabled, schedule the cleanup ticket in an upcoming sprint to remove the toggle from:
132+
- The application code
133+
- All related test code
134+
- The `feature_toggle.json` file
Lines changed: 34 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,34 @@
1+
import logging
2+
import os
3+
4+
import boto3
5+
from botocore.exceptions import ClientError
6+
from cachetools import TTLCache, cached
7+
8+
aws_region = os.getenv("AWS_DEFAULT_REGION")
9+
ssm_client = boto3.client("ssm", region_name=aws_region)
10+
environment = os.getenv("ENV")
11+
feature_toggles_prefix = f"/{environment}/feature_toggles/"
12+
13+
logger = logging.getLogger(__name__)
14+
15+
ssm_cache_in_seconds = TTLCache(maxsize=128, ttl=300)
16+
17+
18+
@cached(ssm_cache_in_seconds)
19+
def get_ssm_parameter(parameter_name: str) -> str:
20+
logger.info("Fetching '%s' from AWS SSM (not from cache).", parameter_name)
21+
try:
22+
response = ssm_client.get_parameter(Name=parameter_name, WithDecryption=True)
23+
return response["Parameter"]["Value"]
24+
except ssm_client.exceptions.ParameterNotFound:
25+
logger.warning("Parameter '%s' not found in SSM.", parameter_name)
26+
return "false"
27+
except ClientError:
28+
logger.exception("An AWS client error occurred fetching '%s' from SSM.", parameter_name)
29+
return "false"
30+
31+
32+
def is_feature_enabled(feature_name: str) -> bool:
33+
parameter_name = feature_toggles_prefix + feature_name
34+
return get_ssm_parameter(parameter_name).lower().strip() == "true"

0 commit comments

Comments
 (0)