Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 7 additions & 0 deletions .github/workflows/base-deploy.yml
Original file line number Diff line number Diff line change
Expand Up @@ -150,6 +150,13 @@ jobs:
git config user.name "github-actions"
git config user.email "[email protected]"

- name: "Validate Feature Toggles"
env:
ENV: ${{ needs.metadata.outputs.environment }}
run: |
pip install boto3
python scripts/feature_toggle/validate_toggles.py

# ---------- Preprod path: create RC tag + pre-release ----------
- name: "Create/Push RC tag for preprod"
if: ${{ needs.metadata.outputs.environment == 'preprod' }}
Expand Down
22 changes: 22 additions & 0 deletions infrastructure/stacks/api-layer/ssm.tf
Original file line number Diff line number Diff line change
Expand Up @@ -92,3 +92,25 @@ resource "aws_ssm_parameter" "splunk_hec_endpoint" {
ignore_changes = [value]
}
}

resource "aws_ssm_parameter" "feature_toggles" {
for_each = jsondecode(file("${path.root}/scripts/feature_toggle/feature_toggle.json"))

name = "/${var.environment}/feature_toggles/${each.key}"
#checkov:skip=CKV2_AWS_34: Since this is a feature toggle config, secure string not needed
type = "String"

value = lookup(each.value.env_overrides, var.environment, each.value.default_state)

tags = {
Environment = var.environment
ManagedBy = "terraform"
Purpose = each.value.purpose
Ticket = each.value.ticket
Created = each.value.created
}

lifecycle {
ignore_changes = [value]
}
}
3 changes: 3 additions & 0 deletions scripts/feature_toggle/feature_toggle.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
{

}
88 changes: 88 additions & 0 deletions scripts/feature_toggle/validate_toggles.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,88 @@
import json
import logging
import os
import sys

import boto3

logging.basicConfig(level=logging.INFO, format='%(message)s')


def validate_feature_toggles():
script_dir = os.path.dirname(os.path.abspath(__file__))
toggles_file_path = os.path.join(script_dir, "feature_toggle.json")
toggles_file_name = os.path.basename(toggles_file_path)

try:
environment = os.getenv("ENV")
if not environment:
raise KeyError("The 'ENV' environment variable is not set.")

logging.info(f"Verifying toggles from '{toggles_file_name}' in environment: {environment}")

if not os.path.exists(toggles_file_path):
logging.error(f"FATAL: '{toggles_file_path}' not found.")
sys.exit(1)

with open(toggles_file_path, "r") as f:
toggles_data = json.load(f)

ssm_client = boto3.client("ssm")
missing_toggles = []
mismatched_toggles = []

for toggle_name, toggle_details in toggles_data.items():
parameter_name = f"/{environment}/feature_toggles/{toggle_name}"

default_state = toggle_details.get('default_state', False)
env_overrides = toggle_details.get('env_overrides', {})
expected_state = env_overrides.get(environment, default_state)
expected_state_str = str(expected_state).lower()

logging.info(f"Checking for: {parameter_name} (expected value: {expected_state_str})")

try:
parameter = ssm_client.get_parameter(Name=parameter_name)
actual_state = parameter['Parameter']['Value']

if actual_state.lower() != expected_state_str:
logging.error(f"--> MISMATCH: {parameter_name} - Expected '{expected_state_str}', but found '{actual_state}'")
mismatched_toggles.append((parameter_name, expected_state_str, actual_state))

except ssm_client.exceptions.ParameterNotFound:
logging.error(f"--> MISSING: {parameter_name}")
missing_toggles.append(parameter_name)

has_errors = False
if missing_toggles:
has_errors = True
logging.error(
f"\nERROR: The following required feature toggles were not found in SSM:")
for toggle in missing_toggles:
logging.error(f"- {toggle}")

if mismatched_toggles:
has_errors = True
logging.error(
f"\nERROR: The following feature toggles have incorrect values in SSM:")
for name, expected, actual in mismatched_toggles:
logging.error(f"- {name}: Expected '{expected}', but found '{actual}'")

if has_errors:
sys.exit(1)

logging.info(f"\nSuccess: All required feature toggles are present in SSM with the correct values.")

except KeyError as e:
logging.error(f"FATAL: {e}")
sys.exit(1)
except json.JSONDecodeError:
logging.error(f"FATAL: Could not decode JSON from '{toggles_file_path}'. Please check for syntax errors.")
sys.exit(1)
except Exception as e:
logging.error(f"An unexpected error occurred: {e}")
sys.exit(1)


if __name__ == "__main__":
validate_feature_toggles()
Empty file.
134 changes: 134 additions & 0 deletions src/eligibility_signposting_api/feature_toggle/feature_toggle.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,134 @@
# Feature Toggles

Feature toggles allow us to deploy code to production in a disabled state, enabling it later without a new deployment.

## How It Works

Our feature toggle system is built on **AWS Systems Manager (SSM) Parameter Store**.

1. **Single Source of Truth**: AWS SSM is the single source of truth for the current state (`true` or `false`) of all feature toggles.
2. **Infrastructure as Code**: Toggles are defined in Terraform, ensuring configuration is version-controlled and repeatable across environments.
3. **CI/CD Validation**: The `feature_toggle.json` file in the repository lists all toggles the application requires. The CI/CD pipeline checks that every toggle in this file exists in AWS SSM before a deployment can proceed.
4. **Runtime Caching**: The application code uses a cached `is_feature_enabled()` function to check a toggle's state at runtime, minimizing calls to AWS and ensuring high performance.

## Developer Workflow

### Step 1: Define the Toggle (The Single Source of Truth)

Adding a new toggle is a single-step process. You only need to add a new entry to the `feature_toggle.json` file. This file defines the toggle's metadata and its intended state for each environment.

`default_state`: The safe, production-like state (usually `false`).

`env_overrides`: An optional map to set a different state for specific environments (e.g., enabling the feature in `dev` and `test` for QA). If an environment is not listed, it uses the `default_state`.

**File: [feature_toggle.json](../../../scripts/feature_toggle/feature_toggle.json)**

```json
{
"enable_dynamic_status_text": {
"purpose": "Enables dynamic status text based on conditions.",
"ticket": "ELI-427",
"created": "2025-09-02",
"default_state": false,
"env_overrides": {
"dev": true,
"test": true
}
}
}
```

Our Terraform setup automatically reads this file and creates the corresponding SSM parameters. You do not need to write new Terraform code for each toggle.

**File: [ssm.tf](../../../infrastructure/stacks/api-layer/ssm.tf) (For Reference—No edits needed)**

```terraform
resource "aws_ssm_parameter" "feature_toggles" {
for_each = jsondecode(file("${path.root}/scripts/feature_toggle/feature_toggle.json"))

name = "/${var.environment}/feature_toggles/${each.key}"
type = "String"

value = lookup(each.value.env_overrides, var.environment, each.value.default_state)

tags = {
Environment = var.environment
ManagedBy = "terraform"
Purpose = each.value.purpose
Ticket = each.value.ticket
Created = each.value.created
}

lifecycle {
ignore_changes = [value]
}
}
```

### Step 2: Implement and Test the Logic

Import and use the `is_feature_enabled()` function to create a conditional code path.

**File (Example): `eligibility_signposting_api/services/calculators/eligibility_calculator.py`**

```python
from eligibility_signposting_api.feature_toggle.feature_toggle import is_feature_enabled

if is_feature_enabled("enable_dynamic_status_text"):
# New feature logic
status_text = self.get_status_text(active_iteration.status_text, ConditionName(cc.target), status)
else:
# Existing (old) logic
status_text = status.get_default_status_text(ConditionName(cc.target))
```

You must write unit tests that cover both the "on" and "off" states of the toggle. Use `pytest.mark.parametrize` to run the same test with both states and `unittest.mock.patch` to control the toggle's return value.

**Important**: The patch path must point to **where the function is used**, not where it is defined.

**File (Example): `tests/unit/services/calculators/test_eligibility_calculator.py`**

```python
import pytest
from unittest.mock import patch

@pytest.mark.parametrize(
"enable_dynamic_status_text, expected_rsv_text",
[
(True, "You are not eligible to take RSV vaccine"), # Case 1: Toggle is ON
(False, "We do not believe you can have it"), # Case 2: Toggle is OFF
],
)
@patch("eligibility_signposting_api.services.calculators.eligibility_calculator.is_feature_enabled")
def test_status_text_is_conditional_on_toggle(
mock_is_feature_enabled,
enable_dynamic_status_text,
expected_rsv_text,
faker: Faker
):

# This mock controls the toggle for the test run
mock_is_feature_enabled.return_value = enable_dynamic_status_text

# Given, When, Then...
assert actual_text_from_audit == expected_rsv_text
```

### Step 3: Commit and Deploy (The Automation)

1. Terraform Apply: During the infrastructure step of deployment, Terraform executes the ssm.tf configuration. It reads the updated feature_toggle.json file.
2. Creation: Because of the 'for each' loop, Terraform detects the new feature toggle entry. It then automatically runs the aws_ssm_parameter resource block for this new item, creating the parameter in AWS SSM with the correct name (e.g., /Dev/feature_toggles/enable_dynamic_status_text) and the appropriate initial value based on the environment (true for Dev and Test, false for others).
3. Validation: Immediately after the validate_toggles.py script runs. It reads the same JSON file, sees that the feature toggle is required, and queries AWS SSM to confirm that Terraform successfully created it.

### Step 4: Cleanup Process

Feature toggles are **technical debt**. Once a feature is fully released and stable, the toggle and all associated conditional logic must be removed.

Follow the **"Two-Ticket" Rule**:

1. When you create a ticket to add a feature toggle, immediately create a second ticket to remove it.
2. Link the two tickets.
3. Once the feature is permanently enabled, schedule the cleanup ticket in an upcoming sprint to remove the toggle from:
- The application code
- All related test code
- The `feature_toggle.json` file
34 changes: 34 additions & 0 deletions src/eligibility_signposting_api/feature_toggle/feature_toggle.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,34 @@
import logging
import os

import boto3
from botocore.exceptions import ClientError
from cachetools import TTLCache, cached

aws_region = os.getenv("AWS_DEFAULT_REGION")
ssm_client = boto3.client("ssm", region_name=aws_region)
environment = os.getenv("ENV")
feature_toggles_prefix = f"/{environment}/feature_toggles/"

logger = logging.getLogger(__name__)

ssm_cache_in_seconds = TTLCache(maxsize=128, ttl=300)


@cached(ssm_cache_in_seconds)
def get_ssm_parameter(parameter_name: str) -> str:
logger.info("Fetching '%s' from AWS SSM (not from cache).", parameter_name)
try:
response = ssm_client.get_parameter(Name=parameter_name, WithDecryption=True)
return response["Parameter"]["Value"]
except ssm_client.exceptions.ParameterNotFound:
logger.warning("Parameter '%s' not found in SSM.", parameter_name)
return "false"
except ClientError:
logger.exception("An AWS client error occurred fetching '%s' from SSM.", parameter_name)
return "false"


def is_feature_enabled(feature_name: str) -> bool:
parameter_name = feature_toggles_prefix + feature_name
return get_ssm_parameter(parameter_name).lower().strip() == "true"
Loading