diff --git a/.github/workflows/base-deploy.yml b/.github/workflows/base-deploy.yml index 3c4454b7..6aae2682 100644 --- a/.github/workflows/base-deploy.yml +++ b/.github/workflows/base-deploy.yml @@ -150,6 +150,13 @@ jobs: git config user.name "github-actions" git config user.email "github-actions@github.com" + - name: "Validate Feature Toggles" + env: + ENV: ${{ needs.metadata.outputs.environment }} + run: | + pip install boto3 + python scripts/feature_toggle/validate_toggles.py + # ---------- Preprod path: create RC tag + pre-release ---------- - name: "Create/Push RC tag for preprod" if: ${{ needs.metadata.outputs.environment == 'preprod' }} diff --git a/infrastructure/stacks/api-layer/ssm.tf b/infrastructure/stacks/api-layer/ssm.tf index fb167094..9eff7334 100644 --- a/infrastructure/stacks/api-layer/ssm.tf +++ b/infrastructure/stacks/api-layer/ssm.tf @@ -92,3 +92,25 @@ resource "aws_ssm_parameter" "splunk_hec_endpoint" { ignore_changes = [value] } } + +resource "aws_ssm_parameter" "feature_toggles" { + for_each = jsondecode(file("${path.root}/scripts/feature_toggle/feature_toggle.json")) + + name = "/${var.environment}/feature_toggles/${each.key}" + #checkov:skip=CKV2_AWS_34: Since this is a feature toggle config, secure string not needed + type = "String" + + value = lookup(each.value.env_overrides, var.environment, each.value.default_state) + + tags = { + Environment = var.environment + ManagedBy = "terraform" + Purpose = each.value.purpose + Ticket = each.value.ticket + Created = each.value.created + } + + lifecycle { + ignore_changes = [value] + } +} diff --git a/scripts/feature_toggle/feature_toggle.json b/scripts/feature_toggle/feature_toggle.json new file mode 100644 index 00000000..0db3279e --- /dev/null +++ b/scripts/feature_toggle/feature_toggle.json @@ -0,0 +1,3 @@ +{ + +} diff --git a/scripts/feature_toggle/validate_toggles.py b/scripts/feature_toggle/validate_toggles.py new file mode 100644 index 00000000..57b4057f --- /dev/null +++ b/scripts/feature_toggle/validate_toggles.py @@ -0,0 +1,88 @@ +import json +import logging +import os +import sys + +import boto3 + +logging.basicConfig(level=logging.INFO, format='%(message)s') + + +def validate_feature_toggles(): + script_dir = os.path.dirname(os.path.abspath(__file__)) + toggles_file_path = os.path.join(script_dir, "feature_toggle.json") + toggles_file_name = os.path.basename(toggles_file_path) + + try: + environment = os.getenv("ENV") + if not environment: + raise KeyError("The 'ENV' environment variable is not set.") + + logging.info(f"Verifying toggles from '{toggles_file_name}' in environment: {environment}") + + if not os.path.exists(toggles_file_path): + logging.error(f"FATAL: '{toggles_file_path}' not found.") + sys.exit(1) + + with open(toggles_file_path, "r") as f: + toggles_data = json.load(f) + + ssm_client = boto3.client("ssm") + missing_toggles = [] + mismatched_toggles = [] + + for toggle_name, toggle_details in toggles_data.items(): + parameter_name = f"/{environment}/feature_toggles/{toggle_name}" + + default_state = toggle_details.get('default_state', False) + env_overrides = toggle_details.get('env_overrides', {}) + expected_state = env_overrides.get(environment, default_state) + expected_state_str = str(expected_state).lower() + + logging.info(f"Checking for: {parameter_name} (expected value: {expected_state_str})") + + try: + parameter = ssm_client.get_parameter(Name=parameter_name) + actual_state = parameter['Parameter']['Value'] + + if actual_state.lower() != expected_state_str: + logging.error(f"--> MISMATCH: {parameter_name} - Expected '{expected_state_str}', but found '{actual_state}'") + mismatched_toggles.append((parameter_name, expected_state_str, actual_state)) + + except ssm_client.exceptions.ParameterNotFound: + logging.error(f"--> MISSING: {parameter_name}") + missing_toggles.append(parameter_name) + + has_errors = False + if missing_toggles: + has_errors = True + logging.error( + f"\nERROR: The following required feature toggles were not found in SSM:") + for toggle in missing_toggles: + logging.error(f"- {toggle}") + + if mismatched_toggles: + has_errors = True + logging.error( + f"\nERROR: The following feature toggles have incorrect values in SSM:") + for name, expected, actual in mismatched_toggles: + logging.error(f"- {name}: Expected '{expected}', but found '{actual}'") + + if has_errors: + sys.exit(1) + + logging.info(f"\nSuccess: All required feature toggles are present in SSM with the correct values.") + + except KeyError as e: + logging.error(f"FATAL: {e}") + sys.exit(1) + except json.JSONDecodeError: + logging.error(f"FATAL: Could not decode JSON from '{toggles_file_path}'. Please check for syntax errors.") + sys.exit(1) + except Exception as e: + logging.error(f"An unexpected error occurred: {e}") + sys.exit(1) + + +if __name__ == "__main__": + validate_feature_toggles() diff --git a/src/eligibility_signposting_api/feature_toggle/__init__.py b/src/eligibility_signposting_api/feature_toggle/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/src/eligibility_signposting_api/feature_toggle/feature_toggle.md b/src/eligibility_signposting_api/feature_toggle/feature_toggle.md new file mode 100644 index 00000000..36d28dbc --- /dev/null +++ b/src/eligibility_signposting_api/feature_toggle/feature_toggle.md @@ -0,0 +1,134 @@ +# Feature Toggles + +Feature toggles allow us to deploy code to production in a disabled state, enabling it later without a new deployment. + +## How It Works + +Our feature toggle system is built on **AWS Systems Manager (SSM) Parameter Store**. + +1. **Single Source of Truth**: AWS SSM is the single source of truth for the current state (`true` or `false`) of all feature toggles. +2. **Infrastructure as Code**: Toggles are defined in Terraform, ensuring configuration is version-controlled and repeatable across environments. +3. **CI/CD Validation**: The `feature_toggle.json` file in the repository lists all toggles the application requires. The CI/CD pipeline checks that every toggle in this file exists in AWS SSM before a deployment can proceed. +4. **Runtime Caching**: The application code uses a cached `is_feature_enabled()` function to check a toggle's state at runtime, minimizing calls to AWS and ensuring high performance. + +## Developer Workflow + +### Step 1: Define the Toggle (The Single Source of Truth) + +Adding a new toggle is a single-step process. You only need to add a new entry to the `feature_toggle.json` file. This file defines the toggle's metadata and its intended state for each environment. + +`default_state`: The safe, production-like state (usually `false`). + +`env_overrides`: An optional map to set a different state for specific environments (e.g., enabling the feature in `dev` and `test` for QA). If an environment is not listed, it uses the `default_state`. + +**File: [feature_toggle.json](../../../scripts/feature_toggle/feature_toggle.json)** + +```json +{ + "enable_dynamic_status_text": { + "purpose": "Enables dynamic status text based on conditions.", + "ticket": "ELI-427", + "created": "2025-09-02", + "default_state": false, + "env_overrides": { + "dev": true, + "test": true + } + } +} +``` + +Our Terraform setup automatically reads this file and creates the corresponding SSM parameters. You do not need to write new Terraform code for each toggle. + +**File: [ssm.tf](../../../infrastructure/stacks/api-layer/ssm.tf) (For Reference—No edits needed)** + +```terraform +resource "aws_ssm_parameter" "feature_toggles" { + for_each = jsondecode(file("${path.root}/scripts/feature_toggle/feature_toggle.json")) + + name = "/${var.environment}/feature_toggles/${each.key}" + type = "String" + + value = lookup(each.value.env_overrides, var.environment, each.value.default_state) + + tags = { + Environment = var.environment + ManagedBy = "terraform" + Purpose = each.value.purpose + Ticket = each.value.ticket + Created = each.value.created + } + + lifecycle { + ignore_changes = [value] + } +} +``` + +### Step 2: Implement and Test the Logic + +Import and use the `is_feature_enabled()` function to create a conditional code path. + +**File (Example): `eligibility_signposting_api/services/calculators/eligibility_calculator.py`** + +```python +from eligibility_signposting_api.feature_toggle.feature_toggle import is_feature_enabled + +if is_feature_enabled("enable_dynamic_status_text"): + # New feature logic + status_text = self.get_status_text(active_iteration.status_text, ConditionName(cc.target), status) +else: + # Existing (old) logic + status_text = status.get_default_status_text(ConditionName(cc.target)) +``` + +You must write unit tests that cover both the "on" and "off" states of the toggle. Use `pytest.mark.parametrize` to run the same test with both states and `unittest.mock.patch` to control the toggle's return value. + +**Important**: The patch path must point to **where the function is used**, not where it is defined. + +**File (Example): `tests/unit/services/calculators/test_eligibility_calculator.py`** + +```python +import pytest +from unittest.mock import patch + +@pytest.mark.parametrize( + "enable_dynamic_status_text, expected_rsv_text", + [ + (True, "You are not eligible to take RSV vaccine"), # Case 1: Toggle is ON + (False, "We do not believe you can have it"), # Case 2: Toggle is OFF + ], +) +@patch("eligibility_signposting_api.services.calculators.eligibility_calculator.is_feature_enabled") +def test_status_text_is_conditional_on_toggle( + mock_is_feature_enabled, + enable_dynamic_status_text, + expected_rsv_text, + faker: Faker +): + + # This mock controls the toggle for the test run + mock_is_feature_enabled.return_value = enable_dynamic_status_text + + # Given, When, Then... + assert actual_text_from_audit == expected_rsv_text +``` + +### Step 3: Commit and Deploy (The Automation) + +1. Terraform Apply: During the infrastructure step of deployment, Terraform executes the ssm.tf configuration. It reads the updated feature_toggle.json file. +2. Creation: Because of the 'for each' loop, Terraform detects the new feature toggle entry. It then automatically runs the aws_ssm_parameter resource block for this new item, creating the parameter in AWS SSM with the correct name (e.g., /Dev/feature_toggles/enable_dynamic_status_text) and the appropriate initial value based on the environment (true for Dev and Test, false for others). +3. Validation: Immediately after the validate_toggles.py script runs. It reads the same JSON file, sees that the feature toggle is required, and queries AWS SSM to confirm that Terraform successfully created it. + +### Step 4: Cleanup Process + +Feature toggles are **technical debt**. Once a feature is fully released and stable, the toggle and all associated conditional logic must be removed. + +Follow the **"Two-Ticket" Rule**: + +1. When you create a ticket to add a feature toggle, immediately create a second ticket to remove it. +2. Link the two tickets. +3. Once the feature is permanently enabled, schedule the cleanup ticket in an upcoming sprint to remove the toggle from: + - The application code + - All related test code + - The `feature_toggle.json` file diff --git a/src/eligibility_signposting_api/feature_toggle/feature_toggle.py b/src/eligibility_signposting_api/feature_toggle/feature_toggle.py new file mode 100644 index 00000000..a9fffc9b --- /dev/null +++ b/src/eligibility_signposting_api/feature_toggle/feature_toggle.py @@ -0,0 +1,34 @@ +import logging +import os + +import boto3 +from botocore.exceptions import ClientError +from cachetools import TTLCache, cached + +aws_region = os.getenv("AWS_DEFAULT_REGION") +ssm_client = boto3.client("ssm", region_name=aws_region) +environment = os.getenv("ENV") +feature_toggles_prefix = f"/{environment}/feature_toggles/" + +logger = logging.getLogger(__name__) + +ssm_cache_in_seconds = TTLCache(maxsize=128, ttl=300) + + +@cached(ssm_cache_in_seconds) +def get_ssm_parameter(parameter_name: str) -> str: + logger.info("Fetching '%s' from AWS SSM (not from cache).", parameter_name) + try: + response = ssm_client.get_parameter(Name=parameter_name, WithDecryption=True) + return response["Parameter"]["Value"] + except ssm_client.exceptions.ParameterNotFound: + logger.warning("Parameter '%s' not found in SSM.", parameter_name) + return "false" + except ClientError: + logger.exception("An AWS client error occurred fetching '%s' from SSM.", parameter_name) + return "false" + + +def is_feature_enabled(feature_name: str) -> bool: + parameter_name = feature_toggles_prefix + feature_name + return get_ssm_parameter(parameter_name).lower().strip() == "true" diff --git a/tests/unit/feature_toggle/test_feature_toggle.py b/tests/unit/feature_toggle/test_feature_toggle.py new file mode 100644 index 00000000..132111d0 --- /dev/null +++ b/tests/unit/feature_toggle/test_feature_toggle.py @@ -0,0 +1,102 @@ +import os +from unittest.mock import Mock, patch + +from botocore.exceptions import ClientError + +os.environ["AWS_DEFAULT_REGION"] = "eu-west-1" +os.environ["ENV"] = "local" + +import pytest + +from eligibility_signposting_api.feature_toggle.feature_toggle import ( + get_ssm_parameter, + is_feature_enabled, + ssm_cache_in_seconds, +) + + +@pytest.fixture(autouse=True) +def clear_cache(): + ssm_cache_in_seconds.clear() + + +@patch("eligibility_signposting_api.feature_toggle.feature_toggle.ssm_client") +class TestGetSsmParameter: + def test_get_ssm_parameter_success(self, mock_ssm_client: Mock): + param_name = "/local/feature_toggles/feature_test" + expected_value = "true" + mock_ssm_client.get_parameter.return_value = {"Parameter": {"Value": expected_value}} + + result = get_ssm_parameter(param_name) + + assert result == expected_value + mock_ssm_client.get_parameter.assert_called_once_with(Name=param_name, WithDecryption=True) + + def test_get_ssm_parameter_is_cached(self, mock_ssm_client: Mock): + param_name = "/local/feature_toggles/cached_feature" + expected_value = "true" + mock_ssm_client.get_parameter.return_value = {"Parameter": {"Value": expected_value}} + + result1 = get_ssm_parameter(param_name) + result2 = get_ssm_parameter(param_name) + + assert result1 == expected_value + assert result2 == expected_value + mock_ssm_client.get_parameter.assert_called_once() + + def test_get_ssm_parameter_not_found(self, mock_ssm_client: Mock): + param_name = "/local/feature_toggles/non_existent_feature" + + not_found_error = ClientError( + error_response={"Error": {"Code": "ParameterNotFound", "Message": "Not Found"}}, + operation_name="GetParameter", + ) + + mock_ssm_client.exceptions.ParameterNotFound = ClientError + + mock_ssm_client.get_parameter.side_effect = not_found_error + + result = get_ssm_parameter(param_name) + + assert result == "false" + mock_ssm_client.get_parameter.assert_called_once_with(Name=param_name, WithDecryption=True) + + def test_get_ssm_parameter_client_error(self, mock_ssm_client: Mock): + param_name = "/local/feature_toggles/error_feature" + + mock_ssm_client.exceptions.ParameterNotFound = ClientError + mock_ssm_client.get_parameter.side_effect = ClientError( + error_response={"Error": {"Code": "ThrottlingException", "Message": "Rate exceeded"}}, + operation_name="GetParameter", + ) + + result = get_ssm_parameter(param_name) + + assert result == "false" + + +@patch("eligibility_signposting_api.feature_toggle.feature_toggle.get_ssm_parameter") +class TestIsFeatureEnabled: + @pytest.mark.parametrize( + ("return_value", "expected_result"), + [ + ("true", True), + ("True", True), + (" TRUE ", True), + ("false", False), + ("False", False), + ("anything_else", False), + ("", False), + ], + ) + def test_is_feature_enabled_various_inputs( + self, mock_get_ssm_parameter: Mock, return_value: str, *, expected_result: bool + ): + feature_name = "is_feature_enabled_test" + expected_param_name = f"/local/feature_toggles/{feature_name}" + mock_get_ssm_parameter.return_value = return_value + + result = is_feature_enabled(feature_name) + + assert result is expected_result + mock_get_ssm_parameter.assert_called_once_with(expected_param_name)