krkn-chaos
diff --git a/‎.coveragerc‎
Lines changed: 1 addition & 0 deletions b/‎.coveragerc‎
Lines changed: 1 addition & 0 deletions
diff --git a/‎.github/workflows/tests_v2.yml‎
Lines changed: 53 additions & 0 deletions b/‎.github/workflows/tests_v2.yml‎
Lines changed: 53 additions & 0 deletions
diff --git a/‎.gitignore‎
Lines changed: 4 additions & 0 deletions b/‎.gitignore‎
Lines changed: 4 additions & 0 deletions
diff --git a/‎CI/tests_v2/CONTRIBUTING_TESTS.md‎
Lines changed: 175 additions & 0 deletions b/‎CI/tests_v2/CONTRIBUTING_TESTS.md‎
Lines changed: 175 additions & 0 deletions
diff --git a/‎CI/tests_v2/Makefile‎
Lines changed: 97 additions & 0 deletions b/‎CI/tests_v2/Makefile‎
Lines changed: 97 additions & 0 deletions
@@ -2,3 +2,4 @@
 omit =
     tests/*
     krkn/tests/**
+    CI/tests_v2/*
@@ -0,0 +1,53 @@
+name: Tests v2 (pytest functional)
+on:
+  pull_request:
+  push:
+    branches:
+      - main
+jobs:
+  tests-v2:
+    name: Tests v2 (pytest functional)
+    runs-on: ubuntu-latest
+    steps:
+      - name: Check out code
+        uses: actions/checkout@v3
+
+      - name: Create KinD cluster
+        uses: redhat-chaos/actions/kind@main
+
+      - name: Pre-load test images into KinD
+        run: |
+          docker pull nginx:alpine
+          kind load docker-image nginx:alpine
+          docker pull quay.io/krkn-chaos/krkn:tools
+          kind load docker-image quay.io/krkn-chaos/krkn:tools
+
+      - name: Install Python
+        uses: actions/setup-python@v4
+        with:
+          python-version: '3.11'
+          architecture: 'x64'
+          cache: 'pip'
+
+      - name: Install dependencies
+        run: |
+          sudo apt-get install -y build-essential python3-dev
+          pip install --upgrade pip
+          pip install -r requirements.txt
+          pip install -r CI/tests_v2/requirements.txt
+
+      - name: Run tests_v2
+        run: |
+          KRKN_TEST_COVERAGE=1 python -m pytest CI/tests_v2/ -v --timeout=300 --reruns=1 --reruns-delay=5 \
+            --html=CI/tests_v2/report.html -n auto --junitxml=CI/tests_v2/results.xml
+
+      - name: Upload tests_v2 artifacts
+        if: always()
+        uses: actions/upload-artifact@v4
+        with:
+          name: tests-v2-results
+          path: |
+            CI/tests_v2/report.html
+            CI/tests_v2/results.xml
+            CI/tests_v2/assets/
+          if-no-files-found: ignore
@@ -64,6 +64,10 @@ CI/out/*
 CI/ci_results
 CI/legacy/*node.yaml
 CI/results.markdown
+# CI tests_v2 (pytest-html / pytest outputs)
+CI/tests_v2/results.xml
+CI/tests_v2/report.html
+CI/tests_v2/assets/
 
 #env
 chaos/*
 
@@ -0,0 +1,175 @@
+# Adding a New Scenario Test (CI/tests_v2)
+
+This guide explains how to add a new chaos scenario test to the v2 pytest framework. The layout is **folder-per-scenario**: each scenario has its own directory under `scenarios/<scenario_name>/` containing the test file, Kubernetes resources, and the Krkn scenario base YAML.
+
+## Option 1: Scaffold script (recommended)
+
+From the **repository root**:
+
+```bash
+python CI/tests_v2/scaffold.py --scenario service_hijacking
+```
+
+This creates:
+
+- `CI/tests_v2/scenarios/service_hijacking/test_service_hijacking.py` — A test class extending `BaseScenarioTest` with a stub `test_happy_path` and `WORKLOAD_MANIFEST` pointing to the folder’s `resource.yaml`.
+- `CI/tests_v2/scenarios/service_hijacking/resource.yaml` — A placeholder Deployment (namespace is patched at deploy time).
+- `CI/tests_v2/scenarios/service_hijacking/scenario_base.yaml` — A placeholder Krkn scenario; edit this with the structure expected by your scenario type.
+
+The script automatically registers the marker in `CI/tests_v2/pytest.ini`. For example, it adds:
+
+```
+service_hijacking: marks a test as a service_hijacking scenario test
+```
+
+**Next steps after scaffolding:**
+
+1. Verify the marker was added to `pytest.ini` (the scaffold does this automatically).
+2. Edit `scenario_base.yaml` with the structure your Krkn scenario type expects (see `scenarios/application_outage/scenario_base.yaml` and `scenarios/pod_disruption/scenario_base.yaml` for examples). The top-level key should match `SCENARIO_NAME`.
+3. If your scenario uses a **list** structure (like pod_disruption) instead of a **dict** with a top-level key, set `NAMESPACE_KEY_PATH` (e.g. `[0, "config", "namespace_pattern"]`) and `NAMESPACE_IS_REGEX = True` if the namespace is a regex pattern.
+4. The generated `test_happy_path` already uses `self.run_scenario(self.tmp_path, ns)` and assertions. Add more test methods (e.g. negative tests with `@pytest.mark.no_workload`) as needed.
+5. Adjust `resource.yaml` if your scenario needs a different workload (e.g. specific image or labels).
+
+If your Kraken scenario type string is not `<scenario>_scenarios`, pass it explicitly:
+
+```bash
+python CI/tests_v2/scaffold.py --scenario node_disruption --scenario-type node_scenarios
+```
+
+## Option 2: Manual setup
+
+1. **Create the scenario folder**  
+   `CI/tests_v2/scenarios/<scenario_name>/`.
+
+2. **Add resource.yaml**  
+   Kubernetes manifest(s) for the workload (Deployment or Pod). Use a distinct label (e.g. `app: <scenario>-target`). Omit or leave `metadata.namespace`; the framework patches it at deploy time.
+
+3. **Add scenario_base.yaml**  
+   The canonical Krkn scenario structure. Tests will load this, patch namespace (and any overrides), write to `tmp_path`, and pass to `build_config`. See existing scenarios for the format your scenario type expects.
+
+4. **Add test_<scenario>.py**  
+   - Import `BaseScenarioTest` from `lib.base` and helpers from `lib.utils` (e.g. `assert_kraken_success`, `get_pods_list`, `scenario_dir` if needed).
+   - Define a class extending `BaseScenarioTest` with:
+     - `WORKLOAD_MANIFEST = "CI/tests_v2/scenarios/<scenario_name>/resource.yaml"`
+     - `WORKLOAD_IS_PATH = True`
+     - `LABEL_SELECTOR = "app=<label>"`
+     - `SCENARIO_NAME = "<scenario_name>"`
+     - `SCENARIO_TYPE = "<scenario_type>"` (e.g. `application_outages_scenarios`)
+     - `NAMESPACE_KEY_PATH`: path to the namespace field (e.g. `["application_outage", "namespace"]` for dict-based, or `[0, "config", "namespace_pattern"]` for list-based)
+     - `NAMESPACE_IS_REGEX = False` (or `True` for regex patterns like pod_disruption)
+     - `OVERRIDES_KEY_PATH = ["<top-level key>"]` if the scenario supports overrides (e.g. duration, block).
+   - Add `@pytest.mark.functional` and `@pytest.mark.<scenario>` on the class.
+   - In at least one test, call `self.run_scenario(self.tmp_path, self.ns)` and assert with `assert_kraken_success`, `assert_pod_count_unchanged`, and `assert_all_pods_running_and_ready`. Use `self.k8s_core`, `self.tmp_path`, etc. (injected by the base class).
+
+5. **Register the marker**  
+   In `CI/tests_v2/pytest.ini`, under `markers`:
+   ```
+   <scenario>: marks a test as a <scenario> scenario test
+   ```
+
+## Conventions
+
+- **Folder-per-scenario**: One directory per scenario under `scenarios/`. All assets (test, resource.yaml, scenario_base.yaml, and any extra YAMLs) live there for easy tracking and onboarding.
+- **Ephemeral namespace**: Every test gets a unique `krkn-test-<uuid>` namespace. The base class deploys the workload into it before the test; no manual deploy is required.
+- **Negative tests**: For tests that don’t need a workload (e.g. invalid scenario, bad namespace), use `@pytest.mark.no_workload`. The test will still get a namespace but no workload will be deployed.
+- **Scenario type**: `SCENARIO_TYPE` must match the key in Kraken’s config (e.g. `application_outages_scenarios`, `pod_disruption_scenarios`). See `CI/tests_v2/config/common_test_config.yaml` and the scenario plugin’s `get_scenario_types()`.
+- **Assertions**: Use `assert_kraken_success(result, context=f"namespace={ns}", tmp_path=self.tmp_path)` so failures include stdout/stderr and optional log files.
+- **Timeouts**: Use constants from `lib.base` (`READINESS_TIMEOUT`, `POLICY_WAIT_TIMEOUT`, etc.) instead of magic numbers.
+
+## Exit Code Handling
+
+Kraken uses the following exit codes: **0** = success; **1** = scenario failure (e.g. post scenarios still failing); **2** = critical alerts fired; **3+** = health check / KubeVirt check failures; **-1** = infrastructure error (bad config, no kubeconfig).
+
+- **Happy-path tests**: Use `assert_kraken_success(result, ...)`. By default only exit code 0 is accepted.
+- **Alert-aware tests**: If you enable `check_critical_alerts` and expect alerts, use `assert_kraken_success(result, allowed_codes=(0, 2), ...)` so exit code 2 is treated as acceptable.
+- **Expected-failure tests**: Use `assert_kraken_failure(result, context=..., tmp_path=self.tmp_path)` for negative tests (invalid scenario, bad namespace, etc.). This gives the same diagnostic quality (log dump, tmp_path hint) as success assertions. Prefer this over a bare `assert result.returncode != 0`.
+
+## Running your new tests
+
+```bash
+pytest CI/tests_v2/ -v -m <scenario>
+```
+
+For debugging with logs and keeping failed namespaces:
+
+```bash
+pytest CI/tests_v2/ -v -m <scenario> --log-cli-level=DEBUG --keep-ns-on-fail
+```
+
+---
+
+## Naming Conventions
+
+Follow these conventions so the framework stays consistent as new scenarios are added.
+
+### Quick Reference
+
+| Element | Pattern | Example |
+|---|---|---|
+| Scenario folder | `scenarios/<snake_case>/` | `scenarios/node_disruption/` |
+| Test file | `test_<scenario>.py` | `test_node_disruption.py` |
+| Test class | `Test<CamelCase>(BaseScenarioTest)` | `TestNodeDisruption` |
+| Pytest marker | `@pytest.mark.<scenario>` (matches folder) | `@pytest.mark.node_disruption` |
+| Scenario YAML | `scenario_base.yaml` | — |
+| Workload YAML | `resource.yaml` | — |
+| Extra YAMLs | `<descriptive_name>.yaml` | `nginx_http.yaml` |
+| Lib modules | `lib/<concern>.py` | `lib/deploy.py` |
+| Public fixtures | `<verb>_<noun>` or `<noun>` | `run_kraken`, `test_namespace` |
+| Private/autouse fixtures | `_<descriptive>` | `_cleanup_stale_namespaces` |
+| Assertion helpers | `assert_<condition>` | `assert_pod_count_unchanged` |
+| Query helpers | `get_<resource>` or `find_<resource>_by_<criteria>` | `get_pods_list`, `find_network_policy_by_prefix` |
+| Env var overrides | `KRKN_TEST_<NAME>` | `KRKN_TEST_READINESS_TIMEOUT` |
+
+### Folders
+
+- One folder per scenario under `scenarios/`. The folder name is `snake_case` and must match the `SCENARIO_NAME` class attribute in the test.
+- Shared framework code lives in `lib/`. Each module covers a single concern (`k8s`, `namespace`, `deploy`, `kraken`, `utils`, `base`, `preflight`).
+- Do **not** add scenario-specific code to `lib/`; keep it in the scenario folder as module-level helpers.
+
+### Files
+
+- Test files: `test_<scenario>.py`. This is required for pytest discovery (`test_*.py`).
+- Workload manifests: always `resource.yaml`. If a scenario needs additional K8s resources (e.g. a Service for traffic testing), use a descriptive name like `nginx_http.yaml`.
+- Scenario config: always `scenario_base.yaml`. This is the template that `load_and_patch_scenario` loads and patches.
+
+### Classes
+
+- One test class per file: `Test<CamelCase>` extending `BaseScenarioTest`.
+- The CamelCase name must be the PascalCase equivalent of the folder name (e.g. `pod_disruption` -> `TestPodDisruption`).
+
+### Test Methods
+
+- Prefix: `test_` (pytest requirement).
+- Use descriptive names that convey **what is being verified**, not implementation details.
+- Good: `test_pod_crash_and_recovery`, `test_traffic_blocked_during_outage`, `test_invalid_scenario_fails`.
+- Avoid: `test_run_1`, `test_scenario`, `test_it_works`.
+
+### Fixtures
+
+- **Public fixtures** (intended for use in tests): use `<verb>_<noun>` or plain `<noun>`. Examples: `run_kraken`, `deploy_workload`, `test_namespace`, `kubectl`.
+- **Private/autouse fixtures** (framework internals): prefix with `_`. Examples: `_kube_config_loaded`, `_preflight_checks`, `_inject_common_fixtures`.
+- K8s client fixtures use the `k8s_` prefix: `k8s_core`, `k8s_apps`, `k8s_networking`, `k8s_client`.
+
+### Helpers and Utilities
+
+- **Assertions**: `assert_<what_is_expected>`. Always raise `AssertionError` with a message that includes the namespace.
+- **K8s queries**: `get_<resource>_list` for direct API calls, `find_<resource>_by_<criteria>` for filtered lookups.
+- **Private helpers**: prefix with `_` for module-internal functions (e.g. `_pods`, `_policies`, `_get_nested`).
+
+### Constants and Environment Variables
+
+- Timeout constants: `UPPER_CASE` in `lib/base.py`. Each is overridable via an env var prefixed `KRKN_TEST_`.
+- Feature flags: `KRKN_TEST_DRY_RUN`, `KRKN_TEST_COVERAGE`. Always use the `KRKN_TEST_` prefix so all tunables are discoverable with `grep KRKN_TEST_`.
+
+### Markers
+
+- Every test class gets `@pytest.mark.functional` (framework-wide) and `@pytest.mark.<scenario>` (scenario-specific).
+- The scenario marker name matches the folder name exactly.
+- Behavioral modifiers use plain descriptive names: `no_workload`, `order`.
+- Register all custom markers in `pytest.ini` to avoid warnings.
+
+## Adding Dependencies
+
+- **Runtime (Kraken needs it)**: Add to the **root** `requirements.txt`. Pin a version (e.g. `package==1.2.3` or `package>=1.2,<2`).
+- **Test-only (only CI/tests_v2 needs it)**: Add to **`CI/tests_v2/requirements.txt`**. Pin a version there as well.
+- After changing either file, run `make setup` (or `make -f CI/tests_v2/Makefile setup`) from the repo root to verify both files install cleanly together.
@@ -0,0 +1,97 @@
+# CI/tests_v2 functional tests - single entry point.
+# Run from repo root: make -f CI/tests_v2/Makefile <target>
+# Or from CI/tests_v2: make <target> (REPO_ROOT is resolved automatically).
+
+# Resolve repo root: go to Makefile dir then up two levels (CI/tests_v2 -> repo root)
+REPO_ROOT := $(shell cd "$(dir $(firstword $(MAKEFILE_LIST)))" && cd ../.. && pwd)
+VENV := $(REPO_ROOT)/venv
+PYTHON := $(VENV)/bin/python
+PIP := $(VENV)/bin/pip
+CLUSTER_NAME ?= ci-krkn
+TESTS_DIR := $(REPO_ROOT)/CI/tests_v2
+
+.PHONY: setup preflight test test-fast test-debug test-scenario test-dry-run clean help
+
+help:
+	@echo "CI/tests_v2 functional tests - usage: make [target]"
+	@echo ""
+	@echo "Targets:"
+	@echo "  setup         Create venv (if missing), install Python deps, create KinD cluster (kind-config-dev.yml)."
+	@echo "               Run once before first test. Override cluster config: KIND_CONFIG=path make setup"
+	@echo ""
+	@echo "  preflight     Check Python 3.9+, kind, kubectl, Docker, cluster reachability, test deps."
+	@echo "               Invoked automatically by test targets; run standalone to validate environment."
+	@echo ""
+	@echo "  test         Full run: retries (2), timeout 300s, HTML report, JUnit XML, coverage."
+	@echo "               Use for CI or final verification. Output: report.html, results.xml"
+	@echo ""
+	@echo "  test-fast     Quick run: no retries, 120s timeout, no report. For fast local iteration."
+	@echo ""
+	@echo "  test-debug    Debug run: verbose (-s), keep failed namespaces (--keep-ns-on-fail), DEBUG logging."
+	@echo "               Use when investigating failures; inspect kept namespaces with kubectl."
+	@echo ""
+	@echo "  test-scenario Run only one scenario. Requires SCENARIO=<marker>."
+	@echo "               Example: make test-scenario SCENARIO=pod_disruption"
+	@echo ""
+	@echo "  test-dry-run  Validate scenario plumbing only (no Kraken execution). Sets KRKN_TEST_DRY_RUN=1."
+	@echo ""
+	@echo "  clean        Delete KinD cluster $(CLUSTER_NAME) and remove report.html, results.xml."
+	@echo ""
+	@echo "  help         Show this help."
+	@echo ""
+	@echo "Run from repo root: make -f CI/tests_v2/Makefile <target>"
+	@echo "Or from CI/tests_v2:  make <target>"
+
+setup: $(VENV)/.installed
+	@echo "Running cluster setup..."
+	$(MAKE) -f $(TESTS_DIR)/Makefile preflight
+	cd $(REPO_ROOT) && ./CI/tests_v2/setup_env.sh
+	@echo "Setup complete. Run 'make test' or 'make -f CI/tests_v2/Makefile test' from repo root."
+
+$(VENV)/.installed: $(REPO_ROOT)/requirements.txt $(TESTS_DIR)/requirements.txt
+	@if [ ! -d "$(VENV)" ]; then python3 -m venv $(VENV); echo "Created venv at $(VENV)"; fi
+	$(PYTHON) -m pip install -q --upgrade pip
+	# Root = Kraken runtime; tests_v2 = test-only plugins; both required for functional tests.
+	$(PIP) install -q -r $(REPO_ROOT)/requirements.txt
+	$(PIP) install -q -r $(TESTS_DIR)/requirements.txt
+	@touch $(VENV)/.installed
+	@echo "Python deps installed."
+
+preflight:
+	@echo "Preflight: checking Python, tools, and cluster..."
+	@command -v python3 >/dev/null 2>&1 || { echo "Error: python3 not found."; exit 1; }
+	@python3 -c "import sys; exit(0 if sys.version_info >= (3, 9) else 1)" || { echo "Error: Python 3.9+ required."; exit 1; }
+	@command -v kind >/dev/null 2>&1 || { echo "Error: kind not installed."; exit 1; }
+	@command -v kubectl >/dev/null 2>&1 || { echo "Error: kubectl not installed."; exit 1; }
+	@docker info >/dev/null 2>&1 || { echo "Error: Docker not running (required for KinD)."; exit 1; }
+	@if kind get clusters 2>/dev/null | grep -qx "$(CLUSTER_NAME)"; then \
+		kubectl cluster-info >/dev/null 2>&1 || { echo "Error: Cluster $(CLUSTER_NAME) exists but cluster-info failed."; exit 1; }; \
+	else \
+		echo "Note: Cluster $(CLUSTER_NAME) not found. Run 'make setup' to create it."; \
+	fi
+	@$(PYTHON) -c "import pytest_rerunfailures, pytest_html, pytest_timeout, pytest_order" 2>/dev/null || \
+		{ echo "Error: Install test deps with 'make setup' or pip install -r CI/tests_v2/requirements.txt"; exit 1; }
+	@echo "Preflight OK."
+
+test: preflight
+	cd $(REPO_ROOT) && KRKN_TEST_COVERAGE=1 $(PYTHON) -m pytest $(TESTS_DIR)/ -v --timeout=300 --reruns=2 --reruns-delay=10 \
+		--html=$(TESTS_DIR)/report.html -n auto --junitxml=$(TESTS_DIR)/results.xml
+
+test-fast: preflight
+	cd $(REPO_ROOT) && $(PYTHON) -m pytest $(TESTS_DIR)/ -v -p no:rerunfailures -n auto --timeout=120
+
+test-debug: preflight
+	cd $(REPO_ROOT) && $(PYTHON) -m pytest $(TESTS_DIR)/ -v -s -p no:rerunfailures --timeout=300 \
+		--keep-ns-on-fail --log-cli-level=DEBUG
+
+test-scenario: preflight
+	@if [ -z "$(SCENARIO)" ]; then echo "Error: set SCENARIO=pod_disruption (or application_outage, etc.)"; exit 1; fi
+	cd $(REPO_ROOT) && $(PYTHON) -m pytest $(TESTS_DIR)/ -v -m "$(SCENARIO)" --timeout=300 --reruns=2 --reruns-delay=10
+
+test-dry-run: preflight
+	cd $(REPO_ROOT) && KRKN_TEST_DRY_RUN=1 $(PYTHON) -m pytest $(TESTS_DIR)/ -v
+
+clean:
+	@kind delete cluster --name $(CLUSTER_NAME) 2>/dev/null || true
+	@rm -f $(TESTS_DIR)/report.html $(TESTS_DIR)/results.xml
+	@echo "Cleaned cluster and report artifacts."