diff --git a/.github/workflows/cat-test-examples.yml b/.github/workflows/cat-test-examples.yml
index 4835172..e445230 100644
--- a/.github/workflows/cat-test-examples.yml
+++ b/.github/workflows/cat-test-examples.yml
@@ -66,18 +66,11 @@ jobs:
           echo "number_of_runs=$ROUNDS" >> "$GITHUB_OUTPUT"
           echo "CAT_AI_SAMPLE_SIZE=$ROUNDS" >> $GITHUB_ENV
 
-      - name: Find latest example
-        uses: mathiasvr/command-output@v2.0.0
-        id: find-latest-example
-        with:
-          run: find examples/team_recommender/tests -maxdepth 1 -name "example_*" -type d | sort -V | tail -n 1
-
       - name: Run Latest Example tests
         run: >
           uv run pytest
           --verbose --verbosity=10 --capture=no --tb=native --color=yes --showlocals
-          ${{ steps.find-latest-example.outputs.stdout }}
-        # examples/team_recommender/tests/example_9*
+          examples/team_recommender/tests
         env:
           OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }}
 
diff --git a/examples/team_recommender/tests/conftest.py b/examples/team_recommender/tests/conftest.py
new file mode 100644
index 0000000..97ad35e
--- /dev/null
+++ b/examples/team_recommender/tests/conftest.py
@@ -0,0 +1,47 @@
+import pytest
+from settings import root_path
+
+
+def pytest_addoption(parser):
+    parser.addoption(
+        "--all",
+        action="store_true",
+        default=False,
+        help="Run all examples instead of just the latest",
+    )
+
+
+def pytest_configure(config):
+    # Register a marker for example tests
+    config.addinivalue_line("markers", "example: mark test as belonging to an example")
+
+
+# Completely avoid any debugger-dependent hooks
+# pytest_collect_file can trigger PyCharm's debugger in unpredictable ways
+
+
+def find_latest_example() -> str | None:
+    """Find the latest example directory without relying on interactive commands"""
+    # Avoid debugger evaluation of non-essential code branches
+    try:
+        tests_dir = root_path() / "tests"
+        example_dirs = [d for d in tests_dir.glob("example_*") if d.is_dir()]
+        example_dirs.sort()  # Natural sort to handle numerical directories correctly
+        return str(example_dirs[-1]) if example_dirs else None
+    except Exception:
+        # Fail silently - better to run all tests than break the test runner
+        return None
+
+
+def pytest_collection_modifyitems(config, items):
+    if not config.getoption("--all"):
+        latest_example = find_latest_example()
+        if latest_example:
+            mark_skip_all_except_matching_example(items, latest_example)
+
+
+def mark_skip_all_except_matching_example(items, latest_example: str):
+    skip_older = pytest.mark.skip(reason="Only running latest example (use --all to run all)")
+    for item in items:
+        if str(latest_example) not in str(item.fspath):
+            item.add_marker(skip_older)
diff --git a/examples/team_recommender/tests/example_4_loop_no_hallucinating/test_allocations_hallucinating.py b/examples/team_recommender/tests/example_4_loop_no_hallucinating/test_allocations_hallucinating.py
index e190998..a85652b 100644
--- a/examples/team_recommender/tests/example_4_loop_no_hallucinating/test_allocations_hallucinating.py
+++ b/examples/team_recommender/tests/example_4_loop_no_hallucinating/test_allocations_hallucinating.py
@@ -2,6 +2,7 @@
 
 from helpers import load_json_fixture
 from openai import OpenAI
+from retry import retry
 from settings import ROOT_DIR
 
 from cat_ai.reporter import Reporter
@@ -20,6 +21,7 @@ def get_developer_names_from_response(response: dict) -> set[str]:
     return {developer["name"] for developer in response["developers"]}
 
 
+@retry()
 def test_allocations():
     skills_data = load_json_fixture("skills.json")
     example_output = load_json_fixture("example_output.json")