derekhiggins
diff --git a/‎.github/actions/run-and-record-tests/action.yml‎
Lines changed: 13 additions & 7 deletions b/‎.github/actions/run-and-record-tests/action.yml‎
Lines changed: 13 additions & 7 deletions
diff --git a/‎.github/workflows/integration-tests.yml‎
Lines changed: 10 additions & 21 deletions b/‎.github/workflows/integration-tests.yml‎
Lines changed: 10 additions & 21 deletions
diff --git a/‎.github/workflows/record-integration-tests.yml‎
Lines changed: 21 additions & 70 deletions b/‎.github/workflows/record-integration-tests.yml‎
Lines changed: 21 additions & 70 deletions
@@ -2,9 +2,13 @@ name: 'Run and Record Tests'
 description: 'Run integration tests and handle recording/artifact upload'
 
 inputs:
-  test-types:
-    description: 'JSON array of test types to run'
+  test-subdirs:
+    description: 'Comma-separated list of test subdirectories to run'
     required: true
+  test-pattern:
+    description: 'Regex pattern to pass to pytest -k'
+    required: false
+    default: ''
   stack-config:
     description: 'Stack configuration to use'
     required: true
@@ -35,9 +39,11 @@ runs:
         ./scripts/integration-tests.sh \
           --stack-config '${{ inputs.stack-config }}' \
           --provider '${{ inputs.provider }}' \
-          --test-types '${{ inputs.test-types }}' \
+          --test-subdirs '${{ inputs.test-subdirs }}' \
+          --test-pattern '${{ inputs.test-pattern }}' \
           --inference-mode '${{ inputs.inference-mode }}' \
-          ${{ inputs.run-vision-tests == 'true' && '--run-vision-tests' || '' }}
+          ${{ inputs.run-vision-tests == 'true' && '--run-vision-tests' || '' }} \
+          | tee pytest-${{ inputs.inference-mode }}.log
 
 
     - name: Commit and push recordings
@@ -57,10 +63,10 @@ runs:
             git commit -m "Recordings update from CI"
           fi
 
-          git fetch origin ${{ github.event.pull_request.head.ref }}
-          git rebase origin/${{ github.event.pull_request.head.ref }}
+          git fetch origin ${{ github.ref_name }}
+          git rebase origin/${{ github.ref_name }}
           echo "Rebased successfully"
-          git push origin HEAD:${{ github.event.pull_request.head.ref }}
+          git push origin HEAD:${{ github.ref_name }}
           echo "Pushed successfully"
         else
           echo "No recording changes"
 
@@ -31,35 +31,23 @@ on:
         description: 'Test against a specific provider'
         type: string
         default: 'ollama'
+      test-subdirs:
+        description: 'Comma-separated list of test subdirectories to run'
+        type: string
+        default: ''
+      test-pattern:
+        description: 'Regex pattern to pass to pytest -k'
+        type: string
+        default: ''
 
 concurrency:
   # Skip concurrency for pushes to main - each commit should be tested independently
   group: ${{ github.workflow }}-${{ github.ref == 'refs/heads/main' && github.run_id || github.ref }}
   cancel-in-progress: true
 
 jobs:
-  discover-tests:
-    runs-on: ubuntu-latest
-    outputs:
-      test-types: ${{ steps.generate-test-types.outputs.test-types }}
-
-    steps:
-      - name: Checkout repository
-        uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
-
-      - name: Generate test types
-        id: generate-test-types
-        run: |
-          # Get test directories dynamically, excluding non-test directories
-          # NOTE: we are excluding post_training since the tests take too long
-          TEST_TYPES=$(find tests/integration -maxdepth 1 -mindepth 1 -type d |
-            sed 's|tests/integration/||' |
-            grep -Ev "^(__pycache__|fixtures|test_cases|recordings|non_ci|post_training)$" |
-            sort | jq -R -s -c 'split("\n")[:-1]')
-          echo "test-types=$TEST_TYPES" >> $GITHUB_OUTPUT
 
   run-replay-mode-tests:
-    needs: discover-tests
     runs-on: ubuntu-latest
     name: ${{ format('Integration Tests ({0}, {1}, {2}, client={3}, vision={4})', matrix.client-type, matrix.provider, matrix.python-version, matrix.client-version, matrix.run-vision-tests) }}
 
@@ -90,7 +78,8 @@ jobs:
       - name: Run tests
         uses: ./.github/actions/run-and-record-tests
         with:
-          test-types: ${{ needs.discover-tests.outputs.test-types }}
+          test-subdirs: ${{ inputs.test-subdirs }}
+          test-pattern: ${{ inputs.test-pattern }}
           stack-config: ${{ matrix.client-type == 'library' && 'ci-tests' || 'server:ci-tests' }}
           provider: ${{ matrix.provider }}
           inference-mode: 'replay'
 
@@ -1,93 +1,43 @@
+# This workflow should be run manually when needing to re-record tests. This happens when you have
+#  - added a new test
+#  - or changed an existing test such that a new inference call is made
+# You should make a PR and then run this workflow on that PR branch. The workflow will re-record the
+# tests and commit the recordings to the PR branch.
 name: Integration Tests (Record)
 
 run-name: Run the integration test suite from tests/integration
 
 on:
-  pull_request_target:
-    branches: [ main ]
-    types: [opened, synchronize, labeled]
-    paths:
-      - 'llama_stack/**'
-      - 'tests/**'
-      - 'uv.lock'
-      - 'pyproject.toml'
-      - '.github/workflows/record-integration-tests.yml' # This workflow
-      - '.github/actions/setup-ollama/action.yml'
-      - '.github/actions/setup-test-environment/action.yml'
-      - '.github/actions/run-and-record-tests/action.yml'
   workflow_dispatch:
     inputs:
+      test-subdirs:
+        description: 'Comma-separated list of test subdirectories to run'
+        type: string
+        default: ''
       test-provider:
         description: 'Test against a specific provider'
         type: string
         default: 'ollama'
-
-concurrency:
-  group: ${{ github.workflow }}-${{ github.event.pull_request.number }}
-  cancel-in-progress: true
+      run-vision-tests:
+        description: 'Whether to run vision tests'
+        type: boolean
+        default: false
+      test-pattern:
+        description: 'Regex pattern to pass to pytest -k'
+        type: string
+        default: ''
 
 jobs:
-  discover-tests:
-    if: contains(github.event.pull_request.labels.*.name, 're-record-tests') ||
-      contains(github.event.pull_request.labels.*.name, 're-record-vision-tests')
-    runs-on: ubuntu-latest
-    outputs:
-      test-types: ${{ steps.generate-test-types.outputs.test-types }}
-      matrix-modes: ${{ steps.generate-test-types.outputs.matrix-modes }}
-
-    steps:
-      - name: Checkout repository
-        uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
-
-      - name: Generate test types
-        id: generate-test-types
-        run: |
-          # Get test directories dynamically, excluding non-test directories
-          TEST_TYPES=$(find tests/integration -maxdepth 1 -mindepth 1 -type d -printf "%f\n" |
-            grep -Ev "^(__pycache__|fixtures|test_cases|recordings|post_training)$" |
-            sort | jq -R -s -c 'split("\n")[:-1]')
-          echo "test-types=$TEST_TYPES" >> $GITHUB_OUTPUT
-
-          labels=$(gh pr view ${{ github.event.pull_request.number }} --json labels --jq '.labels[].name')
-          echo "labels=$labels"
-
-          modes_array=()
-          if [[ $labels == *"re-record-vision-tests"* ]]; then
-            modes_array+=("vision")
-          fi
-          if [[ $labels == *"re-record-tests"* ]]; then
-            modes_array+=("non-vision")
-          fi
-
-          # Convert to JSON array
-          if [ ${#modes_array[@]} -eq 0 ]; then
-            matrix_modes="[]"
-          else
-            matrix_modes=$(printf '%s\n' "${modes_array[@]}" | jq -R -s -c 'split("\n")[:-1]')
-          fi
-          echo "matrix_modes=$matrix_modes"
-          echo "matrix-modes=$matrix_modes" >> $GITHUB_OUTPUT
-
-        env:
-          GH_TOKEN: ${{ github.token }}
-
   record-tests:
-    needs: discover-tests
     runs-on: ubuntu-latest
 
     permissions:
       contents: write
 
-    strategy:
-      fail-fast: false
-      matrix:
-        mode: ${{ fromJSON(needs.discover-tests.outputs.matrix-modes) }}
-
     steps:
       - name: Checkout repository
         uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
         with:
-          ref: ${{ github.event.pull_request.head.ref }}
           fetch-depth: 0
 
       - name: Setup test environment
@@ -96,14 +46,15 @@ jobs:
           python-version: "3.12"  # Use single Python version for recording
           client-version: "latest"
           provider: ${{ inputs.test-provider || 'ollama' }}
-          run-vision-tests: ${{ matrix.mode == 'vision' && 'true' || 'false' }}
+          run-vision-tests: ${{ inputs.run-vision-tests }}
           inference-mode: 'record'
 
       - name: Run and record tests
         uses: ./.github/actions/run-and-record-tests
         with:
-          test-types: ${{ needs.discover-tests.outputs.test-types }}
+          test-pattern: ${{ inputs.test-pattern }}
+          test-subdirs: ${{ inputs.test-subdirs }}
           stack-config: 'server:ci-tests'  # recording must be done with server since more tests are run
           provider: ${{ inputs.test-provider || 'ollama' }}
           inference-mode: 'record'
-          run-vision-tests: ${{ matrix.mode == 'vision' && 'true' || 'false' }}
+          run-vision-tests: ${{ inputs.run-vision-tests }}