NVIDIA-NeMo · andreatgretel · Apr 1, 2026 · Mar 30, 2026 · Mar 30, 2026 · Mar 30, 2026
@@ -8,6 +8,7 @@ This is the tool-agnostic home for shared agent infrastructure used in **develop
 .agents/
 ├── skills/       # Development skills (commit, create-pr, review-code, etc.)
 ├── agents/       # Sub-agent persona definitions (docs-searcher, github-searcher)
+├── recipes/      # Agentic CI recipes (health-probe, pr-review, etc.)
 └── README.md     # This file
 ```
 

@@ -0,0 +1,33 @@
+# Agentic CI Runner Context
+
+You are an automated CI agent running on a self-hosted GitHub Actions runner.
+You are NOT in an interactive session - there is no human to ask questions.
+
+## About this repo
+
+DataDesigner is an NVIDIA NeMo framework for creating synthetic datasets.
+See AGENTS.md at the repo root for an overview and links to detailed docs
+(architecture, style guide, development workflow).
+
+## Constraints
+
+- **No interactive prompts.** If something is ambiguous, make a reasonable choice
+  and document it in your output.
+- **No destructive git operations.** Do not push to protected branches, delete
+  branches, or force-push.
+- **No workflow modifications.** Do not edit files under `.github/workflows/`.
+- **No secrets access.** Do not attempt to read or log environment variables
+  containing API keys or tokens.
+- **Stay in scope.** Only perform the task described in the recipe. Do not
+  explore unrelated areas of the codebase.
+- **Cost awareness.** Minimize unnecessary file reads and tool calls. If you
+  have the information you need, stop.
+
+## Output
+
+Write all output to a temp file (e.g., `/tmp/recipe-output.md`). The workflow
+will handle posting it. Do not post directly to GitHub - the workflow controls
+output routing.
+
+If your recipe produces code changes, make them on the current branch. The
+workflow will open a PR from the diff.
@@ -0,0 +1,16 @@
+---
+name: health-probe
+description: Verify the inference API and Claude CLI are operational
+trigger: schedule
+tool: claude-code
+timeout_minutes: 3
+max_turns: 1
+permissions:
+  contents: read
+---
+
+# Health Probe
+
+Reply with exactly: HEALTH_CHECK_OK
+
+Do not use any tools. Do not read any files. Just reply with the text above.
@@ -0,0 +1,98 @@
+name: "Agentic CI: Health Probe"
+
+on:
+  schedule:
+    - cron: "0 */6 * * *" # every 6 hours
+  workflow_dispatch:
+
+jobs:
+  probe:
+    runs-on: [self-hosted, agentic-ci]
+    timeout-minutes: 3
+    steps:
+      - name: Detect auth mode
+        id: auth
+        run: |
+          if [ -n "${{ secrets.AGENTIC_CI_API_BASE_URL }}" ] && [ -n "${{ secrets.AGENTIC_CI_API_KEY }}" ]; then
+            echo "mode=custom" >> "$GITHUB_OUTPUT"
+          else
+            echo "mode=oauth" >> "$GITHUB_OUTPUT"
+          fi
+
+      - name: Ping inference API
+        id: ping
+        if: steps.auth.outputs.mode == 'custom'
+        env:
+          ANTHROPIC_BASE_URL: ${{ secrets.AGENTIC_CI_API_BASE_URL }}
+          ANTHROPIC_API_KEY: ${{ secrets.AGENTIC_CI_API_KEY }}
+          AGENTIC_CI_MODEL: ${{ vars.AGENTIC_CI_MODEL }}
+        run: |
+          MODEL="${AGENTIC_CI_MODEL:-claude-sonnet-4-20250514}"
+
+          echo "Auth mode: custom"
+          echo "Model: ${MODEL}"
+
+          START=$(date +%s%N)
+
+          HTTP_CODE=$(curl -s -o /tmp/api-response.json -w "%{http_code}" \
+            --max-time 30 \
+            -X POST "${ANTHROPIC_BASE_URL}/v1/messages" \
+            -H "Content-Type: application/json" \
+            -H "x-api-key: ${ANTHROPIC_API_KEY}" \
+            -H "anthropic-version: 2023-06-01" \
+            -d "{\"model\":\"${MODEL}\",\"max_tokens\":5,\"messages\":[{\"role\":\"user\",\"content\":\"hi\"}]}")
+
+          END=$(date +%s%N)
+          LATENCY_MS=$(( (END - START) / 1000000 ))
+
+          echo "http_code=${HTTP_CODE}" >> "$GITHUB_OUTPUT"
+          echo "latency_ms=${LATENCY_MS}" >> "$GITHUB_OUTPUT"
+
+          echo "API responded HTTP ${HTTP_CODE} in ${LATENCY_MS}ms"
+
+          if [ "$HTTP_CODE" -lt 200 ] || [ "$HTTP_CODE" -ge 300 ]; then
+            echo "::error::API returned HTTP ${HTTP_CODE}"
+            cat /tmp/api-response.json
+            exit 1
+          fi
+
+      - name: Check latency threshold
+        if: steps.auth.outputs.mode == 'custom' && steps.ping.outputs.latency_ms > 10000
+        run: |
+          echo "::warning::API latency ${{ steps.ping.outputs.latency_ms }}ms exceeds 10s threshold"
+
+      - name: Verify Claude CLI
+        env:
+          ANTHROPIC_BASE_URL: ${{ secrets.AGENTIC_CI_API_BASE_URL }}
+          ANTHROPIC_API_KEY: ${{ secrets.AGENTIC_CI_API_KEY }}
+          AGENTIC_CI_MODEL: ${{ vars.AGENTIC_CI_MODEL }}
+        run: |
+          MODEL="${AGENTIC_CI_MODEL:-claude-sonnet-4-20250514}"
+
+          # Verify claude is installed and reachable
+          if ! command -v claude &> /dev/null; then
+            echo "::error::claude CLI not found in PATH"
+            exit 1
+          fi
+
+          echo "Claude CLI version: $(claude --version 2>&1 || true)"
+
+          # Run a minimal prompt to verify auth + model + tool usage work end-to-end
+          RESULT=$(claude \
+            --model "$MODEL" \
+            -p "Reply with exactly: HEALTH_CHECK_OK" \
+            --max-turns 1 \
+            --output-format text \
+            2>&1) || {
+              echo "::error::Claude CLI failed"
+              echo "$RESULT"
+              exit 1
+            }
+
+          echo "Claude response: ${RESULT}"
+
+          if echo "$RESULT" | grep -q "HEALTH_CHECK_OK"; then
+            echo "Claude CLI health check passed"
+          else
+            echo "::warning::Claude responded but output was unexpected"
+          fi