avitai
diff --git a/‎.dockerignore‎
Lines changed: 164 additions & 0 deletions b/‎.dockerignore‎
Lines changed: 164 additions & 0 deletions
diff --git a/‎.github/workflows/benchmark-gate.yml‎
Lines changed: 42 additions & 0 deletions b/‎.github/workflows/benchmark-gate.yml‎
Lines changed: 42 additions & 0 deletions
diff --git a/‎.github/workflows/benchmark-nightly.yml‎
Lines changed: 101 additions & 0 deletions b/‎.github/workflows/benchmark-nightly.yml‎
Lines changed: 101 additions & 0 deletions
diff --git a/‎.github/workflows/build-verification.yml‎
Lines changed: 1 addition & 2 deletions b/‎.github/workflows/build-verification.yml‎
Lines changed: 1 addition & 2 deletions
@@ -0,0 +1,164 @@
+# =============================================================================
+# Docker Build Context Ignore
+# =============================================================================
+# Based on .gitignore with Docker-specific additions.
+# Without this file, docker build sends .venv (5-15GB) + .git (~500MB) into
+# the build context. With it, context should be <500MB.
+
+# --- Docker-specific (not in .gitignore) ---
+.git/
+.venv*
+tools/benchkit/.venv/
+
+# --- Byte-compiled / optimized ---
+__pycache__/
+*.py[cod]
+*$py.class
+*.so
+
+# --- Distribution / packaging ---
+.Python
+build/
+develop-eggs/
+dist/
+downloads/
+eggs/
+.eggs/
+lib/
+lib64/
+parts/
+sdist/
+var/
+wheels/
+share/python-wheels/
+*.egg-info/
+.installed.cfg
+*.egg
+MANIFEST
+
+# --- Installer logs ---
+pip-log.txt
+pip-delete-this-directory.txt
+
+# --- Unit test / coverage reports ---
+htmlcov/
+.tox/
+.nox/
+.coverage
+.coverage.*
+.cache
+nosetests.xml
+coverage.xml
+*.cover
+*.py,cover
+.hypothesis/
+.pytest_cache/
+cover/
+
+# --- Caches ---
+.mypy_cache/
+.ruff_cache/
+.dmypy.json
+dmypy.json
+.pybuilder/
+.pytype/
+cython_debug/
+
+# --- Environments ---
+.env
+.env.*
+.env.cloud
+env/
+venv/
+ENV/
+env.bak/
+venv.bak/
+
+# --- Documentation build artifacts ---
+docs/_build/
+/site
+
+# --- Non-runtime project files ---
+design_docs/
+memory-bank/
+sandbox/
+
+# --- AI assistant / agent traces ---
+CLAUDE.md
+.claude/
+.claude-collective/
+.cursor/
+.cursorignore
+.agent/
+.taskmaster/
+.deprecated/
+
+# --- Temp / logs ---
+temp/
+tmp/
+*.tmp
+*.tmp.*
+*.log
+logs/
+
+# --- Benchmark data and results ---
+benchmark-data/
+benchmark_results/
+.benchmarks/
+.benchmarks-results/
+
+# --- W&B ---
+wandb/
+.wandb*
+
+# --- Secrets ---
+secrets.sh
+**/secrets.sh
+vertex_config.yaml
+
+# --- Orbax checkpoint artifacts ---
+example_checkpoints/
+*.orbax-checkpoint-tmp/
+*.orbax-checkpoint-tmp-*/
+<MagicMock*>.orbax-checkpoint-tmp/
+*MagicMock*.orbax-checkpoint-tmp/
+MagicMock/**
+_CHECKPOINT_METADATA
+_strings.json
+manifest.ocdbt
+ocdbt.process_*/
+array_metadatas/
+
+# --- Test artifacts ---
+test_debug*.py
+test_*.tmp
+**/test_checkpoint*/
+**/test_cache*/
+**/tests/tmp*/
+**/tests/temp*/
+tests/data/
+
+# --- JAX/XLA caches ---
+.cache/jax/
+.cache/xla/
+
+# --- IDE ---
+.ipynb_checkpoints
+.ropeproject
+.spyderproject
+.spyproject
+profile_default/
+
+# --- Cloud config ---
+.pdm.toml
+.pdm-python
+.pdm-build/
+
+# --- Misc ---
+*.manifest
+*.spec
+*.mo
+*.pot
+db.sqlite3
+db.sqlite3-journal
+.pypirc
@@ -0,0 +1,42 @@
+name: Performance Gate
+
+on:
+  pull_request:
+    paths:
+      - 'src/datarax/**'
+      - 'benchmarks/**'
+      - 'pyproject.toml'
+
+jobs:
+  benchmark-tier1:
+    runs-on: ubuntu-latest
+    timeout-minutes: 10
+    env:
+      XLA_FLAGS: "--xla_force_host_platform_device_count=4"
+      JAX_PLATFORMS: "cpu"
+    steps:
+      - uses: actions/checkout@v4
+
+      - uses: astral-sh/setup-uv@v4
+        with:
+          version: "latest"
+
+      - name: Install dependencies
+        run: uv sync --all-extras
+
+      - name: Install benchkit
+        run: uv pip install -e tools/benchkit
+
+      - name: Run Tier 1 Benchmark Gate
+        run: uv run python -m benchmarks.runners.ci_runner --repetitions 3
+
+      - name: Regression check (benchkit)
+        run: uv run benchkit check --data benchmark-data/ --threshold 0.05
+        continue-on-error: true  # Non-blocking until baseline is established
+
+      - uses: actions/upload-artifact@v4
+        if: always()
+        with:
+          name: benchmark-results
+          path: benchmark-data/
+          retention-days: 30
@@ -0,0 +1,101 @@
+name: Nightly Benchmarks
+
+on:
+  schedule:
+    - cron: "0 2 * * *"  # 2 AM UTC daily
+  workflow_dispatch:
+    inputs:
+      platform:
+        description: "Target platform"
+        default: "cpu"
+        type: choice
+        options:
+          - cpu
+          - gpu
+          - tpu
+
+jobs:
+  cpu-benchmarks:
+    runs-on: ubuntu-latest
+    timeout-minutes: 60
+    env:
+      XLA_FLAGS: "--xla_force_host_platform_device_count=4"
+      JAX_PLATFORMS: "cpu"
+    steps:
+      - uses: actions/checkout@v4
+
+      - uses: astral-sh/setup-uv@v4
+
+      - name: Install dependencies
+        run: uv sync --all-extras
+
+      - name: Install benchkit
+        run: uv pip install -e tools/benchkit[wandb]
+
+      - name: Run benchmarks and export to W&B
+        env:
+          WANDB_API_KEY: ${{ secrets.WANDB_API_KEY }}
+        run: >
+          uv run datarax-bench run
+          --platform cpu
+          --repetitions 3
+          --wandb
+          --charts
+
+      - name: Upload results
+        uses: actions/upload-artifact@v4
+        with:
+          name: nightly-cpu-results
+          path: benchmark-data/
+          retention-days: 90
+
+  # GPU benchmarks — requires self-hosted runner or SkyPilot
+  # Uncomment when cloud credits are available (see Section 6.4.5)
+  # gpu-benchmarks:
+  #   runs-on: [self-hosted, gpu, a100]
+  #   timeout-minutes: 120
+  #   steps:
+  #     - uses: actions/checkout@v4
+  #     - uses: astral-sh/setup-uv@v4
+  #     - run: uv sync --all-extras
+  #     - run: uv pip install -e tools/benchkit[wandb]
+  #     - name: Run GPU benchmarks and export
+  #       env:
+  #         WANDB_API_KEY: ${{ secrets.WANDB_API_KEY }}
+  #       run: >
+  #         uv run datarax-bench run
+  #         --platform gpu
+  #         --profile gpu_a100
+  #         --repetitions 3
+  #         --wandb
+  #         --charts
+  #     - uses: actions/upload-artifact@v4
+  #       with:
+  #         name: nightly-gpu-results
+  #         path: benchmark-data/
+  #         retention-days: 90
+
+  # TPU benchmarks — requires TRC access or SkyPilot
+  # tpu-benchmarks:
+  #   runs-on: [self-hosted, tpu]
+  #   timeout-minutes: 120
+  #   steps:
+  #     - uses: actions/checkout@v4
+  #     - uses: astral-sh/setup-uv@v4
+  #     - run: uv sync --all-extras
+  #     - run: uv pip install -e tools/benchkit[wandb]
+  #     - name: Run TPU benchmarks and export
+  #       env:
+  #         WANDB_API_KEY: ${{ secrets.WANDB_API_KEY }}
+  #       run: >
+  #         uv run datarax-bench run
+  #         --platform tpu
+  #         --profile tpu_v5e
+  #         --repetitions 3
+  #         --wandb
+  #         --charts
+  #     - uses: actions/upload-artifact@v4
+  #       with:
+  #         name: nightly-tpu-results
+  #         path: benchmark-data/
+  #         retention-days: 90
@@ -44,8 +44,7 @@ jobs:
       - name: Install build dependencies (macOS)
         if: runner.os == 'macOS'
         run: |
-          # Install without CUDA dependencies for macOS
-          uv pip install -e ".[all-cpu]"
+          uv pip install -e ".[dev,data]"
           uv pip install types-requests types-setuptools
 
       - name: Setup PYTHONPATH for tests