Add e2e tests for embedding raw flag #5
Workflow file for this run
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| # Embedding CLI build and tests | |
| name: Embedding CLI | |
| on: | |
| workflow_dispatch: | |
| push: | |
| branches: [master, feature/**] | |
| paths: | |
| - '.github/workflows/embedding.yml' | |
| - 'examples/**' | |
| - 'src/**' | |
| - 'ggml/**' | |
| - 'include/**' | |
| - '**/CMakeLists.txt' | |
| - 'tests/e2e/embedding/**' | |
| pull_request: | |
| types: [opened, synchronize, reopened] | |
| paths: | |
| - '.github/workflows/embedding.yml' | |
| - 'examples/**' | |
| - 'src/**' | |
| - 'ggml/**' | |
| - 'include/**' | |
| - '**/CMakeLists.txt' | |
| - 'tests/e2e/embedding/**' | |
| jobs: | |
| embedding-cli-tests-linux: | |
| runs-on: ubuntu-latest | |
| env: | |
| LLAMA_CACHE: tmp # stable path for cache | |
| EMBD_TEST_DEBUG: "1" | |
| steps: | |
| - uses: actions/checkout@v4 | |
| with: { fetch-depth: 0 } | |
| - name: Restore model cache | |
| uses: actions/cache@v4 | |
| with: | |
| path: | | |
| ~/.cache/llama.cpp | |
| tmp | |
| key: hf-${{ runner.os }}-embeddinggemma-300M-q4_0-v1 | |
| restore-keys: | | |
| hf-${{ runner.os }}- | |
| hf- | |
| - name: Install system deps | |
| run: | | |
| sudo apt-get update | |
| sudo apt-get -y install \ | |
| build-essential cmake curl libcurl4-openssl-dev python3-pip | |
| - name: Set up Python | |
| uses: actions/setup-python@v5 | |
| with: { python-version: '3.11' } | |
| - name: Install Python deps | |
| run: | | |
| python -m pip install -r requirements.txt || echo "No extra requirements found" | |
| python -m pip install pytest numpy pytest-timeout | |
| - name: Build llama-embedding | |
| run: | | |
| cmake -B build -DCMAKE_BUILD_TYPE=Release | |
| cmake --build build --target llama-embedding -j $(nproc) | |
| - name: Pre-download tiny model (retry x3 on network) | |
| run: | | |
| set -e | |
| tries=0 | |
| until ./build/bin/llama-embedding \ | |
| -hfr ggml-org/embeddinggemma-300M-qat-q4_0-GGUF \ | |
| -hff embeddinggemma-300M-qat-Q4_0.gguf \ | |
| --ctx-size 16 --embd-output-format json --no-warmup --threads 1 --seed 42 <<< "ok"; do | |
| tries=$((tries+1)) | |
| if [ $tries -ge 3 ]; then | |
| echo "Pre-download failed after $tries attempts" | |
| exit 1 | |
| fi | |
| echo "Retrying download ($tries/3)..." | |
| sleep 3 | |
| done | |
| - name: Run embedding tests (30s per-test cap) | |
| shell: bash | |
| run: | | |
| set -o pipefail | |
| pytest -v tests/e2e/embedding \ | |
| --timeout=30 \ | |
| --durations=10 \ | |
| --junitxml=pytest-report.xml | tee pytest-output.txt | |
| - name: Upload test artifacts | |
| if: always() | |
| uses: actions/upload-artifact@v4 | |
| with: | |
| name: linux-embedding-tests | |
| path: | | |
| pytest-output.txt | |
| pytest-report.xml | |
| - name: Save model cache | |
| if: always() | |
| uses: actions/cache@v4 | |
| with: | |
| path: | | |
| ~/.cache/llama.cpp | |
| tmp | |
| key: hf-${{ runner.os }}-embeddinggemma-300M-q4_0-v1 | |
| embedding-cli-tests-windows: | |
| runs-on: windows-latest | |
| continue-on-error: true | |
| env: | |
| LLAMA_CACHE: tmp | |
| EMBD_TEST_DEBUG: "1" | |
| steps: | |
| - uses: actions/checkout@v4 | |
| - uses: actions/setup-python@v5 | |
| with: { python-version: '3.11' } | |
| # --- vcpkg plain bootstrap (no actions, no submodules) --- | |
| - name: Bootstrap vcpkg | |
| shell: pwsh | |
| run: | | |
| $env:VCPKG_ROOT = "$env:RUNNER_TEMP\vcpkg" | |
| git clone https://github.com/microsoft/vcpkg $env:VCPKG_ROOT | |
| & "$env:VCPKG_ROOT\bootstrap-vcpkg.bat" -disableMetrics | |
| echo "VCPKG_ROOT=$env:VCPKG_ROOT" | Out-File -FilePath $env:GITHUB_ENV -Append | |
| - name: Install curl with OpenSSL via vcpkg | |
| shell: pwsh | |
| run: | | |
| & "$env:VCPKG_ROOT\vcpkg.exe" install curl[openssl]:x64-windows | |
| - name: Restore model cache | |
| uses: actions/cache@v4 | |
| with: | |
| path: | | |
| $HOME/.cache/llama.cpp | |
| tmp | |
| key: hf-${{ runner.os }}-embeddinggemma-300M-q4_0-v1 | |
| restore-keys: | | |
| hf-${{ runner.os }}- | |
| hf- | |
| - name: Install Python deps | |
| run: pip install pytest numpy | |
| - name: Configure & Build (Release) | |
| shell: pwsh | |
| run: | | |
| cmake -B build -DCMAKE_BUILD_TYPE=Release ` | |
| -DCMAKE_TOOLCHAIN_FILE="$env:VCPKG_ROOT\scripts\buildsystems\vcpkg.cmake" | |
| cmake --build build --target llama-embedding --config Release -j 2 | |
| - name: Pre-download tiny model (retry x3) | |
| shell: bash | |
| run: | | |
| set -e | |
| tries=0 | |
| until ./build/bin/Release/llama-embedding.exe \ | |
| -hfr ggml-org/embeddinggemma-300M-qat-q4_0-GGUF \ | |
| -hff embeddinggemma-300M-qat-Q4_0.gguf \ | |
| --ctx-size 16 --embd-output-format json --no-warmup --threads 1 --seed 42 <<< "ok"; do | |
| tries=$((tries+1)) | |
| if [ $tries -ge 3 ]; then | |
| echo "Pre-download failed after $tries attempts"; exit 1 | |
| fi | |
| echo "Retrying download ($tries/3)..."; sleep 3 | |
| done | |
| - name: Run smoke tests | |
| shell: bash | |
| run: | | |
| pytest -q tests/e2e/embedding -k raw_vs_json_consistency | |
| embedding-cli-tests-macos: | |
| runs-on: macos-latest | |
| continue-on-error: true | |
| env: | |
| LLAMA_CACHE: tmp | |
| EMBD_TEST_DEBUG: "1" | |
| steps: | |
| - uses: actions/checkout@v4 | |
| - uses: actions/setup-python@v5 | |
| with: { python-version: '3.11' } | |
| - name: Install Python deps | |
| run: pip install pytest numpy | |
| - name: Build | |
| run: | | |
| cmake -B build -DCMAKE_BUILD_TYPE=Release | |
| cmake --build build --target llama-embedding -j 3 | |
| - name: Pre-download tiny model (retry x3) | |
| run: | | |
| set -e | |
| tries=0 | |
| until ./build/bin/llama-embedding \ | |
| -hfr ggml-org/embeddinggemma-300M-qat-q4_0-GGUF \ | |
| -hff embeddinggemma-300M-qat-Q4_0.gguf \ | |
| --ctx-size 16 --embd-output-format json --no-warmup --threads 1 --seed 42 <<< "ok"; do | |
| tries=$((tries+1)) | |
| if [ $tries -ge 3 ]; then | |
| echo "Pre-download failed after $tries attempts"; exit 1 | |
| fi | |
| echo "Retrying download ($tries/3)..."; sleep 3 | |
| done | |
| - name: Warm cache & run a tiny smoke | |
| run: | | |
| ./build/bin/llama-embedding --help >/dev/null 2>&1 | |
| pytest -q tests/e2e/embedding -k raw_vs_json_consistency |