Skip to content

Add e2e tests for embedding raw flag #5

Add e2e tests for embedding raw flag

Add e2e tests for embedding raw flag #5

Workflow file for this run

# Embedding CLI build and tests
name: Embedding CLI
on:
workflow_dispatch:
push:
branches: [master, feature/**]
paths:
- '.github/workflows/embedding.yml'
- 'examples/**'
- 'src/**'
- 'ggml/**'
- 'include/**'
- '**/CMakeLists.txt'
- 'tests/e2e/embedding/**'
pull_request:
types: [opened, synchronize, reopened]
paths:
- '.github/workflows/embedding.yml'
- 'examples/**'
- 'src/**'
- 'ggml/**'
- 'include/**'
- '**/CMakeLists.txt'
- 'tests/e2e/embedding/**'
jobs:
embedding-cli-tests-linux:
runs-on: ubuntu-latest
env:
LLAMA_CACHE: tmp # stable path for cache
EMBD_TEST_DEBUG: "1"
steps:
- uses: actions/checkout@v4
with: { fetch-depth: 0 }
- name: Restore model cache
uses: actions/cache@v4
with:
path: |
~/.cache/llama.cpp
tmp
key: hf-${{ runner.os }}-embeddinggemma-300M-q4_0-v1
restore-keys: |
hf-${{ runner.os }}-
hf-
- name: Install system deps
run: |
sudo apt-get update
sudo apt-get -y install \
build-essential cmake curl libcurl4-openssl-dev python3-pip
- name: Set up Python
uses: actions/setup-python@v5
with: { python-version: '3.11' }
- name: Install Python deps
run: |
python -m pip install -r requirements.txt || echo "No extra requirements found"
python -m pip install pytest numpy pytest-timeout
- name: Build llama-embedding
run: |
cmake -B build -DCMAKE_BUILD_TYPE=Release
cmake --build build --target llama-embedding -j $(nproc)
- name: Pre-download tiny model (retry x3 on network)
run: |
set -e
tries=0
until ./build/bin/llama-embedding \
-hfr ggml-org/embeddinggemma-300M-qat-q4_0-GGUF \
-hff embeddinggemma-300M-qat-Q4_0.gguf \
--ctx-size 16 --embd-output-format json --no-warmup --threads 1 --seed 42 <<< "ok"; do
tries=$((tries+1))
if [ $tries -ge 3 ]; then
echo "Pre-download failed after $tries attempts"
exit 1
fi
echo "Retrying download ($tries/3)..."
sleep 3
done
- name: Run embedding tests (30s per-test cap)
shell: bash
run: |
set -o pipefail
pytest -v tests/e2e/embedding \
--timeout=30 \
--durations=10 \
--junitxml=pytest-report.xml | tee pytest-output.txt
- name: Upload test artifacts
if: always()
uses: actions/upload-artifact@v4
with:
name: linux-embedding-tests
path: |
pytest-output.txt
pytest-report.xml
- name: Save model cache
if: always()
uses: actions/cache@v4
with:
path: |
~/.cache/llama.cpp
tmp
key: hf-${{ runner.os }}-embeddinggemma-300M-q4_0-v1
embedding-cli-tests-windows:
runs-on: windows-latest
continue-on-error: true
env:
LLAMA_CACHE: tmp
EMBD_TEST_DEBUG: "1"
steps:
- uses: actions/checkout@v4
- uses: actions/setup-python@v5
with: { python-version: '3.11' }
# --- vcpkg plain bootstrap (no actions, no submodules) ---
- name: Bootstrap vcpkg
shell: pwsh
run: |
$env:VCPKG_ROOT = "$env:RUNNER_TEMP\vcpkg"
git clone https://github.com/microsoft/vcpkg $env:VCPKG_ROOT
& "$env:VCPKG_ROOT\bootstrap-vcpkg.bat" -disableMetrics
echo "VCPKG_ROOT=$env:VCPKG_ROOT" | Out-File -FilePath $env:GITHUB_ENV -Append
- name: Install curl with OpenSSL via vcpkg
shell: pwsh
run: |
& "$env:VCPKG_ROOT\vcpkg.exe" install curl[openssl]:x64-windows
- name: Restore model cache
uses: actions/cache@v4
with:
path: |
$HOME/.cache/llama.cpp
tmp
key: hf-${{ runner.os }}-embeddinggemma-300M-q4_0-v1
restore-keys: |
hf-${{ runner.os }}-
hf-
- name: Install Python deps
run: pip install pytest numpy
- name: Configure & Build (Release)
shell: pwsh
run: |
cmake -B build -DCMAKE_BUILD_TYPE=Release `
-DCMAKE_TOOLCHAIN_FILE="$env:VCPKG_ROOT\scripts\buildsystems\vcpkg.cmake"
cmake --build build --target llama-embedding --config Release -j 2
- name: Pre-download tiny model (retry x3)
shell: bash
run: |
set -e
tries=0
until ./build/bin/Release/llama-embedding.exe \
-hfr ggml-org/embeddinggemma-300M-qat-q4_0-GGUF \
-hff embeddinggemma-300M-qat-Q4_0.gguf \
--ctx-size 16 --embd-output-format json --no-warmup --threads 1 --seed 42 <<< "ok"; do
tries=$((tries+1))
if [ $tries -ge 3 ]; then
echo "Pre-download failed after $tries attempts"; exit 1
fi
echo "Retrying download ($tries/3)..."; sleep 3
done
- name: Run smoke tests
shell: bash
run: |
pytest -q tests/e2e/embedding -k raw_vs_json_consistency
embedding-cli-tests-macos:
runs-on: macos-latest
continue-on-error: true
env:
LLAMA_CACHE: tmp
EMBD_TEST_DEBUG: "1"
steps:
- uses: actions/checkout@v4
- uses: actions/setup-python@v5
with: { python-version: '3.11' }
- name: Install Python deps
run: pip install pytest numpy
- name: Build
run: |
cmake -B build -DCMAKE_BUILD_TYPE=Release
cmake --build build --target llama-embedding -j 3
- name: Pre-download tiny model (retry x3)
run: |
set -e
tries=0
until ./build/bin/llama-embedding \
-hfr ggml-org/embeddinggemma-300M-qat-q4_0-GGUF \
-hff embeddinggemma-300M-qat-Q4_0.gguf \
--ctx-size 16 --embd-output-format json --no-warmup --threads 1 --seed 42 <<< "ok"; do
tries=$((tries+1))
if [ $tries -ge 3 ]; then
echo "Pre-download failed after $tries attempts"; exit 1
fi
echo "Retrying download ($tries/3)..."; sleep 3
done
- name: Warm cache & run a tiny smoke
run: |
./build/bin/llama-embedding --help >/dev/null 2>&1
pytest -q tests/e2e/embedding -k raw_vs_json_consistency