Add e2e tests for embedding raw flag #5

Workflow file for this run

.github/workflows/embedding.yml at 5ce810e

	# Embedding CLI build and tests
	name: Embedding CLI

	on:
	workflow_dispatch:
	push:
	branches: [master, feature/**]
	paths:
	- '.github/workflows/embedding.yml'
	- 'examples/**'
	- 'src/**'
	- 'ggml/**'
	- 'include/**'
	- '**/CMakeLists.txt'
	- 'tests/e2e/embedding/**'
	pull_request:
	types: [opened, synchronize, reopened]
	paths:
	- '.github/workflows/embedding.yml'
	- 'examples/**'
	- 'src/**'
	- 'ggml/**'
	- 'include/**'
	- '**/CMakeLists.txt'
	- 'tests/e2e/embedding/**'

	jobs:
	embedding-cli-tests-linux:
	runs-on: ubuntu-latest
	env:
	LLAMA_CACHE: tmp # stable path for cache
	EMBD_TEST_DEBUG: "1"

	steps:
	- uses: actions/checkout@v4
	with: { fetch-depth: 0 }

	- name: Restore model cache
	uses: actions/cache@v4
	with:
	path: \|
	~/.cache/llama.cpp
	tmp
	key: hf-${{ runner.os }}-embeddinggemma-300M-q4_0-v1
	restore-keys: \|
	hf-${{ runner.os }}-
	hf-

	- name: Install system deps
	run: \|
	sudo apt-get update
	sudo apt-get -y install \
	build-essential cmake curl libcurl4-openssl-dev python3-pip

	- name: Set up Python
	uses: actions/setup-python@v5
	with: { python-version: '3.11' }

	- name: Install Python deps
	run: \|
	python -m pip install -r requirements.txt \|\| echo "No extra requirements found"
	python -m pip install pytest numpy pytest-timeout

	- name: Build llama-embedding
	run: \|
	cmake -B build -DCMAKE_BUILD_TYPE=Release
	cmake --build build --target llama-embedding -j $(nproc)

	- name: Pre-download tiny model (retry x3 on network)
	run: \|
	set -e
	tries=0
	until ./build/bin/llama-embedding \
	-hfr ggml-org/embeddinggemma-300M-qat-q4_0-GGUF \
	-hff embeddinggemma-300M-qat-Q4_0.gguf \
	--ctx-size 16 --embd-output-format json --no-warmup --threads 1 --seed 42 <<< "ok"; do
	tries=$((tries+1))
	if [ $tries -ge 3 ]; then
	echo "Pre-download failed after $tries attempts"
	exit 1
	fi
	echo "Retrying download ($tries/3)..."
	sleep 3
	done

	- name: Run embedding tests (30s per-test cap)
	shell: bash
	run: \|
	set -o pipefail
	pytest -v tests/e2e/embedding \
	--timeout=30 \
	--durations=10 \
	--junitxml=pytest-report.xml \| tee pytest-output.txt

	- name: Upload test artifacts
	if: always()
	uses: actions/upload-artifact@v4
	with:
	name: linux-embedding-tests
	path: \|
	pytest-output.txt
	pytest-report.xml

	- name: Save model cache
	if: always()
	uses: actions/cache@v4
	with:
	path: \|
	~/.cache/llama.cpp
	tmp
	key: hf-${{ runner.os }}-embeddinggemma-300M-q4_0-v1

	embedding-cli-tests-windows:
	runs-on: windows-latest
	continue-on-error: true
	env:
	LLAMA_CACHE: tmp
	EMBD_TEST_DEBUG: "1"

	steps:
	- uses: actions/checkout@v4
	- uses: actions/setup-python@v5
	with: { python-version: '3.11' }

	# --- vcpkg plain bootstrap (no actions, no submodules) ---
	- name: Bootstrap vcpkg
	shell: pwsh
	run: \|
	$env:VCPKG_ROOT = "$env:RUNNER_TEMP\vcpkg"
	git clone https://github.com/microsoft/vcpkg $env:VCPKG_ROOT
	& "$env:VCPKG_ROOT\bootstrap-vcpkg.bat" -disableMetrics
	echo "VCPKG_ROOT=$env:VCPKG_ROOT" \| Out-File -FilePath $env:GITHUB_ENV -Append

	- name: Install curl with OpenSSL via vcpkg
	shell: pwsh
	run: \|
	& "$env:VCPKG_ROOT\vcpkg.exe" install curl[openssl]:x64-windows

	- name: Restore model cache
	uses: actions/cache@v4
	with:
	path: \|
	$HOME/.cache/llama.cpp
	tmp
	key: hf-${{ runner.os }}-embeddinggemma-300M-q4_0-v1
	restore-keys: \|
	hf-${{ runner.os }}-
	hf-

	- name: Install Python deps
	run: pip install pytest numpy

	- name: Configure & Build (Release)
	shell: pwsh
	run: \|
	cmake -B build -DCMAKE_BUILD_TYPE=Release `
	-DCMAKE_TOOLCHAIN_FILE="$env:VCPKG_ROOT\scripts\buildsystems\vcpkg.cmake"
	cmake --build build --target llama-embedding --config Release -j 2

	- name: Pre-download tiny model (retry x3)
	shell: bash
	run: \|
	set -e
	tries=0
	until ./build/bin/Release/llama-embedding.exe \
	-hfr ggml-org/embeddinggemma-300M-qat-q4_0-GGUF \
	-hff embeddinggemma-300M-qat-Q4_0.gguf \
	--ctx-size 16 --embd-output-format json --no-warmup --threads 1 --seed 42 <<< "ok"; do
	tries=$((tries+1))
	if [ $tries -ge 3 ]; then
	echo "Pre-download failed after $tries attempts"; exit 1
	fi
	echo "Retrying download ($tries/3)..."; sleep 3
	done

	- name: Run smoke tests
	shell: bash
	run: \|
	pytest -q tests/e2e/embedding -k raw_vs_json_consistency



	embedding-cli-tests-macos:
	runs-on: macos-latest
	continue-on-error: true
	env:
	LLAMA_CACHE: tmp
	EMBD_TEST_DEBUG: "1"
	steps:
	- uses: actions/checkout@v4
	- uses: actions/setup-python@v5
	with: { python-version: '3.11' }

	- name: Install Python deps
	run: pip install pytest numpy

	- name: Build
	run: \|
	cmake -B build -DCMAKE_BUILD_TYPE=Release
	cmake --build build --target llama-embedding -j 3

	- name: Pre-download tiny model (retry x3)
	run: \|
	set -e
	tries=0
	until ./build/bin/llama-embedding \
	-hfr ggml-org/embeddinggemma-300M-qat-q4_0-GGUF \
	-hff embeddinggemma-300M-qat-Q4_0.gguf \
	--ctx-size 16 --embd-output-format json --no-warmup --threads 1 --seed 42 <<< "ok"; do
	tries=$((tries+1))
	if [ $tries -ge 3 ]; then
	echo "Pre-download failed after $tries attempts"; exit 1
	fi
	echo "Retrying download ($tries/3)..."; sleep 3
	done

	- name: Warm cache & run a tiny smoke
	run: \|
	./build/bin/llama-embedding --help >/dev/null 2>&1
	pytest -q tests/e2e/embedding -k raw_vs_json_consistency

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

Uh oh!

Add e2e tests for embedding raw flag #5

Workflow file

Add e2e tests for embedding raw flag #5

Uh oh!

Jobs

Run details

Workflow file for this run