opencl: transposed gemm/gemv moe kernel with mxfp4,f32 (#16602) #78
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Server build and tests | |
name: Server | |
on: | |
workflow_dispatch: # allows manual triggering | |
inputs: | |
sha: | |
description: 'Commit SHA1 to build' | |
required: false | |
type: string | |
slow_tests: | |
description: 'Run slow tests' | |
required: true | |
type: boolean | |
push: | |
branches: | |
- master | |
paths: ['.github/workflows/server.yml', '**/CMakeLists.txt', '**/Makefile', '**/*.h', '**/*.hpp', '**/*.c', '**/*.cpp', '**/*.cu', '**/*.swift', '**/*.m', 'tools/server/**.*'] | |
pull_request: | |
types: [opened, synchronize, reopened] | |
paths: ['.github/workflows/server.yml', '**/CMakeLists.txt', '**/Makefile', '**/*.h', '**/*.hpp', '**/*.c', '**/*.cpp', '**/*.cu', '**/*.swift', '**/*.m', 'tools/server/**.*'] | |
env: | |
LLAMA_LOG_COLORS: 1 | |
LLAMA_LOG_PREFIX: 1 | |
LLAMA_LOG_TIMESTAMPS: 1 | |
LLAMA_LOG_VERBOSITY: 10 | |
concurrency: | |
group: ${{ github.workflow }}-${{ github.ref }}-${{ github.head_ref || github.run_id }} | |
cancel-in-progress: true | |
jobs: | |
server: | |
runs-on: ubuntu-latest | |
strategy: | |
matrix: | |
sanitizer: [ADDRESS, UNDEFINED] # THREAD is broken | |
build_type: [RelWithDebInfo] | |
include: | |
- build_type: Release | |
sanitizer: "" | |
fail-fast: false # While -DLLAMA_SANITIZE_THREAD=ON is broken | |
steps: | |
- name: Dependencies | |
id: depends | |
run: | | |
sudo apt-get update | |
sudo apt-get -y install \ | |
build-essential \ | |
xxd \ | |
git \ | |
cmake \ | |
curl \ | |
wget \ | |
language-pack-en \ | |
libcurl4-openssl-dev | |
- name: Clone | |
id: checkout | |
uses: actions/checkout@v4 | |
with: | |
fetch-depth: 0 | |
ref: ${{ github.event.inputs.sha || github.event.pull_request.head.sha || github.sha || github.head_ref || github.ref_name }} | |
- name: Python setup | |
id: setup_python | |
uses: actions/setup-python@v5 | |
with: | |
python-version: '3.11' | |
- name: Tests dependencies | |
id: test_dependencies | |
run: | | |
pip install -r tools/server/tests/requirements.txt | |
webui-setup: | |
name: WebUI Setup | |
runs-on: ubuntu-latest | |
steps: | |
- name: Checkout code | |
uses: actions/checkout@v4 | |
with: | |
fetch-depth: 0 | |
ref: ${{ github.event.inputs.sha || github.event.pull_request.head.sha || github.sha || github.head_ref || github.ref_name }} | |
- name: Setup Node.js | |
uses: actions/setup-node@v4 | |
with: | |
node-version: "22" | |
cache: "npm" | |
cache-dependency-path: "tools/server/webui/package-lock.json" | |
- name: Cache node_modules | |
uses: actions/cache@v4 | |
id: cache-node-modules | |
with: | |
path: tools/server/webui/node_modules | |
key: ${{ runner.os }}-node-modules-${{ hashFiles('tools/server/webui/package-lock.json') }} | |
restore-keys: | | |
${{ runner.os }}-node-modules- | |
- name: Install dependencies | |
if: steps.cache-node-modules.outputs.cache-hit != 'true' | |
run: npm ci | |
working-directory: tools/server/webui | |
webui-check: | |
needs: webui-setup | |
name: WebUI Check | |
runs-on: ubuntu-latest | |
steps: | |
- name: Checkout code | |
uses: actions/checkout@v4 | |
with: | |
fetch-depth: 0 | |
ref: ${{ github.event.inputs.sha || github.event.pull_request.head.sha || github.sha || github.head_ref || github.ref_name }} | |
- name: Setup Node.js | |
uses: actions/setup-node@v4 | |
with: | |
node-version: "22" | |
- name: Restore node_modules cache | |
uses: actions/cache@v4 | |
with: | |
path: tools/server/webui/node_modules | |
key: ${{ runner.os }}-node-modules-${{ hashFiles('tools/server/webui/package-lock.json') }} | |
restore-keys: | | |
${{ runner.os }}-node-modules- | |
- name: Run type checking | |
run: npm run check | |
working-directory: tools/server/webui | |
- name: Run linting | |
run: npm run lint | |
working-directory: tools/server/webui | |
webui-build: | |
needs: webui-check | |
name: WebUI Build | |
runs-on: ubuntu-latest | |
steps: | |
- name: Checkout code | |
uses: actions/checkout@v4 | |
with: | |
fetch-depth: 0 | |
ref: ${{ github.event.inputs.sha || github.event.pull_request.head.sha || github.sha || github.head_ref || github.ref_name }} | |
- name: Setup Node.js | |
uses: actions/setup-node@v4 | |
with: | |
node-version: "22" | |
- name: Restore node_modules cache | |
uses: actions/cache@v4 | |
with: | |
path: tools/server/webui/node_modules | |
key: ${{ runner.os }}-node-modules-${{ hashFiles('tools/server/webui/package-lock.json') }} | |
restore-keys: | | |
${{ runner.os }}-node-modules- | |
- name: Build application | |
run: npm run build | |
working-directory: tools/server/webui | |
webui-tests: | |
needs: webui-build | |
name: Run WebUI tests | |
permissions: | |
contents: read | |
runs-on: ubuntu-latest | |
steps: | |
- name: Checkout code | |
uses: actions/checkout@v4 | |
- name: Setup Node.js | |
uses: actions/setup-node@v4 | |
with: | |
node-version: "22" | |
- name: Restore node_modules cache | |
uses: actions/cache@v4 | |
with: | |
path: tools/server/webui/node_modules | |
key: ${{ runner.os }}-node-modules-${{ hashFiles('tools/server/webui/package-lock.json') }} | |
restore-keys: | | |
${{ runner.os }}-node-modules- | |
- name: Install Playwright browsers | |
run: npx playwright install --with-deps | |
working-directory: tools/server/webui | |
- name: Build Storybook | |
run: npm run build-storybook | |
working-directory: tools/server/webui | |
- name: Run Client tests | |
run: npm run test:client | |
working-directory: tools/server/webui | |
- name: Run Server tests | |
run: npm run test:server | |
working-directory: tools/server/webui | |
- name: Run UI tests | |
run: npm run test:ui | |
working-directory: tools/server/webui | |
- name: Run E2E tests | |
run: npm run test:e2e | |
working-directory: tools/server/webui | |
server-build: | |
needs: [webui-tests] | |
runs-on: ubuntu-latest | |
strategy: | |
matrix: | |
sanitizer: [ADDRESS, UNDEFINED] # THREAD is broken | |
build_type: [RelWithDebInfo] | |
include: | |
- build_type: Release | |
sanitizer: "" | |
fail-fast: false # While -DLLAMA_SANITIZE_THREAD=ON is broken | |
steps: | |
- name: Dependencies | |
id: depends | |
run: | | |
sudo apt-get update | |
sudo apt-get -y install \ | |
build-essential \ | |
xxd \ | |
git \ | |
cmake \ | |
curl \ | |
wget \ | |
language-pack-en \ | |
libcurl4-openssl-dev | |
- name: Clone | |
id: checkout | |
uses: actions/checkout@v4 | |
with: | |
fetch-depth: 0 | |
ref: ${{ github.event.inputs.sha || github.event.pull_request.head.sha || github.sha || github.head_ref || github.ref_name }} | |
- name: Python setup | |
id: setup_python | |
uses: actions/setup-python@v5 | |
with: | |
python-version: '3.11' | |
- name: Tests dependencies | |
id: test_dependencies | |
run: | | |
pip install -r tools/server/tests/requirements.txt | |
- name: Setup Node.js for WebUI | |
uses: actions/setup-node@v4 | |
with: | |
node-version: "22" | |
cache: "npm" | |
cache-dependency-path: "tools/server/webui/package-lock.json" | |
- name: Install WebUI dependencies | |
run: npm ci | |
working-directory: tools/server/webui | |
- name: Build WebUI | |
run: npm run build | |
working-directory: tools/server/webui | |
- name: Build (no OpenMP) | |
id: cmake_build_no_openmp | |
if: ${{ matrix.sanitizer == 'THREAD' }} | |
run: | | |
cmake -B build \ | |
-DGGML_NATIVE=OFF \ | |
-DLLAMA_BUILD_SERVER=ON \ | |
-DCMAKE_BUILD_TYPE=${{ matrix.build_type }} \ | |
-DLLAMA_SANITIZE_${{ matrix.sanitizer }}=ON \ | |
-DGGML_OPENMP=OFF ; | |
cmake --build build --config ${{ matrix.build_type }} -j $(nproc) --target llama-server | |
- name: Build (sanitizers) | |
id: cmake_build_sanitizers | |
if: ${{ matrix.sanitizer != '' && matrix.sanitizer != 'THREAD' }} | |
run: | | |
cmake -B build \ | |
-DGGML_NATIVE=OFF \ | |
-DLLAMA_BUILD_SERVER=ON \ | |
-DCMAKE_BUILD_TYPE=${{ matrix.build_type }} \ | |
-DLLAMA_SANITIZE_${{ matrix.sanitizer }}=ON ; | |
cmake --build build --config ${{ matrix.build_type }} -j $(nproc) --target llama-server | |
- name: Build (sanitizers) | |
id: cmake_build | |
if: ${{ matrix.sanitizer == '' }} | |
run: | | |
cmake -B build \ | |
-DGGML_NATIVE=OFF \ | |
-DLLAMA_BUILD_SERVER=ON \ | |
-DCMAKE_BUILD_TYPE=${{ matrix.build_type }} ; | |
cmake --build build --config ${{ matrix.build_type }} -j $(nproc) --target llama-server | |
- name: Tests | |
id: server_integration_tests | |
if: ${{ matrix.sanitizer == '' }} | |
env: | |
GITHUB_ACTIONS: "true" | |
run: | | |
cd tools/server/tests | |
./tests.sh | |
- name: Tests (sanitizers) | |
id: server_integration_tests_sanitizers | |
if: ${{ matrix.sanitizer != '' }} | |
run: | | |
cd tools/server/tests | |
LLAMA_SANITIZE=1 ./tests.sh | |
- name: Slow tests | |
id: server_integration_tests_slow | |
if: ${{ (github.event.schedule || github.event.inputs.slow_tests == 'true') && matrix.build_type == 'Release' }} | |
run: | | |
cd tools/server/tests | |
SLOW_TESTS=1 ./tests.sh | |
server-windows: | |
runs-on: windows-2022 | |
steps: | |
- name: Clone | |
id: checkout | |
uses: actions/checkout@v4 | |
with: | |
fetch-depth: 0 | |
ref: ${{ github.event.inputs.sha || github.event.pull_request.head.sha || github.sha || github.head_ref || github.ref_name }} | |
- name: libCURL | |
id: get_libcurl | |
uses: ./.github/actions/windows-setup-curl | |
- name: Build | |
id: cmake_build | |
env: | |
CURL_PATH: ${{ steps.get_libcurl.outputs.curl_path }} | |
run: | | |
cmake -B build -DCURL_LIBRARY="$env:CURL_PATH/lib/libcurl.dll.a" -DCURL_INCLUDE_DIR="$env:CURL_PATH/include" | |
cmake --build build --config Release -j ${env:NUMBER_OF_PROCESSORS} --target llama-server | |
- name: Python setup | |
id: setup_python | |
uses: actions/setup-python@v5 | |
with: | |
python-version: '3.11' | |
- name: Tests dependencies | |
id: test_dependencies | |
run: | | |
pip install -r tools/server/tests/requirements.txt | |
- name: Copy Libcurl | |
id: prepare_libcurl | |
env: | |
CURL_PATH: ${{ steps.get_libcurl.outputs.curl_path }} | |
run: | | |
cp $env:CURL_PATH/bin/libcurl-x64.dll ./build/bin/Release/libcurl-x64.dll | |
- name: Tests | |
id: server_integration_tests | |
if: ${{ !matrix.disabled_on_pr || !github.event.pull_request }} | |
run: | | |
cd tools/server/tests | |
$env:PYTHONIOENCODING = ":replace" | |
pytest -v -x -m "not slow" | |
- name: Slow tests | |
id: server_integration_tests_slow | |
if: ${{ (github.event.schedule || github.event.inputs.slow_tests == 'true') && matrix.build_type == 'Release' }} | |
run: | | |
cd tools/server/tests | |
$env:SLOW_TESTS = "1" | |
pytest -v -x |