Skip to content

Commit dd56e72

Browse files
committed
Merge branch 'master' into dev-refactoring
2 parents 5ef9b98 + 28c39da commit dd56e72

File tree

283 files changed

+26684
-11541
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

283 files changed

+26684
-11541
lines changed

.clang-format

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -22,7 +22,7 @@ AllowShortIfStatementsOnASingleLine: Never
2222
AllowShortLambdasOnASingleLine: Inline
2323
AllowShortLoopsOnASingleLine: false
2424
AlwaysBreakBeforeMultilineStrings: true
25-
BinPackArguments: false
25+
BinPackArguments: true
2626
BinPackParameters: false # OnePerLine
2727
BitFieldColonSpacing: Both
2828
BreakBeforeBraces: Custom # Attach

.devops/rocm.Dockerfile

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,7 @@ ARG UBUNTU_VERSION=24.04
44
ARG ROCM_VERSION=6.4
55
ARG AMDGPU_VERSION=6.4
66

7-
# Target the CUDA build image
7+
# Target the ROCm build image
88
ARG BASE_ROCM_DEV_CONTAINER=rocm/dev-ubuntu-${UBUNTU_VERSION}:${ROCM_VERSION}-complete
99

1010
### Build image
@@ -15,12 +15,12 @@ FROM ${BASE_ROCM_DEV_CONTAINER} AS build
1515
# This is mostly tied to rocBLAS supported archs.
1616
# gfx803, gfx900, gfx1032, gfx1101, gfx1102,not officialy supported
1717
# gfx906 is deprecated
18-
#check https://rocm.docs.amd.com/projects/install-on-linux/en/docs-6.2.4/reference/system-requirements.html
18+
#check https://rocm.docs.amd.com/projects/install-on-linux/en/docs-6.4.1/reference/system-requirements.html
1919

20-
ARG ROCM_DOCKER_ARCH='gfx803,gfx900,gfx906,gfx908,gfx90a,gfx942,gfx1010,gfx1030,gfx1032,gfx1100,gfx1101,gfx1102'
20+
ARG ROCM_DOCKER_ARCH='gfx803,gfx900,gfx906,gfx908,gfx90a,gfx942,gfx1010,gfx1030,gfx1032,gfx1100,gfx1101,gfx1102,gfx1200,gfx1201'
2121
#ARG ROCM_DOCKER_ARCH=gfx1100
2222

23-
# Set nvcc architectured
23+
# Set ROCm architectured
2424
ENV AMDGPU_TARGETS=${ROCM_DOCKER_ARCH}
2525
# Enable ROCm
2626
# ENV CC=/opt/rocm/llvm/bin/clang

.github/workflows/build.yml

Lines changed: 36 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -88,6 +88,7 @@ jobs:
8888
-DGGML_METAL_SHADER_DEBUG=ON \
8989
-DGGML_RPC=ON
9090
cmake --build build --config Release -j $(sysctl -n hw.logicalcpu)
91+
leaks -atExit -- ./build/bin/test-thread-safety -hf ggml-org/gemma-3-270m-qat-GGUF -ngl 99 -p "$(printf 'hello %.0s' {1..128})" -n 16 -c 512 -ub 32 -np 2 -t 2 -lv 1
9192
9293
- name: Test
9394
id: cmake_test
@@ -126,7 +127,8 @@ jobs:
126127
-DCMAKE_BUILD_RPATH="@loader_path" \
127128
-DLLAMA_FATAL_WARNINGS=ON \
128129
-DGGML_METAL=OFF \
129-
-DGGML_RPC=ON
130+
-DGGML_RPC=ON \
131+
-DCMAKE_OSX_DEPLOYMENT_TARGET=13.3
130132
cmake --build build --config Release -j $(sysctl -n hw.logicalcpu)
131133
132134
- name: Test
@@ -1050,9 +1052,13 @@ jobs:
10501052
run: examples/sycl/win-build-sycl.bat
10511053

10521054
windows-latest-cmake-hip:
1053-
if: ${{ github.event.inputs.create_release != 'true' }}
10541055
runs-on: windows-2022
10551056

1057+
env:
1058+
# The ROCm version must correspond to the version used in the HIP SDK.
1059+
ROCM_VERSION: "6.4.2"
1060+
HIPSDK_INSTALLER_VERSION: "25.Q3"
1061+
10561062
steps:
10571063
- name: Clone
10581064
id: checkout
@@ -1061,23 +1067,46 @@ jobs:
10611067
- name: Clone rocWMMA repository
10621068
id: clone_rocwmma
10631069
run: |
1064-
git clone https://github.com/rocm/rocwmma --branch rocm-6.2.4 --depth 1
1070+
git clone https://github.com/rocm/rocwmma --branch rocm-${{ env.ROCM_VERSION }} --depth 1
10651071
1066-
- name: Install
1072+
- name: Cache ROCm Installation
1073+
id: cache-rocm
1074+
uses: actions/cache@v4
1075+
with:
1076+
path: C:\Program Files\AMD\ROCm
1077+
key: rocm-${{ env.HIPSDK_INSTALLER_VERSION }}-${{ runner.os }}
1078+
1079+
- name: Install ROCm
1080+
if: steps.cache-rocm.outputs.cache-hit != 'true'
10671081
id: depends
10681082
run: |
10691083
$ErrorActionPreference = "Stop"
10701084
write-host "Downloading AMD HIP SDK Installer"
1071-
Invoke-WebRequest -Uri "https://download.amd.com/developer/eula/rocm-hub/AMD-Software-PRO-Edition-24.Q3-WinSvr2022-For-HIP.exe" -OutFile "${env:RUNNER_TEMP}\rocm-install.exe"
1085+
Invoke-WebRequest -Uri "https://download.amd.com/developer/eula/rocm-hub/AMD-Software-PRO-Edition-${{ env.HIPSDK_INSTALLER_VERSION }}-WinSvr2022-For-HIP.exe" -OutFile "${env:RUNNER_TEMP}\rocm-install.exe"
10721086
write-host "Installing AMD HIP SDK"
10731087
$proc = Start-Process "${env:RUNNER_TEMP}\rocm-install.exe" -ArgumentList '-install' -NoNewWindow -PassThru
1074-
$proc.WaitForExit(600000)
1088+
$completed = $proc.WaitForExit(600000)
1089+
if (-not $completed) {
1090+
Write-Error "ROCm installation timed out after 10 minutes. Killing the process"
1091+
$proc.Kill()
1092+
exit 1
1093+
}
1094+
if ($proc.ExitCode -ne 0) {
1095+
Write-Error "ROCm installation failed with exit code $($proc.ExitCode)"
1096+
exit 1
1097+
}
10751098
write-host "Completed AMD HIP SDK installation"
10761099
10771100
- name: Verify ROCm
10781101
id: verify
10791102
run: |
1080-
& 'C:\Program Files\AMD\ROCm\*\bin\clang.exe' --version
1103+
# Find and test ROCm installation
1104+
$clangPath = Get-ChildItem 'C:\Program Files\AMD\ROCm\*\bin\clang.exe' | Select-Object -First 1
1105+
if (-not $clangPath) {
1106+
Write-Error "ROCm installation not found"
1107+
exit 1
1108+
}
1109+
& $clangPath.FullName --version
10811110
10821111
- name: Install ccache
10831112
uses: ggml-org/[email protected]

.github/workflows/close-issue.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,7 @@ jobs:
1717
steps:
1818
- uses: actions/stale@v5
1919
with:
20-
exempt-issue-labels: "refactoring,help wanted,good first issue,research,bug,roadmap"
20+
exempt-issue-labels: "refactoring,help wanted,good first issue,research 🔬,bug,roadmap"
2121
days-before-issue-stale: 30
2222
days-before-issue-close: 14
2323
stale-issue-label: "stale"

.github/workflows/release.yml

Lines changed: 40 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -108,7 +108,8 @@ jobs:
108108
-DCMAKE_BUILD_WITH_INSTALL_RPATH=ON \
109109
-DLLAMA_FATAL_WARNINGS=ON \
110110
-DGGML_METAL=OFF \
111-
-DGGML_RPC=ON
111+
-DGGML_RPC=ON \
112+
-DCMAKE_OSX_DEPLOYMENT_TARGET=13.3
112113
cmake --build build --config Release -j $(sysctl -n hw.logicalcpu)
113114
114115
- name: Determine tag name
@@ -528,11 +529,16 @@ jobs:
528529
windows-hip:
529530
runs-on: windows-2022
530531

532+
env:
533+
# The ROCm version must correspond to the version used in the HIP SDK.
534+
ROCM_VERSION: "6.4.2"
535+
HIPSDK_INSTALLER_VERSION: "25.Q3"
536+
531537
strategy:
532538
matrix:
533539
include:
534540
- name: "radeon"
535-
gpu_targets: "gfx1100;gfx1101;gfx1102;gfx1030;gfx1031;gfx1032"
541+
gpu_targets: "gfx1200;gfx1201;gfx1100;gfx1101;gfx1102;gfx1030;gfx1031;gfx1032"
536542

537543
steps:
538544
- name: Clone
@@ -542,29 +548,52 @@ jobs:
542548
- name: Clone rocWMMA repository
543549
id: clone_rocwmma
544550
run: |
545-
git clone https://github.com/rocm/rocwmma --branch rocm-6.2.4 --depth 1
551+
git clone https://github.com/rocm/rocwmma --branch rocm-${{ env.ROCM_VERSION }} --depth 1
552+
553+
- name: Cache ROCm Installation
554+
id: cache-rocm
555+
uses: actions/cache@v4
556+
with:
557+
path: C:\Program Files\AMD\ROCm
558+
key: rocm-${{ env.HIPSDK_INSTALLER_VERSION }}-${{ runner.os }}
546559

547560
- name: ccache
548561
uses: ggml-org/[email protected]
549562
with:
550-
key: windows-latest-cmake-hip-${{ matrix.name }}-x64
563+
key: windows-latest-cmake-hip-${{ env.HIPSDK_INSTALLER_VERSION }}-${{ matrix.name }}-x64
551564
evict-old-files: 1d
552565

553-
- name: Install
566+
- name: Install ROCm
567+
if: steps.cache-rocm.outputs.cache-hit != 'true'
554568
id: depends
555569
run: |
556570
$ErrorActionPreference = "Stop"
557571
write-host "Downloading AMD HIP SDK Installer"
558-
Invoke-WebRequest -Uri "https://download.amd.com/developer/eula/rocm-hub/AMD-Software-PRO-Edition-24.Q3-WinSvr2022-For-HIP.exe" -OutFile "${env:RUNNER_TEMP}\rocm-install.exe"
572+
Invoke-WebRequest -Uri "https://download.amd.com/developer/eula/rocm-hub/AMD-Software-PRO-Edition-${{ env.HIPSDK_INSTALLER_VERSION }}-WinSvr2022-For-HIP.exe" -OutFile "${env:RUNNER_TEMP}\rocm-install.exe"
559573
write-host "Installing AMD HIP SDK"
560574
$proc = Start-Process "${env:RUNNER_TEMP}\rocm-install.exe" -ArgumentList '-install' -NoNewWindow -PassThru
561-
$proc.WaitForExit(600000)
575+
$completed = $proc.WaitForExit(600000)
576+
if (-not $completed) {
577+
Write-Error "ROCm installation timed out after 10 minutes. Killing the process"
578+
$proc.Kill()
579+
exit 1
580+
}
581+
if ($proc.ExitCode -ne 0) {
582+
Write-Error "ROCm installation failed with exit code $($proc.ExitCode)"
583+
exit 1
584+
}
562585
write-host "Completed AMD HIP SDK installation"
563586
564587
- name: Verify ROCm
565588
id: verify
566589
run: |
567-
& 'C:\Program Files\AMD\ROCm\*\bin\clang.exe' --version
590+
# Find and test ROCm installation
591+
$clangPath = Get-ChildItem 'C:\Program Files\AMD\ROCm\*\bin\clang.exe' | Select-Object -First 1
592+
if (-not $clangPath) {
593+
Write-Error "ROCm installation not found"
594+
exit 1
595+
}
596+
& $clangPath.FullName --version
568597
569598
- name: Build
570599
id: cmake_build
@@ -585,9 +614,12 @@ jobs:
585614
-DLLAMA_CURL=OFF
586615
cmake --build build --target ggml-hip -j ${env:NUMBER_OF_PROCESSORS}
587616
md "build\bin\rocblas\library\"
617+
md "build\bin\hipblaslt\library"
588618
cp "${env:HIP_PATH}\bin\hipblas.dll" "build\bin\"
619+
cp "${env:HIP_PATH}\bin\hipblaslt.dll" "build\bin\"
589620
cp "${env:HIP_PATH}\bin\rocblas.dll" "build\bin\"
590621
cp "${env:HIP_PATH}\bin\rocblas\library\*" "build\bin\rocblas\library\"
622+
cp "${env:HIP_PATH}\bin\hipblaslt\library\*" "build\bin\hipblaslt\library\"
591623
592624
- name: Pack artifacts
593625
id: pack_artifacts

CONTRIBUTING.md

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,9 @@
1616
- Use the following format for the squashed commit title: `<module> : <commit title> (#<issue_number>)`. For example: `utils : fix typo in utils.py (#1234)`
1717
- Optionally pick a `<module>` from here: https://github.com/ggml-org/llama.cpp/wiki/Modules
1818
- Consider adding yourself to [CODEOWNERS](CODEOWNERS)
19+
- Let authors, who are also collaborators, merge their own PRs
20+
- When merging a PR by a contributor, make sure you have a good understanding of the changes
21+
- Be mindful of maintenance: most of the work going into a feature happens after the PR is merged. If the PR author is not committed to contribute long-term, someone else needs to take responsibility (you)
1922

2023
# Coding guidelines
2124

README.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -137,6 +137,7 @@ Instructions for adding support for new models: [HOWTO-add-model.md](docs/develo
137137
- [X] [Trillion-7B-preview](https://huggingface.co/trillionlabs/Trillion-7B-preview)
138138
- [x] [Ling models](https://huggingface.co/collections/inclusionAI/ling-67c51c85b34a7ea0aba94c32)
139139
- [x] [LFM2 models](https://huggingface.co/collections/LiquidAI/lfm2-686d721927015b2ad73eaa38)
140+
- [x] [Hunyuan models](https://huggingface.co/collections/tencent/hunyuan-dense-model-6890632cda26b19119c9c5e7)
140141

141142
#### Multimodal
142143

ci/run.sh

Lines changed: 23 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -270,7 +270,9 @@ function gg_run_ctest_with_model_debug {
270270
local model; model=$(gg_get_model)
271271
cd build-ci-debug
272272
set -e
273+
273274
(LLAMACPP_TEST_MODELFILE="$model" time ctest --output-on-failure -L model) 2>&1 | tee -a $OUT/${ci}-ctest.log
275+
274276
set +e
275277
cd ..
276278
}
@@ -281,7 +283,15 @@ function gg_run_ctest_with_model_release {
281283
local model; model=$(gg_get_model)
282284
cd build-ci-release
283285
set -e
286+
284287
(LLAMACPP_TEST_MODELFILE="$model" time ctest --output-on-failure -L model) 2>&1 | tee -a $OUT/${ci}-ctest.log
288+
289+
# test memory leaks
290+
#if [[ ! -z ${GG_BUILD_METAL} ]]; then
291+
# # TODO: this hangs for some reason ...
292+
# (time leaks -quiet -atExit -- ./bin/test-thread-safety -m $model --parallel 2 -t 2 -p "hello") 2>&1 | tee -a $OUT/${ci}-leaks.log
293+
#fi
294+
285295
set +e
286296
cd ..
287297
}
@@ -386,10 +396,10 @@ function gg_run_open_llama_7b_v2 {
386396

387397
(time ./bin/llama-imatrix --model ${model_f16} -f ${wiki_test} -t 1 -ngl 99 -c 2048 -b 512 --chunks 4 ) 2>&1 | tee -a $OUT/${ci}-imatrix.log
388398

389-
(time ./bin/llama-save-load-state --model ${model_q4_0} -ngl 10 -c 0 ) 2>&1 | tee -a $OUT/${ci}-save-load-state.log
390-
(time ./bin/llama-save-load-state --model ${model_q4_0} -ngl 10 -c 0 -fa ) 2>&1 | tee -a $OUT/${ci}-save-load-state.log
391-
(time ./bin/llama-save-load-state --model ${model_q4_0} -ngl 99 -c 0 ) 2>&1 | tee -a $OUT/${ci}-save-load-state.log
392-
(time ./bin/llama-save-load-state --model ${model_q4_0} -ngl 99 -c 0 -fa ) 2>&1 | tee -a $OUT/${ci}-save-load-state.log
399+
(time ./bin/llama-save-load-state --model ${model_q4_0} -ngl 10 -c 0 -fa off ) 2>&1 | tee -a $OUT/${ci}-save-load-state.log
400+
(time ./bin/llama-save-load-state --model ${model_q4_0} -ngl 10 -c 0 -fa on ) 2>&1 | tee -a $OUT/${ci}-save-load-state.log
401+
(time ./bin/llama-save-load-state --model ${model_q4_0} -ngl 99 -c 0 -fa off ) 2>&1 | tee -a $OUT/${ci}-save-load-state.log
402+
(time ./bin/llama-save-load-state --model ${model_q4_0} -ngl 99 -c 0 -fa on ) 2>&1 | tee -a $OUT/${ci}-save-load-state.log
393403

394404
function check_ppl {
395405
qnt="$1"
@@ -520,8 +530,8 @@ function gg_run_pythia_1_4b {
520530

521531
(time ./bin/llama-imatrix --model ${model_f16} -f ${wiki_test_60} -ngl 99 -c 128 -b 128 --chunks 1 ) 2>&1 | tee -a $OUT/${ci}-imatrix.log
522532

523-
(time ./bin/llama-save-load-state --model ${model_q4_0} -ngl 99 -c 0 ) 2>&1 | tee -a $OUT/${ci}-save-load-state.log
524-
(time ./bin/llama-save-load-state --model ${model_q4_0} -ngl 99 -c 0 -fa ) 2>&1 | tee -a $OUT/${ci}-save-load-state.log
533+
(time ./bin/llama-save-load-state --model ${model_q4_0} -ngl 99 -c 0 -fa off ) 2>&1 | tee -a $OUT/${ci}-save-load-state.log
534+
(time ./bin/llama-save-load-state --model ${model_q4_0} -ngl 99 -c 0 -fa on ) 2>&1 | tee -a $OUT/${ci}-save-load-state.log
525535

526536
function check_ppl {
527537
qnt="$1"
@@ -651,10 +661,10 @@ function gg_run_pythia_2_8b {
651661

652662
(time ./bin/llama-imatrix --model ${model_f16} -f ${wiki_test} -t 1 -ngl 99 -c 2048 -b 512 --chunks 4 ) 2>&1 | tee -a $OUT/${ci}-imatrix.log
653663

654-
(time ./bin/llama-save-load-state --model ${model_q4_0} -ngl 10 -c 0 ) 2>&1 | tee -a $OUT/${ci}-save-load-state.log
655-
(time ./bin/llama-save-load-state --model ${model_q4_0} -ngl 10 -c 0 -fa ) 2>&1 | tee -a $OUT/${ci}-save-load-state.log
656-
(time ./bin/llama-save-load-state --model ${model_q4_0} -ngl 99 -c 0 ) 2>&1 | tee -a $OUT/${ci}-save-load-state.log
657-
(time ./bin/llama-save-load-state --model ${model_q4_0} -ngl 99 -c 0 -fa ) 2>&1 | tee -a $OUT/${ci}-save-load-state.log
664+
(time ./bin/llama-save-load-state --model ${model_q4_0} -ngl 10 -c 0 -fa off ) 2>&1 | tee -a $OUT/${ci}-save-load-state.log
665+
(time ./bin/llama-save-load-state --model ${model_q4_0} -ngl 10 -c 0 -fa on ) 2>&1 | tee -a $OUT/${ci}-save-load-state.log
666+
(time ./bin/llama-save-load-state --model ${model_q4_0} -ngl 99 -c 0 -fa off ) 2>&1 | tee -a $OUT/${ci}-save-load-state.log
667+
(time ./bin/llama-save-load-state --model ${model_q4_0} -ngl 99 -c 0 -fa on ) 2>&1 | tee -a $OUT/${ci}-save-load-state.log
658668

659669
function check_ppl {
660670
qnt="$1"
@@ -860,20 +870,15 @@ if [ -z ${GG_BUILD_LOW_PERF} ]; then
860870
fi
861871

862872
ret=0
863-
if [ -z ${GG_BUILD_SYCL} ]; then
864-
# SYCL build breaks with debug build flags
865-
test $ret -eq 0 && gg_run ctest_debug
866-
fi
873+
test $ret -eq 0 && gg_run ctest_debug
867874
test $ret -eq 0 && gg_run ctest_release
868875

869876
if [ -z ${GG_BUILD_LOW_PERF} ]; then
870877
test $ret -eq 0 && gg_run embd_bge_small
871878
test $ret -eq 0 && gg_run rerank_tiny
872879

873880
if [ -z ${GG_BUILD_CLOUD} ] || [ ${GG_BUILD_EXTRA_TESTS_0} ]; then
874-
if [ -z ${GG_BUILD_SYCL} ]; then
875-
test $ret -eq 0 && gg_run test_scripts_debug
876-
fi
881+
test $ret -eq 0 && gg_run test_scripts_debug
877882
test $ret -eq 0 && gg_run test_scripts_release
878883
fi
879884

@@ -884,9 +889,7 @@ if [ -z ${GG_BUILD_LOW_PERF} ]; then
884889
test $ret -eq 0 && gg_run pythia_2_8b
885890
#test $ret -eq 0 && gg_run open_llama_7b_v2
886891
fi
887-
if [ -z ${GG_BUILD_SYCL} ]; then
888-
test $ret -eq 0 && gg_run ctest_with_model_debug
889-
fi
892+
test $ret -eq 0 && gg_run ctest_with_model_debug
890893
test $ret -eq 0 && gg_run ctest_with_model_release
891894
fi
892895
fi

0 commit comments

Comments
 (0)