Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
1036 commits
Select commit Hold shift + click to select a range
4afb0a7
server : Support multimodal completion and embeddings prompts in JSON…
65a Aug 22, 2025
ad5c975
ggml-cpu: Support Q5_0 and Q5_1 on s390x (#15486)
taronaeo Aug 22, 2025
9ebebef
llama : remove KV cache defragmentation logic (#15473)
ggerganov Aug 22, 2025
b1ab918
cuda : add Pad Reflect 1D support (#14659)
YavorGIvanov Aug 22, 2025
92f7f0a
ggml: add `conv3d` op (#15182)
rmatif Aug 22, 2025
32732f2
model : gpt-oss add response_format support (#15494)
aldehir Aug 22, 2025
4536363
ggml WebGPU: add support for quantization types (#15440)
reeselevine Aug 22, 2025
e92734d
test-opt: allow slight inprecision (#15503)
JohannesGaessler Aug 22, 2025
330c3d2
vulkan: optimize mul_mat_id loading row ids into shared memory (#15427)
jeffbolznv Aug 23, 2025
0a9b43e
vulkan : support ggml_mean (#15393)
Acly Aug 23, 2025
b55f06e
vulkan.Dockerfile: install vulkan SDK using tarball (#15282)
yeahdongcn Aug 23, 2025
289bf41
vulkan: Rewrite synchronization to allow some overlap between nodes (…
jeffbolznv Aug 23, 2025
21dc4dd
chat : fix debug build assertion in trim function (#15520)
LaffeyNyaa Aug 23, 2025
9ef5369
scripts: fix compare-llama-bench.py (#15521)
JohannesGaessler Aug 23, 2025
b1afcab
model : add support for Seed-OSS (#15490)
pwilkin Aug 23, 2025
611f419
vulkan: optimize rms_norm, and allow the work to spread across multip…
jeffbolznv Aug 23, 2025
710dfc4
CUDA: fix half2 -> half conversion for HIP (#15529)
JohannesGaessler Aug 23, 2025
e78cf0d
vulkan: workaround MoltenVK compile failure in multi_add (#15506)
jeffbolznv Aug 24, 2025
a9c6ffc
vulkan: enable Conv2D for Apple after MoltenVK fixed the bug (#15526)
0cc4m Aug 24, 2025
c9a24fb
vulkan: Support FA with any multiple of 8 head sizes (#15537)
jeffbolznv Aug 24, 2025
b730706
kv-cache : support layer reuse (#15504)
ggerganov Aug 24, 2025
043fb27
vulkan: apply MUL_MAT_ID subgroup optimization to non-coopmat devices…
0cc4m Aug 24, 2025
c247d06
CANN: ROPE cache sin/cos repeat (#15501)
noemotiovon Aug 25, 2025
7da9fed
convert : support interns1-mini (#15412)
RunningLeon Aug 25, 2025
b0ba31f
metal : add FA kernels for HS=40 (#15559)
ggerganov Aug 25, 2025
0d5a470
convert : update Ernie 4.5 dense architecture name (#15555)
ownia Aug 25, 2025
6b64f74
batched-bench : fix unified KV cache handling + pp timing (#15562)
ggerganov Aug 25, 2025
5a6bc6b
model-conversion : add model card template for embeddings [no ci] (#1…
danbev Aug 25, 2025
dfd9b5f
model-conversion : set pooling type to none in logits.cpp (#15564)
danbev Aug 25, 2025
5eff6ec
CUDA: MoE helper in device code, better tile sizes (#15525)
JohannesGaessler Aug 25, 2025
111f8d0
metal: fix regression when no metal devices are present (#15531)
booxter Aug 25, 2025
886b97a
tests: Generate unique input values for count_equal (#15487)
jeffbolznv Aug 25, 2025
4d917cd
vulkan: fix min subgroup 16 condition for mmid subgroup optimization …
0cc4m Aug 25, 2025
f7207b0
opencl: fix support ops condition for `rms_norm` (#15560)
lhez Aug 25, 2025
74f52f7
CUDA: Accelerate MXFP4 table lookup using `__byte_perm` (#15451)
Qeeweew Aug 25, 2025
34bdbbd
vulkan: Remove splitting for mul_mat_id (#15568)
jeffbolznv Aug 26, 2025
4c37636
Add a warning for special devices (#15563)
pt13762104 Aug 26, 2025
0fd90db
metal : remove contiguous assertion for src0 in IM2COL (#15577)
CISC Aug 26, 2025
39842a7
gguf-py : remove erroneous FFN_GATE entry (#15583)
CISC Aug 26, 2025
c4e9239
model : support MiniCPM-V 4.5 (#15575)
tc-mb Aug 26, 2025
1d8d83d
metal : improve `MUL_MAT_ID` (#15541)
ggerganov Aug 26, 2025
85cc1ae
context : print graph stats for memory-less contexts (#15586)
ggerganov Aug 26, 2025
79a5462
mtmd : support Kimi VL model (#15458)
ngxson Aug 26, 2025
b3964c1
metal : optimize FA vec for large sequences and BS <= 8 (#15566)
ggerganov Aug 26, 2025
8f5afa9
CUDA: return -1 for nonexistent compiled arch (#15587)
JohannesGaessler Aug 26, 2025
62cef26
model-conversion : add qat-q4 quantization targets (#15588)
danbev Aug 26, 2025
0373486
graph : fix assert in memory-less build_attn (#15590)
ggerganov Aug 26, 2025
a6a58d6
llamafile: PowerPC Sgemm Optimization (#15558)
shalinib-ibm Aug 26, 2025
44b1efa
tests: add performance test for mul mat id (#15543)
netrunnereve Aug 26, 2025
8ce3ff1
mtmd : fix mtmd ios build (#15579)
fidoriel Aug 26, 2025
8b69686
SYCL: fix rms_norm_mul_add for tensor dim not a multiple of sg_size (…
qnixsynapse Aug 26, 2025
bcbddcd
tests : fix test-opt with GGML_BACKEND_DL (#15599)
slaren Aug 26, 2025
86076f9
OpenCL: add fused group_norm/norm, mul, add (#15314)
rmatif Aug 27, 2025
fcca218
common : add -m to bash completion for --model [no ci] (#15591)
danbev Aug 27, 2025
1cf123a
ggml-cpu : add basic RVV support for vector f32 ops (#15057)
xctan Aug 27, 2025
1e74897
CANN: refactor mask handling and improve performance in FA (#15561)
noemotiovon Aug 27, 2025
1bded5a
kv-cache : better estimate of n_kv for multi-sequence batches (#15610)
ggerganov Aug 27, 2025
4737327
HIP: Enable support for ggml_backend_cuda_register_host_buffer (#15615)
IMbackK Aug 27, 2025
da54f9f
presets : add qwen3-30B-a3b FIM (#15616)
ggerganov Aug 27, 2025
fbef0fa
server: higher timeout for tests (#15621)
JohannesGaessler Aug 27, 2025
5a0e3ef
cuda: Add cublasLt_static linking when GGML_STATIC is enabled (#15622)
matiaslin Aug 28, 2025
46d9caa
model-conversion : add mmproj conversion target (#15628)
danbev Aug 28, 2025
d35a1e8
cli : change log to warning to explain reason for stopping (#15604)
jrincayc Aug 28, 2025
64387f6
gguf-py: byteswapping improvements (#12851)
AlekseiNikiforovIBM Aug 28, 2025
8a4280c
kv-cache : remove LLAMA_SET_ROWS checks (#15505)
ggerganov Aug 28, 2025
55042b3
scripts: add sqlite3 check for compare-commits.sh (#15633)
am17an Aug 28, 2025
84ab83c
model : jina-embeddings-v3 support (#13693)
CISC Aug 28, 2025
c8d0d14
kv-cache : fix find_slot to not search for continuous slot (#15638)
ggerganov Aug 28, 2025
7380414
ggml : fix SSM_SCAN for n_groups > 1 (#15625)
compilade Aug 28, 2025
6c442f4
ggml-cpu: fix invalid hsum build in debug s390x (#15634)
taronaeo Aug 28, 2025
c97dc09
CUDA: add conv2d (#15635)
mnehete32 Aug 28, 2025
a8bca68
fix: Compute the full sum in llama-eval-callback, not just the sum of…
gabe-l-hart Aug 28, 2025
e8d99dd
nvidia nemotron nano v2 (nemotronh) (#15507)
gabe-l-hart Aug 29, 2025
009b709
CUDA: fuse adds, fuse add with rms norm (#15631)
am17an Aug 29, 2025
60e5eee
chat : Seed OSS thinking + tool call support (#15552)
pwilkin Aug 29, 2025
8101786
CUDA: fix bug in rms_norm fusion (#15660)
am17an Aug 29, 2025
792b44f
server : add documentation for `parallel_tool_calls` param (#15647)
ExtReMLapin Aug 29, 2025
3d16b29
scripts: strip "AMD Instinct" from GPU name (#15668)
JohannesGaessler Aug 29, 2025
d82f6aa
server : removed obsolete doc (#15670)
l29ah Aug 29, 2025
ef47691
CANN: FIx compiler warnings (#15661)
noemotiovon Aug 30, 2025
696fccf
vulkan: Skip syncing for prealloc_y when it is reused (#15544)
jeffbolznv Aug 30, 2025
38ad381
CUDA: use FP32 arithmetic for conv2d (#15683)
JohannesGaessler Aug 30, 2025
e81b8e4
llama: use FA + max. GPU layers by default (#15434)
JohannesGaessler Aug 30, 2025
dd89255
Update build.md to remove MSVC arm64 notes (#15684)
slaren Aug 30, 2025
4d74393
ggml: update kleidiai to v1.13.0 (#15663)
chaxu01 Aug 30, 2025
94e82c7
vulkan: clamp matmul and FA results to the max finite value (#15652)
jeffbolznv Aug 31, 2025
b97c9ed
vulkan: Allow fallback to sysmem memory when vidmem is full (#15649)
jeffbolznv Aug 31, 2025
5c16b9c
vulkan : remove unused portability_enumeration_ext variable (#15679)
danbev Aug 31, 2025
c37052a
vulkan: mul_mat_id coopmat2 optimizations (#15546)
jeffbolznv Aug 31, 2025
bbbf5ec
vulkan: handle large sizes for get_rows (#15686)
jeffbolznv Aug 31, 2025
7d3c9f2
ci : explicitly set fa off or on (#15692)
CISC Aug 31, 2025
9777032
llama : separate compute buffer reserve from fattn check (#15696)
slaren Aug 31, 2025
2749662
llama : fix fattn reserve call n_seqs parameter (#15699)
slaren Aug 31, 2025
4efd5a8
metal : fix checks for available FA kernels (#15700)
ggerganov Aug 31, 2025
0d161f0
server : enable /slots by default and make it secure (#15630)
ggerganov Aug 31, 2025
e92d53b
sampling : optimize samplers by reusing bucket sort (#15665)
ggerganov Aug 31, 2025
3dc7397
CANN: fix RoPE cache issue on multi-device (#15629)
hipudding Sep 1, 2025
b9382c3
CANN: Optimize MUL_MAT_ID (#15658)
hipudding Sep 1, 2025
b66df9d
CUDA: fix build error from ambiguous __half conversions in conv2d (#1…
qnixsynapse Sep 1, 2025
4795c91
docs : add Hunyuan to models section (#15707)
DamonFool Sep 1, 2025
77dee9d
ggml : WebGPU add TRANSPOSE and RESHAPE to supported ops (#15695)
danbev Sep 1, 2025
02c1813
Vulkan: Add Integer Dot Product mul_mat_vec shader for legacy quants …
0cc4m Sep 1, 2025
4b20d8b
convert : remove redundant code (#15708)
DamonFool Sep 1, 2025
a0c2b20
ggml: aarch64: Implement SVE F16 kernels for vector functions (#15115)
Vithulep Sep 1, 2025
078ce23
ggml: SVE support for exponential functions (#15145)
s-goto-11 Sep 1, 2025
fec7911
vulkan: disable large mmv subgroups on older Nvidia GPUs (#15717)
0cc4m Sep 1, 2025
35a42ed
vulkan: add missing clamps in new mul_mat_id paths (#15702)
jeffbolznv Sep 1, 2025
d4d8dbe
vulkan: use memory budget extension to read memory usage (#15545)
giladgd Sep 1, 2025
5d804a4
ggml-backend: raise GGML_MAX_SPLIT_INPUTS (#15722)
JohannesGaessler Sep 1, 2025
ef2af57
CANN: Support ext_factor in rope (#15710)
hipudding Sep 2, 2025
2f85368
CANN: Support eager execution mode under ACL graph compilation (#15712)
noemotiovon Sep 2, 2025
97669e4
opencl: add attn sinks support for FA kernels (#15706)
rmatif Sep 2, 2025
25f1045
vulkan: Fix macro parameter order for f32 matmul shaders (#15716)
jeffbolznv Sep 2, 2025
9961d24
CANN: Resolve soft_max precision issue (#15730)
hipudding Sep 2, 2025
0a2a384
vulkan: fix shaders gen when no integer dot is available (#15740)
0cc4m Sep 2, 2025
c466abe
llama: -fa 1/0/-1 aliases for -fa on/off/auto (#15746)
JohannesGaessler Sep 2, 2025
69db8a5
chore: Update `.clang-format` to use `BinPackArguments=true` (#15744)
ORippler Sep 2, 2025
3de0082
fix: resolve unsigned int initialization warning for n_dims/size in g…
skrandy Sep 2, 2025
8a2234e
CANN: Fix type float_t to float (#15736)
noemotiovon Sep 3, 2025
f6da8cb
CANN: Mask unsupported TRANSPOSE_1D operator (#15733)
hipudding Sep 3, 2025
8c3fdf4
model-conversion : add missing curl script [no ci] (#15761)
danbev Sep 3, 2025
05c0380
ggml-cpu : optimize RVV kernels (#15720)
xctan Sep 3, 2025
5eae934
CANN: Add RoPE contiguous check for 310I DUP device (#15735)
hipudding Sep 3, 2025
40a751e
model-conversion : remove hardcoded /bin/bash shebangs [no ci] (#15765)
danbev Sep 3, 2025
2c8dac7
llama : fix incorrect model type for Gemma 270M (#15764)
danbev Sep 3, 2025
cdedb70
sampling : optimize dist sampler (#15704)
ggerganov Sep 3, 2025
407c237
model-conversion : fix pyright errors (#15770)
danbev Sep 3, 2025
661ae31
CUDA: Optimize `rms_norm_f32` kernel and its fused variants, giving 1…
ORippler Sep 3, 2025
0014fb4
ggml vulkan: add hardsigmoid and hardswish operations (#15762)
relent95 Sep 3, 2025
8227695
vulkan : update ggml_vk_instance_validation_ext_available (#15666)
danbev Sep 3, 2025
0fce7a1
vulkan: don't use std::string in load_shaders, to improve compile tim…
jeffbolznv Sep 3, 2025
dff7551
vulkan: fix mmv subgroup16 selection (#15775)
0cc4m Sep 3, 2025
239b60e
CANN: fix acl_rstd allocation size in ggml_cann_rms_norm (#15760)
noemotiovon Sep 4, 2025
820bc98
opencl: add hs=40 to FA (#15758)
rmatif Sep 4, 2025
5421f63
CANN: Fix precision issue on 310I DUO multi-devices (#15784)
hipudding Sep 4, 2025
0a1b398
ggml: add ops for WAN video model (cuda && cpu) (#15669)
leejet Sep 4, 2025
badb80c
Document the new max GPU layers default in help (#15771)
ericcurtin Sep 4, 2025
a68d914
server: add exceed_context_size_error type (#15780)
ngxson Sep 4, 2025
c1c354e
CANN: Refactor ND to NZ workspace to be per-device (#15763)
noemotiovon Sep 4, 2025
d1e2adb
llama : set n_outputs to 1 to avoid 0 outputs mean-pooling (#15791)
danbev Sep 4, 2025
856ed09
metal : Add template specialization for mul_mm_id w/ ne20 == 10 (#15799)
gabe-l-hart Sep 4, 2025
fb15d64
llama : add support for EmbeddingGemma 300m (#15798)
danbev Sep 4, 2025
9e2b1e8
scripts : add Jinja tester PySide6 simple app (#15756)
pwilkin Sep 4, 2025
b2426e4
chat : nemotron thinking & toolcalling support (#15676)
pwilkin Sep 4, 2025
4fd1242
chat : fixed crash when Hermes 2 <tool_call> had a newline before it …
ExtReMLapin Sep 4, 2025
5d6688d
model-conversion : add --embeddings flag to modelcard.template [no ci…
danbev Sep 5, 2025
c610b6c
kv-cache : fix SWA checks + disable cacheless iSWA (#15811)
ggerganov Sep 5, 2025
a812838
gguf: gguf_writer refactor (#15691)
Green-Sky Sep 5, 2025
3a550b5
tests : add --list-ops and --show-coverage options (#15745)
danbev Sep 5, 2025
5143fa8
CUDA: fastdiv, launch bounds for mmvq + q8_1 quant (#15802)
JohannesGaessler Sep 5, 2025
408ff52
Implement --log-colors with always/never/auto (#15792)
ericcurtin Sep 5, 2025
5fac79c
Thinking model disabled assistant prefill (#15404)
gabe-l-hart Sep 5, 2025
4281c7b
ci : exempt correct research label (#15825)
CISC Sep 5, 2025
fd62188
aLoRA Support (#15327)
gabe-l-hart Sep 5, 2025
186415d
ggml-cpu: drop support for nnpa intrinsics (#15821)
taronaeo Sep 6, 2025
01806e7
ggml-cpu: document use of "free" memory [no ci] (#15834)
JohannesGaessler Sep 6, 2025
61bdfd5
server : implement prompt processing progress report in stream mode (…
ngxson Sep 6, 2025
3c3635d
server : speed up tests (#15836)
ngxson Sep 6, 2025
c4df49a
kleidiai: generalize compute_forward_kv_cache to compute_forward_fp16…
chaxu01 Sep 6, 2025
79bc429
CUDA: faster tile FA (Pascal/AMD), headsize 256 (#15769)
JohannesGaessler Sep 6, 2025
3b15924
ggml WebGPU: remove userdata from request adapter callback (#15527)
danbev Sep 7, 2025
267e998
vulkan: Use larger loads in scalar/coopmat1 matmul (#15729)
jeffbolznv Sep 7, 2025
c97b5e5
vulkan: Support pad_ext (#15794)
jeffbolznv Sep 7, 2025
d36e61c
ggml-cpu: clean up s390x SIMD (#15855)
taronaeo Sep 7, 2025
3976dfb
vulkan: support im2col_3d (#15795)
jeffbolznv Sep 7, 2025
85ca66a
CANN: Stream sync between devices for acl_graph (#15809)
noemotiovon Sep 8, 2025
d413dca
tests: large sizes for get_rows (#15687)
jeffbolznv Sep 8, 2025
cf0e3ba
model : avoid ggml_cont_3d for fused QKV weights (#15662)
ggerganov Sep 8, 2025
663027f
context : fix n_outputs during reserve (#15858)
ggerganov Sep 8, 2025
a885dcf
batched-bench : fix llama_synchronize usage during prompt processing …
ggerganov Sep 8, 2025
233d773
convert : force setting sliding_window from original config (#15867)
danbev Sep 8, 2025
5ef22d2
CUDA: non-contiguous src0 not supported for PAD (#15869)
CISC Sep 8, 2025
9fcb29f
ggml: allow casting between f32 and i32 (#15783)
ngxson Sep 8, 2025
f28d4f4
metal : refactor + optimize (#15857)
ggerganov Sep 8, 2025
b0d5299
cuda : fix supports_op condition for get_rows when number of blocks i…
ggerganov Sep 8, 2025
56920f5
server : bring back timings_per_token (#15879)
ngxson Sep 8, 2025
8802156
chat : Deepseek V3.1 reasoning and tool calling support (OpenAI Style…
createthis Sep 8, 2025
0a16bf5
CUDA: generate_cu_files.py - add missing mxfp4 (#15880)
am17an Sep 8, 2025
e68aa10
vulkan: sort graph to allow more parallel execution (#15850)
jeffbolznv Sep 8, 2025
fe1c92c
media : add llama1 icon (#15878)
06kellyjac Sep 8, 2025
7057faf
json : support `enum` values within `allOf` (#15830)
aldehir Sep 8, 2025
acc1b00
model-conversion : add extra debugging support for model conversion (…
pwilkin Sep 9, 2025
70cd37d
requirements : update transformers/torch for Embedding Gemma (#15828)
danbev Sep 9, 2025
c252ce6
contrib : add notes about merging PRs (#15881)
ggerganov Sep 9, 2025
550cf72
CUDA: fix GET_ROWS for large tensors (#15882)
JohannesGaessler Sep 9, 2025
a972fae
CUDA: Add mul_mat_id support for the mmf kernel (#15767)
am17an Sep 9, 2025
ed54e32
Workaround for subgroup arithmetic failing on MoltenVK with AMD GPUs …
lksj92hs Sep 9, 2025
17bc5a8
HIP: use v_dot2_f32_f16 instruction for FA (#15884)
JohannesGaessler Sep 9, 2025
4f63cd7
vulkan: Fix OOB accesses in soft_max_back (#15861)
jeffbolznv Sep 9, 2025
ae355f6
vulkan: throw the oom error instead of no memory type found (#15905)
0cc4m Sep 9, 2025
ff02caf
ci : cache ROCm installation in windows-latest-cmake-hip (#15887)
danbev Sep 10, 2025
86587da
llama : check returned fn ptrs from ggml_backend_reg_get_proc_address…
danbev Sep 10, 2025
28b5f19
CANN: implement LRU cache for ACL graphs (#15814)
noemotiovon Sep 10, 2025
10d8b2b
CANN: Add ROPE sin/cos cache for reuse (#15912)
noemotiovon Sep 10, 2025
09e72a0
gitignore : Ignore vim swap files in tests (#15901)
createthis Sep 10, 2025
2cfef4d
media : add transparent icon svg and png [no ci] (#15891)
06kellyjac Sep 10, 2025
e7b6d83
tests : filter out no-ops from coverage report (#15900)
danbev Sep 10, 2025
33daece
ci : add caching for ROCm installation in release workflow (#15924)
danbev Sep 10, 2025
0f0a3c2
metal : make the backend async (#15906)
ggerganov Sep 10, 2025
9de447d
ggml-cpu : fix padding in ggml_timestep_embedding (#15917)
danbev Sep 10, 2025
6ab397e
graph : support non-contiguous Q in build_attn_mha (#15908)
CISC Sep 10, 2025
4f65885
llama : support T5 models with unequal number of encoder-decoder laye…
DamonFool Sep 10, 2025
00681df
CUDA: Add `fastdiv` to `k_bin_bcast*`, giving 1-3% E2E performance (#…
ORippler Sep 10, 2025
c0389db
CANN: Disable acl_graph for prefill stage (#15933)
hipudding Sep 11, 2025
2b3efea
kleidiai: fix GGML_ASSERT(*cur_backend_id != -1) failed (#15614)
chaxu01 Sep 11, 2025
24a6734
ggml-cpu : add check for ARM MATMUL_INT8/i8mm support (#15922)
danbev Sep 11, 2025
df082f5
nitpick : correct MB to MiB (#15934)
ddh0 Sep 11, 2025
0e6ff00
CUDA: larger SRAM reads for tile FA, AMD FP16 dot (#15927)
JohannesGaessler Sep 11, 2025
360d653
ggml-backend : add GGML_BACKEND_DEVICE_TYPE_IGPU device type (#15797)
slaren Sep 11, 2025
704d90c
Revert "sycl: add usage of enqueue_functions extension (#14244)" (#1…
NeoZhangJianyu Sep 12, 2025
6c88ad8
vulkan: Make device memory check more portable (#15939)
mbaudier Sep 12, 2025
304ac56
Vulkan iGPU device selection overhaul and PCI ID API support (#15947)
0cc4m Sep 12, 2025
f088b6a
server : adjust prompt similarity thold + add logs (#15913)
ggerganov Sep 12, 2025
f4e664f
context : remove redundant explicit casting to the same type (#15948)
haiyuewa Sep 12, 2025
4bf5549
Add docker protocol support for llama-server model loading (#15790)
ericcurtin Sep 12, 2025
40be511
ggml-zdnn: fix #15414, activate FP16 and BF16 acceleration and incorr…
taronaeo Sep 12, 2025
84d7b2f
metal : fix memory leaks (#15962)
ggerganov Sep 13, 2025
f161463
metal : allow ops to run concurrently (#15929)
ggerganov Sep 13, 2025
55758b0
metal : refactor kernel loading (#15964)
ggerganov Sep 13, 2025
50f4281
llama : allow using iGPUs with --device (#15951)
slaren Sep 13, 2025
b9c9c9f
vulkan: initialize vulkan-hpp to allow using extension function point…
jeffbolznv Sep 13, 2025
aa0c461
vulkan: fix failing dequant shaders (#15862)
jeffbolznv Sep 13, 2025
6380d6a
ggml-zdnn: rm user mapped buffers (#15965)
taronaeo Sep 14, 2025
d1c6f11
doc : update documentation for --tensor-split (#15980)
rgerganov Sep 14, 2025
9ecb884
releases : update ROCM, add gfx1200, gfx1201, gfx1151 (#15972)
slaren Sep 14, 2025
918b26f
rpc : fix regression when --device is used (#15981)
rgerganov Sep 14, 2025
a14bd35
metal : fix kernel requirements (#15983)
ggerganov Sep 14, 2025
a0e13dc
build: fix the build failures of Windows HIP release job (#15984)
lcy0321 Sep 14, 2025
261e6a2
Vulkan: Clean up mul_mm shader (#15987)
0cc4m Sep 14, 2025
0fa154e
rocm.Dockerfile: added gfx1200,gfx1201 architectures to support AMD …
channeladam Sep 14, 2025
9dcd200
metal : remove memory pools (#15966)
ggerganov Sep 14, 2025
6c019cb
server : only attempt to enable thinking if using jinja (#15967)
CISC Sep 14, 2025
b8e09f0
model : add grok-2 support (#15539)
CISC Sep 14, 2025
a68f31e
fix KLD percentile output (#15999)
ddh0 Sep 15, 2025
1062205
CUDA: some micro-optimizations in mmf.cuh for mul_mat_id (#15926)
am17an Sep 15, 2025
28c39da
llama-run: Fix model download on Windows (#15988)
npopov-vst Sep 15, 2025
b907255
SYCL: Add COUNT_EQUAL operator support (#15991)
yael-works Sep 15, 2025
10d1974
releases : switch to rocWMMA develop branch, add gfx1151 (#15992)
slaren Sep 15, 2025
dc381aa
docker : enable rocWMMA in ROCm images, add gfx1151 (#15997)
slaren Sep 15, 2025
3d4053f
CUDA: fix im2col_3d to respect non-contiguous inputs (views) (#15956)
jakekarnes42 Sep 15, 2025
6d75883
Add LLaDA-7b-MoE diffusion model (#16003)
am17an Sep 16, 2025
07808eb
cmake : Do not install tools on iOS targets (#15903)
ykhrustalev Sep 16, 2025
51abc96
ci : update macos-latest* jobs to use macos-latest (#15938)
danbev Sep 16, 2025
d853890
Merge remote-tracking branch 'upstream/master' into update-master, on…
Sep 17, 2025
a17e36f
-Fixed issue for llama-cli. Tested on posix & FPGA
Sep 17, 2025
7a6ce92
-Disabled CI, flake8 Lint ,editor config, python lint workflow defaul…
Sep 19, 2025
1432366
-Disabled riscv-native, editorconfig, python-type-check, server check…
Sep 19, 2025
b91626c
-disabled python-lint. CI check
Sep 19, 2025
c9a365e
-Disabled all automatic checks for commits
Sep 19, 2025
4193b56
-fix(build): resolve GLIBC compatibility issues for TSI binaries, Swi…
Sep 22, 2025
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
The table of contents is too big for display.
Diff view
Diff view
  •  
  •  
  •  
11 changes: 7 additions & 4 deletions .clang-format
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@ AllowShortLambdasOnASingleLine: Inline
AllowShortLoopsOnASingleLine: false
AlwaysBreakBeforeMultilineStrings: true
BinPackArguments: true
BinPackParameters: true # OnePerLine
BinPackParameters: false # OnePerLine
BitFieldColonSpacing: Both
BreakBeforeBraces: Custom # Attach
BraceWrapping:
Expand Down Expand Up @@ -70,15 +70,18 @@ ExperimentalAutoDetectBinPacking: false
FixNamespaceComments: true
IncludeBlocks: Regroup
IncludeCategories:
- Regex: '^<.*\.h>'
- Regex: '".*"'
Priority: 1
SortPriority: 0
- Regex: '^<.*'
- Regex: '^<.*\.h>'
Priority: 2
SortPriority: 0
- Regex: '.*'
- Regex: '^<.*'
Priority: 3
SortPriority: 0
- Regex: '.*'
Priority: 4
SortPriority: 0
IncludeIsMainRegex: '([-_](test|unittest))?$'
IncludeIsMainSourceRegex: ''
IndentAccessModifiers: false
Expand Down
130 changes: 130 additions & 0 deletions .devops/cann.Dockerfile
Original file line number Diff line number Diff line change
@@ -0,0 +1,130 @@
# ==============================================================================
# ARGUMENTS
# ==============================================================================

# Define the CANN base image for easier version updates later
ARG CANN_BASE_IMAGE=quay.io/ascend/cann:8.1.rc1-910b-openeuler22.03-py3.10

# ==============================================================================
# BUILD STAGE
# Compile all binary files and libraries
# ==============================================================================
FROM ${CANN_BASE_IMAGE} AS build

# Define the Ascend chip model for compilation. Default is Ascend910B3
ARG ASCEND_SOC_TYPE=Ascend910B3

# -- Install build dependencies --
RUN yum install -y gcc g++ cmake make git libcurl-devel python3 python3-pip && \
yum clean all && \
rm -rf /var/cache/yum

# -- Set the working directory --
WORKDIR /app

# -- Copy project files --
COPY . .

# -- Set CANN environment variables (required for compilation) --
# Using ENV instead of `source` allows environment variables to persist across the entire image layer
ENV ASCEND_TOOLKIT_HOME=/usr/local/Ascend/ascend-toolkit/latest
ENV LD_LIBRARY_PATH=${ASCEND_TOOLKIT_HOME}/lib64:${LD_LIBRARY_PATH}
ENV PATH=${ASCEND_TOOLKIT_HOME}/bin:${PATH}
ENV ASCEND_OPP_PATH=${ASCEND_TOOLKIT_HOME}/opp
ENV LD_LIBRARY_PATH=${ASCEND_TOOLKIT_HOME}/runtime/lib64/stub:$LD_LIBRARY_PATH
# ... You can add other environment variables from the original file as needed ...
# For brevity, only core variables are listed here. You can paste the original ENV list here.

# -- Build llama.cpp --
# Use the passed ASCEND_SOC_TYPE argument and add general build options
RUN source /usr/local/Ascend/ascend-toolkit/set_env.sh --force \
&& \
cmake -B build \
-DGGML_CANN=ON \
-DCMAKE_BUILD_TYPE=Release \
-DSOC_TYPE=${ASCEND_SOC_TYPE} \
. && \
cmake --build build --config Release -j$(nproc)

# -- Organize build artifacts for copying in later stages --
# Create a lib directory to store all .so files
RUN mkdir -p /app/lib && \
find build -name "*.so" -exec cp {} /app/lib \;

# Create a full directory to store all executables and Python scripts
RUN mkdir -p /app/full && \
cp build/bin/* /app/full/ && \
cp *.py /app/full/ && \
cp -r gguf-py /app/full/ && \
cp -r requirements /app/full/ && \
cp requirements.txt /app/full/
# If you have a tools.sh script, make sure it is copied here
# cp .devops/tools.sh /app/full/tools.sh

# ==============================================================================
# BASE STAGE
# Create a minimal base image with CANN runtime and common libraries
# ==============================================================================
FROM ${CANN_BASE_IMAGE} AS base

# -- Install runtime dependencies --
RUN yum install -y libgomp curl && \
yum clean all && \
rm -rf /var/cache/yum

# -- Set CANN environment variables (required for runtime) --
ENV ASCEND_TOOLKIT_HOME=/usr/local/Ascend/ascend-toolkit/latest
ENV LD_LIBRARY_PATH=/app:${ASCEND_TOOLKIT_HOME}/lib64:${LD_LIBRARY_PATH}
ENV PATH=${ASCEND_TOOLKIT_HOME}/bin:${PATH}
ENV ASCEND_OPP_PATH=${ASCEND_TOOLKIT_HOME}/opp
# ... You can add other environment variables from the original file as needed ...

WORKDIR /app

# Copy compiled .so files from the build stage
COPY --from=build /app/lib/ /app

# ==============================================================================
# FINAL STAGES (TARGETS)
# ==============================================================================

### Target: full
# Complete image with all tools, Python bindings, and dependencies
# ==============================================================================
FROM base AS full

COPY --from=build /app/full /app

# Install Python dependencies
RUN yum install -y git python3 python3-pip && \
pip3 install --no-cache-dir --upgrade pip setuptools wheel && \
pip3 install --no-cache-dir -r requirements.txt && \
yum clean all && \
rm -rf /var/cache/yum

# You need to provide a tools.sh script as the entrypoint
ENTRYPOINT ["/app/tools.sh"]
# If there is no tools.sh, you can set the default to start the server
# ENTRYPOINT ["/app/llama-server"]

### Target: light
# Lightweight image containing only llama-cli
# ==============================================================================
FROM base AS light

COPY --from=build /app/full/llama-cli /app

ENTRYPOINT [ "/app/llama-cli" ]

### Target: server
# Dedicated server image containing only llama-server
# ==============================================================================
FROM base AS server

ENV LLAMA_ARG_HOST=0.0.0.0

COPY --from=build /app/full/llama-server /app

HEALTHCHECK --interval=5m CMD [ "curl", "-f", "http://localhost:8080/health" ]

ENTRYPOINT [ "/app/llama-server" ]
22 changes: 0 additions & 22 deletions .devops/cloud-v-pipeline

This file was deleted.

6 changes: 1 addition & 5 deletions .devops/cpu.Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -4,19 +4,15 @@ FROM ubuntu:$UBUNTU_VERSION AS build

ARG TARGETARCH

ARG GGML_CPU_ARM_ARCH=armv8-a

RUN apt-get update && \
apt-get install -y build-essential git cmake libcurl4-openssl-dev

WORKDIR /app

COPY . .

RUN if [ "$TARGETARCH" = "amd64" ]; then \
RUN if [ "$TARGETARCH" = "amd64" ] || [ "$TARGETARCH" = "arm64" ]; then \
cmake -S . -B build -DCMAKE_BUILD_TYPE=Release -DGGML_NATIVE=OFF -DLLAMA_BUILD_TESTS=OFF -DGGML_BACKEND_DL=ON -DGGML_CPU_ALL_VARIANTS=ON; \
elif [ "$TARGETARCH" = "arm64" ]; then \
cmake -S . -B build -DCMAKE_BUILD_TYPE=Release -DGGML_NATIVE=OFF -DLLAMA_BUILD_TESTS=OFF -DGGML_CPU_ARM_ARCH=${GGML_CPU_ARM_ARCH}; \
else \
echo "Unsupported architecture"; \
exit 1; \
Expand Down
2 changes: 1 addition & 1 deletion .devops/cuda.Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -61,7 +61,7 @@ RUN apt-get update \
python3 \
python3-pip \
&& pip install --upgrade pip setuptools wheel \
&& pip install -r requirements.txt \
&& pip install --break-system-packages -r requirements.txt \
&& apt autoremove -y \
&& apt clean -y \
&& rm -rf /tmp/* /var/tmp/* \
Expand Down
30 changes: 17 additions & 13 deletions .devops/intel.Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -49,19 +49,23 @@ COPY --from=build /app/full /app

WORKDIR /app

RUN apt-get update \
&& apt-get install -y \
git \
python3 \
python3-pip \
&& pip install --upgrade pip setuptools wheel \
&& pip install -r requirements.txt \
&& apt autoremove -y \
&& apt clean -y \
&& rm -rf /tmp/* /var/tmp/* \
&& find /var/cache/apt/archives /var/lib/apt/lists -not -name lock -type f -delete \
&& find /var/cache -type f -delete

RUN apt-get update && \
apt-get install -y \
git \
python3 \
python3-pip \
python3-venv && \
python3 -m venv /opt/venv && \
. /opt/venv/bin/activate && \
pip install --upgrade pip setuptools wheel && \
pip install -r requirements.txt && \
apt autoremove -y && \
apt clean -y && \
rm -rf /tmp/* /var/tmp/* && \
find /var/cache/apt/archives /var/lib/apt/lists -not -name lock -type f -delete && \
find /var/cache -type f -delete

ENV PATH="/opt/venv/bin:$PATH"

ENTRYPOINT ["/app/tools.sh"]

Expand Down
6 changes: 3 additions & 3 deletions .devops/musa.Dockerfile
Original file line number Diff line number Diff line change
@@ -1,10 +1,10 @@
ARG UBUNTU_VERSION=22.04
# This needs to generally match the container host's environment.
ARG MUSA_VERSION=rc4.0.1
ARG MUSA_VERSION=rc4.2.0
# Target the MUSA build image
ARG BASE_MUSA_DEV_CONTAINER=mthreads/musa:${MUSA_VERSION}-mudnn-devel-ubuntu${UBUNTU_VERSION}
ARG BASE_MUSA_DEV_CONTAINER=mthreads/musa:${MUSA_VERSION}-devel-ubuntu${UBUNTU_VERSION}-amd64

ARG BASE_MUSA_RUN_CONTAINER=mthreads/musa:${MUSA_VERSION}-mudnn-runtime-ubuntu${UBUNTU_VERSION}
ARG BASE_MUSA_RUN_CONTAINER=mthreads/musa:${MUSA_VERSION}-runtime-ubuntu${UBUNTU_VERSION}-amd64

FROM ${BASE_MUSA_DEV_CONTAINER} AS build

Expand Down
3 changes: 2 additions & 1 deletion .devops/nix/package.nix
Original file line number Diff line number Diff line change
Expand Up @@ -47,6 +47,7 @@ let
inherit (lib)
cmakeBool
cmakeFeature
optionalAttrs
optionals
strings
;
Expand Down Expand Up @@ -197,7 +198,7 @@ effectiveStdenv.mkDerivation (finalAttrs: {
];

# Environment variables needed for ROCm
env = optionals useRocm {
env = optionalAttrs useRocm {
ROCM_PATH = "${rocmPackages.clr}";
HIP_DEVICE_LIB_PATH = "${rocmPackages.rocm-device-libs}/amdgcn/bitcode";
};
Expand Down
27 changes: 16 additions & 11 deletions .devops/rocm.Dockerfile
Original file line number Diff line number Diff line change
@@ -1,10 +1,10 @@
ARG UBUNTU_VERSION=24.04

# This needs to generally match the container host's environment.
ARG ROCM_VERSION=6.3
ARG AMDGPU_VERSION=6.3
ARG ROCM_VERSION=6.4
ARG AMDGPU_VERSION=6.4

# Target the CUDA build image
# Target the ROCm build image
ARG BASE_ROCM_DEV_CONTAINER=rocm/dev-ubuntu-${UBUNTU_VERSION}:${ROCM_VERSION}-complete

### Build image
Expand All @@ -15,16 +15,13 @@ FROM ${BASE_ROCM_DEV_CONTAINER} AS build
# This is mostly tied to rocBLAS supported archs.
# gfx803, gfx900, gfx1032, gfx1101, gfx1102,not officialy supported
# gfx906 is deprecated
#check https://rocm.docs.amd.com/projects/install-on-linux/en/docs-6.2.4/reference/system-requirements.html
#check https://rocm.docs.amd.com/projects/install-on-linux/en/docs-6.4.1/reference/system-requirements.html

ARG ROCM_DOCKER_ARCH='gfx803,gfx900,gfx906,gfx908,gfx90a,gfx942,gfx1010,gfx1030,gfx1032,gfx1100,gfx1101,gfx1102'
#ARG ROCM_DOCKER_ARCH=gfx1100
ARG ROCM_DOCKER_ARCH='gfx803;gfx900;gfx906;gfx908;gfx90a;gfx942;gfx1010;gfx1030;gfx1032;gfx1100;gfx1101;gfx1102;gfx1200;gfx1201;gfx1151'
#ARG ROCM_DOCKER_ARCH='gfx1151'

# Set nvcc architectured
# Set ROCm architectures
ENV AMDGPU_TARGETS=${ROCM_DOCKER_ARCH}
# Enable ROCm
# ENV CC=/opt/rocm/llvm/bin/clang
# ENV CXX=/opt/rocm/llvm/bin/clang++

RUN apt-get update \
&& apt-get install -y \
Expand All @@ -39,8 +36,16 @@ WORKDIR /app

COPY . .

RUN git clone https://github.com/rocm/rocwmma --branch develop --depth 1

RUN HIPCXX="$(hipconfig -l)/clang" HIP_PATH="$(hipconfig -R)" \
cmake -S . -B build -DGGML_HIP=ON -DAMDGPU_TARGETS=$ROCM_DOCKER_ARCH -DGGML_BACKEND_DL=ON -DGGML_CPU_ALL_VARIANTS=ON -DCMAKE_BUILD_TYPE=Release -DLLAMA_BUILD_TESTS=OFF \
cmake -S . -B build \
-DGGML_HIP=ON \
-DGGML_HIP_ROCWMMA_FATTN=ON \
-DCMAKE_HIP_FLAGS="-I$(pwd)/rocwmma/library/include/" \
-DAMDGPU_TARGETS="$ROCM_DOCKER_ARCH" \
-DGGML_BACKEND_DL=ON -DGGML_CPU_ALL_VARIANTS=ON \
-DCMAKE_BUILD_TYPE=Release -DLLAMA_BUILD_TESTS=OFF \
&& cmake --build build --config Release -j$(nproc)

RUN mkdir -p /app/lib \
Expand Down
2 changes: 1 addition & 1 deletion .devops/tools.sh
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
#!/bin/bash
#!/usr/bin/env bash
set -e

# Read the first argument into a variable
Expand Down
30 changes: 23 additions & 7 deletions .devops/vulkan.Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -2,14 +2,30 @@ ARG UBUNTU_VERSION=24.04

FROM ubuntu:$UBUNTU_VERSION AS build

# Install build tools
RUN apt update && apt install -y git build-essential cmake wget
# Ref: https://vulkan.lunarg.com/doc/sdk/latest/linux/getting_started.html

# Install Vulkan SDK and cURL
RUN wget -qO - https://packages.lunarg.com/lunarg-signing-key-pub.asc | apt-key add - && \
wget -qO /etc/apt/sources.list.d/lunarg-vulkan-noble.list https://packages.lunarg.com/vulkan/lunarg-vulkan-noble.list && \
apt update -y && \
apt-get install -y vulkan-sdk libcurl4-openssl-dev curl
# Install build tools
RUN apt update && apt install -y git build-essential cmake wget xz-utils

# Install Vulkan SDK
ARG VULKAN_VERSION=1.4.321.1
RUN ARCH=$(uname -m) && \
wget -qO /tmp/vulkan-sdk.tar.xz https://sdk.lunarg.com/sdk/download/${VULKAN_VERSION}/linux/vulkan-sdk-linux-${ARCH}-${VULKAN_VERSION}.tar.xz && \
mkdir -p /opt/vulkan && \
tar -xf /tmp/vulkan-sdk.tar.xz -C /tmp --strip-components=1 && \
mv /tmp/${ARCH}/* /opt/vulkan/ && \
rm -rf /tmp/*

# Install cURL and Vulkan SDK dependencies
RUN apt install -y libcurl4-openssl-dev curl \
libxcb-xinput0 libxcb-xinerama0 libxcb-cursor-dev

# Set environment variables
ENV VULKAN_SDK=/opt/vulkan
ENV PATH=$VULKAN_SDK/bin:$PATH
ENV LD_LIBRARY_PATH=$VULKAN_SDK/lib:$LD_LIBRARY_PATH
ENV CMAKE_PREFIX_PATH=$VULKAN_SDK:$CMAKE_PREFIX_PATH
ENV PKG_CONFIG_PATH=$VULKAN_SDK/lib/pkgconfig:$PKG_CONFIG_PATH

# Build it
WORKDIR /app
Expand Down
4 changes: 4 additions & 0 deletions .editorconfig
Original file line number Diff line number Diff line change
Expand Up @@ -48,3 +48,7 @@ end_of_line = unset
charset = unset
trim_trailing_whitespace = unset
insert_final_newline = unset

[vendor/miniaudio/miniaudio.h]
trim_trailing_whitespace = unset
insert_final_newline = unset
Loading