Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
819 commits
Select commit Hold shift + click to select a range
49006c6
llama : move random seed generation to the samplers (#9398)
slaren Sep 10, 2024
8d300bd
enable --special arg for llama-server (#9419)
matteoserva Sep 10, 2024
6cd4e03
arg : bring back missing ifdef (#9411)
ngxson Sep 10, 2024
cb9c933
flake.lock: Update (#9360)
ggerganov Sep 10, 2024
51b6038
sycl : update support conditions (#9394)
Alcpz Sep 11, 2024
b34e023
musa: remove Clang builtins mapping (#9421)
yeahdongcn Sep 11, 2024
d2b496b
batched-bench : remove unused code (#9305)
ggerganov Sep 11, 2024
5af118e
CUDA: fix --split-mode row race condition (#9413)
JohannesGaessler Sep 11, 2024
67155ab
feat: Implements retrying logic for downloading models using --model-…
farbodbj Sep 11, 2024
5bb2c5d
files : remove accidentally added `lora_test` submodule (#9430)
ngxson Sep 11, 2024
0996c55
llava : correct args for minicpmv-cli (#9429)
ngxson Sep 11, 2024
8db003a
py : support converting local models (#7547)
EvilFreelancer Sep 11, 2024
1b28061
llama : skip token bounds check when evaluating embeddings (#9437)
slaren Sep 11, 2024
449ccfb
Add Jais to list of supported models (#9439)
fmz Sep 12, 2024
df4b794
cann: Fix error when running a non-exist op (#9424)
bachelor-dou Sep 12, 2024
c9c8575
enhance run script to be easy to change the parameters (#9448)
NeoZhangJianyu Sep 12, 2024
d6a04f8
ggml : hide ggml_object, ggml_cgraph, ggml_hash_set (#9408)
ggerganov Sep 12, 2024
2b00fa7
riscv : modify Makefile and add a RISCV_VECT to print log info (#9442)
Tameem-10xE Sep 12, 2024
39f852f
py : add special tokens in hf_converter for RWKV v6 (#9428)
MollySophia Sep 12, 2024
ff76e18
cmake : fixed the order of linking libraries for llama-quantize (#9450)
Xarbirus Sep 12, 2024
3c26a16
ci : bump actions/checkout to v4 (#9377)
trivikr Sep 12, 2024
c837981
py : add Phi-1.5/Phi-2 tokenizer (#9361)
daminho Sep 12, 2024
4dc4f5f
ci : update HIP SDK to 24.Q3 (ROCm 6.1) (#9329)
no1wudi Sep 12, 2024
2a82511
cmake : fix for builds without `GGML_CDEF_PUBLIC` (#9338)
Xarbirus Sep 12, 2024
d4c3c10
lora : raise error if lm_head is ignored (#9103)
ngxson Sep 12, 2024
e665744
llava : fix the script error in MobileVLM README (#9054)
fengerhu1 Sep 12, 2024
e6b7801
cann: Add host buffer type for Ascend NPU (#9406)
bachelor-dou Sep 12, 2024
7820364
server : Add option to return token pieces in /tokenize endpoint (#9108)
mathijshenquet Sep 12, 2024
bd35cb0
feat: remove a sampler from a chain (#9445)
giladgd Sep 13, 2024
0abc6a2
llama : llama_perf + option to disable timings during decode (#9355)
ggerganov Sep 13, 2024
feff4aa
server : add loading html page while model is loading (#9468)
ngxson Sep 13, 2024
befaf11
llama : make cell_id const in inp_s_mask block (#9470)
danbev Sep 14, 2024
1f4111e
cmake : use list(APPEND ...) instead of set() + dedup linker (#9463)
ggerganov Sep 14, 2024
dcdcee3
server: add data: [DONE] to /chat/completions stream response (#9459)
VoidIsVoid Sep 14, 2024
822b632
ggml : ggml_type_name return "NONE" for invalid values (#9458)
ykhrustalev Sep 14, 2024
7596487
cmake : try to fix sycl+intel build (#9487)
Xarbirus Sep 15, 2024
d6b37c8
readme : update tools list (#9475)
OLSecret Sep 15, 2024
3c7989f
py : add "LLaMAForCausalLM" conversion support (#9485)
csabakecskemeti Sep 15, 2024
6988da9
cmake : correct order of sycl flags (#9497)
Xarbirus Sep 15, 2024
e6deac3
gguf-split : add basic checks (#9499)
slaren Sep 15, 2024
6262d13
common : reimplement logging (#9418)
ggerganov Sep 15, 2024
90a2fff
flake.lock: Update (#9488)
ggerganov Sep 16, 2024
c4965a6
metal : handle zero-sized allocs (#9466)
ggerganov Sep 16, 2024
441b72b
main : option to disable context shift (#9484)
VJHack Sep 16, 2024
95ca851
llama : support MiniCPM3 (#9322)
CarryFun Sep 16, 2024
0aadac1
llama : support OLMoE (#9462)
2015aroras Sep 16, 2024
5c3d0f1
ggml : IQ4_NL sgemm + Q4_0 AVX optimization (#9422)
netrunnereve Sep 16, 2024
19514d6
cmake : do not hide GGML options + rename option (#9465)
ggerganov Sep 16, 2024
d54c21d
convert : identify missing model files (#9397)
compilade Sep 16, 2024
a6a3a5c
ggml : link MATH_LIBRARY not by its full path (#9339)
Xarbirus Sep 16, 2024
acb2c32
llama : rename n_embed to n_embd in rwkv6_time_mix (#9504)
danbev Sep 16, 2024
23e0d70
ggml : move common CPU backend impl to new header (#9509)
slaren Sep 16, 2024
37f3a38
llama : add llama_n_head() (#9512)
Xarbirus Sep 17, 2024
0d2ec43
llama : support IBM Granite architecture (#9412)
gabe-l-hart Sep 17, 2024
503147a
unicode : add <algorithm> (#9508)
ykhrustalev Sep 17, 2024
0226613
threadpool : skip polling for unused threads (#9461)
max-krasnyansky Sep 17, 2024
8344ef5
llama : fix n_vocab init for 'no_vocab' case (#9511)
Xarbirus Sep 17, 2024
8b836ae
arg : add env variable for parallel (#9513)
bertwagner Sep 17, 2024
7be099f
llama-bench: correct argument parsing error message (#9524)
Xarbirus Sep 17, 2024
faf67b3
[SYCL]set context default value to avoid memory issue, update guide (…
NeoZhangJianyu Sep 18, 2024
f799155
server : fix OpenSSL build (remove obsolete `LOG_INFO`) (#9529)
EZForever Sep 18, 2024
8a30835
server : match OAI structured output response (#9527)
VJHack Sep 18, 2024
6443ddd
llama : use reserve/emplace_back in sampler_sample (#9534)
danbev Sep 18, 2024
0d2f22e
scripts : verify py deps at the start of compare (#9520)
ggerganov Sep 18, 2024
64c6af3
ggml : fix n_threads_cur initialization with one thread (#9538)
slaren Sep 18, 2024
eca0fab
imatrix : disable prompt escape by default (#9543)
CISC Sep 19, 2024
6026da5
server : clean-up completed tasks from waiting list (#9531)
ggerganov Sep 19, 2024
722ec1e
perplexity : do not escape input data by default (#9548)
CISC Sep 20, 2024
d39e267
examples : flush log upon ctrl+c (#9559)
ggerganov Sep 20, 2024
5cb12f6
CUDA: fix sum.cu compilation for CUDA < 11.7 (#9562)
JohannesGaessler Sep 20, 2024
a6809c6
examples : add null threadpool args where needed (ggml/0)
ggerganov Sep 8, 2024
424c5d0
ggml/examples: add backend support for numerical optimization (ggml/949)
JohannesGaessler Sep 20, 2024
4301535
sync : ggml
ggerganov Sep 20, 2024
27609c4
ggml : fix trailing whitespace (#0)
ggerganov Sep 20, 2024
d13edb1
ggml : fix builds (#0)
ggerganov Sep 20, 2024
6335114
quantize : improve type name parsing (#9570)
slaren Sep 20, 2024
e948a7d
CI: Provide prebuilt windows binary for hip (#9467)
no1wudi Sep 21, 2024
41f4778
Update CUDA graph on scale change plus clear nodes/params (#9550)
agray3 Sep 21, 2024
d09770c
ggml-alloc : fix list of allocated tensors with GGML_ALLOCATOR_DEBUG …
slaren Sep 21, 2024
2a63caa
RWKV v6: RWKV_WKV op CUDA implementation (#9454)
MollySophia Sep 22, 2024
ecd5d6b
llama: remove redundant loop when constructing ubatch (#9574)
shankarg87 Sep 22, 2024
a5b57b0
CUDA: enable Gemma FA for HIP/Pascal (#9581)
JohannesGaessler Sep 22, 2024
912c331
Fix merge error in #9454 (#9589)
MollySophia Sep 22, 2024
c35e586
musa: enable building fat binaries, enable unified memory, and disabl…
yeahdongcn Sep 22, 2024
e62e978
Revert "[SYCL] fallback mmvq (#9088)" (#9579)
Sep 23, 2024
bf9c101
metal : use F32 prec for K*Q in vec FA (#9595)
ggerganov Sep 23, 2024
37f8c7b
perplexity : remove extra new lines after chunks (#9596)
ggerganov Sep 23, 2024
1e7b929
ggml : AVX512 gemm for Q4_0_8_8 (#9532)
Srihari-mcw Sep 23, 2024
f3979df
flake.lock: Update (#9586)
ggerganov Sep 23, 2024
1d48e98
readme : add programmable prompt engine language CLI (#9599)
snowyu Sep 23, 2024
f0c7b5e
threads: improve ggml_barrier scaling with large number of threads (#…
max-krasnyansky Sep 23, 2024
0b3bf96
server : add --no-context-shift option (#9607)
ngxson Sep 23, 2024
116efee
cuda: add q8_0->f32 cpy operation (#9571)
Nekotekina Sep 24, 2024
c087b6f
threads: fix msvc build without openmp (#9615)
max-krasnyansky Sep 24, 2024
b0f2736
sampling : avoid expensive softmax during greedy sampling (#9605)
ggerganov Sep 24, 2024
0aa1501
server : add newline after chat example (#9616)
StrangeBytesDev Sep 24, 2024
cea1486
log : add CONT level for continuing previous log entry (#9610)
ggerganov Sep 24, 2024
31ac583
llama : keep track of all EOG tokens in the vocab (#9609)
ggerganov Sep 24, 2024
c038931
examples : adapt to ggml.h changes (ggml/0)
ggerganov Sep 20, 2024
bb5f819
sync : ggml
ggerganov Sep 24, 2024
70392f1
ggml : add AVX512DQ requirement for AVX512 builds (#9622)
EZForever Sep 24, 2024
904837e
cann: fix crash when llama-bench is running on multiple cann devices …
bachelor-dou Sep 25, 2024
3d6bf69
llama : add IBM Granite MoE architecture (#9438)
gabe-l-hart Sep 25, 2024
afbbfaa
server : add more env vars, improve gen-docs (#9635)
ngxson Sep 25, 2024
1e43630
ggml : remove assert for AArch64 GEMV and GEMM Q4 kernels (#9217)
chaxu01 Sep 25, 2024
ea9c32b
ci : fix docker build number and tag name (#9638)
ngxson Sep 25, 2024
7691654
mtgpu: enable VMM (#9597)
yeahdongcn Sep 26, 2024
95bc82f
[SYCL] add missed dll file in package (#9577)
NeoZhangJianyu Sep 26, 2024
44f59b4
cmake : add option for common library (#9661)
iboB Sep 27, 2024
b5de3b7
readme : update hot topics
ggerganov Sep 27, 2024
89f9944
Enable use to the rebar feature to upload buffers to the device. (#9251)
mtavenrath Sep 28, 2024
6a0f779
ggml : add run-time detection of neon, i8mm and sve (#9331)
eddnjjn Sep 28, 2024
43bcdd9
readme : add tool (#9655)
akx Sep 28, 2024
9a91311
llama : add support for Chameleon (#8543)
nopperl Sep 28, 2024
6102037
vocab : refactor tokenizer to reduce init overhead (#9449)
kylo5aby Sep 28, 2024
7398427
llama : add comment about thread-safety [no ci] (#9449)
ggerganov Sep 28, 2024
1b2f992
test-backend-ops : use flops for some performance tests (#9657)
slaren Sep 28, 2024
f4d2b88
llama : add reranking support (#9510)
ggerganov Sep 28, 2024
589b48d
contrib : add Resources section (#9675)
ggerganov Sep 29, 2024
f99d3f8
py : add model class for Chameleon conversion (#9683)
nopperl Sep 29, 2024
faac0ba
common : ensure llama_batch size does not exceed max size (#9668)
matiaslin Sep 29, 2024
6084bfb
ggml : fix GGML_MAX_N_THREADS + improve formatting (ggml/969)
ggerganov Sep 24, 2024
544f409
vulkan : argsort barriers must be under uniform control flow (ggml/951)
smeso Sep 26, 2024
0de8b20
vulkan : fix build for GGML_VULKAN_RUN_TESTS, add TFLOPS to log (ggml…
jeffbolznv Sep 27, 2024
641002f
vulkan : multithread pipeline creation (ggml/963)
jeffbolznv Sep 29, 2024
aaa4099
CUDA: remove bad assert (ggml/972)
JohannesGaessler Sep 29, 2024
d0b1d66
sync : ggml
ggerganov Sep 29, 2024
c919d5d
ggml : define missing HWCAP flags (#9684)
ggerganov Sep 29, 2024
8277a81
console : utf-8 fix for windows stdin (#9690)
hasaranga Sep 30, 2024
ace4f4b
flake.lock: Update (#9680)
ggerganov Sep 30, 2024
08a43d0
py : update transfomers version (#9694)
Vaibhavs10 Sep 30, 2024
511636d
ci : reduce severity of unused Pyright ignore comments (#9697)
compilade Sep 30, 2024
6f1d9d7
Fix Docker ROCM builds, use AMDGPU_TARGETS instead of GPU_TARGETS (#9…
serhii-nakon Sep 30, 2024
1927378
convert : refactor rope_freqs generation (#9396)
compilade Oct 1, 2024
a90484c
llama : print correct model type for Llama 3.2 1B and 3B
ggerganov Oct 1, 2024
cad341d
metal : reduce command encoding overhead (#9698)
ggerganov Oct 1, 2024
7254cdf
ggml: fix gradient allocation logic (ggml/966)
JohannesGaessler Sep 29, 2024
6c53224
ggml : fix ggml_cast (ggml/973)
iboB Sep 30, 2024
cb00020
vulkan : mul_mat: fix UB with small warps (ggml/952)
smeso Sep 30, 2024
e98c1c1
test: fix OPT_STEP_ADAMW for test-backend-ops (ggml/974)
JohannesGaessler Sep 30, 2024
f1b8c42
sync : ggml
ggerganov Oct 1, 2024
3f1ae2e
Update README.md (#9591)
32bitmicro Oct 1, 2024
148844f
examples : remove benchmark (#9704)
ggerganov Oct 2, 2024
76b37d1
gguf-split : improve --split and --merge logic (#9619)
kylo5aby Oct 2, 2024
00b7317
vulkan : do not use tensor->extra (#9407)
rgerganov Oct 2, 2024
f536f4c
[SYCL] Initial cmake support of SYCL for AMD GPUs (#9658)
Alcpz Oct 2, 2024
a39ab21
llama : reduce compile time and binary size (#9712)
ngxson Oct 2, 2024
c83ad6d
ggml-backend : add device and backend reg interfaces (#9707)
slaren Oct 2, 2024
5639971
Fixed dequant precision issues in Q4_1 and Q5_1 (#9711)
OuadiElfarouki Oct 3, 2024
841713e
rpc : enable vulkan (#9714)
rgerganov Oct 3, 2024
e3c355b
convert : handle tokenizer merges format from transformers 4.45 (#9696)
compilade Oct 3, 2024
d6fe7ab
ggml: unify backend logging mechanism (#9709)
bandoti Oct 3, 2024
a7ad553
ggml-backend : add device description to CPU backend (#9720)
slaren Oct 3, 2024
5d5ab1e
metal : fix compute pass descriptor autorelease crash (#9718)
jmousseau Oct 3, 2024
eee39bd
ggml: refactor cross entropy loss CPU impl. (ggml/976)
JohannesGaessler Oct 2, 2024
fabdc3b
ggml/ex: calculate accuracy in graph, adapt MNIST (ggml/980)
JohannesGaessler Oct 3, 2024
1bb8a64
sync : ggml
ggerganov Oct 3, 2024
d5ed2b9
metal : remove abort (skip) (ggml/0)
ggerganov Oct 3, 2024
133c7b4
Fixed RNG seed docs (#9723)
d-kleine Oct 4, 2024
f3fdcfa
ci : fine-grant permission (#9710)
ngxson Oct 4, 2024
ff56576
ggml : fixes after sync (ggml/983)
slaren Oct 4, 2024
55951c0
ggml : fix typo in example usage ggml_gallocr_new (ggml/984)
danbev Oct 4, 2024
1788077
sync : ggml
ggerganov Oct 4, 2024
71967c2
Add Llama Assistant (#9744)
vietanhdev Oct 4, 2024
905f548
metal : zero-init buffer contexts (whisper/0)
ggerganov Oct 5, 2024
58b1669
sync : ggml
ggerganov Oct 5, 2024
8c475b9
rerank : use [SEP] token instead of [BOS] (#9737)
ggerganov Oct 5, 2024
b0915d5
vulkan : retry allocation with fallback flags (whisper/2451)
SRHMorris Oct 6, 2024
b6d6c52
sync : llama.cpp
ggerganov Oct 6, 2024
f4b2dcd
readme : fix typo [no ci]
ggerganov Oct 6, 2024
d5cb868
contrib : simplify + minor edits [no ci]
ggerganov Oct 6, 2024
96b6912
metal : single allocation of encode_async block (#9747)
ptsochantaris Oct 7, 2024
d5ac8cf
ggml : add metal backend registry / device (#9713)
ggerganov Oct 7, 2024
6279dac
flake.lock: Update (#9753)
ggerganov Oct 7, 2024
f1af42f
Update building for Android (#9672)
amqdn Oct 7, 2024
6374743
ggml : add backend registry / device interfaces to BLAS backend (#9752)
slaren Oct 7, 2024
fa42aa6
scripts : fix spelling typo in messages and comments (#9782)
standby24x7 Oct 8, 2024
458367a
server : better security control for public deployments (#9776)
ngxson Oct 8, 2024
dca1d4b
ggml : fix BLAS with unsupported types (#9775)
slaren Oct 8, 2024
3dc48fe
examples : remove llama.vim
ggerganov Oct 9, 2024
e702206
perplexity : fix integer overflow (#9783)
ggerganov Oct 9, 2024
c81f3bb
cmake : do not build common library by default when standalone (#9804)
slaren Oct 9, 2024
c7499c5
examples : do not use common library in simple example (#9803)
slaren Oct 10, 2024
cf8e0a3
musa: add docker image support (#9685)
yeahdongcn Oct 10, 2024
0e9f760
rpc : add backend registry / device interfaces (#9812)
slaren Oct 10, 2024
7eee341
common : use common_ prefix for common library functions (#9805)
slaren Oct 10, 2024
9677640
ggml : move more prints to the ggml log system (#9839)
slaren Oct 11, 2024
943d20b
musa : update doc (#9856)
yeahdongcn Oct 12, 2024
11ac980
llama : improve infill support and special token detection (#9798)
ggerganov Oct 12, 2024
95c76e8
server : remove legacy system_prompt feature (#9857)
ggerganov Oct 12, 2024
1bde94d
server : remove self-extend features (#9860)
ggerganov Oct 12, 2024
edc2656
server : add option to time limit the generation phase (#9865)
ggerganov Oct 12, 2024
92be9f1
flake.lock: Update (#9870)
ggerganov Oct 13, 2024
c7181bd
server : reuse cached context chunks (#9866)
ggerganov Oct 13, 2024
d4c19c0
server : accept extra_context for the infill endpoint (#9874)
ggerganov Oct 13, 2024
13dca2a
Vectorize load instructions in dmmv f16 CUDA kernel (#9816)
agray3 Oct 14, 2024
a89f75e
server : handle "logprobs" field with false value (#9871)
VoidIsVoid Oct 14, 2024
4c42f93
readme : update bindings list (#9889)
srgtuszy Oct 15, 2024
dcdd535
server : update preact (#9895)
ggerganov Oct 15, 2024
fbc98b7
sampling : add XTC sampler (#9742)
MaggotHATE Oct 15, 2024
223c25a
server : improve infill context reuse (#9894)
ggerganov Oct 15, 2024
755a9b2
llama : add infill sampler (#9896)
ggerganov Oct 15, 2024
becfd38
[CANN] Fix cann compilation error (#9891)
leo-pony Oct 16, 2024
cd60b88
ggml-alloc : remove buffer_id from leaf_alloc (ggml/987)
danbev Oct 9, 2024
0e41b30
sync : ggml
ggerganov Oct 16, 2024
1f66b69
server : fix the disappearance of the end of the text (#9867)
z80maniac Oct 16, 2024
10433e8
llama : add tensor name for "result_norm" (#9907)
MollySophia Oct 16, 2024
66c2c93
grammar : fix JSON Schema for string regex with top-level alt. (#9903)
jemc Oct 16, 2024
dbf18e4
llava : fix typo in error message [no ci] (#9884)
danbev Oct 16, 2024
9e04102
llama : suppress conversion from 'size_t' to 'int' (#9046)
danbev Oct 16, 2024
73afe68
fix: use `vm_allocate` to allocate CPU backend buffer on macOS (#9875)
giladgd Oct 16, 2024
2194200
fix: allocating CPU buffer with size `0` (#9917)
giladgd Oct 16, 2024
f010b77
vulkan : add backend registry / device interfaces (#9721)
slaren Oct 17, 2024
3752217
readme : update bindings list (#9918)
ShenghaiWang Oct 17, 2024
99bd4ac
llama : infill sampling handle very long tokens (#9924)
ggerganov Oct 17, 2024
9f45fc1
llama : change warning to debug log
ggerganov Oct 17, 2024
17bb928
readme : remove --memory-f32 references (#9925)
ggerganov Oct 17, 2024
6f55bcc
llama : rename batch_all to batch (#8881)
danbev Oct 17, 2024
8901755
server : add n_indent parameter for line indentation requirement (#9929)
ggerganov Oct 18, 2024
60ce97c
add amx kernel for gemm (#8998)
mingfeima Oct 18, 2024
87421a2
[SYCL] Add SYCL Backend registry, device and Event Interfaces (#9705)
OuadiElfarouki Oct 18, 2024
afd9909
rpc : backend refactoring (#9912)
rgerganov Oct 18, 2024
cda0e4b
llama : remove all_pos_0, all_pos_1, all_seq_id from llama_batch (#9745)
ngxson Oct 18, 2024
7cab208
readme : update infra list (#9942)
icppWorld Oct 20, 2024
45f0976
readme : update bindings list (#9951)
lcarrere Oct 20, 2024
1db8c84
fix mul_mat_vec_q and *_vec_q error (#9939)
NeoZhangJianyu Oct 21, 2024
bc21975
speculative : fix handling of some input params (#9963)
ggerganov Oct 21, 2024
55e4778
llama : default sampling changes + greedy update (#9897)
ggerganov Oct 21, 2024
d5ebd79
rpc : pack only RPC structs (#9959)
rgerganov Oct 21, 2024
f594bc8
ggml : add asserts for type conversion in fattn kernels (#9971)
ggerganov Oct 21, 2024
dbd5f2f
llama.vim : plugin for Neovim (#9787)
ggerganov Oct 21, 2024
94008cc
arg : fix attention non-causal arg value hint (#9985)
danbev Oct 21, 2024
994cfb1
readme : update UI list (#9972)
a-ghorbani Oct 21, 2024
e01c67a
llama.vim : move info to the right of screen [no ci] (#9787)
ggerganov Oct 21, 2024
e94a138
llama.vim : fix info text display [no ci] (#9787)
ggerganov Oct 21, 2024
674804a
arg : fix typo in embeddings argument help [no ci] (#9994)
danbev Oct 22, 2024
6b84473
[CANN] Adapt to dynamically loadable backends mechanism (#9970)
leo-pony Oct 22, 2024
4ff7fe1
llama : add chat template for RWKV-World + fix EOT (#9968)
MollySophia Oct 22, 2024
c421ac0
lora : warn user if new token is added in the adapter (#9948)
ngxson Oct 22, 2024
11d4705
Rwkv chat template fix (#10001)
MollySophia Oct 22, 2024
19d900a
llama : rename batch to ubatch (#9950)
danbev Oct 22, 2024
c8c07d6
llama : fix empty batch causing llama_batch_allocr to crash (#9966)
ngxson Oct 22, 2024
873279b
flake.lock: Update
github-actions[bot] Oct 20, 2024
4c9388f
metal : add POOL2D and fix IM2COL (#9943)
junhee-yoo Oct 23, 2024
ac113a0
llama.vim : add classic vim support (#9995)
m18coppola Oct 23, 2024
c19af0a
ggml : remove redundant set of contexts used field (ggml/978)
danbev Oct 16, 2024
80273a3
CUDA: fix 1D im2col, add tests (ggml/993)
JohannesGaessler Oct 18, 2024
2d3aba9
llama.vim : bump generation time limit to 3s [no ci]
ggerganov Oct 23, 2024
190a37d
sync : ggml
ggerganov Oct 23, 2024
0a1c750
server : samplers accept the prompt correctly (#10019)
wwoodsTM Oct 23, 2024
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
The table of contents is too big for display.
Diff view
Diff view
  •  
  •  
  •  
27 changes: 12 additions & 15 deletions .devops/full-cuda.Dockerfile
Original file line number Diff line number Diff line change
@@ -1,18 +1,16 @@
ARG UBUNTU_VERSION=22.04

# This needs to generally match the container host's environment.
ARG CUDA_VERSION=11.7.1

ARG CUDA_VERSION=12.6.0
# Target the CUDA build image
ARG BASE_CUDA_DEV_CONTAINER=nvidia/cuda:${CUDA_VERSION}-devel-ubuntu${UBUNTU_VERSION}

FROM ${BASE_CUDA_DEV_CONTAINER} as build
FROM ${BASE_CUDA_DEV_CONTAINER} AS build

# Unless otherwise specified, we make a fat build.
ARG CUDA_DOCKER_ARCH=all
# CUDA architecture to build for (defaults to all supported archs)
ARG CUDA_DOCKER_ARCH=default

RUN apt-get update && \
apt-get install -y build-essential python3 python3-pip git libcurl4-openssl-dev libgomp1
apt-get install -y build-essential cmake python3 python3-pip git libcurl4-openssl-dev libgomp1

COPY requirements.txt requirements.txt
COPY requirements requirements
Expand All @@ -24,13 +22,12 @@ WORKDIR /app

COPY . .

# Set nvcc architecture
ENV CUDA_DOCKER_ARCH=${CUDA_DOCKER_ARCH}
# Enable CUDA
ENV LLAMA_CUDA=1
# Enable cURL
ENV LLAMA_CURL=1

RUN make -j$(nproc)
# Use the default CUDA archs if not specified
RUN if [ "${CUDA_DOCKER_ARCH}" != "default" ]; then \
export CMAKE_ARGS="-DCMAKE_CUDA_ARCHITECTURES=${CUDA_DOCKER_ARCH}"; \
fi && \
cmake -B build -DGGML_CUDA=ON -DLLAMA_CURL=ON ${CMAKE_ARGS} -DCMAKE_EXE_LINKER_FLAGS=-Wl,--allow-shlib-undefined . && \
cmake --build build --config Release -j$(nproc) && \
cp build/bin/* .

ENTRYPOINT ["/app/.devops/tools.sh"]
26 changes: 26 additions & 0 deletions .devops/full-musa.Dockerfile
Original file line number Diff line number Diff line change
@@ -0,0 +1,26 @@
ARG UBUNTU_VERSION=22.04
# This needs to generally match the container host's environment.
ARG MUSA_VERSION=rc3.1.0
# Target the MUSA build image
ARG BASE_MUSA_DEV_CONTAINER=mthreads/musa:${MUSA_VERSION}-devel-ubuntu${UBUNTU_VERSION}

FROM ${BASE_MUSA_DEV_CONTAINER} AS build

RUN apt-get update && \
apt-get install -y build-essential cmake python3 python3-pip git libcurl4-openssl-dev libgomp1

COPY requirements.txt requirements.txt
COPY requirements requirements

RUN pip install --upgrade pip setuptools wheel \
&& pip install -r requirements.txt

WORKDIR /app

COPY . .

RUN cmake -B build -DGGML_MUSA=ON -DLLAMA_CURL=ON ${CMAKE_ARGS} -DCMAKE_EXE_LINKER_FLAGS=-Wl,--allow-shlib-undefined . && \
cmake --build build --config Release -j$(nproc) && \
cp build/bin/* .

ENTRYPOINT ["/app/.devops/tools.sh"]
10 changes: 5 additions & 5 deletions .devops/full-rocm.Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -6,12 +6,12 @@ ARG ROCM_VERSION=5.6
# Target the CUDA build image
ARG BASE_ROCM_DEV_CONTAINER=rocm/dev-ubuntu-${UBUNTU_VERSION}:${ROCM_VERSION}-complete

FROM ${BASE_ROCM_DEV_CONTAINER} as build
FROM ${BASE_ROCM_DEV_CONTAINER} AS build

# Unless otherwise specified, we make a fat build.
# List from https://github.com/ggerganov/llama.cpp/pull/1087#issuecomment-1682807878
# This is mostly tied to rocBLAS supported archs.
ARG ROCM_DOCKER_ARCH=\
ARG ROCM_DOCKER_ARCH="\
gfx803 \
gfx900 \
gfx906 \
Expand All @@ -21,7 +21,7 @@ ARG ROCM_DOCKER_ARCH=\
gfx1030 \
gfx1100 \
gfx1101 \
gfx1102
gfx1102"

COPY requirements.txt requirements.txt
COPY requirements requirements
Expand All @@ -34,9 +34,9 @@ WORKDIR /app
COPY . .

# Set nvcc architecture
ENV GPU_TARGETS=${ROCM_DOCKER_ARCH}
ENV AMDGPU_TARGETS=${ROCM_DOCKER_ARCH}
# Enable ROCm
ENV LLAMA_HIPBLAS=1
ENV GGML_HIPBLAS=1
ENV CC=/opt/rocm/llvm/bin/clang
ENV CXX=/opt/rocm/llvm/bin/clang++

Expand Down
2 changes: 1 addition & 1 deletion .devops/full.Dockerfile
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
ARG UBUNTU_VERSION=22.04

FROM ubuntu:$UBUNTU_VERSION as build
FROM ubuntu:$UBUNTU_VERSION AS build

RUN apt-get update && \
apt-get install -y build-essential python3 python3-pip git libcurl4-openssl-dev libgomp1
Expand Down
44 changes: 44 additions & 0 deletions .devops/llama-cli-cann.Dockerfile
Original file line number Diff line number Diff line change
@@ -0,0 +1,44 @@
ARG ASCEND_VERSION=8.0.rc2.alpha003-910b-openeuler22.03-py3.8

FROM cosdt/cann:$ASCEND_VERSION AS build

WORKDIR /app

COPY . .

RUN yum install -y gcc g++ cmake make
ENV ASCEND_TOOLKIT_HOME=/usr/local/Ascend/ascend-toolkit/latest
ENV LIBRARY_PATH=${ASCEND_TOOLKIT_HOME}/lib64:$LIBRARY_PATH
ENV LD_LIBRARY_PATH=${ASCEND_TOOLKIT_HOME}/lib64:${ASCEND_TOOLKIT_HOME}/lib64/plugin/opskernel:${ASCEND_TOOLKIT_HOME}/lib64/plugin/nnengine:${ASCEND_TOOLKIT_HOME}/opp/built-in/op_impl/ai_core/tbe/op_tiling:${LD_LIBRARY_PATH}
ENV PYTHONPATH=${ASCEND_TOOLKIT_HOME}/python/site-packages:${ASCEND_TOOLKIT_HOME}/opp/built-in/op_impl/ai_core/tbe:${PYTHONPATH}
ENV PATH=${ASCEND_TOOLKIT_HOME}/bin:${ASCEND_TOOLKIT_HOME}/compiler/ccec_compiler/bin:${PATH}
ENV ASCEND_AICPU_PATH=${ASCEND_TOOLKIT_HOME}
ENV ASCEND_OPP_PATH=${ASCEND_TOOLKIT_HOME}/opp
ENV TOOLCHAIN_HOME=${ASCEND_TOOLKIT_HOME}/toolkit
ENV ASCEND_HOME_PATH=${ASCEND_TOOLKIT_HOME}

# find libascend_hal.so, because the drive hasn`t been mounted.
ENV LD_LIBRARY_PATH=${ASCEND_TOOLKIT_HOME}/runtime/lib64/stub:$LD_LIBRARY_PATH

RUN echo "Building with static libs" && \
source /usr/local/Ascend/ascend-toolkit/set_env.sh --force && \
cmake -B build -DGGML_CANN=ON -DBUILD_SHARED_LIBS=OFF && \
cmake --build build --config Release --target llama-cli

# TODO: use image with NNRT
FROM cosdt/cann:$ASCEND_VERSION AS runtime
COPY --from=build /app/build/bin/llama-cli /llama-cli

ENV LC_ALL=C.utf8

ENV ASCEND_TOOLKIT_HOME=/usr/local/Ascend/ascend-toolkit/latest
ENV LIBRARY_PATH=${ASCEND_TOOLKIT_HOME}/lib64:$LIBRARY_PATH
ENV LD_LIBRARY_PATH=${ASCEND_TOOLKIT_HOME}/lib64:${ASCEND_TOOLKIT_HOME}/lib64/plugin/opskernel:${ASCEND_TOOLKIT_HOME}/lib64/plugin/nnengine:${ASCEND_TOOLKIT_HOME}/opp/built-in/op_impl/ai_core/tbe/op_tiling:${LD_LIBRARY_PATH}
ENV PYTHONPATH=${ASCEND_TOOLKIT_HOME}/python/site-packages:${ASCEND_TOOLKIT_HOME}/opp/built-in/op_impl/ai_core/tbe:${PYTHONPATH}
ENV PATH=${ASCEND_TOOLKIT_HOME}/bin:${ASCEND_TOOLKIT_HOME}/compiler/ccec_compiler/bin:${PATH}
ENV ASCEND_AICPU_PATH=${ASCEND_TOOLKIT_HOME}
ENV ASCEND_OPP_PATH=${ASCEND_TOOLKIT_HOME}/opp
ENV TOOLCHAIN_HOME=${ASCEND_TOOLKIT_HOME}/toolkit
ENV ASCEND_HOME_PATH=${ASCEND_TOOLKIT_HOME}

ENTRYPOINT ["/llama-cli" ]
28 changes: 15 additions & 13 deletions .devops/llama-cli-cuda.Dockerfile
Original file line number Diff line number Diff line change
@@ -1,35 +1,37 @@
ARG UBUNTU_VERSION=22.04
# This needs to generally match the container host's environment.
ARG CUDA_VERSION=11.7.1
ARG CUDA_VERSION=12.6.0
# Target the CUDA build image
ARG BASE_CUDA_DEV_CONTAINER=nvidia/cuda:${CUDA_VERSION}-devel-ubuntu${UBUNTU_VERSION}
# Target the CUDA runtime image
ARG BASE_CUDA_RUN_CONTAINER=nvidia/cuda:${CUDA_VERSION}-runtime-ubuntu${UBUNTU_VERSION}

FROM ${BASE_CUDA_DEV_CONTAINER} as build
FROM ${BASE_CUDA_DEV_CONTAINER} AS build

# Unless otherwise specified, we make a fat build.
ARG CUDA_DOCKER_ARCH=all
# CUDA architecture to build for (defaults to all supported archs)
ARG CUDA_DOCKER_ARCH=default

RUN apt-get update && \
apt-get install -y build-essential git
apt-get install -y build-essential git cmake

WORKDIR /app

COPY . .

# Set nvcc architecture
ENV CUDA_DOCKER_ARCH=${CUDA_DOCKER_ARCH}
# Enable CUDA
ENV LLAMA_CUDA=1
# Use the default CUDA archs if not specified
RUN if [ "${CUDA_DOCKER_ARCH}" != "default" ]; then \
export CMAKE_ARGS="-DCMAKE_CUDA_ARCHITECTURES=${CUDA_DOCKER_ARCH}"; \
fi && \
cmake -B build -DGGML_CUDA=ON ${CMAKE_ARGS} -DCMAKE_EXE_LINKER_FLAGS=-Wl,--allow-shlib-undefined . && \
cmake --build build --config Release --target llama-cli -j$(nproc)

RUN make -j$(nproc) llama-cli

FROM ${BASE_CUDA_RUN_CONTAINER} as runtime
FROM ${BASE_CUDA_RUN_CONTAINER} AS runtime

RUN apt-get update && \
apt-get install -y libgomp1

COPY --from=build /app/llama-cli /llama-cli
COPY --from=build /app/build/ggml/src/libggml.so /libggml.so
COPY --from=build /app/build/src/libllama.so /libllama.so
COPY --from=build /app/build/bin/llama-cli /llama-cli

ENTRYPOINT [ "/llama-cli" ]
16 changes: 9 additions & 7 deletions .devops/llama-cli-intel.Dockerfile
Original file line number Diff line number Diff line change
@@ -1,23 +1,25 @@
ARG ONEAPI_VERSION=2024.1.1-devel-ubuntu22.04

FROM intel/oneapi-basekit:$ONEAPI_VERSION as build
FROM intel/oneapi-basekit:$ONEAPI_VERSION AS build

ARG LLAMA_SYCL_F16=OFF
ARG GGML_SYCL_F16=OFF
RUN apt-get update && \
apt-get install -y git

WORKDIR /app

COPY . .

RUN if [ "${LLAMA_SYCL_F16}" = "ON" ]; then \
echo "LLAMA_SYCL_F16 is set" && \
export OPT_SYCL_F16="-DLLAMA_SYCL_F16=ON"; \
RUN if [ "${GGML_SYCL_F16}" = "ON" ]; then \
echo "GGML_SYCL_F16 is set" && \
export OPT_SYCL_F16="-DGGML_SYCL_F16=ON"; \
fi && \
cmake -B build -DLLAMA_SYCL=ON -DCMAKE_C_COMPILER=icx -DCMAKE_CXX_COMPILER=icpx ${OPT_SYCL_F16} && \
echo "Building with static libs" && \
cmake -B build -DGGML_SYCL=ON -DCMAKE_C_COMPILER=icx -DCMAKE_CXX_COMPILER=icpx \
${OPT_SYCL_F16} -DBUILD_SHARED_LIBS=OFF && \
cmake --build build --config Release --target llama-cli

FROM intel/oneapi-basekit:$ONEAPI_VERSION as runtime
FROM intel/oneapi-basekit:$ONEAPI_VERSION AS runtime

COPY --from=build /app/build/bin/llama-cli /llama-cli

Expand Down
30 changes: 30 additions & 0 deletions .devops/llama-cli-musa.Dockerfile
Original file line number Diff line number Diff line change
@@ -0,0 +1,30 @@
ARG UBUNTU_VERSION=22.04
# This needs to generally match the container host's environment.
ARG MUSA_VERSION=rc3.1.0
# Target the MUSA build image
ARG BASE_MUSA_DEV_CONTAINER=mthreads/musa:${MUSA_VERSION}-devel-ubuntu${UBUNTU_VERSION}
# Target the MUSA runtime image
ARG BASE_MUSA_RUN_CONTAINER=mthreads/musa:${MUSA_VERSION}-runtime-ubuntu${UBUNTU_VERSION}

FROM ${BASE_MUSA_DEV_CONTAINER} AS build

RUN apt-get update && \
apt-get install -y build-essential git cmake

WORKDIR /app

COPY . .

RUN cmake -B build -DGGML_MUSA=ON ${CMAKE_ARGS} -DCMAKE_EXE_LINKER_FLAGS=-Wl,--allow-shlib-undefined . && \
cmake --build build --config Release --target llama-cli -j$(nproc)

FROM ${BASE_MUSA_RUN_CONTAINER} AS runtime

RUN apt-get update && \
apt-get install -y libgomp1

COPY --from=build /app/build/ggml/src/libggml.so /libggml.so
COPY --from=build /app/build/src/libllama.so /libllama.so
COPY --from=build /app/build/bin/llama-cli /llama-cli

ENTRYPOINT [ "/llama-cli" ]
10 changes: 5 additions & 5 deletions .devops/llama-cli-rocm.Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -6,12 +6,12 @@ ARG ROCM_VERSION=5.6
# Target the CUDA build image
ARG BASE_ROCM_DEV_CONTAINER=rocm/dev-ubuntu-${UBUNTU_VERSION}:${ROCM_VERSION}-complete

FROM ${BASE_ROCM_DEV_CONTAINER} as build
FROM ${BASE_ROCM_DEV_CONTAINER} AS build

# Unless otherwise specified, we make a fat build.
# List from https://github.com/ggerganov/llama.cpp/pull/1087#issuecomment-1682807878
# This is mostly tied to rocBLAS supported archs.
ARG ROCM_DOCKER_ARCH=\
ARG ROCM_DOCKER_ARCH="\
gfx803 \
gfx900 \
gfx906 \
Expand All @@ -21,7 +21,7 @@ ARG ROCM_DOCKER_ARCH=\
gfx1030 \
gfx1100 \
gfx1101 \
gfx1102
gfx1102"

COPY requirements.txt requirements.txt
COPY requirements requirements
Expand All @@ -34,9 +34,9 @@ WORKDIR /app
COPY . .

# Set nvcc architecture
ENV GPU_TARGETS=${ROCM_DOCKER_ARCH}
ENV AMDGPU_TARGETS=${ROCM_DOCKER_ARCH}
# Enable ROCm
ENV LLAMA_HIPBLAS=1
ENV GGML_HIPBLAS=1
ENV CC=/opt/rocm/llvm/bin/clang
ENV CXX=/opt/rocm/llvm/bin/clang++

Expand Down
4 changes: 2 additions & 2 deletions .devops/llama-cli-vulkan.Dockerfile
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
ARG UBUNTU_VERSION=jammy

FROM ubuntu:$UBUNTU_VERSION as build
FROM ubuntu:$UBUNTU_VERSION AS build

# Install build tools
RUN apt update && apt install -y git build-essential cmake wget libgomp1
Expand All @@ -14,7 +14,7 @@ RUN wget -qO - https://packages.lunarg.com/lunarg-signing-key-pub.asc | apt-key
# Build it
WORKDIR /app
COPY . .
RUN cmake -B build -DLLAMA_VULKAN=1 && \
RUN cmake -B build -DGGML_VULKAN=1 && \
cmake --build build --config Release --target llama-cli

# Clean up
Expand Down
4 changes: 2 additions & 2 deletions .devops/llama-cli.Dockerfile
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
ARG UBUNTU_VERSION=22.04

FROM ubuntu:$UBUNTU_VERSION as build
FROM ubuntu:$UBUNTU_VERSION AS build

RUN apt-get update && \
apt-get install -y build-essential git
Expand All @@ -11,7 +11,7 @@ COPY . .

RUN make -j$(nproc) llama-cli

FROM ubuntu:$UBUNTU_VERSION as runtime
FROM ubuntu:$UBUNTU_VERSION AS runtime

RUN apt-get update && \
apt-get install -y libgomp1
Expand Down
Loading
Loading