Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
245 commits
Select commit Hold shift + click to select a range
5e99646
Vulkan: Fix device info output format specifiers (#10366)
0cc4m Nov 18, 2024
9e4af2d
flake.lock: Update (#10346)
ggerganov Nov 18, 2024
f3d8b63
vulkan: remove use of null initializer (#10372)
jeffbolznv Nov 18, 2024
154a391
Skip searching root path for cross-compile builds (#10383)
bandoti Nov 18, 2024
caea396
cuda : only use native when supported by cmake (#10389)
slaren Nov 18, 2024
74b246b
sycl: Revert MUL_MAT_OP support changes (#10385)
Alcpz Nov 19, 2024
e975c44
vulkan: Optimize soft_max (#10301)
jeffbolznv Nov 19, 2024
9a59aa2
sycl : Add option to set the SYCL architecture for all targets (#10266)
Rbiessy Nov 19, 2024
07a1be4
llama : add OLMo November 2024 support (#10394)
2015aroras Nov 19, 2024
a9069ec
llama : add check for KV cache shifts (#10401)
ggerganov Nov 19, 2024
2641f52
cuda : fix CUDA_FLAGS not being applied (#10403)
slaren Nov 19, 2024
3e2334e
Add required ggml-base and backend libs to cmake pkg (#10407)
bandoti Nov 19, 2024
1c592f6
cmake: force MSVC compiler charset to utf-8 (#9989)
MakeDecisionWorth Nov 19, 2024
0354ccb
metal : add `GGML_UNARY_OP_ELU` kernel (ggml/1018)
PABannier Nov 18, 2024
0cc8a63
metal : fox offset integer overflows in im2col (ggml/1015)
pminev Nov 18, 2024
72819e7
sync : ggml
ggerganov Nov 19, 2024
514b193
add cmake rvv support (#10411)
lhpqaq Nov 19, 2024
142d3c2
Fix missing file renames in Makefile due to changes in commit ae8de6d…
avdg Nov 19, 2024
68b25ed
update rel to 4040 (#10395)
NeoZhangJianyu Nov 20, 2024
a77f705
vulkan: further optimize mul_mat_vec using larger loads (#10387)
jeffbolznv Nov 20, 2024
c396183
vulkan: copy iq4_nl LUT into shared memory (#10409)
jeffbolznv Nov 20, 2024
b08b75c
llama : add .clang-format file (#10415)
slaren Nov 20, 2024
f107657
cmake: add link dependencies to cmake find pkg (#10433)
bandoti Nov 20, 2024
bd31703
vulkan: predicate max operation in soft_max shaders/soft_max (#10437)
jeffbolznv Nov 20, 2024
6aefea2
ggml-opt: fix data corruption (ggml/1022)
JohannesGaessler Nov 20, 2024
6f97400
ggml/sched : do not skip views in pre-assignments
slaren Nov 20, 2024
3882f3f
sync : ggml
ggerganov Nov 21, 2024
6fb5f1a
llama : handle KV shift for recurrent models (#10402)
ggerganov Nov 21, 2024
708842d
cuda : optimize argmax (#10441)
slaren Nov 21, 2024
e763d5c
CANN: Support Ascend310P to accelerate F32 and F16 Model (#10216)
leo-pony Nov 22, 2024
b47ede8
GitHub: ask for more info in issue templates (#10426)
JohannesGaessler Nov 22, 2024
4ebae97
ci: Update oneAPI runtime dll packaging (#10428)
MakeDecisionWorth Nov 22, 2024
50a43a6
ggml : do not use ARM features not included in the build (#10457)
slaren Nov 23, 2024
6ffef43
fix gguf-py: Conversion error when multiple licenses are configured …
mmngays Nov 24, 2024
68ebe18
convert : XLMRoberta Type Vocab Size (#10458)
gabe-l-hart Nov 24, 2024
823b211
llama : fix op mul check with command-r-plus (#10476)
slaren Nov 24, 2024
02dc28e
flake.lock: Update (#10470)
ggerganov Nov 24, 2024
611f679
speculative : refactor and add a simpler example (#10362)
ggerganov Nov 25, 2024
c9dd5ab
[SYCL] Fix building Win package for oneAPI 2025.0 update (#10483)
NeoZhangJianyu Nov 25, 2024
f966337
metal : minor code formatting
ggerganov Nov 25, 2024
f8abca2
tests : fix compile warning
ggerganov Nov 25, 2024
fa4365c
ggml : add support for dynamic loading of backends (#10469)
slaren Nov 25, 2024
1cb813b
server : add speculative decoding support (#10455)
ggerganov Nov 25, 2024
9f9ca37
Add download chat feature to server chat (#10481)
brucepro Nov 25, 2024
e2b3e41
Github: update issue templates [no ci] (#10489)
JohannesGaessler Nov 25, 2024
20c1cf2
llama : accept a list of devices to use to offload a model (#10497)
slaren Nov 25, 2024
0f28837
Rename Olmo1124 to Olmo2 (#10500)
2015aroras Nov 25, 2024
c869ec3
metal : enable mat-vec kernels for bs <= 4 (#10491)
ggerganov Nov 25, 2024
872ea1a
server : enable cache_prompt by default (#10501)
ggerganov Nov 25, 2024
acbea67
server : add more information about error (#10455)
ggerganov Nov 25, 2024
afadaa1
ci : build docker images only once daily (#10503)
slaren Nov 25, 2024
b0486ff
Introduce llama-run (#10291)
ericcurtin Nov 25, 2024
dc2dc79
vulkan: Fix a vulkan-shaders-gen arugment parsing error (#10484)
sparkleholic Nov 26, 2024
a7637d3
CANN: RoPE and CANCAT operator optimization (#10488)
noemotiovon Nov 26, 2024
a3214b5
CANN: Improve the Inferencing Performance for Ascend NPU Device (#10454)
shen-shanshan Nov 26, 2024
315fbd2
speculative : simplify the implementation (#10504)
ggerganov Nov 26, 2024
eb7f0ed
server : fix parallel speculative decoding (#10513)
ggerganov Nov 26, 2024
f134ef5
ggml-cpu: cmake add arm64 cpu feature check for macos (#10487)
chaxu01 Nov 26, 2024
058dba3
ci : add ubuntu cuda build, build with one arch on windows (#10456)
slaren Nov 26, 2024
4d773f4
ci : publish the docker images created during scheduled runs (#10515)
slaren Nov 26, 2024
e8a97f6
cmake : enable warnings in llama (#10474)
ggerganov Nov 26, 2024
8da1d9c
restore the condistion to build & update pacakge when merge (#10507)
NeoZhangJianyu Nov 26, 2024
7e88ce5
server : replace behave with pytest (#10416)
ngxson Nov 26, 2024
6649360
vulkan: fix group_norm (#10496)
jeffbolznv Nov 26, 2024
264339e
mtgpu: Add MUSA_DOCKER_ARCH in Dockerfiles && update cmake and make (…
yeahdongcn Nov 26, 2024
0f185c5
Fix HIP flag inconsistency & build docs (#10524)
tristandruyen Nov 26, 2024
142f48c
llama : disable warnings for 3rd party sha1 dependency (#10527)
slaren Nov 26, 2024
e44800d
ci : remove nix workflows (#10526)
slaren Nov 26, 2024
929cf88
Add OLMo 2 model in docs (#10530)
2015aroras Nov 26, 2024
1c1e8ba
ci : fix cuda releases (#10532)
slaren Nov 26, 2024
9bb00fe
vulkan: optimize Q2_K and Q3_K mul_mat_vec (#10459)
jeffbolznv Nov 27, 2024
16a8bca
vulkan: skip integer div/mod in get_offsets for batch_idx==0 (#10506)
jeffbolznv Nov 27, 2024
433e5ca
vulkan: further optimize q5_k mul_mat_vec (#10479)
jeffbolznv Nov 27, 2024
21ea961
vulkan: Handle GPUs with less shared memory (#10468)
jeffbolznv Nov 27, 2024
e197ca1
vulkan: define all quant data structures in types.comp (#10440)
jeffbolznv Nov 27, 2024
8c52880
Do not include arm_neon.h when compiling CUDA code (ggml/1028)
frankier Nov 26, 2024
2a52637
sync : ggml
ggerganov Nov 27, 2024
c2c2670
metal : fix group_norm support condition (#0)
ggerganov Nov 27, 2024
de6717e
ci : faster CUDA toolkit installation method and use ccache (#10537)
slaren Nov 27, 2024
3da2aeb
Add some minimal optimizations for CDNA (#10498)
IMbackK Nov 27, 2024
416110b
common : fix duplicated file name with hf_repo and hf_file (#10550)
ngxson Nov 27, 2024
1fdca2f
CANN: ROPE operator optimization (#10540)
noemotiovon Nov 28, 2024
a45d576
CANN: Fix SOC_TYPE compile bug (#10519)
leo-pony Nov 28, 2024
328a13d
CANN: Update cann.md to display correctly in CLion (#10538)
HRXWEB Nov 28, 2024
e6f1bc2
kompute : improve backend to pass test_backend_ops (#10542)
slp Nov 28, 2024
2016284
ggml-cpu: support IQ4_NL_4_4 by runtime repack (#10541)
FanShupei Nov 28, 2024
94014df
cmake : fix ARM feature detection (#10543)
ggerganov Nov 28, 2024
cd1741d
ggml : fix row condition for i8mm kernels (#10561)
ggerganov Nov 28, 2024
88f01a0
ci : fix tag name in cuda and hip releases (#10566)
slaren Nov 28, 2024
4f64e40
docs: fix outdated usage of llama-simple (#10565)
rand-fly Nov 28, 2024
7b5d38f
common: fix warning message when no GPU found (#10564)
JohannesGaessler Nov 28, 2024
250c873
server : (tests) don't use thread for capturing stdout/stderr, bump o…
ngxson Nov 28, 2024
5012af7
llama : add missing model types
ggerganov Nov 28, 2024
1cb83e0
ggml : remove redundant copyright notice + update authors
ggerganov Nov 28, 2024
94b4f06
llava: return false instead of exit (#10546)
tinglou Nov 29, 2024
a825e03
vulkan: get the first command buffer submitted sooner (#10499)
jeffbolznv Nov 29, 2024
9028651
CANN: RoPE operator optimization (#10563)
noemotiovon Nov 29, 2024
a64f340
sycl : Reroute permuted mul_mats through oneMKL (#10408)
Alcpz Nov 29, 2024
00bd891
sycl : offload of get_rows set to 0 (#10432)
Alcpz Nov 29, 2024
02888d4
ggml-cpu: fix typo in gemv/gemm iq4_nl_4_4 (#10580)
FanShupei Nov 29, 2024
f819bab
ggml : fix I8MM Q4_1 scaling factor conversion (#10562)
ggerganov Nov 29, 2024
1129dc8
cleanup UI link list (#10577)
slaren Nov 29, 2024
32519f3
imatrix : support combine-only (#10492)
robbiemu Nov 29, 2024
9ce227c
server : add more test cases (#10569)
ngxson Nov 29, 2024
df8e981
ggml : move AMX to the CPU backend (#10570)
slaren Nov 29, 2024
cc69e60
vulkan: Dynamic subgroup size support for Q6_K mat_vec (#10536)
netrunnereve Nov 30, 2024
09315ed
readme : refresh (#10587)
ggerganov Nov 30, 2024
238f9be
readme : remove old badge
ggerganov Nov 30, 2024
465e6b7
ggml-cpu: replace AArch64 NEON assembly with intrinsics in ggml_gemv_…
angt Nov 30, 2024
12a744d
build: update Makefile comments for C++ version change (#10598)
wangqin0 Dec 1, 2024
4537839
readme : update the usage section with examples (#10596)
ggerganov Dec 1, 2024
d7e94ac
server : bind to any port when specified (#10590)
alek3y Dec 1, 2024
59c8895
ggml : automatic selection of best CPU backend (#10606)
slaren Dec 1, 2024
783b21b
ci: add error handling for Python venv creation in run.sh (#10608)
wangqin0 Dec 1, 2024
29ade18
grammars : add English-only grammar (#10612)
ggerganov Dec 1, 2024
3d85b01
Add `mistral-v1`, `mistral-v3`, `mistral-v3-tekken` and `mistral-v7` …
jukofyork Dec 1, 2024
c986a92
contrib : refresh (#10593)
ggerganov Dec 2, 2024
1d6c85a
server: Add "tokens per second" information in the backend (#10548)
lhpqaq Dec 2, 2024
883e61a
make : deprecate (#10514)
ggerganov Dec 2, 2024
01acf48
llama : add enum for built-in chat templates (#10623)
ngxson Dec 2, 2024
180d8d0
server : fix default draft model parameters (#10586)
ggerganov Dec 3, 2024
e88df91
github : minify link [no ci]
ggerganov Dec 3, 2024
bb9ecac
github : minify link [no ci] (revert)
ggerganov Dec 3, 2024
2b5e903
metal : small-batch mat-mul kernels (#10581)
ggerganov Dec 3, 2024
923f4ab
readme : add option, update default value, fix formatting (#10271)
pothitos Dec 3, 2024
4e27070
llama : add missing LLAMA_API for llama_chat_builtin_templates (#10636)
ngxson Dec 3, 2024
b9dc0d2
metal : add `GGML_OP_CONV_TRANSPOSE_1D` kernels (ggml/1026)
PABannier Nov 28, 2024
ad84fc8
feat: add `GGML_UNARY_OP_ARGMAX` Metal kernel (ggml/1019)
PABannier Dec 2, 2024
b5f822f
CUDA: remove unnecessary warp reduce in FA (ggml/1032)
mahorozte Dec 3, 2024
5f1e4d4
sync : ggml
ggerganov Dec 3, 2024
e7bee28
scripts : remove amx sync
ggerganov Dec 3, 2024
a5320c6
server : (web ui) Various improvements, now use vite as bundler (#10599)
ngxson Dec 3, 2024
746a4e0
vulkan: optimize and reenable split_k (#10637)
jeffbolznv Dec 3, 2024
e480feb
clip : add sycl support (#10574)
piDack Dec 4, 2024
507aae4
Add docs for creating a static build (#10268) (#10630)
mostlygeek Dec 4, 2024
2d029b1
Avoid using __fp16 on ARM with old nvcc (#10616)
frankier Dec 4, 2024
7fd12a6
fix typo of README.md (#10605)
WrRan Dec 4, 2024
db892e1
SYCL : Move to compile time oneMKL interface backend selection for NV…
s-Nick Dec 4, 2024
3c1539a
vulkan: Implement "fast divide" (mul+shift) for unary ops like copy (…
jeffbolznv Dec 4, 2024
f0fd6a8
llama: Support MiniCPM-1B (with & w/o longrope) (#10559)
JFLFY2255 Dec 4, 2024
a9bb5b8
Fix HF repo commit to clone lora test models (#10649)
ltoniazzi Dec 4, 2024
9a840f8
ggml-cpu : fix HWCAP2_I8MM value (#10646)
slaren Dec 4, 2024
545316b
ggml : add predefined list of CPU backend variants to build (#10626)
slaren Dec 4, 2024
b8b403f
server : fix speculative decoding with context shift (#10641)
ggerganov Dec 4, 2024
94bb316
Update deprecation-warning.cpp (#10619)
aryantandon01 Dec 4, 2024
73d3d8c
py : update outdated copy-paste instructions [no ci] (#10667)
danbev Dec 5, 2024
63df14a
ggml : add `GGML_PAD_REFLECT_1D` operation (ggml/1034)
PABannier Dec 3, 2024
1daf35b
ggml: add `GGML_SET` Metal kernel + i32 CPU kernel (ggml/1037)
PABannier Dec 4, 2024
36447f9
sync : ggml
ggerganov Dec 5, 2024
d80a283
llama : add Minerva 7B model support (#10673)
Riccorl Dec 5, 2024
10512c3
vulkan: Add VK_NV_cooperative_matrix2 support for mul_mat and flash a…
jeffbolznv Dec 5, 2024
c28a202
fix(server) : not show alert when DONE is received (#10674)
pminev Dec 5, 2024
2070378
server : (refactoring) do not rely on JSON internally (#10643)
ngxson Dec 6, 2024
064e58a
common : bring back --no-warmup to server (#10686)
ngxson Dec 6, 2024
905344e
convert : add custom attention mapping
ggerganov Dec 6, 2024
a1b52c1
convert : add support for Roberta embeddings (#10695)
Ssukriti Dec 7, 2024
ebc1fad
metal : Extend how Llama.cpp locates metal resources (#10676)
ormandi Dec 7, 2024
a507932
Vulkan: VK_KHR_cooperative_matrix support to speed up prompt processi…
0cc4m Dec 7, 2024
2911db5
server : fix free of spec context and batch (#10651)
ggerganov Dec 7, 2024
d90c615
ggml : refactor online repacking (#10446)
Djip007 Dec 7, 2024
65b3db9
server : various fixes (#10704)
ggerganov Dec 7, 2024
a0b1dcc
ggml : disable iq4_nl interleave size 8 (#10709)
ggerganov Dec 7, 2024
c5ad0bf
server : (refactor) no more json in server_task input (#10691)
ngxson Dec 7, 2024
9651dc8
llama : add 128k yarn context for Qwen (#10698)
robbiemu Dec 7, 2024
14d4693
vulkan: compile a test shader in cmake to check for coopmat2 support …
jeffbolznv Dec 8, 2024
fd9cf67
llama : use cmake for swift build (#10525)
slaren Dec 8, 2024
e136690
Vulkan: fix NaN in tanh.comp with AMD proprietary driver on Windows (…
stduhpf Dec 8, 2024
5eb7358
server : bring back info of final chunk in stream mode (#10722)
ngxson Dec 8, 2024
efc7661
server : fix format_infill (#10724)
ngxson Dec 8, 2024
e2edfeb
cmake : simplify msvc charsets (#10672)
iboB Dec 9, 2024
24d7c8f
vulkan: fix compile warnings (#10731)
jeffbolznv Dec 9, 2024
458f369
Changes to CMakePresets.json to add ninja clang target on windows (#1…
Srihari-mcw Dec 9, 2024
44aee1c
CUDA: fix shared memory access condition for mmv (#10740)
JohannesGaessler Dec 9, 2024
43fdc54
vulkan: disable spirv-opt for coopmat shaders (#10763)
jeffbolznv Dec 10, 2024
99e9454
server : add flag to disable the web-ui (#10762) (#10751)
eugeniosegala Dec 10, 2024
3712295
CUDA: rename macros to avoid conflicts with WinAPI (#10736)
aendk Dec 10, 2024
8d07314
imatrix : Add imatrix to --no-context-shift (#10766)
bartowski1182 Dec 10, 2024
f4892ca
vulkan: dynamic subgroup size for the remaining k quants (#10745)
netrunnereve Dec 10, 2024
aeab6a7
vulkan: request round-to-even for fp16 in im2col/rope_head (#10767)
jeffbolznv Dec 10, 2024
6c52736
ggml: load all backends from a user-provided search path (#10699)
giladgd Dec 11, 2024
d3e0eee
docs: fix server documentation formatting (#10776)
CentricStorm Dec 11, 2024
c8d98b0
bug-fix: snprintf prints NULL in place of the last character (#10419)
kallewoof Dec 11, 2024
2c0b614
ci : pin nodejs to 22.11.0 (#10779)
ngxson Dec 11, 2024
1a47802
Update README.md (#10772)
Dec 11, 2024
367e86b
server : (UI) add tok/s, get rid of completion.js (#10786)
ngxson Dec 11, 2024
20c00f6
gguf-py : bump version to 0.11.0
ggerganov Dec 11, 2024
a51a27f
docs: update server streaming mode documentation (#9519)
CentricStorm Dec 11, 2024
d69fcae
common : add missing env var for speculative (#10801)
ngxson Dec 12, 2024
65d648f
Vulkan: Add VK_EXT_subgroup_size_control support to ensure full subgr…
0cc4m Dec 12, 2024
e4a6749
Vulkan: Use improved q4_k and q5_k dequant code in dequant shaders (#…
0cc4m Dec 12, 2024
2f0d559
remove CMAKE_WINDOWS_EXPORT_ALL_SYMBOLS (#10797)
slaren Dec 12, 2024
2abbafa
CUDA: faster non-contiguous concat (#10760)
A3shTnT Dec 12, 2024
0baed04
contrib : add ngxson as codeowner (#10804)
ngxson Dec 12, 2024
069a999
common : improve -ctv -ctk CLI arguments (#10806)
ngxson Dec 12, 2024
f2f391c
ggml : Fix compilation issues on ARM platform when building without f…
kkontny Dec 13, 2024
a674ffb
SYCL: Reduce most of the compiler warnings (#10748)
qnixsynapse Dec 13, 2024
e76a5b7
vulkan: small mul_mat_vec optimizations (#10665)
netrunnereve Dec 13, 2024
f687631
Fix crash caused by ggml_backend_load_all when launching on Android A…
sienaiwun Dec 13, 2024
3e4b36c
gguf-py : numpy 2 newbyteorder fix (#9772)
jettjaniak Dec 13, 2024
45f8465
fix: graceful shutdown for Docker images (#10815)
co42 Dec 13, 2024
e7c0f17
Opt class for positional argument handling (#10508)
ericcurtin Dec 13, 2024
4b7ae34
Introducing experimental OpenCL backend with support for Qualcomm Adr…
lhez Dec 13, 2024
ea373cf
Removes spurious \r in output that causes logging in journalctl to tr…
cduk Dec 13, 2024
c12caff
llama : add Qwen2VL support + multimodal RoPE (#10361)
HimariO Dec 14, 2024
0ff002b
nix: allow to override rocm gpu targets (#10794)
kurnevsky Dec 14, 2024
4f47e21
server: Fix `has_next_line` in JSON response (#10818)
MichelleTanPY Dec 14, 2024
3dfaa4c
gguf-py : bump to v0.13.0
ggerganov Dec 15, 2024
fef4135
server: (UI) add syntax highlighting and latex math rendering (#10808)
VJHack Dec 15, 2024
1fa6dc2
scripts : change build path to "build-bench" for compare-commits.sh (…
ggerganov Dec 15, 2024
aa7ccec
llama : add Deepseek MoE v1 & GigaChat models (#10827)
Inf1delis Dec 15, 2024
5da384b
llava : Allow locally downloaded models for QwenVL (#10833)
bartowski1182 Dec 15, 2024
ef62166
sampling : refactor + optimize penalties sampler (#10803)
ggerganov Dec 16, 2024
8069dec
unicode : improve naming style (#10838)
ggerganov Dec 16, 2024
4e32aca
rwkv6: add wkv6 support for Vulkan backend (#10829)
zhiyuan1i Dec 16, 2024
3b63e55
vulkan: bugfixes for small subgroup size systems + llvmpipe test (#10…
netrunnereve Dec 17, 2024
838f91d
server : (UI) fix missing async generator on safari (#10857)
ngxson Dec 17, 2024
eb5e9a2
readme : update typos (#10863)
ruanych Dec 17, 2024
6a27965
llama : add Falcon3 support (#10864)
mokeddembillel Dec 17, 2024
e122ae2
server : fill usage info in embeddings and rerank responses (#10852)
krystiancha Dec 17, 2024
5b1759d
ggml : update ggml_backend_cpu_device_supports_op (#10867)
ggerganov Dec 17, 2024
d1e25cd
ggml : add check for grad_accs (ggml/1046)
danbev Dec 13, 2024
1e0a597
ggml : remove return from ggml_gallocr_allocate_node (ggml/1048)
danbev Dec 14, 2024
2c761cc
vulkan : fix soft_max.comp division by zero (whisper/2633)
gn64 Dec 16, 2024
b51d8a7
cmake : fix "amd64" processor string (whisper/2638)
ggerganov Dec 17, 2024
b3fbbbe
sync : ggml
ggerganov Dec 17, 2024
0cb43f2
tests: add tests for GGUF (#10830)
JohannesGaessler Dec 17, 2024
4852a5b
Use model->gguf_kv for loading the template instead of using the C AP…
dranger003 Dec 17, 2024
5b74740
Revert "llama : add Falcon3 support (#10864)" (#10876)
slaren Dec 18, 2024
49c08d5
docs: Fix HIP (née hipBLAS) in README (#10880)
brianredbeard Dec 18, 2024
ab97a8e
server : (embeddings) using same format for "input" and "content" (#1…
ngxson Dec 18, 2024
6d7f216
server : add "tokens" output (#10853)
ggerganov Dec 18, 2024
0b2b89e
server : output embeddings for all tokens when pooling = none (#10861)
ggerganov Dec 18, 2024
80b3fb9
server: avoid overwriting Authorization header (#10878)
vesath Dec 18, 2024
260e334
tts : add OuteTTS support (#10784)
ggerganov Dec 18, 2024
44792a2
ggml : fix arm build (#10890)
slaren Dec 18, 2024
73f11f6
llama-run : improve progress bar (#10821)
ericcurtin Dec 19, 2024
b26f071
tests: disable GGUF test for bad value size (#10886)
JohannesGaessler Dec 19, 2024
f27a7cb
convert : Add support for Microsoft Phi-4 model (#10817)
fairydreaming Dec 19, 2024
ac68d37
llama : fix Roberta embeddings (#10856)
Ssukriti Dec 19, 2024
784fa8a
ggml: fix arm build with gcc (#10895)
angt Dec 19, 2024
a20b502
server : fix logprobs, make it OAI-compatible (#10783)
ngxson Dec 19, 2024
b126125
tts : small QoL for easy model fetch (#10903)
ggerganov Dec 19, 2024
3a6752b
llama : minor grammar refactor (#10897)
ggerganov Dec 19, 2024
286f861
clip : disable GPU support (#10896)
ggerganov Dec 19, 2024
bcfe978
correct the device info format
arthw Dec 20, 2024
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
The table of contents is too big for display.
Diff view
Diff view
  •  
  •  
  •  
161 changes: 161 additions & 0 deletions .clang-format
Original file line number Diff line number Diff line change
@@ -0,0 +1,161 @@
---
Language: Cpp
AlignAfterOpenBracket: Align
AlignArrayOfStructures: Left
AlignConsecutiveAssignments: AcrossComments
AlignConsecutiveBitFields: AcrossComments
AlignConsecutiveDeclarations: AcrossComments
AlignConsecutiveMacros: AcrossComments
# AlignConsecutiveShortCaseStatements: AcrossComments
AlignEscapedNewlines: Left # LeftWithLastLine
AlignOperands: Align
AlignTrailingComments:
Kind: Always
OverEmptyLines: 1
AllowAllArgumentsOnNextLine: true
AllowAllParametersOfDeclarationOnNextLine: false
# AllowBreakBeforeNoexceptSpecifier: OnlyWithParen
AllowShortBlocksOnASingleLine: Never
AllowShortCaseLabelsOnASingleLine: false
AllowShortFunctionsOnASingleLine: Inline
AllowShortIfStatementsOnASingleLine: Never
AllowShortLambdasOnASingleLine: Inline
AllowShortLoopsOnASingleLine: false
AlwaysBreakBeforeMultilineStrings: true
BinPackArguments: true
BinPackParameters: true # OnePerLine
BitFieldColonSpacing: Both
BreakBeforeBraces: Custom # Attach
BraceWrapping:
AfterCaseLabel: true
AfterClass: false
AfterControlStatement: false
AfterEnum: false
AfterFunction: false
AfterNamespace: false
AfterObjCDeclaration: false
AfterStruct: false
AfterUnion: false
AfterExternBlock: false
BeforeCatch: false
BeforeElse: false
BeforeLambdaBody: false
BeforeWhile: false
IndentBraces: false
SplitEmptyFunction: false
SplitEmptyRecord: false
SplitEmptyNamespace: false
# BreakAdjacentStringLiterals: true
BreakAfterAttributes: Never
BreakBeforeBinaryOperators: None
BreakBeforeInlineASMColon: OnlyMultiline
BreakBeforeTernaryOperators: false
# BreakBinaryOperations: Never
BreakConstructorInitializers: AfterColon
# BreakFunctionDefinitionParameters: false
BreakInheritanceList: AfterComma
BreakStringLiterals: true
# BreakTemplateDeclarations: Yes
ColumnLimit: 120
CommentPragmas: '^ IWYU pragma:'
CompactNamespaces: false
ConstructorInitializerIndentWidth: 4
ContinuationIndentWidth: 4
Cpp11BracedListStyle: false
DerivePointerAlignment: false
DisableFormat: false
EmptyLineBeforeAccessModifier: Leave
EmptyLineAfterAccessModifier: Never
ExperimentalAutoDetectBinPacking: false
FixNamespaceComments: true
IncludeBlocks: Regroup
IncludeCategories:
- Regex: '^<.*\.h>'
Priority: 1
SortPriority: 0
- Regex: '^<.*'
Priority: 2
SortPriority: 0
- Regex: '.*'
Priority: 3
SortPriority: 0
IncludeIsMainRegex: '([-_](test|unittest))?$'
IncludeIsMainSourceRegex: ''
IndentAccessModifiers: false
IndentCaseBlocks: true
IndentCaseLabels: true
IndentExternBlock: NoIndent
IndentGotoLabels: false
IndentPPDirectives: AfterHash
IndentWidth: 4
IndentWrappedFunctionNames: false
InsertBraces: true # NOTE: may lead to incorrect formatting
InsertNewlineAtEOF: true
JavaScriptQuotes: Leave
JavaScriptWrapImports: true
KeepEmptyLinesAtTheStartOfBlocks: false
LambdaBodyIndentation: Signature
LineEnding: LF
MacroBlockBegin: ''
MacroBlockEnd: ''
MaxEmptyLinesToKeep: 1
NamespaceIndentation: None
ObjCBinPackProtocolList: Auto
ObjCBlockIndentWidth: 4
ObjCSpaceAfterProperty: true
ObjCSpaceBeforeProtocolList: true
PPIndentWidth: -1
PackConstructorInitializers: CurrentLine
PenaltyBreakAssignment: 2
PenaltyBreakBeforeFirstCallParameter: 1
PenaltyBreakComment: 300
PenaltyBreakFirstLessLess: 120
PenaltyBreakString: 1000
PenaltyBreakTemplateDeclaration: 10
PenaltyExcessCharacter: 1000000
PenaltyReturnTypeOnItsOwnLine: 200
PointerAlignment: Middle
QualifierAlignment: Left
#QualifierOrder: ['static', 'inline', 'friend', 'constexpr', 'const', 'volatile', 'type', 'restrict']
RawStringFormats:
- Language: Cpp
Delimiters:
- cc
- CC
- cpp
- Cpp
- CPP
- 'c++'
- 'C++'
CanonicalDelimiter: ''
ReferenceAlignment: Middle
ReflowComments: false # IndentOnly
SeparateDefinitionBlocks: Always
SortIncludes: CaseInsensitive
SortUsingDeclarations: LexicographicNumeric
SpaceAfterCStyleCast: true
SpaceAfterLogicalNot: false
SpaceAfterTemplateKeyword: true
SpaceBeforeAssignmentOperators: true
SpaceBeforeCpp11BracedList: false
SpaceBeforeCtorInitializerColon: true
SpaceBeforeInheritanceColon: true
SpaceBeforeParens: ControlStatements
SpaceBeforeRangeBasedForLoopColon: true
SpaceInEmptyBlock: false
SpaceInEmptyParentheses: false
SpacesBeforeTrailingComments: 2
SpacesInAngles: Never
SpacesInContainerLiterals: true
SpacesInLineCommentPrefix:
Minimum: 1
Maximum: -1
SpacesInParentheses: false
SpacesInSquareBrackets: false
SpaceBeforeSquareBrackets: false
Standard: c++17
TabWidth: 4
UseTab: Never
WhitespaceSensitiveMacros: ['STRINGIZE']
...

2 changes: 2 additions & 0 deletions .clang-tidy
Original file line number Diff line number Diff line change
Expand Up @@ -17,8 +17,10 @@ Checks: >
-clang-analyzer-security.insecureAPI.DeprecatedOrUnsafeBufferHandling,
performance-*,
portability-*,
-portability-simd-intrinsics,
misc-*,
-misc-const-correctness,
-misc-non-private-member-variables-in-classes,
-misc-no-recursion,
-misc-use-anonymous-namespace,
FormatStyle: none
9 changes: 8 additions & 1 deletion .devops/full-musa.Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,9 @@ ARG BASE_MUSA_DEV_CONTAINER=mthreads/musa:${MUSA_VERSION}-devel-ubuntu${UBUNTU_V

FROM ${BASE_MUSA_DEV_CONTAINER} AS build

# MUSA architecture to build for (defaults to all supported archs)
ARG MUSA_DOCKER_ARCH=default

RUN apt-get update && \
apt-get install -y build-essential cmake python3 python3-pip git libcurl4-openssl-dev libgomp1

Expand All @@ -19,7 +22,11 @@ WORKDIR /app

COPY . .

RUN cmake -B build -DGGML_NATIVE=OFF -DGGML_MUSA=ON -DLLAMA_CURL=ON ${CMAKE_ARGS} -DCMAKE_EXE_LINKER_FLAGS=-Wl,--allow-shlib-undefined . && \
# Use the default MUSA archs if not specified
RUN if [ "${MUSA_DOCKER_ARCH}" != "default" ]; then \
export CMAKE_ARGS="-DMUSA_ARCHITECTURES=${MUSA_DOCKER_ARCH}"; \
fi && \
cmake -B build -DGGML_NATIVE=OFF -DGGML_MUSA=ON -DLLAMA_CURL=ON ${CMAKE_ARGS} -DCMAKE_EXE_LINKER_FLAGS=-Wl,--allow-shlib-undefined . && \
cmake --build build --config Release -j$(nproc) && \
cp build/bin/* .

Expand Down
31 changes: 22 additions & 9 deletions .devops/full.Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -3,23 +3,36 @@ ARG UBUNTU_VERSION=22.04
FROM ubuntu:$UBUNTU_VERSION AS build

RUN apt-get update && \
apt-get install -y build-essential python3 python3-pip git libcurl4-openssl-dev libgomp1
apt-get install -y build-essential git cmake libcurl4-openssl-dev

WORKDIR /app

COPY . .

COPY requirements.txt requirements.txt
COPY requirements requirements
RUN cmake -S . -B build -DGGML_BACKEND_DL=ON -DGGML_NATIVE=OFF -DGGML_CPU_ALL_VARIANTS=ON -DLLAMA_CURL=ON -DCMAKE_BUILD_TYPE=Release && \
cmake --build build -j $(nproc) && \
mkdir -p /app/lib && \
find build -name "*.so" -exec cp {} /app/lib/ \;

RUN pip install --upgrade pip setuptools wheel \
&& pip install -r requirements.txt
FROM ubuntu:$UBUNTU_VERSION as runtime

WORKDIR /app

COPY . .
RUN apt-get update && \
apt-get install -y build-essential python3 python3-pip git libcurl4-openssl-dev libgomp1

ENV LLAMA_CURL=1
COPY requirements.txt /app/requirements.txt
COPY requirements /app/requirements
COPY .devops/tools.sh /app/tools.sh

RUN pip install --upgrade pip setuptools wheel && \
pip install -r /app/requirements.txt

RUN make -j$(nproc)
COPY --from=build /app/build/bin/ /app/
COPY --from=build /app/lib/ /app/
COPY --from=build /app/convert_hf_to_gguf.py /app/
COPY --from=build /app/gguf-py /app/gguf-py

ENV LC_ALL=C.utf8

ENTRYPOINT ["/app/.devops/tools.sh"]
ENTRYPOINT ["/app/tools.sh"]
9 changes: 8 additions & 1 deletion .devops/llama-cli-musa.Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -8,14 +8,21 @@ ARG BASE_MUSA_RUN_CONTAINER=mthreads/musa:${MUSA_VERSION}-runtime-ubuntu${UBUNTU

FROM ${BASE_MUSA_DEV_CONTAINER} AS build

# MUSA architecture to build for (defaults to all supported archs)
ARG MUSA_DOCKER_ARCH=default

RUN apt-get update && \
apt-get install -y build-essential git cmake

WORKDIR /app

COPY . .

RUN cmake -B build -DGGML_NATIVE=OFF -DGGML_MUSA=ON ${CMAKE_ARGS} -DCMAKE_EXE_LINKER_FLAGS=-Wl,--allow-shlib-undefined . && \
# Use the default MUSA archs if not specified
RUN if [ "${MUSA_DOCKER_ARCH}" != "default" ]; then \
export CMAKE_ARGS="-DMUSA_ARCHITECTURES=${MUSA_DOCKER_ARCH}"; \
fi && \
cmake -B build -DGGML_NATIVE=OFF -DGGML_MUSA=ON ${CMAKE_ARGS} -DCMAKE_EXE_LINKER_FLAGS=-Wl,--allow-shlib-undefined . && \
cmake --build build --config Release --target llama-cli -j$(nproc) && \
mkdir -p /app/lib && \
find build -name "*.so" -exec cp {} /app/lib \;
Expand Down
16 changes: 11 additions & 5 deletions .devops/llama-cli.Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -3,21 +3,27 @@ ARG UBUNTU_VERSION=22.04
FROM ubuntu:$UBUNTU_VERSION AS build

RUN apt-get update && \
apt-get install -y build-essential git
apt-get install -y build-essential git cmake libcurl4-openssl-dev

WORKDIR /app

COPY . .

RUN make -j$(nproc) llama-cli
RUN cmake -S . -B build -DGGML_BACKEND_DL=ON -DGGML_NATIVE=OFF -DGGML_CPU_ALL_VARIANTS=ON -DLLAMA_CURL=ON -DCMAKE_BUILD_TYPE=Release && \
cmake --build build -j $(nproc) && \
mkdir -p /app/lib && \
find build -name "*.so" -exec cp {} /app/lib/ \;

FROM ubuntu:$UBUNTU_VERSION AS runtime

WORKDIR /app

RUN apt-get update && \
apt-get install -y libgomp1
apt-get install -y libcurl4-openssl-dev libgomp1 curl

COPY --from=build /app/llama-cli /llama-cli
COPY --from=build /app/build/bin/llama-cli /app/
COPY --from=build /app/lib/ /app/

ENV LC_ALL=C.utf8

ENTRYPOINT [ "/llama-cli" ]
ENTRYPOINT [ "/app/llama-cli" ]
9 changes: 8 additions & 1 deletion .devops/llama-server-musa.Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -8,14 +8,21 @@ ARG BASE_MUSA_RUN_CONTAINER=mthreads/musa:${MUSA_VERSION}-runtime-ubuntu${UBUNTU

FROM ${BASE_MUSA_DEV_CONTAINER} AS build

# MUSA architecture to build for (defaults to all supported archs)
ARG MUSA_DOCKER_ARCH=default

RUN apt-get update && \
apt-get install -y build-essential git cmake libcurl4-openssl-dev

WORKDIR /app

COPY . .

RUN cmake -B build -DGGML_NATIVE=OFF -DGGML_MUSA=ON -DLLAMA_CURL=ON ${CMAKE_ARGS} -DCMAKE_EXE_LINKER_FLAGS=-Wl,--allow-shlib-undefined . && \
# Use the default MUSA archs if not specified
RUN if [ "${MUSA_DOCKER_ARCH}" != "default" ]; then \
export CMAKE_ARGS="-DMUSA_ARCHITECTURES=${MUSA_DOCKER_ARCH}"; \
fi && \
cmake -B build -DGGML_NATIVE=OFF -DGGML_MUSA=ON -DLLAMA_CURL=ON ${CMAKE_ARGS} -DCMAKE_EXE_LINKER_FLAGS=-Wl,--allow-shlib-undefined . && \
cmake --build build --config Release --target llama-server -j$(nproc) && \
mkdir -p /app/lib && \
find build -name "*.so" -exec cp {} /app/lib \;
Expand Down
16 changes: 10 additions & 6 deletions .devops/llama-server.Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -3,27 +3,31 @@ ARG UBUNTU_VERSION=22.04
FROM ubuntu:$UBUNTU_VERSION AS build

RUN apt-get update && \
apt-get install -y build-essential git libcurl4-openssl-dev
apt-get install -y build-essential git cmake libcurl4-openssl-dev

WORKDIR /app

COPY . .

ENV LLAMA_CURL=1

RUN make -j$(nproc) llama-server
RUN cmake -S . -B build -DGGML_BACKEND_DL=ON -DGGML_NATIVE=OFF -DGGML_CPU_ALL_VARIANTS=ON -DLLAMA_CURL=ON -DCMAKE_BUILD_TYPE=Release && \
cmake --build build -j $(nproc) && \
mkdir -p /app/lib && \
find build -name "*.so" -exec cp {} /app/lib/ \;

FROM ubuntu:$UBUNTU_VERSION AS runtime

WORKDIR /app

RUN apt-get update && \
apt-get install -y libcurl4-openssl-dev libgomp1 curl

COPY --from=build /app/llama-server /llama-server
COPY --from=build /app/build/bin/llama-server /app/
COPY --from=build /app/lib/ /app/

ENV LC_ALL=C.utf8
# Must be set to 0.0.0.0 so it can listen to requests from host machine
ENV LLAMA_ARG_HOST=0.0.0.0

HEALTHCHECK CMD [ "curl", "-f", "http://localhost:8080/health" ]

ENTRYPOINT [ "/llama-server" ]
ENTRYPOINT [ "/app/llama-server" ]
3 changes: 2 additions & 1 deletion .devops/nix/package.nix
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,7 @@
# Increases the runtime closure size by ~700M
useMpi ? false,
useRocm ? config.rocmSupport,
rocmGpuTargets ? builtins.concatStringsSep ";" rocmPackages.clr.gpuTargets,
enableCurl ? true,
useVulkan ? false,
llamaVersion ? "0.0.0", # Arbitrary version, substituted by the flake
Expand Down Expand Up @@ -188,7 +189,7 @@ effectiveStdenv.mkDerivation (finalAttrs: {
]
++ optionals useRocm [
(cmakeFeature "CMAKE_HIP_COMPILER" "${rocmPackages.llvm.clang}/bin/clang")
(cmakeFeature "CMAKE_HIP_ARCHITECTURES" (builtins.concatStringsSep ";" rocmPackages.clr.gpuTargets))
(cmakeFeature "CMAKE_HIP_ARCHITECTURES" rocmGpuTargets)
]
++ optionals useMetalKit [
(lib.cmakeFeature "CMAKE_C_FLAGS" "-D__ARM_FEATURE_DOTPROD=1")
Expand Down
2 changes: 1 addition & 1 deletion .devops/nix/python-scripts.nix
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,7 @@ let

# server tests
openai
behave
pytest
prometheus-client
];
in
Expand Down
Loading
Loading