Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
32 commits
Select commit Hold shift + click to select a range
d30de24
feat: add streaming tool use
lsorber Dec 25, 2024
8ce5e4f
fix: remove strict=True to support Python 3.9
lsorber Dec 25, 2024
d7215f3
feat: improve tool use robustness
lsorber Jan 5, 2025
50accd4
test: skip if insufficient resources on macOS
lsorber Jan 12, 2025
9f8bd21
fix: apply missing _convert_text_completion_logprobs_to_chat
lsorber Mar 14, 2025
e41ae12
feat: Add Gemma3 chat handler (#1976)
kossum Mar 30, 2025
150a4a5
resolve the image embedding issue in gemma3
kossum Apr 2, 2025
60443dd
fix: added n_ctx check for prompt requirements when embedding images …
kossum Apr 3, 2025
5d52b03
fix: modify the gemma3 chat template to be compatible with openai api
kossum Apr 4, 2025
126a13d
fix: add compatibility with v0.3.9 for Gemma3ChatHandler
kossum Jun 4, 2025
f31ac2e
feat: abstract context creation and expose for recreation
okaris Jun 20, 2025
c3debdf
feat: add usage to streamin response
okaris Jun 23, 2025
3d776cd
switch to llama.cpp fork and llama : expose C API to get layer device…
okaris Jun 24, 2025
ceb2a7e
chore: empty commit to trigger rebuild downstream
okaris Jun 24, 2025
6d80d61
c definitions
okaris Jun 24, 2025
8d7001e
chore: bump empty commit
okaris Jun 24, 2025
3fc6b15
migrate llava to mtmd
okaris Jun 24, 2025
5d8583b
port kv_cache to new memory
okaris Jun 24, 2025
5dfb439
cleanup
okaris Jun 24, 2025
03ce53b
fixes
okaris Jun 24, 2025
ffff841
migrate clip to mtmd
okaris Jun 25, 2025
4cf4b15
migrate clip to mtmd
okaris Jun 25, 2025
22a16bd
add general purpose function calling handler
okaris Jun 26, 2025
b2ca084
add general purpose function calling handler
okaris Jun 26, 2025
3d7bc26
add general purpose function calling handler
okaris Jun 26, 2025
4f27cc3
fix recreate context
okaris Jul 4, 2025
8bbdc8b
bump llama.cpp
okaris Jul 4, 2025
36cb6a1
fix deprecated
okaris Jul 4, 2025
d930cfe
fixes
okaris Jul 4, 2025
f006860
fixes
okaris Jul 4, 2025
605998e
fixes
okaris Jul 4, 2025
3d30f0c
Update llama.py - Fix embedding generation error
MikeLP Jul 12, 2025
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion .gitmodules
Original file line number Diff line number Diff line change
@@ -1,3 +1,3 @@
[submodule "vendor/llama.cpp"]
path = vendor/llama.cpp
url = https://github.com/ggerganov/llama.cpp.git
url = http://github.com/inference-sh/llama.cpp
6 changes: 3 additions & 3 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@ cmake_minimum_required(VERSION 3.21)
project(llama_cpp)

option(LLAMA_BUILD "Build llama.cpp shared library and install alongside python package" ON)
option(LLAVA_BUILD "Build llava shared library and install alongside python package" ON)
option(MTMD_BUILD "Build multimodal (mtmd) shared library and install alongside python package" ON)

function(llama_cpp_python_install_target target)
if(NOT TARGET ${target})
Expand Down Expand Up @@ -143,7 +143,7 @@ if (LLAMA_BUILD)
)
endif()

if (LLAVA_BUILD)
if (MTMD_BUILD)
if (LLAMA_CUBLAS OR LLAMA_CUDA)
add_compile_definitions(GGML_USE_CUBLAS)
add_compile_definitions(GGML_USE_CUDA)
Expand All @@ -153,7 +153,7 @@ if (LLAMA_BUILD)
add_compile_definitions(GGML_USE_METAL)
endif()

# Building llava
# Building multimodal support using mtmd
add_subdirectory(vendor/llama.cpp/tools/mtmd)

if (WIN32)
Expand Down
2 changes: 1 addition & 1 deletion examples/notebooks/Batching.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -230,7 +230,7 @@
"outputs": [],
"source": [
"for i in range(n_parallel):\n",
" llama_cpp.llama_kv_cache_seq_cp(ctx, 0, i, 0, batch.n_tokens)"
" llama_cpp.llama_kv_self_seq_cp(ctx, 0, i, 0, batch.n_tokens)"
]
},
{
Expand Down
2 changes: 1 addition & 1 deletion llama_cpp/_ctypes_extensions.py
Original file line number Diff line number Diff line change
Expand Up @@ -128,4 +128,4 @@ def _byref(obj: CtypesCData, offset: Optional[int] = None) -> CtypesRef[CtypesCD
...


byref = _byref if TYPE_CHECKING else ctypes.byref
byref = _byref if TYPE_CHECKING else ctypes.byref
17 changes: 15 additions & 2 deletions llama_cpp/_internals.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@

import os
import ctypes
from enum import Enum

from typing import (
Dict,
Expand All @@ -26,7 +27,13 @@


# Python wrappers over llama.h structs

class LlamaBackendDev(Enum):
# CPU device using system memory
CPU = 0
# GPU device using dedicated memory
GPU = 1
# accelerator devices intended to be used together with the CPU backend (e.g. BLAS or AMX)
ACCEL = 2

class LlamaModel:
"""Intermediate Python wrapper for a llama.cpp llama_model.
Expand Down Expand Up @@ -95,7 +102,13 @@ def n_ctx_train(self) -> int:
return llama_cpp.llama_model_n_ctx_train(self.model)

def n_embd(self) -> int:
return llama_cpp.llama_model_n_embd(self.model)
return llama_cpp.llama_n_embd(self.model)

def n_layer(self) -> int:
return llama_cpp.llama_n_layer(self.model)

def dev_layer(self, il: int) -> LlamaBackendDev:
return LlamaBackendDev(llama_cpp.llama_model_dev_layer(self.model, il))

def rope_freq_scale_train(self) -> float:
return llama_cpp.llama_model_rope_freq_scale_train(self.model)
Expand Down
Loading