Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
23 commits
Select commit Hold shift + click to select a range
042b51a
Applied port patches
jpgaribotti Aug 13, 2025
4b22bce
Export mtmd target
jpgaribotti Aug 13, 2025
45f6198
Rebase temp-load-from-buffer and merge into master (#7)
jesusmb1995 Aug 28, 2025
f218f1d
Add option to build only mtmd library (#8)
jpgaribotti Aug 28, 2025
3b0e639
Add approval-check-worker workflow
kapilsingh421 Aug 29, 2025
e2a5a4a
Add CODEOWNERS file (#9)
chetasr Sep 4, 2025
00a367e
Create merging_strategy.md
olyasir Sep 11, 2025
1473dce
Merge pull request #15 from tetherto/temp-olyasir-merging_strategy
olyasir Sep 11, 2025
68cc760
Tune python scripts
jesusmb1995 Sep 17, 2025
ae1d001
remove_unneeded_script
jesusmb1995 Sep 17, 2025
646fdc5
Merge pull request #16 from jesusmb1995/jmb/tune_scripts2
jpgaribotti Sep 22, 2025
c88d653
Fix CMakeLists to support building with LLAMA_MTMD on or off
jpgaribotti Sep 23, 2025
d961fab
Merge pull request #19 from jpgaribotti/QVAC-6114
jpgaribotti Sep 23, 2025
f1d7a5a
char_buff_stream
jesusmb1995 Sep 19, 2025
26519c7
Corrected build interface for libmtmd
jpgaribotti Sep 23, 2025
285713d
Merge pull request #20 from jpgaribotti/QVAC-6114
jpgaribotti Sep 23, 2025
ff75661
Make LLAMA_MTMD dependent on LLAMA_BUILD_TOOLS if not specified
jpgaribotti Sep 23, 2025
dc3dd18
Merge pull request #17 from jesusmb1995/jmb/char_stream
jpgaribotti Sep 25, 2025
cb421f2
fix_include
jesusmb1995 Sep 30, 2025
9009b2b
Merge pull request #25 from jesusmb1995/jmb/fixup_include
jpgaribotti Sep 30, 2025
29361f9
remove_check
jesusmb1995 Sep 30, 2025
55bf86c
Merge pull request #24 from jesusmb1995/jmb/update_to_char_streams2
jpgaribotti Sep 30, 2025
80d71b1
fix_ci_cmake_pkg
jesusmb1995 Oct 1, 2025
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions .flake8
Original file line number Diff line number Diff line change
Expand Up @@ -15,4 +15,5 @@ exclude =
build,
# This contains builds that we don't want to check
dist # This is generated with `python build .` for package releases
scripts/tune
# max-complexity = 10
1 change: 1 addition & 0 deletions .github/CODEOWNERS
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
* @tetherto/ai-runtime-bk-models
19 changes: 19 additions & 0 deletions .github/workflows/approval-check-worker.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
name: Approval Check Worker

on:
pull_request_review:
types: [submitted, dismissed]

jobs:
check-approvals:
permissions:
contents: write
pull-requests: write
statuses: write
issues: write

uses: tetherto/qvac-devops/.github/workflows/approval-check-worker.yml@production-workflows-tag
secrets: inherit
with:
pr_number: ${{ github.event.pull_request.number }}
pr_sha: ${{ github.event.pull_request.head.sha }}
4 changes: 2 additions & 2 deletions .github/workflows/build-cmake-pkg.yml
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@ jobs:
cmake --build build --config Release
cmake --install build --prefix "$PREFIX" --config Release

export LLAMA_CONFIG="$PREFIX"/lib/cmake/llama/llama-config.cmake
export LLAMA_CONFIG="$PREFIX"/share/cmake/llama/llama-config.cmake
tclsh <<'EOF'
set build(commit) [string trim [exec git rev-parse --short HEAD]]
set build(number) [string trim [exec git rev-list --count HEAD]]
Expand All @@ -47,5 +47,5 @@ jobs:
EOF

cd examples/simple-cmake-pkg
cmake -S . -B build -DCMAKE_PREFIX_PATH="$PREFIX"/lib/cmake
cmake -S . -B build -DCMAKE_PREFIX_PATH="$PREFIX"/share/cmake
cmake --build build
58 changes: 49 additions & 9 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -83,6 +83,9 @@ option(LLAMA_BUILD_TOOLS "llama: build tools" ${LLAMA_STANDALONE})
option(LLAMA_BUILD_EXAMPLES "llama: build examples" ${LLAMA_STANDALONE})
option(LLAMA_BUILD_SERVER "llama: build server example" ${LLAMA_STANDALONE})

# specific extras
option(LLAMA_MTMD "llama: multimodal support" ${LLAMA_BUILD_TOOLS})

# 3rd party libs
option(LLAMA_CURL "llama: use libcurl to download model from an URL" ON)
option(LLAMA_LLGUIDANCE "llama-common: include LLGuidance library for structured output in common utils" OFF)
Expand Down Expand Up @@ -194,6 +197,10 @@ if (LLAMA_BUILD_COMMON)
add_subdirectory(common)
endif()

if(LLAMA_BUILD_EXAMPLES OR LLAMA_BUILD_TESTS)
add_subdirectory(common_test)
endif()

if (LLAMA_BUILD_COMMON AND LLAMA_BUILD_TESTS AND NOT CMAKE_JS_VERSION)
include(CTest)
add_subdirectory(tests)
Expand All @@ -206,6 +213,8 @@ endif()

if (LLAMA_BUILD_COMMON AND LLAMA_BUILD_TOOLS)
add_subdirectory(tools)
elseif (LLAMA_MTMD)
add_subdirectory(tools/mtmd)
endif()

#
Expand All @@ -215,7 +224,7 @@ endif()
include(GNUInstallDirs)
include(CMakePackageConfigHelpers)

set(LLAMA_INCLUDE_INSTALL_DIR ${CMAKE_INSTALL_INCLUDEDIR} CACHE PATH "Location of header files")
set(LLAMA_INCLUDE_INSTALL_DIR ${CMAKE_INSTALL_INCLUDEDIR}/llama CACHE PATH "Location of header files")
set(LLAMA_LIB_INSTALL_DIR ${CMAKE_INSTALL_LIBDIR} CACHE PATH "Location of library files")
set(LLAMA_BIN_INSTALL_DIR ${CMAKE_INSTALL_BINDIR} CACHE PATH "Location of binary files")

Expand All @@ -227,15 +236,46 @@ set_target_properties(llama
PROPERTIES
PUBLIC_HEADER "${LLAMA_PUBLIC_HEADERS}")

install(TARGETS llama LIBRARY PUBLIC_HEADER)
install(
TARGETS llama
EXPORT llama-targets
PUBLIC_HEADER
DESTINATION ${LLAMA_INCLUDE_INSTALL_DIR})

if (LLAMA_BUILD_COMMON)

install(
TARGETS common build_info
EXPORT llama-targets
PUBLIC_HEADER
DESTINATION ${LLAMA_INCLUDE_INSTALL_DIR}/common)

endif()

if (LLAMA_MTMD)

install(
TARGETS mtmd
EXPORT llama-targets
PUBLIC_HEADER
DESTINATION ${LLAMA_INCLUDE_INSTALL_DIR}/mtmd)

endif()

install(
EXPORT llama-targets
FILE llama-targets.cmake
NAMESPACE llama::
DESTINATION ${CMAKE_INSTALL_DATAROOTDIR}/llama)

install(
FILES ${CMAKE_CURRENT_BINARY_DIR}/llama-config.cmake
DESTINATION ${CMAKE_INSTALL_DATAROOTDIR}/llama)

configure_package_config_file(
${CMAKE_CURRENT_SOURCE_DIR}/cmake/llama-config.cmake.in
${CMAKE_CURRENT_BINARY_DIR}/llama-config.cmake
INSTALL_DESTINATION ${CMAKE_INSTALL_LIBDIR}/cmake/llama
PATH_VARS LLAMA_INCLUDE_INSTALL_DIR
LLAMA_LIB_INSTALL_DIR
LLAMA_BIN_INSTALL_DIR )
${CMAKE_CURRENT_SOURCE_DIR}/cmake/llama-config.cmake.in
${CMAKE_CURRENT_BINARY_DIR}/llama-config.cmake
INSTALL_DESTINATION ${CMAKE_INSTALL_DATAROOTDIR}/llama)

write_basic_package_version_file(
${CMAKE_CURRENT_BINARY_DIR}/llama-version.cmake
Expand All @@ -244,7 +284,7 @@ write_basic_package_version_file(

install(FILES ${CMAKE_CURRENT_BINARY_DIR}/llama-config.cmake
${CMAKE_CURRENT_BINARY_DIR}/llama-version.cmake
DESTINATION ${CMAKE_INSTALL_LIBDIR}/cmake/llama)
DESTINATION ${CMAKE_INSTALL_DATAROOTDIR}/llama)

install(
FILES convert_hf_to_gguf.py
Expand Down
25 changes: 4 additions & 21 deletions cmake/llama-config.cmake.in
Original file line number Diff line number Diff line change
Expand Up @@ -5,26 +5,9 @@ set(LLAMA_SHARED_LIB @BUILD_SHARED_LIBS@)

@PACKAGE_INIT@

set_and_check(LLAMA_INCLUDE_DIR "@PACKAGE_LLAMA_INCLUDE_INSTALL_DIR@")
set_and_check(LLAMA_LIB_DIR "@PACKAGE_LLAMA_LIB_INSTALL_DIR@")
set_and_check(LLAMA_BIN_DIR "@PACKAGE_LLAMA_BIN_INSTALL_DIR@")
include(CMakeFindDependencyMacro)
find_dependency(ggml CONFIG REQUIRED)

find_package(ggml REQUIRED HINTS ${LLAMA_LIB_DIR}/cmake)
include("${CMAKE_CURRENT_LIST_DIR}/llama-targets.cmake")

find_library(llama_LIBRARY llama
REQUIRED
HINTS ${LLAMA_LIB_DIR}
NO_CMAKE_FIND_ROOT_PATH
)

add_library(llama UNKNOWN IMPORTED)
set_target_properties(llama
PROPERTIES
INTERFACE_INCLUDE_DIRECTORIES "${LLAMA_INCLUDE_DIR}"
INTERFACE_LINK_LIBRARIES "ggml::ggml;ggml::ggml-base;"
IMPORTED_LINK_INTERFACE_LANGUAGES "CXX"
IMPORTED_LOCATION "${llama_LIBRARY}"
INTERFACE_COMPILE_FEATURES c_std_90
POSITION_INDEPENDENT_CODE ON)

check_required_components(Llama)
check_required_components(llama)
40 changes: 27 additions & 13 deletions common/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -44,34 +44,43 @@ endif()

set(TARGET common)

set(${TARGET}_HEADERS
arg.h
base64.hpp
chat-parser.h
chat.h
common.h
console.h
json-partial.h
json-schema-to-grammar.h
log.h
ngram-cache.h
regex-partial.h
sampling.h
speculative.h
)

list(TRANSFORM ${TARGET}_HEADERS PREPEND ${CMAKE_SOURCE_DIR}/common/)

add_library(${TARGET} STATIC
arg.cpp
arg.h
base64.hpp
chat-parser.cpp
chat-parser.h
chat.cpp
chat.h
common.cpp
common.h
console.cpp
console.h
json-partial.cpp
json-partial.h
json-schema-to-grammar.cpp
llguidance.cpp
log.cpp
log.h
ngram-cache.cpp
ngram-cache.h
regex-partial.cpp
regex-partial.h
sampling.cpp
sampling.h
speculative.cpp
speculative.h
${${TARGET}_HEADERS}
)

set_target_properties(${TARGET} PROPERTIES PUBLIC_HEADER "${${TARGET}_HEADERS}")

if (BUILD_SHARED_LIBS)
set_target_properties(${TARGET} PROPERTIES POSITION_INDEPENDENT_CODE ON)
endif()
Expand Down Expand Up @@ -133,7 +142,12 @@ if (LLAMA_LLGUIDANCE)
set(LLAMA_COMMON_EXTRA_LIBS ${LLAMA_COMMON_EXTRA_LIBS} llguidance ${LLGUIDANCE_PLATFORM_LIBS})
endif ()

target_include_directories(${TARGET} PUBLIC . ../vendor)
target_include_directories(
${TARGET}
PUBLIC
$<BUILD_INTERFACE:${CMAKE_SOURCE_DIR}/common$<SEMICOLON>${CMAKE_SOURCE_DIR}/vendor>
$<INSTALL_INTERFACE:${CMAKE_INSTALL_INCLUDEDIR}>)

target_compile_features (${TARGET} PUBLIC cxx_std_17)
target_link_libraries (${TARGET} PRIVATE ${LLAMA_COMMON_EXTRA_LIBS} PUBLIC llama Threads::Threads)

Expand Down
24 changes: 14 additions & 10 deletions common/common.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -895,16 +895,7 @@ std::string fs_get_cache_file(const std::string & filename) {
// Model utils
//

struct common_init_result common_init_from_params(common_params & params) {
common_init_result iparams;
auto mparams = common_model_params_to_llama(params);

llama_model * model = llama_model_load_from_file(params.model.path.c_str(), mparams);
if (model == NULL) {
LOG_ERR("%s: failed to load model '%s', try reducing --n-gpu-layers if you're running out of VRAM\n",
__func__, params.model.path.c_str());
return iparams;
}
struct common_init_result common_init_from_model_and_params(llama_model* model, common_init_result iparams, common_params & params) {

const llama_vocab * vocab = llama_model_get_vocab(model);

Expand Down Expand Up @@ -1077,6 +1068,19 @@ struct common_init_result common_init_from_params(common_params & params) {
return iparams;
}

struct common_init_result common_init_from_params(common_params & params) {
common_init_result iparams;
auto mparams = common_model_params_to_llama(params);

llama_model * model = llama_model_load_from_file(params.model.path.c_str(), mparams);
if (model == NULL) {
LOG_ERR("%s: failed to load model '%s'\n", __func__, params.model.path.c_str());
return iparams;
}

return common_init_from_model_and_params(model, std::move(iparams), params);
}

std::string get_model_endpoint() {
const char * model_endpoint_env = getenv("MODEL_ENDPOINT");
// We still respect the use of environment-variable "HF_ENDPOINT" for backward-compatibility.
Expand Down
2 changes: 2 additions & 0 deletions common/common.h
Original file line number Diff line number Diff line change
Expand Up @@ -615,6 +615,8 @@ struct common_init_result {
};

struct common_init_result common_init_from_params(common_params & params);
struct common_init_result common_init_from_model_and_params(llama_model * model, common_init_result iparams,
common_params & params);

struct llama_model_params common_model_params_to_llama ( common_params & params);
struct llama_context_params common_context_params_to_llama(const common_params & params);
Expand Down
15 changes: 15 additions & 0 deletions common_test/CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
# common_test library for load_into_memory.h and uint8-buff-stream.h

set(TARGET llama-common-test)

add_library(${TARGET} INTERFACE)

target_include_directories(${TARGET} INTERFACE
${CMAKE_CURRENT_SOURCE_DIR}
)

target_compile_definitions(${TARGET} INTERFACE LLAMA_COMMON_TEST_HEADERS)

target_compile_features(${TARGET} INTERFACE cxx_std_17)

target_link_libraries(${TARGET} INTERFACE common)
Loading