Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
12 changes: 10 additions & 2 deletions examples/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,12 @@ else()
add_subdirectory(batched)
add_subdirectory(embedding)
add_subdirectory(eval-callback)
add_subdirectory(gbnf-validator)

if (NOT WIN32)
# disabled on Windows because it uses internal functions not exported with LLAMA_API
add_subdirectory(gbnf-validator)
endif()

add_subdirectory(gguf-hash)
add_subdirectory(gguf-split)
add_subdirectory(gguf)
Expand Down Expand Up @@ -51,7 +56,10 @@ else()
add_subdirectory(convert-llama2c-to-ggml)
add_subdirectory(cvector-generator)
add_subdirectory(export-lora)
add_subdirectory(quantize-stats)
if (NOT WIN32)
# disabled on Windows because it uses internal functions not exported with LLAMA_API
add_subdirectory(quantize-stats)
endif()
add_subdirectory(llava)
if (GGML_RPC)
add_subdirectory(rpc)
Expand Down
4 changes: 2 additions & 2 deletions examples/gguf-split/gguf-split.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -287,7 +287,7 @@ struct split_strategy {
}

void print_info() {
printf("n_split: %ld\n", ctx_outs.size());
printf("n_split: %zu\n", ctx_outs.size());
int i_split = 0;
for (auto & ctx_out : ctx_outs) {
// re-calculate the real gguf size for each split (= metadata size + total size of all tensors)
Expand All @@ -297,7 +297,7 @@ struct split_strategy {
total_size += ggml_nbytes(t);
}
total_size = total_size / 1000 / 1000; // convert to megabytes
printf("split %05d: n_tensors = %d, total_size = %ldM\n", i_split + 1, gguf_get_n_tensors(ctx_out), total_size);
printf("split %05d: n_tensors = %d, total_size = %zuM\n", i_split + 1, gguf_get_n_tensors(ctx_out), total_size);
i_split++;
}
}
Expand Down
10 changes: 5 additions & 5 deletions examples/llama-bench/llama-bench.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1521,7 +1521,7 @@ int main(int argc, char ** argv) {
for (const auto & inst : params_instances) {
params_idx++;
if (params.progress) {
fprintf(stderr, "llama-bench: benchmark %d/%ld: starting\n", params_idx, params_count);
fprintf(stderr, "llama-bench: benchmark %d/%zu: starting\n", params_idx, params_count);
}
// keep the same model between tests when possible
if (!lmodel || !prev_inst || !inst.equal_mparams(*prev_inst)) {
Expand Down Expand Up @@ -1573,14 +1573,14 @@ int main(int argc, char ** argv) {
// warmup run
if (t.n_prompt > 0) {
if (params.progress) {
fprintf(stderr, "llama-bench: benchmark %d/%ld: warmup prompt run\n", params_idx, params_count);
fprintf(stderr, "llama-bench: benchmark %d/%zu: warmup prompt run\n", params_idx, params_count);
}
//test_prompt(ctx, std::min(t.n_batch, std::min(t.n_prompt, 32)), 0, t.n_batch, t.n_threads);
test_prompt(ctx, t.n_prompt, t.n_batch, t.n_threads);
}
if (t.n_gen > 0) {
if (params.progress) {
fprintf(stderr, "llama-bench: benchmark %d/%ld: warmup generation run\n", params_idx, params_count);
fprintf(stderr, "llama-bench: benchmark %d/%zu: warmup generation run\n", params_idx, params_count);
}
test_gen(ctx, 1, t.n_threads);
}
Expand All @@ -1592,14 +1592,14 @@ int main(int argc, char ** argv) {

if (t.n_prompt > 0) {
if (params.progress) {
fprintf(stderr, "llama-bench: benchmark %d/%ld: prompt run %d/%d\n", params_idx, params_count,
fprintf(stderr, "llama-bench: benchmark %d/%zu: prompt run %d/%d\n", params_idx, params_count,
i + 1, params.reps);
}
test_prompt(ctx, t.n_prompt, t.n_batch, t.n_threads);
}
if (t.n_gen > 0) {
if (params.progress) {
fprintf(stderr, "llama-bench: benchmark %d/%ld: generation run %d/%d\n", params_idx, params_count,
fprintf(stderr, "llama-bench: benchmark %d/%zu: generation run %d/%d\n", params_idx, params_count,
i + 1, params.reps);
}
test_gen(ctx, t.n_gen, t.n_threads);
Expand Down
2 changes: 1 addition & 1 deletion examples/retrieval/retrieval.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -143,7 +143,7 @@ int main(int argc, char ** argv) {
std::vector<chunk> file_chunk = chunk_file(context_file, params.chunk_size, params.chunk_separator);
chunks.insert(chunks.end(), file_chunk.begin(), file_chunk.end());
}
LOG_INF("Number of chunks: %ld\n", chunks.size());
LOG_INF("Number of chunks: %zu\n", chunks.size());

llama_backend_init();
llama_numa_init(params.numa);
Expand Down
2 changes: 1 addition & 1 deletion examples/tokenize/tokenize.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -394,7 +394,7 @@ int main(int raw_argc, char ** raw_argv) {
}

if (show_token_count) {
printf("Total number of tokens: %ld\n", tokens.size());
printf("Total number of tokens: %zu\n", tokens.size());
}
// silence valgrind
llama_free(ctx);
Expand Down
7 changes: 7 additions & 0 deletions ggml/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,13 @@ else()
endif()
endif()

# remove the lib prefix on win32 mingw
if (WIN32)
set(CMAKE_STATIC_LIBRARY_PREFIX "")
set(CMAKE_SHARED_LIBRARY_PREFIX "")
set(CMAKE_SHARED_MODULE_PREFIX "")
endif()

option(BUILD_SHARED_LIBS "ggml: build shared libraries" ${BUILD_SHARED_LIBS_DEFAULT})
option(GGML_BACKEND_DL "ggml: build backends as dynamic libraries (requires BUILD_SHARED_LIBS)" OFF)

Expand Down
5 changes: 0 additions & 5 deletions ggml/src/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -194,11 +194,6 @@ endif()

if (WIN32)
add_compile_definitions(_CRT_SECURE_NO_WARNINGS)

if (BUILD_SHARED_LIBS)
# TODO: should not use this
set(CMAKE_WINDOWS_EXPORT_ALL_SYMBOLS ON)
endif()
endif()

# ggml
Expand Down
2 changes: 1 addition & 1 deletion ggml/src/ggml-cpu/amx/amx.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -122,7 +122,7 @@ static const char * ggml_backend_amx_buffer_type_get_name(ggml_backend_buffer_ty
}

static ggml_backend_buffer_t ggml_backend_amx_buffer_type_alloc_buffer(ggml_backend_buffer_type_t buft, size_t size) {
void * data = aligned_alloc(TENSOR_ALIGNMENT, size);
void * data = ggml_aligned_malloc(size);
if (data == NULL) {
fprintf(stderr, "%s: failed to allocate buffer of size %zu\n", __func__, size);
return NULL;
Expand Down
5 changes: 2 additions & 3 deletions ggml/src/ggml-cpu/ggml-cpu.c
Original file line number Diff line number Diff line change
Expand Up @@ -126,8 +126,7 @@ struct ggml_arm_arch_features_type {
#endif
#include <windows.h>


#if !defined(__clang__)
#if defined(_MSC_VER) && !defined(__clang__)
#define GGML_CACHE_ALIGN __declspec(align(GGML_CACHE_LINE))

typedef volatile LONG atomic_int;
Expand Down Expand Up @@ -12945,7 +12944,7 @@ static thread_ret_t ggml_graph_compute_secondary_thread(void* data);
#include "windows.h"

// TODO: support > 64 CPUs
bool ggml_thread_apply_affinity(bool * mask) {
static bool ggml_thread_apply_affinity(bool * mask) {
HANDLE h = GetCurrentThread();
uint64_t bitmask = 0ULL;

Expand Down
8 changes: 4 additions & 4 deletions ggml/src/ggml-impl.h
Original file line number Diff line number Diff line change
Expand Up @@ -74,8 +74,8 @@ static inline int ggml_up(int n, int m) {
//

GGML_ATTRIBUTE_FORMAT(2, 3)
void ggml_log_internal (enum ggml_log_level level, const char * format, ...);
void ggml_log_callback_default(enum ggml_log_level level, const char * text, void * user_data);
GGML_API void ggml_log_internal (enum ggml_log_level level, const char * format, ...);
GGML_API void ggml_log_callback_default(enum ggml_log_level level, const char * text, void * user_data);

#define GGML_LOG(...) ggml_log_internal(GGML_LOG_LEVEL_NONE , __VA_ARGS__)
#define GGML_LOG_INFO(...) ggml_log_internal(GGML_LOG_LEVEL_INFO , __VA_ARGS__)
Expand Down Expand Up @@ -304,8 +304,8 @@ struct ggml_cgraph ggml_graph_view(struct ggml_cgraph * cgraph, int i0, int i1);

// Memory allocation

void * ggml_aligned_malloc(size_t size);
void ggml_aligned_free(void * ptr, size_t size);
GGML_API void * ggml_aligned_malloc(size_t size);
GGML_API void ggml_aligned_free(void * ptr, size_t size);

// FP16 to FP32 conversion

Expand Down
6 changes: 4 additions & 2 deletions ggml/src/ggml-threading.h
Original file line number Diff line number Diff line change
@@ -1,11 +1,13 @@
#pragma once

#include "ggml.h"

#ifdef __cplusplus
extern "C" {
#endif

void ggml_critical_section_start(void);
void ggml_critical_section_end(void);
GGML_API void ggml_critical_section_start(void);
GGML_API void ggml_critical_section_end(void);

#ifdef __cplusplus
}
Expand Down
7 changes: 0 additions & 7 deletions src/CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -1,10 +1,3 @@
# TODO: should not use this
if (WIN32)
if (BUILD_SHARED_LIBS)
set(CMAKE_WINDOWS_EXPORT_ALL_SYMBOLS ON)
endif()
endif()

llama_add_compile_flags()

#
Expand Down
6 changes: 3 additions & 3 deletions src/llama.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1794,7 +1794,7 @@ struct llama_file {
DWORD bufLen = FormatMessageA(FORMAT_MESSAGE_ALLOCATE_BUFFER | FORMAT_MESSAGE_FROM_SYSTEM | FORMAT_MESSAGE_IGNORE_INSERTS,
NULL, error_code, MAKELANGID(LANG_NEUTRAL, SUBLANG_DEFAULT), (LPSTR)&lpMsgBuf, 0, NULL);
if (!bufLen) {
ret = format("Win32 error code: %s", error_code);
ret = format("Win32 error code: %lx", error_code);
} else {
ret = lpMsgBuf;
LocalFree(lpMsgBuf);
Expand Down Expand Up @@ -2132,7 +2132,7 @@ struct llama_mmap {
HMODULE hKernel32 = GetModuleHandleW(L"kernel32.dll");

// may fail on pre-Windows 8 systems
pPrefetchVirtualMemory = reinterpret_cast<decltype(pPrefetchVirtualMemory)> (GetProcAddress(hKernel32, "PrefetchVirtualMemory"));
pPrefetchVirtualMemory = (decltype(pPrefetchVirtualMemory))(void *) GetProcAddress(hKernel32, "PrefetchVirtualMemory");

if (pPrefetchVirtualMemory) {
// advise the kernel to preload the mapped memory
Expand Down Expand Up @@ -21577,7 +21577,7 @@ float * llama_get_embeddings_ith(struct llama_context * ctx, int32_t i) {
throw std::runtime_error(format("negative index out of range [0, %d)", ctx->n_outputs));
}
} else if ((size_t) i >= ctx->output_ids.size()) {
throw std::runtime_error(format("out of range [0, %lu)", ctx->output_ids.size()));
throw std::runtime_error(format("out of range [0, %zu)", ctx->output_ids.size()));
} else {
j = ctx->output_ids[i];
}
Expand Down
73 changes: 40 additions & 33 deletions tests/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -84,38 +84,50 @@ llama_test(test-tokenizer-0 NAME test-tokenizer-0-qwen2 ARGS ${CMAKE
llama_test(test-tokenizer-0 NAME test-tokenizer-0-refact ARGS ${CMAKE_CURRENT_SOURCE_DIR}/../models/ggml-vocab-refact.gguf)
llama_test(test-tokenizer-0 NAME test-tokenizer-0-starcoder ARGS ${CMAKE_CURRENT_SOURCE_DIR}/../models/ggml-vocab-starcoder.gguf)

# build test-tokenizer-1-bpe target once and add many tests
add_executable(test-tokenizer-1-bpe test-tokenizer-1-bpe.cpp)
target_link_libraries(test-tokenizer-1-bpe PRIVATE common)
install(TARGETS test-tokenizer-1-bpe RUNTIME)

# TODO: disabled due to slowness
#llama_test(test-tokenizer-1-bpe NAME test-tokenizer-1-aquila ARGS ${CMAKE_CURRENT_SOURCE_DIR}/../models/ggml-vocab-aquila.gguf)
#llama_test(test-tokenizer-1-bpe NAME test-tokenizer-1-falcon ARGS ${CMAKE_CURRENT_SOURCE_DIR}/../models/ggml-vocab-falcon.gguf)
#llama_test(test-tokenizer-1-bpe NAME test-tokenizer-1-gpt-2 ARGS ${CMAKE_CURRENT_SOURCE_DIR}/../models/ggml-vocab-gpt-2.gguf)
#llama_test(test-tokenizer-1-bpe NAME test-tokenizer-1-gpt-neox ARGS ${CMAKE_CURRENT_SOURCE_DIR}/../models/ggml-vocab-gpt-neox.gguf)
#llama_test(test-tokenizer-1-bpe NAME test-tokenizer-1-llama-bpe ARGS ${CMAKE_CURRENT_SOURCE_DIR}/../models/ggml-vocab-llama-bpe.gguf --ignore-merges)
#llama_test(test-tokenizer-1-bpe NAME test-tokenizer-1-mpt ARGS ${CMAKE_CURRENT_SOURCE_DIR}/../models/ggml-vocab-mpt.gguf)
#llama_test(test-tokenizer-1-bpe NAME test-tokenizer-1-refact ARGS ${CMAKE_CURRENT_SOURCE_DIR}/../models/ggml-vocab-refact.gguf)
#llama_test(test-tokenizer-1-bpe NAME test-tokenizer-1-starcoder ARGS ${CMAKE_CURRENT_SOURCE_DIR}/../models/ggml-vocab-starcoder.gguf)

# build test-tokenizer-1-spm target once and add many tests
add_executable(test-tokenizer-1-spm test-tokenizer-1-spm.cpp)
target_link_libraries(test-tokenizer-1-spm PRIVATE common)
install(TARGETS test-tokenizer-1-spm RUNTIME)

llama_test(test-tokenizer-1-spm NAME test-tokenizer-1-llama-spm ARGS ${CMAKE_CURRENT_SOURCE_DIR}/../models/ggml-vocab-llama-spm.gguf)
#llama_test(test-tokenizer-1-spm NAME test-tokenizer-1-baichuan ARGS ${CMAKE_CURRENT_SOURCE_DIR}/../models/ggml-vocab-baichuan.gguf)

# llama_target_and_test(test-double-float.cpp) # SLOW

if (NOT WIN32)
# these tests are disabled on Windows because they use internal functions not exported with LLAMA_API
llama_target_and_test(test-sampling.cpp)
llama_target_and_test(test-grammar-parser.cpp)
llama_target_and_test(test-grammar-integration.cpp)
llama_target_and_test(test-llama-grammar.cpp)
# TODO: disabled on loongarch64 because the ggml-ci node lacks Python 3.8
if (NOT ${CMAKE_SYSTEM_PROCESSOR} MATCHES "loongarch64")
llama_target_and_test(test-json-schema-to-grammar.cpp WORKING_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR}/..)
target_include_directories(test-json-schema-to-grammar PRIVATE ${CMAKE_CURRENT_SOURCE_DIR}/../examples/server)
endif()


# build test-tokenizer-1-bpe target once and add many tests
add_executable(test-tokenizer-1-bpe test-tokenizer-1-bpe.cpp)
target_link_libraries(test-tokenizer-1-bpe PRIVATE common)
install(TARGETS test-tokenizer-1-bpe RUNTIME)

# TODO: disabled due to slowness
#llama_test(test-tokenizer-1-bpe NAME test-tokenizer-1-aquila ARGS ${CMAKE_CURRENT_SOURCE_DIR}/../models/ggml-vocab-aquila.gguf)
#llama_test(test-tokenizer-1-bpe NAME test-tokenizer-1-falcon ARGS ${CMAKE_CURRENT_SOURCE_DIR}/../models/ggml-vocab-falcon.gguf)
#llama_test(test-tokenizer-1-bpe NAME test-tokenizer-1-gpt-2 ARGS ${CMAKE_CURRENT_SOURCE_DIR}/../models/ggml-vocab-gpt-2.gguf)
#llama_test(test-tokenizer-1-bpe NAME test-tokenizer-1-gpt-neox ARGS ${CMAKE_CURRENT_SOURCE_DIR}/../models/ggml-vocab-gpt-neox.gguf)
#llama_test(test-tokenizer-1-bpe NAME test-tokenizer-1-llama-bpe ARGS ${CMAKE_CURRENT_SOURCE_DIR}/../models/ggml-vocab-llama-bpe.gguf --ignore-merges)
#llama_test(test-tokenizer-1-bpe NAME test-tokenizer-1-mpt ARGS ${CMAKE_CURRENT_SOURCE_DIR}/../models/ggml-vocab-mpt.gguf)
#llama_test(test-tokenizer-1-bpe NAME test-tokenizer-1-refact ARGS ${CMAKE_CURRENT_SOURCE_DIR}/../models/ggml-vocab-refact.gguf)
#llama_test(test-tokenizer-1-bpe NAME test-tokenizer-1-starcoder ARGS ${CMAKE_CURRENT_SOURCE_DIR}/../models/ggml-vocab-starcoder.gguf)

# build test-tokenizer-1-spm target once and add many tests
add_executable(test-tokenizer-1-spm test-tokenizer-1-spm.cpp)
target_link_libraries(test-tokenizer-1-spm PRIVATE common)
install(TARGETS test-tokenizer-1-spm RUNTIME)

llama_test(test-tokenizer-1-spm NAME test-tokenizer-1-llama-spm ARGS ${CMAKE_CURRENT_SOURCE_DIR}/../models/ggml-vocab-llama-spm.gguf)
#llama_test(test-tokenizer-1-spm NAME test-tokenizer-1-baichuan ARGS ${CMAKE_CURRENT_SOURCE_DIR}/../models/ggml-vocab-baichuan.gguf)

# llama_target_and_test(test-double-float.cpp) # SLOW
endif()

llama_target_and_test(test-log.cpp)
llama_target_and_test(test-arg-parser.cpp)
llama_target_and_test(test-sampling.cpp)
llama_target_and_test(test-chat-template.cpp)

llama_target_and_test(test-grammar-parser.cpp)
llama_target_and_test(test-grammar-integration.cpp)
llama_target_and_test(test-llama-grammar.cpp)
# llama_target_and_test(test-opt.cpp) # SLOW
llama_target_and_test(test-backend-ops.cpp)

Expand All @@ -130,11 +142,6 @@ if (NOT GGML_BACKEND_DL)
llama_target_and_test(test-rope.cpp)
endif()

# TODO: disabled on loongarch64 because the ggml-ci node lacks Python 3.8
if (NOT ${CMAKE_SYSTEM_PROCESSOR} MATCHES "loongarch64")
llama_target_and_test(test-json-schema-to-grammar.cpp WORKING_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR}/..)
target_include_directories(test-json-schema-to-grammar PRIVATE ${CMAKE_CURRENT_SOURCE_DIR}/../examples/server)
endif()

# dummy executable - not installed
get_filename_component(TEST_TARGET test-c.c NAME_WE)
Expand Down
Loading