Skip to content

Commit 378986d

Browse files
authored
Merge branch 'ikawrakow:main' into main
2 parents 27125b1 + b94f3af commit 378986d

File tree

3 files changed

+22
-0
lines changed

3 files changed

+22
-0
lines changed

ggml/CMakeLists.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -104,6 +104,7 @@ endif()
104104

105105
# ggml core
106106
set(GGML_SCHED_MAX_COPIES "4" CACHE STRING "ggml: max input copies for pipeline parallelism")
107+
set(GGML_MAX_CONTEXTS "" CACHE STRING "ggml: max model contexts (override only; defaults to 64 in the code)")
107108

108109
# 3rd party libs / backends
109110
option(GGML_ACCELERATE "ggml: enable Accelerate framework" ON)

ggml/src/CMakeLists.txt

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,9 @@ include(CheckCXXCompilerFlag)
33
unset(GGML_CDEF_PUBLIC)
44

55
add_compile_definitions(GGML_SCHED_MAX_COPIES=${GGML_SCHED_MAX_COPIES})
6+
if (GGML_MAX_CONTEXTS)
7+
add_compile_definitions(GGML_MAX_CONTEXTS=${GGML_MAX_CONTEXTS})
8+
endif()
69

710
# enable libstdc++ assertions for debug builds
811
if (CMAKE_SYSTEM_NAME MATCHES "Linux")

src/llama.cpp

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4265,6 +4265,24 @@ struct llama_model_loader {
42654265
trace = atoi(getenv("LLAMA_TRACE"));
42664266
}
42674267

4268+
#ifdef _WIN32
4269+
// Only bump maxstdio if the user really wants large contexts:
4270+
#if defined(GGML_MAX_CONTEXTS) && (GGML_MAX_CONTEXTS > 512)
4271+
// Cap at MSVC's hard limit of 8192 - https://learn.microsoft.com/en-us/cpp/c-runtime-library/reference/setmaxstdio?view=msvc-160
4272+
#if (GGML_MAX_CONTEXTS > 8192)
4273+
#define _GGML_STDIO_TARGET 8192
4274+
#else
4275+
#define _GGML_STDIO_TARGET GGML_MAX_CONTEXTS
4276+
#endif
4277+
int _setmaxstdio_ret = _setmaxstdio(_GGML_STDIO_TARGET);
4278+
if (_setmaxstdio_ret == -1) {
4279+
LLAMA_LOG_INFO("%s: failed to set max stdio to %d. (setmaxstdio returned -1)\n", __func__, _GGML_STDIO_TARGET);
4280+
} else {
4281+
LLAMA_LOG_INFO("%s: max stdio successfully set to %d\n", __func__, _setmaxstdio_ret);
4282+
}
4283+
#endif // GGML_MAX_CONTEXTS > 512
4284+
#endif // _WIN32
4285+
42684286
if (param_overrides_p != nullptr) {
42694287
for (const struct llama_model_kv_override * p = param_overrides_p; p->key[0] != 0; p++) {
42704288
kv_overrides.insert({std::string(p->key), *p});

0 commit comments

Comments
 (0)