File tree Expand file tree Collapse file tree 3 files changed +22
-0
lines changed Expand file tree Collapse file tree 3 files changed +22
-0
lines changed Original file line number Diff line number Diff line change @@ -104,6 +104,7 @@ endif()
104104
105105# ggml core
106106set (GGML_SCHED_MAX_COPIES "4" CACHE STRING "ggml: max input copies for pipeline parallelism" )
107+ set (GGML_MAX_CONTEXTS "" CACHE STRING "ggml: max model contexts (override only; defaults to 64 in the code)" )
107108
108109# 3rd party libs / backends
109110option (GGML_ACCELERATE "ggml: enable Accelerate framework" ON )
Original file line number Diff line number Diff line change @@ -3,6 +3,9 @@ include(CheckCXXCompilerFlag)
33unset (GGML_CDEF_PUBLIC)
44
55add_compile_definitions (GGML_SCHED_MAX_COPIES=${GGML_SCHED_MAX_COPIES} )
6+ if (GGML_MAX_CONTEXTS)
7+ add_compile_definitions (GGML_MAX_CONTEXTS=${GGML_MAX_CONTEXTS} )
8+ endif ()
69
710# enable libstdc++ assertions for debug builds
811if (CMAKE_SYSTEM_NAME MATCHES "Linux" )
Original file line number Diff line number Diff line change @@ -4265,6 +4265,24 @@ struct llama_model_loader {
42654265 trace = atoi(getenv("LLAMA_TRACE"));
42664266 }
42674267
4268+ #ifdef _WIN32
4269+ // Only bump maxstdio if the user really wants large contexts:
4270+ #if defined(GGML_MAX_CONTEXTS) && (GGML_MAX_CONTEXTS > 512)
4271+ // Cap at MSVC's hard limit of 8192 - https://learn.microsoft.com/en-us/cpp/c-runtime-library/reference/setmaxstdio?view=msvc-160
4272+ #if (GGML_MAX_CONTEXTS > 8192)
4273+ #define _GGML_STDIO_TARGET 8192
4274+ #else
4275+ #define _GGML_STDIO_TARGET GGML_MAX_CONTEXTS
4276+ #endif
4277+ int _setmaxstdio_ret = _setmaxstdio(_GGML_STDIO_TARGET);
4278+ if (_setmaxstdio_ret == -1) {
4279+ LLAMA_LOG_INFO("%s: failed to set max stdio to %d. (setmaxstdio returned -1)\n", __func__, _GGML_STDIO_TARGET);
4280+ } else {
4281+ LLAMA_LOG_INFO("%s: max stdio successfully set to %d\n", __func__, _setmaxstdio_ret);
4282+ }
4283+ #endif // GGML_MAX_CONTEXTS > 512
4284+ #endif // _WIN32
4285+
42684286 if (param_overrides_p != nullptr) {
42694287 for (const struct llama_model_kv_override * p = param_overrides_p; p->key[0] != 0; p++) {
42704288 kv_overrides.insert({std::string(p->key), *p});
You can’t perform that action at this time.
0 commit comments