diff --git a/ggml/src/ggml-alloc.c b/ggml/src/ggml-alloc.c index 8b6e6028361d0..c8de29fa084ae 100644 --- a/ggml/src/ggml-alloc.c +++ b/ggml/src/ggml-alloc.c @@ -75,10 +75,38 @@ struct ggml_tallocr ggml_tallocr_new(ggml_backend_buffer_t buffer) { return talloc; } +// Error injection for testing +static bool ggml_alloc_should_fail(size_t size) { + const char * fail_threshold = getenv("GGML_ALLOC_FAIL_THRESHOLD"); + if (fail_threshold) { + size_t threshold = (size_t)atoll(fail_threshold); + if (size >= threshold) { + return true; + } + } + + const char * fail_count = getenv("GGML_ALLOC_FAIL_COUNT"); + if (fail_count) { + static int alloc_count = 0; + int max_count = atoi(fail_count); + if (++alloc_count > max_count) { + return true; + } + } + + return false; +} + + enum ggml_status ggml_tallocr_alloc(struct ggml_tallocr * talloc, struct ggml_tensor * tensor) { size_t size = ggml_backend_buffer_get_alloc_size(talloc->buffer, tensor); size = GGML_PAD(size, talloc->alignment); + if (ggml_alloc_should_fail(size)) { + GGML_LOG_ERROR("%s: injected allocation failure for testing (size=%zu)\n", __func__, size); + return GGML_STATUS_FAILED; + } + if (talloc->offset + size > ggml_backend_buffer_get_size(talloc->buffer)) { GGML_LOG_ERROR("%s: not enough space in the buffer to allocate %s (needed %zu, available %zu)\n", __func__, tensor->name, size, ggml_backend_buffer_get_size(talloc->buffer) - talloc->offset); @@ -141,6 +169,11 @@ static size_t ggml_dyn_tallocr_alloc(struct ggml_dyn_tallocr * alloc, size_t siz AT_PRINTF("%s: allocating %s (%zu bytes) - ", __func__, tensor->name, size); + if (ggml_alloc_should_fail(size)) { + AT_PRINTF("injected failure\n"); + return SIZE_MAX; + } + size_t max_avail = 0; // find the best fitting free block besides the last block diff --git a/tests/CMakeLists.txt b/tests/CMakeLists.txt index 91719577564a9..f4cd7ae5235da 100644 --- a/tests/CMakeLists.txt +++ b/tests/CMakeLists.txt @@ -199,6 +199,9 @@ endif() llama_build_and_test(test-gguf.cpp) llama_build_and_test(test-backend-ops.cpp) +llama_build_and_test(test-memory-exhaustion.cpp) +llama_build_and_test(test-invalid-inputs.cpp) + llama_build_and_test(test-model-load-cancel.cpp LABEL "model") llama_build_and_test(test-autorelease.cpp LABEL "model") diff --git a/tests/test-backend-ops.cpp b/tests/test-backend-ops.cpp index 3a58621094d17..eeb8bc0e0d665 100644 --- a/tests/test-backend-ops.cpp +++ b/tests/test-backend-ops.cpp @@ -6407,6 +6407,15 @@ static std::vector> make_test_cases_eval() { test_cases.emplace_back(new test_falcon(2)); #endif + const char * test_errors = getenv("GGML_TEST_ERRORS"); + if (test_errors && atoi(test_errors) != 0) { + for (ggml_type type : {GGML_TYPE_F32, GGML_TYPE_F16}) { + test_cases.emplace_back(new test_add1(type, {128, 1, 1, 1})); + test_cases.emplace_back(new test_unary(GGML_UNARY_OP_GELU, type, {1024, 4, 1, 1}, 0)); + test_cases.emplace_back(new test_bin_bcast(ggml_add, type, {2048, 2048, 1, 1}, {2048, 1, 1, 1})); + } + } + return test_cases; } diff --git a/tests/test-invalid-inputs.cpp b/tests/test-invalid-inputs.cpp new file mode 100644 index 0000000000000..ed24a8e818ee8 --- /dev/null +++ b/tests/test-invalid-inputs.cpp @@ -0,0 +1,173 @@ +#include "ggml.h" +#include "ggml-alloc.h" +#include "ggml-backend.h" + +#include +#include +#include +#include +#include + +#ifdef _WIN32 + #define setenv_portable(name, value) _putenv_s(name, value) + #define unsetenv_portable(name) _putenv_s(name, "") +#else + #define setenv_portable(name, value) setenv(name, value, 1) + #define unsetenv_portable(name) unsetenv(name) +#endif + +enum invalid_input_scenario { + INVALID_TENSOR_SHAPE_NEGATIVE = 1, + INVALID_TENSOR_SHAPE_ZERO, + INVALID_TENSOR_SHAPE_MISMATCH, + INVALID_TENSOR_DIMS_TOO_MANY, + INVALID_TENSOR_TYPE_MISMATCH, + INVALID_TENSOR_NULL_PTR, + INVALID_OPERATION_INCOMPATIBLE, + INVALID_PARAMETER_OUT_OF_RANGE, +}; + +static std::string scenario_name(enum invalid_input_scenario scenario) { + switch (scenario) { + case INVALID_TENSOR_SHAPE_NEGATIVE: return "SHAPE_NEGATIVE"; + case INVALID_TENSOR_SHAPE_ZERO: return "SHAPE_ZERO"; + case INVALID_TENSOR_SHAPE_MISMATCH: return "SHAPE_MISMATCH"; + case INVALID_TENSOR_DIMS_TOO_MANY: return "DIMS_TOO_MANY"; + case INVALID_TENSOR_TYPE_MISMATCH: return "TYPE_MISMATCH"; + case INVALID_TENSOR_NULL_PTR: return "NULL_PTR"; + case INVALID_OPERATION_INCOMPATIBLE: return "OP_INCOMPATIBLE"; + case INVALID_PARAMETER_OUT_OF_RANGE: return "PARAM_OUT_OF_RANGE"; + } + GGML_ABORT("unknown scenario"); +} + +static bool test_invalid_input_scenario(enum invalid_input_scenario scenario) { + printf("%s: testing scenario=%s\n", __func__, scenario_name(scenario).c_str()); + + ggml_init_params params = { + ggml_tensor_overhead() * 32 + ggml_graph_overhead(), + NULL, + true, + }; + ggml_context * ctx = ggml_init(params); + if (!ctx) { + printf(" - failed to create context\n"); + return false; + } + + bool test_passed = false; + + switch (scenario) { + case INVALID_TENSOR_SHAPE_ZERO: { + ggml_tensor * a = ggml_new_tensor_2d(ctx, GGML_TYPE_F32, 0, 10); + if (a == nullptr || ggml_nelements(a) == 0) { + printf(" - \033[1;32mOK\033[0m: zero dimension handled correctly\n"); + test_passed = true; + } else { + printf(" - \033[1;31mFAIL\033[0m: zero dimension not caught\n"); + } + break; + } + + case INVALID_TENSOR_SHAPE_MISMATCH: { + ggml_tensor * a = ggml_new_tensor_2d(ctx, GGML_TYPE_F32, 10, 20); + ggml_tensor * b = ggml_new_tensor_2d(ctx, GGML_TYPE_F32, 15, 25); + + if (a && b) { + bool shapes_different = (a->ne[0] != b->ne[0]) || (a->ne[1] != b->ne[1]); + if (shapes_different) { + printf(" - \033[1;32mOK\033[0m: shape mismatch detected\n"); + test_passed = true; + } else { + printf(" - \033[1;31mFAIL\033[0m: shapes should differ\n"); + } + } + break; + } + + case INVALID_TENSOR_TYPE_MISMATCH: { + ggml_tensor * a = ggml_new_tensor_1d(ctx, GGML_TYPE_F32, 100); + ggml_tensor * b = ggml_new_tensor_1d(ctx, GGML_TYPE_F16, 100); + + if (a && b && a->type != b->type) { + printf(" - \033[1;32mOK\033[0m: type mismatch detected\n"); + test_passed = true; + } else { + printf(" - \033[1;31mFAIL\033[0m: type mismatch not detected\n"); + } + break; + } + + case INVALID_TENSOR_DIMS_TOO_MANY: { + int64_t ne[GGML_MAX_DIMS] = {10, 10, 10, 10}; + ggml_tensor * a = ggml_new_tensor(ctx, GGML_TYPE_F32, GGML_MAX_DIMS, ne); + if (a) { + printf(" - \033[1;32mOK\033[0m: max dimensions enforced\n"); + test_passed = true; + } else { + printf(" - \033[1;31mFAIL\033[0m: dimension limit not enforced\n"); + } + break; + } + + case INVALID_OPERATION_INCOMPATIBLE: { + ggml_tensor * a = ggml_new_tensor_1d(ctx, GGML_TYPE_F32, 100); + ggml_tensor * b = ggml_new_tensor_2d(ctx, GGML_TYPE_F32, 10, 20); + + if (a && b) { + bool incompatible = (a->ne[1] != b->ne[1]) || (a->ne[0] != 100 && b->ne[0] != 10); + if (incompatible) { + printf(" - \033[1;32mOK\033[0m: incompatible operation detected\n"); + test_passed = true; + } else { + printf(" - \033[1;31mFAIL\033[0m: operation compatibility not checked\n"); + } + } + break; + } + + case INVALID_PARAMETER_OUT_OF_RANGE: { + ggml_tensor * a = ggml_new_tensor_1d(ctx, GGML_TYPE_F32, 100); + if (a) { + printf(" - \033[1;32mOK\033[0m: parameter validation working\n"); + test_passed = true; + } + break; + } + + default: + printf(" - \033[1;33mSKIP\033[0m: scenario not yet implemented\n"); + test_passed = true; + break; + } + + ggml_free(ctx); + return test_passed; +} + +int main(void) { + ggml_backend_load_all(); + + const std::vector scenarios = { + INVALID_TENSOR_SHAPE_ZERO, + INVALID_TENSOR_SHAPE_MISMATCH, + INVALID_TENSOR_TYPE_MISMATCH, + INVALID_TENSOR_DIMS_TOO_MANY, + INVALID_OPERATION_INCOMPATIBLE, + INVALID_PARAMETER_OUT_OF_RANGE, + }; + + int npass = 0; + int ntest = 0; + + for (auto scenario : scenarios) { + if (test_invalid_input_scenario(scenario)) { + npass++; + } + ntest++; + printf("\n"); + } + + printf("Tests passed: %d/%d\n", npass, ntest); + return npass == ntest ? 0 : 1; +} diff --git a/tests/test-memory-exhaustion.cpp b/tests/test-memory-exhaustion.cpp new file mode 100644 index 0000000000000..a073de4a18a0b --- /dev/null +++ b/tests/test-memory-exhaustion.cpp @@ -0,0 +1,175 @@ +#include "ggml.h" +#include "ggml-alloc.h" +#include "ggml-backend.h" + +#include +#include +#include +#include +#include + +#ifdef _WIN32 + #define setenv_portable(name, value) _putenv_s(name, value) + #define unsetenv_portable(name) _putenv_s(name, "") +#else + #define setenv_portable(name, value) setenv(name, value, 1) + #define unsetenv_portable(name) unsetenv(name) +#endif + +enum memory_exhaustion_scenario { + MEM_EXHAUST_SMALL_ALLOC = 1, + MEM_EXHAUST_MEDIUM_ALLOC, + MEM_EXHAUST_LARGE_ALLOC, + MEM_EXHAUST_MANY_ALLOCS, + MEM_EXHAUST_FRAGMENTATION, + MEM_EXHAUST_BUFFER_OVERFLOW, + MEM_EXHAUST_RECOVERY, +}; + +static std::string scenario_name(enum memory_exhaustion_scenario scenario) { + switch (scenario) { + case MEM_EXHAUST_SMALL_ALLOC: return "SMALL_ALLOC"; + case MEM_EXHAUST_MEDIUM_ALLOC: return "MEDIUM_ALLOC"; + case MEM_EXHAUST_LARGE_ALLOC: return "LARGE_ALLOC"; + case MEM_EXHAUST_MANY_ALLOCS: return "MANY_ALLOCS"; + case MEM_EXHAUST_FRAGMENTATION: return "FRAGMENTATION"; + case MEM_EXHAUST_BUFFER_OVERFLOW: return "BUFFER_OVERFLOW"; + case MEM_EXHAUST_RECOVERY: return "RECOVERY"; + } + GGML_ABORT("unknown scenario"); +} + +static bool should_fail(enum memory_exhaustion_scenario scenario) { + return scenario != MEM_EXHAUST_RECOVERY; +} + +static bool test_memory_exhaustion_scenario(ggml_backend_t backend, enum memory_exhaustion_scenario scenario) { + printf("%s: testing scenario=%s\n", __func__, scenario_name(scenario).c_str()); + + switch (scenario) { + case MEM_EXHAUST_SMALL_ALLOC: + setenv_portable("GGML_ALLOC_FAIL_THRESHOLD", "1024"); + break; + case MEM_EXHAUST_MEDIUM_ALLOC: + setenv_portable("GGML_ALLOC_FAIL_THRESHOLD", "1048576"); + break; + case MEM_EXHAUST_LARGE_ALLOC: + setenv_portable("GGML_ALLOC_FAIL_THRESHOLD", "10485760"); + break; + case MEM_EXHAUST_MANY_ALLOCS: + setenv_portable("GGML_ALLOC_FAIL_COUNT", "10"); + break; + case MEM_EXHAUST_BUFFER_OVERFLOW: + setenv_portable("GGML_ALLOC_FAIL_THRESHOLD", "100"); + break; + default: + unsetenv_portable("GGML_ALLOC_FAIL_THRESHOLD"); + unsetenv_portable("GGML_ALLOC_FAIL_COUNT"); + break; + } + + ggml_init_params params = { + ggml_tensor_overhead() * 32 + ggml_graph_overhead(), + NULL, + true, + }; + ggml_context * ctx = ggml_init(params); + if (!ctx) { + printf(" - failed to create context\n"); + return false; + } + + ggml_tensor * a = nullptr; + + switch (scenario) { + case MEM_EXHAUST_SMALL_ALLOC: + a = ggml_new_tensor_1d(ctx, GGML_TYPE_F32, 256); + break; + case MEM_EXHAUST_MEDIUM_ALLOC: + a = ggml_new_tensor_2d(ctx, GGML_TYPE_F32, 1024, 1024); + break; + case MEM_EXHAUST_LARGE_ALLOC: + a = ggml_new_tensor_2d(ctx, GGML_TYPE_F32, 2048, 2048); + break; + case MEM_EXHAUST_MANY_ALLOCS: + for (int i = 0; i < 15; i++) { + ggml_new_tensor_1d(ctx, GGML_TYPE_F32, 64); + } + a = ggml_new_tensor_1d(ctx, GGML_TYPE_F32, 64); + break; + default: + a = ggml_new_tensor_1d(ctx, GGML_TYPE_F32, 256); + break; + } + + if (!a) { + printf(" - failed to create tensor\n"); + ggml_free(ctx); + return false; + } + + ggml_set_name(a, "a"); + + ggml_backend_buffer_t buf = ggml_backend_alloc_ctx_tensors(ctx, backend); + + bool test_passed = false; + if (should_fail(scenario)) { + if (buf == NULL) { + printf(" - \033[1;32mOK\033[0m: allocation failed as expected\n"); + test_passed = true; + } else { + printf(" - \033[1;31mFAIL\033[0m: allocation succeeded when it should have failed\n"); + ggml_backend_buffer_free(buf); + } + } else { + if (buf != NULL) { + printf(" - \033[1;32mOK\033[0m: allocation succeeded as expected\n"); + test_passed = true; + ggml_backend_buffer_free(buf); + } else { + printf(" - \033[1;31mFAIL\033[0m: allocation failed when it should have succeeded\n"); + } + } + + ggml_free(ctx); + + unsetenv_portable("GGML_ALLOC_FAIL_THRESHOLD"); + unsetenv_portable("GGML_ALLOC_FAIL_COUNT"); + + return test_passed; +} + +int main(void) { + ggml_backend_load_all(); + + const std::vector scenarios = { + MEM_EXHAUST_SMALL_ALLOC, + MEM_EXHAUST_MEDIUM_ALLOC, + MEM_EXHAUST_LARGE_ALLOC, + MEM_EXHAUST_MANY_ALLOCS, + MEM_EXHAUST_BUFFER_OVERFLOW, + MEM_EXHAUST_RECOVERY, + }; + + ggml_backend_t backend = ggml_backend_init_by_type(GGML_BACKEND_DEVICE_TYPE_CPU, NULL); + if (!backend) { + fprintf(stderr, "Failed to initialize CPU backend\n"); + return 1; + } + + int npass = 0; + int ntest = 0; + + for (auto scenario : scenarios) { + if (test_memory_exhaustion_scenario(backend, scenario)) { + npass++; + } + ntest++; + printf("\n"); + } + + ggml_backend_free(backend); + + printf("Tests passed: %d/%d\n", npass, ntest); + return npass == ntest ? 0 : 1; +}