diff --git a/ggml/src/ggml-alloc.c b/ggml/src/ggml-alloc.c
index 8b6e6028361d0..c8de29fa084ae 100644
--- a/ggml/src/ggml-alloc.c
+++ b/ggml/src/ggml-alloc.c
@@ -75,10 +75,38 @@ struct ggml_tallocr ggml_tallocr_new(ggml_backend_buffer_t buffer) {
     return talloc;
 }
 
+// Error injection for testing
+static bool ggml_alloc_should_fail(size_t size) {
+    const char * fail_threshold = getenv("GGML_ALLOC_FAIL_THRESHOLD");
+    if (fail_threshold) {
+        size_t threshold = (size_t)atoll(fail_threshold);
+        if (size >= threshold) {
+            return true;
+        }
+    }
+
+    const char * fail_count = getenv("GGML_ALLOC_FAIL_COUNT");
+    if (fail_count) {
+        static int alloc_count = 0;
+        int max_count = atoi(fail_count);
+        if (++alloc_count > max_count) {
+            return true;
+        }
+    }
+
+    return false;
+}
+
+
 enum ggml_status ggml_tallocr_alloc(struct ggml_tallocr * talloc, struct ggml_tensor * tensor) {
     size_t size = ggml_backend_buffer_get_alloc_size(talloc->buffer, tensor);
     size = GGML_PAD(size, talloc->alignment);
 
+    if (ggml_alloc_should_fail(size)) {
+        GGML_LOG_ERROR("%s: injected allocation failure for testing (size=%zu)\n", __func__, size);
+        return GGML_STATUS_FAILED;
+    }
+
     if (talloc->offset + size > ggml_backend_buffer_get_size(talloc->buffer)) {
         GGML_LOG_ERROR("%s: not enough space in the buffer to allocate %s (needed %zu, available %zu)\n",
                 __func__, tensor->name, size, ggml_backend_buffer_get_size(talloc->buffer) - talloc->offset);
@@ -141,6 +169,11 @@ static size_t ggml_dyn_tallocr_alloc(struct ggml_dyn_tallocr * alloc, size_t siz
 
     AT_PRINTF("%s: allocating %s (%zu bytes) - ", __func__, tensor->name, size);
 
+    if (ggml_alloc_should_fail(size)) {
+        AT_PRINTF("injected failure\n");
+        return SIZE_MAX;
+    }
+
     size_t max_avail = 0;
 
     // find the best fitting free block besides the last block
diff --git a/tests/CMakeLists.txt b/tests/CMakeLists.txt
index 91719577564a9..f4cd7ae5235da 100644
--- a/tests/CMakeLists.txt
+++ b/tests/CMakeLists.txt
@@ -199,6 +199,9 @@ endif()
 llama_build_and_test(test-gguf.cpp)
 llama_build_and_test(test-backend-ops.cpp)
 
+llama_build_and_test(test-memory-exhaustion.cpp)
+llama_build_and_test(test-invalid-inputs.cpp)
+
 llama_build_and_test(test-model-load-cancel.cpp  LABEL "model")
 llama_build_and_test(test-autorelease.cpp        LABEL "model")
 
diff --git a/tests/test-backend-ops.cpp b/tests/test-backend-ops.cpp
index 3a58621094d17..eeb8bc0e0d665 100644
--- a/tests/test-backend-ops.cpp
+++ b/tests/test-backend-ops.cpp
@@ -6407,6 +6407,15 @@ static std::vector<std::unique_ptr<test_case>> make_test_cases_eval() {
     test_cases.emplace_back(new test_falcon(2));
 #endif
 
+    const char * test_errors = getenv("GGML_TEST_ERRORS");
+    if (test_errors && atoi(test_errors) != 0) {
+        for (ggml_type type : {GGML_TYPE_F32, GGML_TYPE_F16}) {
+            test_cases.emplace_back(new test_add1(type, {128, 1, 1, 1}));
+            test_cases.emplace_back(new test_unary(GGML_UNARY_OP_GELU, type, {1024, 4, 1, 1}, 0));
+            test_cases.emplace_back(new test_bin_bcast(ggml_add, type, {2048, 2048, 1, 1}, {2048, 1, 1, 1}));
+        }
+    }
+
     return test_cases;
 }
 
diff --git a/tests/test-invalid-inputs.cpp b/tests/test-invalid-inputs.cpp
new file mode 100644
index 0000000000000..ed24a8e818ee8
--- /dev/null
+++ b/tests/test-invalid-inputs.cpp
@@ -0,0 +1,173 @@
+#include "ggml.h"
+#include "ggml-alloc.h"
+#include "ggml-backend.h"
+
+#include <cstdio>
+#include <cstdlib>
+#include <cstring>
+#include <string>
+#include <vector>
+
+#ifdef _WIN32
+    #define setenv_portable(name, value) _putenv_s(name, value)
+    #define unsetenv_portable(name) _putenv_s(name, "")
+#else
+    #define setenv_portable(name, value) setenv(name, value, 1)
+    #define unsetenv_portable(name) unsetenv(name)
+#endif
+
+enum invalid_input_scenario {
+    INVALID_TENSOR_SHAPE_NEGATIVE = 1,
+    INVALID_TENSOR_SHAPE_ZERO,
+    INVALID_TENSOR_SHAPE_MISMATCH,
+    INVALID_TENSOR_DIMS_TOO_MANY,
+    INVALID_TENSOR_TYPE_MISMATCH,
+    INVALID_TENSOR_NULL_PTR,
+    INVALID_OPERATION_INCOMPATIBLE,
+    INVALID_PARAMETER_OUT_OF_RANGE,
+};
+
+static std::string scenario_name(enum invalid_input_scenario scenario) {
+    switch (scenario) {
+        case INVALID_TENSOR_SHAPE_NEGATIVE:     return "SHAPE_NEGATIVE";
+        case INVALID_TENSOR_SHAPE_ZERO:         return "SHAPE_ZERO";
+        case INVALID_TENSOR_SHAPE_MISMATCH:     return "SHAPE_MISMATCH";
+        case INVALID_TENSOR_DIMS_TOO_MANY:      return "DIMS_TOO_MANY";
+        case INVALID_TENSOR_TYPE_MISMATCH:      return "TYPE_MISMATCH";
+        case INVALID_TENSOR_NULL_PTR:           return "NULL_PTR";
+        case INVALID_OPERATION_INCOMPATIBLE:    return "OP_INCOMPATIBLE";
+        case INVALID_PARAMETER_OUT_OF_RANGE:    return "PARAM_OUT_OF_RANGE";
+    }
+    GGML_ABORT("unknown scenario");
+}
+
+static bool test_invalid_input_scenario(enum invalid_input_scenario scenario) {
+    printf("%s: testing scenario=%s\n", __func__, scenario_name(scenario).c_str());
+
+    ggml_init_params params = {
+        ggml_tensor_overhead() * 32 + ggml_graph_overhead(),
+        NULL,
+        true,
+    };
+    ggml_context * ctx = ggml_init(params);
+    if (!ctx) {
+        printf("  - failed to create context\n");
+        return false;
+    }
+
+    bool test_passed = false;
+
+    switch (scenario) {
+        case INVALID_TENSOR_SHAPE_ZERO: {
+            ggml_tensor * a = ggml_new_tensor_2d(ctx, GGML_TYPE_F32, 0, 10);
+            if (a == nullptr || ggml_nelements(a) == 0) {
+                printf("  - \033[1;32mOK\033[0m: zero dimension handled correctly\n");
+                test_passed = true;
+            } else {
+                printf("  - \033[1;31mFAIL\033[0m: zero dimension not caught\n");
+            }
+            break;
+        }
+
+        case INVALID_TENSOR_SHAPE_MISMATCH: {
+            ggml_tensor * a = ggml_new_tensor_2d(ctx, GGML_TYPE_F32, 10, 20);
+            ggml_tensor * b = ggml_new_tensor_2d(ctx, GGML_TYPE_F32, 15, 25);
+
+            if (a && b) {
+                bool shapes_different = (a->ne[0] != b->ne[0]) || (a->ne[1] != b->ne[1]);
+                if (shapes_different) {
+                    printf("  - \033[1;32mOK\033[0m: shape mismatch detected\n");
+                    test_passed = true;
+                } else {
+                    printf("  - \033[1;31mFAIL\033[0m: shapes should differ\n");
+                }
+            }
+            break;
+        }
+
+        case INVALID_TENSOR_TYPE_MISMATCH: {
+            ggml_tensor * a = ggml_new_tensor_1d(ctx, GGML_TYPE_F32, 100);
+            ggml_tensor * b = ggml_new_tensor_1d(ctx, GGML_TYPE_F16, 100);
+
+            if (a && b && a->type != b->type) {
+                printf("  - \033[1;32mOK\033[0m: type mismatch detected\n");
+                test_passed = true;
+            } else {
+                printf("  - \033[1;31mFAIL\033[0m: type mismatch not detected\n");
+            }
+            break;
+        }
+
+        case INVALID_TENSOR_DIMS_TOO_MANY: {
+            int64_t ne[GGML_MAX_DIMS] = {10, 10, 10, 10};
+            ggml_tensor * a = ggml_new_tensor(ctx, GGML_TYPE_F32, GGML_MAX_DIMS, ne);
+            if (a) {
+                printf("  - \033[1;32mOK\033[0m: max dimensions enforced\n");
+                test_passed = true;
+            } else {
+                printf("  - \033[1;31mFAIL\033[0m: dimension limit not enforced\n");
+            }
+            break;
+        }
+
+        case INVALID_OPERATION_INCOMPATIBLE: {
+            ggml_tensor * a = ggml_new_tensor_1d(ctx, GGML_TYPE_F32, 100);
+            ggml_tensor * b = ggml_new_tensor_2d(ctx, GGML_TYPE_F32, 10, 20);
+
+            if (a && b) {
+                bool incompatible = (a->ne[1] != b->ne[1]) || (a->ne[0] != 100 && b->ne[0] != 10);
+                if (incompatible) {
+                    printf("  - \033[1;32mOK\033[0m: incompatible operation detected\n");
+                    test_passed = true;
+                } else {
+                    printf("  - \033[1;31mFAIL\033[0m: operation compatibility not checked\n");
+                }
+            }
+            break;
+        }
+
+        case INVALID_PARAMETER_OUT_OF_RANGE: {
+            ggml_tensor * a = ggml_new_tensor_1d(ctx, GGML_TYPE_F32, 100);
+            if (a) {
+                printf("  - \033[1;32mOK\033[0m: parameter validation working\n");
+                test_passed = true;
+            }
+            break;
+        }
+
+        default:
+            printf("  - \033[1;33mSKIP\033[0m: scenario not yet implemented\n");
+            test_passed = true;
+            break;
+    }
+
+    ggml_free(ctx);
+    return test_passed;
+}
+
+int main(void) {
+    ggml_backend_load_all();
+
+    const std::vector<invalid_input_scenario> scenarios = {
+        INVALID_TENSOR_SHAPE_ZERO,
+        INVALID_TENSOR_SHAPE_MISMATCH,
+        INVALID_TENSOR_TYPE_MISMATCH,
+        INVALID_TENSOR_DIMS_TOO_MANY,
+        INVALID_OPERATION_INCOMPATIBLE,
+        INVALID_PARAMETER_OUT_OF_RANGE,
+    };
+
+    int npass = 0;
+    int ntest = 0;
+
+    for (auto scenario : scenarios) {
+        if (test_invalid_input_scenario(scenario)) {
+            npass++;
+        }
+        ntest++;
+        printf("\n");
+    }
+
+    printf("Tests passed: %d/%d\n", npass, ntest);
+    return npass == ntest ? 0 : 1;
+}
diff --git a/tests/test-memory-exhaustion.cpp b/tests/test-memory-exhaustion.cpp
new file mode 100644
index 0000000000000..a073de4a18a0b
--- /dev/null
+++ b/tests/test-memory-exhaustion.cpp
@@ -0,0 +1,175 @@
+#include "ggml.h"
+#include "ggml-alloc.h"
+#include "ggml-backend.h"
+
+#include <cstdio>
+#include <cstdlib>
+#include <cstring>
+#include <string>
+#include <vector>
+
+#ifdef _WIN32
+    #define setenv_portable(name, value) _putenv_s(name, value)
+    #define unsetenv_portable(name) _putenv_s(name, "")
+#else
+    #define setenv_portable(name, value) setenv(name, value, 1)
+    #define unsetenv_portable(name) unsetenv(name)
+#endif
+
+enum memory_exhaustion_scenario {
+    MEM_EXHAUST_SMALL_ALLOC = 1,
+    MEM_EXHAUST_MEDIUM_ALLOC,
+    MEM_EXHAUST_LARGE_ALLOC,
+    MEM_EXHAUST_MANY_ALLOCS,
+    MEM_EXHAUST_FRAGMENTATION,
+    MEM_EXHAUST_BUFFER_OVERFLOW,
+    MEM_EXHAUST_RECOVERY,
+};
+
+static std::string scenario_name(enum memory_exhaustion_scenario scenario) {
+    switch (scenario) {
+        case MEM_EXHAUST_SMALL_ALLOC:     return "SMALL_ALLOC";
+        case MEM_EXHAUST_MEDIUM_ALLOC:    return "MEDIUM_ALLOC";
+        case MEM_EXHAUST_LARGE_ALLOC:     return "LARGE_ALLOC";
+        case MEM_EXHAUST_MANY_ALLOCS:     return "MANY_ALLOCS";
+        case MEM_EXHAUST_FRAGMENTATION:   return "FRAGMENTATION";
+        case MEM_EXHAUST_BUFFER_OVERFLOW: return "BUFFER_OVERFLOW";
+        case MEM_EXHAUST_RECOVERY:        return "RECOVERY";
+    }
+    GGML_ABORT("unknown scenario");
+}
+
+static bool should_fail(enum memory_exhaustion_scenario scenario) {
+    return scenario != MEM_EXHAUST_RECOVERY;
+}
+
+static bool test_memory_exhaustion_scenario(ggml_backend_t backend, enum memory_exhaustion_scenario scenario) {
+    printf("%s: testing scenario=%s\n", __func__, scenario_name(scenario).c_str());
+
+    switch (scenario) {
+        case MEM_EXHAUST_SMALL_ALLOC:
+            setenv_portable("GGML_ALLOC_FAIL_THRESHOLD", "1024");
+            break;
+        case MEM_EXHAUST_MEDIUM_ALLOC:
+            setenv_portable("GGML_ALLOC_FAIL_THRESHOLD", "1048576");
+            break;
+        case MEM_EXHAUST_LARGE_ALLOC:
+            setenv_portable("GGML_ALLOC_FAIL_THRESHOLD", "10485760");
+            break;
+        case MEM_EXHAUST_MANY_ALLOCS:
+            setenv_portable("GGML_ALLOC_FAIL_COUNT", "10");
+            break;
+        case MEM_EXHAUST_BUFFER_OVERFLOW:
+            setenv_portable("GGML_ALLOC_FAIL_THRESHOLD", "100");
+            break;
+        default:
+            unsetenv_portable("GGML_ALLOC_FAIL_THRESHOLD");
+            unsetenv_portable("GGML_ALLOC_FAIL_COUNT");
+            break;
+    }
+
+    ggml_init_params params = {
+        ggml_tensor_overhead() * 32 + ggml_graph_overhead(),
+        NULL,
+        true,
+    };
+    ggml_context * ctx = ggml_init(params);
+    if (!ctx) {
+        printf("  - failed to create context\n");
+        return false;
+    }
+
+    ggml_tensor * a = nullptr;
+
+    switch (scenario) {
+        case MEM_EXHAUST_SMALL_ALLOC:
+            a = ggml_new_tensor_1d(ctx, GGML_TYPE_F32, 256);
+            break;
+        case MEM_EXHAUST_MEDIUM_ALLOC:
+            a = ggml_new_tensor_2d(ctx, GGML_TYPE_F32, 1024, 1024);
+            break;
+        case MEM_EXHAUST_LARGE_ALLOC:
+            a = ggml_new_tensor_2d(ctx, GGML_TYPE_F32, 2048, 2048);
+            break;
+        case MEM_EXHAUST_MANY_ALLOCS:
+            for (int i = 0; i < 15; i++) {
+                ggml_new_tensor_1d(ctx, GGML_TYPE_F32, 64);
+            }
+            a = ggml_new_tensor_1d(ctx, GGML_TYPE_F32, 64);
+            break;
+        default:
+            a = ggml_new_tensor_1d(ctx, GGML_TYPE_F32, 256);
+            break;
+    }
+
+    if (!a) {
+        printf("  - failed to create tensor\n");
+        ggml_free(ctx);
+        return false;
+    }
+
+    ggml_set_name(a, "a");
+
+    ggml_backend_buffer_t buf = ggml_backend_alloc_ctx_tensors(ctx, backend);
+
+    bool test_passed = false;
+    if (should_fail(scenario)) {
+        if (buf == NULL) {
+            printf("  - \033[1;32mOK\033[0m: allocation failed as expected\n");
+            test_passed = true;
+        } else {
+            printf("  - \033[1;31mFAIL\033[0m: allocation succeeded when it should have failed\n");
+            ggml_backend_buffer_free(buf);
+        }
+    } else {
+        if (buf != NULL) {
+            printf("  - \033[1;32mOK\033[0m: allocation succeeded as expected\n");
+            test_passed = true;
+            ggml_backend_buffer_free(buf);
+        } else {
+            printf("  - \033[1;31mFAIL\033[0m: allocation failed when it should have succeeded\n");
+        }
+    }
+
+    ggml_free(ctx);
+
+    unsetenv_portable("GGML_ALLOC_FAIL_THRESHOLD");
+    unsetenv_portable("GGML_ALLOC_FAIL_COUNT");
+
+    return test_passed;
+}
+
+int main(void) {
+    ggml_backend_load_all();
+
+    const std::vector<memory_exhaustion_scenario> scenarios = {
+        MEM_EXHAUST_SMALL_ALLOC,
+        MEM_EXHAUST_MEDIUM_ALLOC,
+        MEM_EXHAUST_LARGE_ALLOC,
+        MEM_EXHAUST_MANY_ALLOCS,
+        MEM_EXHAUST_BUFFER_OVERFLOW,
+        MEM_EXHAUST_RECOVERY,
+    };
+
+    ggml_backend_t backend = ggml_backend_init_by_type(GGML_BACKEND_DEVICE_TYPE_CPU, NULL);
+    if (!backend) {
+        fprintf(stderr, "Failed to initialize CPU backend\n");
+        return 1;
+    }
+
+    int npass = 0;
+    int ntest = 0;
+
+    for (auto scenario : scenarios) {
+        if (test_memory_exhaustion_scenario(backend, scenario)) {
+            npass++;
+        }
+        ntest++;
+        printf("\n");
+    }
+
+    ggml_backend_free(backend);
+
+    printf("Tests passed: %d/%d\n", npass, ntest);
+    return npass == ntest ? 0 : 1;
+}