Merge branch 'ggerganov:master' into master

MaggotHATE · web-flow · commit 094caea359c4 · 2024-10-06T16:06:32.000+05:00
diff --git a/examples/main/README.md b/examples/main/README.md
@@ -69,7 +69,7 @@ In this section, we cover the most commonly used options for running the `llama-
 -   `-c N, --ctx-size N`: Set the size of the prompt context. The default is 512, but LLaMA models were built with a context of 2048, which will provide better results for longer input/inference.
 -   `-mli, --multiline-input`: Allows you to write or paste multiple lines without ending each in '\'
 -   `-t N, --threads N`: Set the number of threads to use during generation. For optimal performance, it is recommended to set this value to the number of physical CPU cores your system has.
--   -   `-ngl N, --n-gpu-layers N`: When compiled with GPU support, this option allows offloading some layers to the GPU for computation. Generally results in increased performance.
+-   `-ngl N, --n-gpu-layers N`: When compiled with GPU support, this option allows offloading some layers to the GPU for computation. Generally results in increased performance.
 
 ## Input Prompts
 
diff --git a/ggml/src/ggml-vulkan.cpp b/ggml/src/ggml-vulkan.cpp
@@ -1070,10 +1070,25 @@ static vk_buffer ggml_vk_create_buffer(vk_device& device, size_t size, vk::Memor
     try {
         buf->device_memory = device->device.allocateMemory({ mem_req.size, memory_type_index });
     } catch (const vk::SystemError& e) {
-        // Out of Host/Device memory, clean up buffer
-        device->device.destroyBuffer(buf->buffer);
-        buf->size = 0;
-        throw e;
+        if (buf->memory_property_flags != fallback_flags) {
+            // Try again with fallback flags
+            memory_type_index = find_properties(&mem_props, &mem_req, fallback_flags);
+            buf->memory_property_flags = fallback_flags;
+
+            try {
+                buf->device_memory = device->device.allocateMemory({ mem_req.size, memory_type_index });
+            }
+            catch (const vk::SystemError& e) {
+                device->device.destroyBuffer(buf->buffer);
+                buf->size = 0;
+                throw e;
+            }
+        } else {
+            // Out of Host/Device memory, clean up buffer
+            device->device.destroyBuffer(buf->buffer);
+            buf->size = 0;
+            throw e;
+        }
     }
     buf->ptr = nullptr;
 
diff --git a/scripts/sync-ggml.last b/scripts/sync-ggml.last
@@ -1 +1 @@
-0d7ecbbe536dc84240f646e0ec0a712251377f34
+564f42082f858f9674b2a2e06e9e779d9ed2c754

Original file line number	Diff line number	Diff line change
`@@ -1 +1 @@`
`1`		`-0d7ecbbe536dc84240f646e0ec0a712251377f34`
	`1`	`+564f42082f858f9674b2a2e06e9e779d9ed2c754`