Skip to content

Commit 34ba782

Browse files
committed
vulkan: implement deferred_memset on UMA
Signed-off-by: Giuseppe Scrivano <[email protected]>
1 parent 78c0d5e commit 34ba782

File tree

1 file changed

+33
-0
lines changed

1 file changed

+33
-0
lines changed

ggml/src/ggml-vulkan/ggml-vulkan.cpp

Lines changed: 33 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1185,6 +1185,14 @@ struct vk_staging_memcpy {
11851185
size_t n;
11861186
};
11871187

1188+
struct vk_staging_memset {
1189+
vk_staging_memset(void * _dst, uint32_t _val, size_t _n) : dst(_dst), val(_val), n(_n) {}
1190+
1191+
void * dst;
1192+
uint32_t val;
1193+
size_t n;
1194+
};
1195+
11881196
struct vk_context_struct {
11891197
vk_submission * s;
11901198
std::vector<vk_sequence> seqs;
@@ -1193,6 +1201,7 @@ struct vk_context_struct {
11931201

11941202
std::vector<vk_staging_memcpy> in_memcpys;
11951203
std::vector<vk_staging_memcpy> out_memcpys;
1204+
std::vector<vk_staging_memset> memsets;
11961205

11971206
vk_command_pool * p {};
11981207
};
@@ -5194,6 +5203,14 @@ static void deferred_memcpy(void * dst, const void * src, size_t size, std::vect
51945203
}
51955204
}
51965205

5206+
static void deferred_memset(void * dst, uint32_t val, size_t size, std::vector<vk_staging_memset>* memsets = nullptr) {
5207+
if (memsets == nullptr) {
5208+
memset(dst, val, size);
5209+
} else {
5210+
memsets->emplace_back(dst, val, size);
5211+
}
5212+
}
5213+
51975214
static void ggml_vk_ensure_sync_staging_buffer(vk_device& device, size_t size) {
51985215
if (device->sync_staging == nullptr || device->sync_staging->size < size) {
51995216
VK_LOG_MEMORY("ggml_vk_ensure_sync_staging_buffer(" << size << ")");
@@ -5389,6 +5406,10 @@ static void ggml_vk_buffer_write_2d(vk_buffer& dst, size_t offset, const void *
53895406
memcpy(cpy.dst, cpy.src, cpy.n);
53905407
}
53915408

5409+
for (auto& mset : subctx->memsets) {
5410+
memset(mset.dst, mset.val, mset.n);
5411+
}
5412+
53925413
ggml_vk_submit(subctx, dst->device->fence);
53935414
VK_CHECK(dst->device->device.waitForFences({ dst->device->fence }, true, UINT64_MAX), "vk_buffer_write_2d waitForFences");
53945415
dst->device->device.resetFences({ dst->device->fence });
@@ -5528,6 +5549,13 @@ static void ggml_vk_buffer_copy(vk_buffer& dst, size_t dst_offset, vk_buffer& sr
55285549
static void ggml_vk_buffer_memset_async(vk_context& ctx, vk_buffer& dst, size_t offset, uint32_t c, size_t size) {
55295550
VK_LOG_DEBUG("ggml_vk_buffer_memset_async(" << offset << ", " << c << ", " << size << ")");
55305551

5552+
if (dst->memory_property_flags & vk::MemoryPropertyFlagBits::eHostVisible &&
5553+
dst->device->uma) {
5554+
deferred_memset((uint8_t*)dst->ptr + offset, c, size, &ctx->memsets);
5555+
return;
5556+
}
5557+
5558+
// Fall back to GPU fillBuffer for non-UMA or non-host-visible buffers
55315559
ctx->s->buffer.fillBuffer(dst->buffer, offset, size, c);
55325560
}
55335561

@@ -11174,6 +11202,10 @@ static bool ggml_vk_compute_forward(ggml_backend_vk_context * ctx, ggml_cgraph *
1117411202
memcpy(cpy.dst, cpy.src, cpy.n);
1117511203
}
1117611204

11205+
for (auto& mset : subctx->memsets) {
11206+
memset(mset.dst, mset.val, mset.n);
11207+
}
11208+
1117711209
if (almost_ready && !ctx->almost_ready_fence_pending && !use_fence) {
1117811210
ggml_vk_submit(subctx, ctx->almost_ready_fence);
1117911211
ctx->almost_ready_fence_pending = true;
@@ -11196,6 +11228,7 @@ static bool ggml_vk_compute_forward(ggml_backend_vk_context * ctx, ggml_cgraph *
1119611228
}
1119711229
subctx->in_memcpys.clear();
1119811230
subctx->out_memcpys.clear();
11231+
subctx->memsets.clear();
1119911232
}
1120011233

1120111234
return true;

0 commit comments

Comments
 (0)