Skip to content

Commit d347008

Browse files
authored
Adding skeleton AMDGPU allocator. (iree-org#21093)
This doesn't do anything but is enough to satisfy build requirements of future PRs. This was reviewed as iree-org#21045 but incorrectly merged. This PR just merges again on main now that the base commits have landed.
1 parent 616c960 commit d347008

File tree

4 files changed

+384
-3
lines changed

4 files changed

+384
-3
lines changed

runtime/src/iree/hal/drivers/amdgpu/BUILD.bazel

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -16,8 +16,8 @@ package(
1616
iree_runtime_cc_library(
1717
name = "amdgpu",
1818
srcs = [
19-
# "allocator.c",
20-
# "allocator.h",
19+
"allocator.c",
20+
"allocator.h",
2121
"buffer.c",
2222
"buffer.h",
2323
"buffer_pool.c",
@@ -79,7 +79,7 @@ iree_runtime_cc_library(
7979
iree_runtime_cc_library(
8080
name = "headers",
8181
hdrs = [
82-
# "allocator.h",
82+
"allocator.h",
8383
"buffer.h",
8484
"buffer_pool.h",
8585
"channel.h",

runtime/src/iree/hal/drivers/amdgpu/CMakeLists.txt

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,8 @@ iree_cc_library(
1616
HDRS
1717
"api.h"
1818
SRCS
19+
"allocator.c"
20+
"allocator.h"
1921
"buffer.c"
2022
"buffer.h"
2123
"buffer_pool.c"
@@ -60,6 +62,7 @@ iree_cc_library(
6062
NAME
6163
headers
6264
HDRS
65+
"allocator.h"
6366
"buffer.h"
6467
"buffer_pool.h"
6568
"channel.h"
Lines changed: 349 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,349 @@
1+
// Copyright 2025 The IREE Authors
2+
//
3+
// Licensed under the Apache License v2.0 with LLVM Exceptions.
4+
// See https://llvm.org/LICENSE.txt for license information.
5+
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6+
7+
#include "iree/hal/drivers/amdgpu/allocator.h"
8+
9+
#include "iree/hal/drivers/amdgpu/buffer.h"
10+
#include "iree/hal/drivers/amdgpu/util/topology.h"
11+
12+
//===----------------------------------------------------------------------===//
13+
// iree_hal_amdgpu_allocator_t
14+
//===----------------------------------------------------------------------===//
15+
16+
// TODO(benvanik): use one ID per address space or pool - each shows as a
17+
// different track in tracing tools.
18+
#if IREE_TRACING_FEATURES & IREE_TRACING_FEATURE_ALLOCATION_TRACKING
19+
static const char* IREE_HAL_AMDGPU_ALLOCATOR_ID = "AMDGPU unpooled";
20+
#endif // IREE_TRACING_FEATURE_ALLOCATION_TRACKING
21+
22+
typedef struct iree_hal_amdgpu_allocator_t {
23+
iree_hal_resource_t resource;
24+
iree_allocator_t host_allocator;
25+
26+
// Unowned libhsa handle. Must be retained by the owner.
27+
const iree_hal_amdgpu_libhsa_t* libhsa;
28+
// Topology with all CPU and GPU agents.
29+
const iree_hal_amdgpu_topology_t* topology;
30+
31+
IREE_STATISTICS(iree_hal_allocator_statistics_t statistics;)
32+
} iree_hal_amdgpu_allocator_t;
33+
34+
static const iree_hal_allocator_vtable_t iree_hal_amdgpu_allocator_vtable;
35+
36+
static iree_hal_amdgpu_allocator_t* iree_hal_amdgpu_allocator_cast(
37+
iree_hal_allocator_t* base_value) {
38+
IREE_HAL_ASSERT_TYPE(base_value, &iree_hal_amdgpu_allocator_vtable);
39+
return (iree_hal_amdgpu_allocator_t*)base_value;
40+
}
41+
42+
iree_status_t iree_hal_amdgpu_allocator_create(
43+
const iree_hal_amdgpu_libhsa_t* libhsa,
44+
const iree_hal_amdgpu_topology_t* topology, iree_allocator_t host_allocator,
45+
iree_hal_allocator_t** out_allocator) {
46+
IREE_ASSERT_ARGUMENT(libhsa);
47+
IREE_ASSERT_ARGUMENT(topology);
48+
IREE_ASSERT_ARGUMENT(out_allocator);
49+
IREE_TRACE_ZONE_BEGIN(z0);
50+
51+
iree_hal_amdgpu_allocator_t* allocator = NULL;
52+
IREE_RETURN_AND_END_ZONE_IF_ERROR(
53+
z0, iree_allocator_malloc(host_allocator, sizeof(*allocator),
54+
(void**)&allocator));
55+
iree_hal_resource_initialize(&iree_hal_amdgpu_allocator_vtable,
56+
&allocator->resource);
57+
allocator->host_allocator = host_allocator;
58+
allocator->libhsa = libhsa;
59+
allocator->topology = topology;
60+
61+
// TODO(benvanik): query device heaps, supported features (concurrent
62+
// access/etc), and prepare any pools that will be used during allocation.
63+
// It's expected that most failures that occur after creation are allocation
64+
// request-specific so preparing here will help keep the errors more
65+
// localized.
66+
iree_status_t status = iree_ok_status();
67+
68+
if (iree_status_is_ok(status)) {
69+
*out_allocator = (iree_hal_allocator_t*)allocator;
70+
} else {
71+
iree_hal_allocator_release((iree_hal_allocator_t*)allocator);
72+
}
73+
IREE_TRACE_ZONE_END(z0);
74+
return status;
75+
}
76+
77+
static void iree_hal_amdgpu_allocator_destroy(
78+
iree_hal_allocator_t* IREE_RESTRICT base_allocator) {
79+
IREE_ASSERT_ARGUMENT(base_allocator);
80+
iree_hal_amdgpu_allocator_t* allocator =
81+
iree_hal_amdgpu_allocator_cast(base_allocator);
82+
IREE_TRACE_ZONE_BEGIN(z0);
83+
84+
iree_allocator_free(allocator->host_allocator, allocator);
85+
86+
IREE_TRACE_ZONE_END(z0);
87+
}
88+
89+
static iree_allocator_t iree_hal_amdgpu_allocator_host_allocator(
90+
const iree_hal_allocator_t* IREE_RESTRICT base_allocator) {
91+
iree_hal_amdgpu_allocator_t* allocator =
92+
(iree_hal_amdgpu_allocator_t*)base_allocator;
93+
return allocator->host_allocator;
94+
}
95+
96+
static iree_status_t iree_hal_amdgpu_allocator_trim(
97+
iree_hal_allocator_t* IREE_RESTRICT base_allocator) {
98+
iree_hal_amdgpu_allocator_t* allocator =
99+
(iree_hal_amdgpu_allocator_t*)base_allocator;
100+
101+
// TODO(benvanik): if the allocator is retaining any unused resources they
102+
// should be dropped here. If the underlying implementation has pools or
103+
// caches it should be notified that a trim is requested. This is called in
104+
// low-memory situations or when IREE is not going to be used for awhile (low
105+
// power modes or suspension).
106+
(void)allocator;
107+
108+
return iree_ok_status();
109+
}
110+
111+
static void iree_hal_amdgpu_allocator_query_statistics(
112+
iree_hal_allocator_t* IREE_RESTRICT base_allocator,
113+
iree_hal_allocator_statistics_t* IREE_RESTRICT out_statistics) {
114+
IREE_STATISTICS({
115+
iree_hal_amdgpu_allocator_t* allocator =
116+
iree_hal_amdgpu_allocator_cast(base_allocator);
117+
memcpy(out_statistics, &allocator->statistics, sizeof(*out_statistics));
118+
// TODO(benvanik): update statistics (merge).
119+
});
120+
}
121+
122+
static iree_status_t iree_hal_amdgpu_allocator_query_memory_heaps(
123+
iree_hal_allocator_t* IREE_RESTRICT base_allocator,
124+
iree_host_size_t capacity,
125+
iree_hal_allocator_memory_heap_t* IREE_RESTRICT heaps,
126+
iree_host_size_t* IREE_RESTRICT out_count) {
127+
iree_hal_amdgpu_allocator_t* allocator =
128+
iree_hal_amdgpu_allocator_cast(base_allocator);
129+
130+
// TODO(benvanik): return heap information. This is called at least once with
131+
// a capacity that may be 0 (indicating a query for the total count) and the
132+
// heaps should only be populated if capacity is sufficient to store all of
133+
// them.
134+
(void)allocator;
135+
iree_status_t status =
136+
iree_make_status(IREE_STATUS_UNIMPLEMENTED, "heap query not implemented");
137+
138+
return status;
139+
}
140+
141+
static iree_hal_buffer_compatibility_t
142+
iree_hal_amdgpu_allocator_query_buffer_compatibility(
143+
iree_hal_allocator_t* IREE_RESTRICT base_allocator,
144+
iree_hal_buffer_params_t* IREE_RESTRICT params,
145+
iree_device_size_t* IREE_RESTRICT allocation_size) {
146+
iree_hal_amdgpu_allocator_t* allocator =
147+
iree_hal_amdgpu_allocator_cast(base_allocator);
148+
149+
// TODO(benvanik): set compatibility rules based on the implementation.
150+
// Note that the user may have requested that the allocator place the
151+
// allocation based on whatever is optimal for the indicated usage by
152+
// including the IREE_HAL_MEMORY_TYPE_OPTIMAL flag. It's still required that
153+
// the implementation meet all the requirements but it is free to place it in
154+
// either host or device memory so long as the appropriate bits are updated to
155+
// indicate where it landed.
156+
(void)allocator;
157+
iree_hal_buffer_compatibility_t compatibility =
158+
IREE_HAL_BUFFER_COMPATIBILITY_NONE;
159+
160+
// We are now optimal.
161+
params->type &= ~IREE_HAL_MEMORY_TYPE_OPTIMAL;
162+
163+
// Guard against the corner case where the requested buffer size is 0. The
164+
// application is unlikely to do anything when requesting a 0-byte buffer; but
165+
// it can happen in real world use cases. So we should at least not crash.
166+
if (*allocation_size == 0) *allocation_size = 4;
167+
168+
return compatibility;
169+
}
170+
171+
static iree_status_t iree_hal_amdgpu_allocator_allocate_buffer(
172+
iree_hal_allocator_t* IREE_RESTRICT base_allocator,
173+
const iree_hal_buffer_params_t* IREE_RESTRICT params,
174+
iree_device_size_t allocation_size,
175+
iree_hal_buffer_t** IREE_RESTRICT out_buffer) {
176+
iree_hal_amdgpu_allocator_t* allocator =
177+
iree_hal_amdgpu_allocator_cast(base_allocator);
178+
179+
// Coerce options into those required by the current device.
180+
iree_hal_buffer_params_t compat_params = *params;
181+
iree_hal_buffer_compatibility_t compatibility =
182+
iree_hal_amdgpu_allocator_query_buffer_compatibility(
183+
base_allocator, &compat_params, &allocation_size);
184+
if (!iree_all_bits_set(compatibility,
185+
IREE_HAL_BUFFER_COMPATIBILITY_ALLOCATABLE)) {
186+
// TODO(benvanik): make a helper for this.
187+
#if IREE_STATUS_MODE
188+
iree_bitfield_string_temp_t temp0, temp1, temp2;
189+
iree_string_view_t memory_type_str =
190+
iree_hal_memory_type_format(params->type, &temp0);
191+
iree_string_view_t usage_str =
192+
iree_hal_buffer_usage_format(params->usage, &temp1);
193+
iree_string_view_t compatibility_str =
194+
iree_hal_buffer_compatibility_format(compatibility, &temp2);
195+
return iree_make_status(
196+
IREE_STATUS_INVALID_ARGUMENT,
197+
"allocator cannot allocate a buffer with the given parameters; "
198+
"memory_type=%.*s, usage=%.*s, compatibility=%.*s",
199+
(int)memory_type_str.size, memory_type_str.data, (int)usage_str.size,
200+
usage_str.data, (int)compatibility_str.size, compatibility_str.data);
201+
#else
202+
return iree_make_status(
203+
IREE_STATUS_INVALID_ARGUMENT,
204+
"allocator cannot allocate a buffer with the given parameters");
205+
#endif // IREE_STATUS_MODE
206+
}
207+
208+
// TODO(benvanik): allocate the underlying device memory. The impl_ptr is just
209+
// used for accounting and can be an opaque value (handle/etc) so long as it
210+
// is consistent between the alloc and free and unique to the buffer while it
211+
// is live. An example iree_hal_amdgpu_external_buffer_wrap is provided that
212+
// can be used for implementations that are managing memory using underlying
213+
// allocators and just wrapping those device pointers in the HAL buffer type.
214+
// Other implementations that require more tracking can provide their own
215+
// buffer types that do such tracking for them.
216+
(void)allocator;
217+
void* impl_ptr = NULL;
218+
(void)impl_ptr;
219+
iree_hal_buffer_t* buffer = NULL;
220+
iree_status_t status = iree_make_status(IREE_STATUS_UNIMPLEMENTED,
221+
"buffer allocation not implemented");
222+
223+
if (iree_status_is_ok(status)) {
224+
// TODO(benvanik): ensure this accounting is balanced in deallocate_buffer.
225+
IREE_TRACE_ALLOC_NAMED(IREE_HAL_AMDGPU_ALLOCATOR_ID, impl_ptr,
226+
allocation_size);
227+
IREE_STATISTICS(iree_hal_allocator_statistics_record_alloc(
228+
&allocator->statistics, compat_params.type, allocation_size));
229+
*out_buffer = buffer;
230+
} else {
231+
iree_hal_buffer_release(buffer);
232+
}
233+
return status;
234+
}
235+
236+
static void iree_hal_amdgpu_allocator_deallocate_buffer(
237+
iree_hal_allocator_t* IREE_RESTRICT base_allocator,
238+
iree_hal_buffer_t* IREE_RESTRICT base_buffer) {
239+
iree_hal_amdgpu_allocator_t* allocator =
240+
iree_hal_amdgpu_allocator_cast(base_allocator);
241+
242+
// TODO(benvanik): free the underlying device memory here. Buffers allocated
243+
// from this allocator will call this method to handle cleanup. Note that
244+
// because this method is responsible for doing the base
245+
// iree_hal_buffer_destroy and the caller assumes the memory has been freed an
246+
// implementation could pool the buffer handle and return it in the future.
247+
(void)allocator;
248+
void* impl_ptr = NULL;
249+
(void)impl_ptr;
250+
251+
// TODO(benvanik): if the buffer was imported then this accounting may need to
252+
// be conditional depending on the implementation.
253+
bool was_imported = false;
254+
if (!was_imported) {
255+
IREE_TRACE_FREE_NAMED(IREE_HAL_AMDGPU_ALLOCATOR_ID, impl_ptr);
256+
IREE_STATISTICS(iree_hal_allocator_statistics_record_free(
257+
&allocator->statistics, iree_hal_buffer_memory_type(base_buffer),
258+
iree_hal_buffer_allocation_size(base_buffer)));
259+
}
260+
261+
iree_hal_buffer_destroy(base_buffer);
262+
}
263+
264+
static iree_status_t iree_hal_amdgpu_allocator_import_buffer(
265+
iree_hal_allocator_t* IREE_RESTRICT base_allocator,
266+
const iree_hal_buffer_params_t* IREE_RESTRICT params,
267+
iree_hal_external_buffer_t* IREE_RESTRICT external_buffer,
268+
iree_hal_buffer_release_callback_t release_callback,
269+
iree_hal_buffer_t** IREE_RESTRICT out_buffer) {
270+
iree_hal_amdgpu_allocator_t* allocator =
271+
iree_hal_amdgpu_allocator_cast(base_allocator);
272+
273+
// Coerce options into those required by the current device.
274+
iree_hal_buffer_params_t compat_params = *params;
275+
iree_device_size_t allocation_size = external_buffer->size;
276+
iree_hal_buffer_compatibility_t compatibility =
277+
iree_hal_amdgpu_allocator_query_buffer_compatibility(
278+
base_allocator, &compat_params, &allocation_size);
279+
if (!iree_all_bits_set(compatibility,
280+
IREE_HAL_BUFFER_COMPATIBILITY_IMPORTABLE)) {
281+
// TODO(benvanik): make a helper for this.
282+
#if IREE_STATUS_MODE
283+
iree_bitfield_string_temp_t temp0, temp1, temp2;
284+
iree_string_view_t memory_type_str =
285+
iree_hal_memory_type_format(params->type, &temp0);
286+
iree_string_view_t usage_str =
287+
iree_hal_buffer_usage_format(params->usage, &temp1);
288+
iree_string_view_t compatibility_str =
289+
iree_hal_buffer_compatibility_format(compatibility, &temp2);
290+
return iree_make_status(
291+
IREE_STATUS_INVALID_ARGUMENT,
292+
"allocator cannot import a buffer with the given parameters; "
293+
"memory_type=%.*s, usage=%.*s, compatibility=%.*s",
294+
(int)memory_type_str.size, memory_type_str.data, (int)usage_str.size,
295+
usage_str.data, (int)compatibility_str.size, compatibility_str.data);
296+
#else
297+
return iree_make_status(
298+
IREE_STATUS_INVALID_ARGUMENT,
299+
"allocator cannot import a buffer with the given parameters");
300+
#endif // IREE_STATUS_MODE
301+
}
302+
303+
// TODO(benvanik): switch on external_buffer->type and import the buffer. See
304+
// the headers for more information on semantics. Most implementations can
305+
// service IREE_HAL_EXTERNAL_BUFFER_TYPE_DEVICE_ALLOCATION by just wrapping
306+
// the underlying device pointer. Those that can service
307+
// IREE_HAL_EXTERNAL_BUFFER_TYPE_HOST_ALLOCATION may be able to avoid a lot of
308+
// additional copies when moving data around between host and device or across
309+
// devices from different drivers.
310+
(void)allocator;
311+
iree_status_t status = iree_make_status(IREE_STATUS_UNIMPLEMENTED,
312+
"external buffer type not supported");
313+
314+
return status;
315+
}
316+
317+
static iree_status_t iree_hal_amdgpu_allocator_export_buffer(
318+
iree_hal_allocator_t* IREE_RESTRICT base_allocator,
319+
iree_hal_buffer_t* IREE_RESTRICT buffer,
320+
iree_hal_external_buffer_type_t requested_type,
321+
iree_hal_external_buffer_flags_t requested_flags,
322+
iree_hal_external_buffer_t* IREE_RESTRICT out_external_buffer) {
323+
iree_hal_amdgpu_allocator_t* allocator =
324+
iree_hal_amdgpu_allocator_cast(base_allocator);
325+
326+
// TODO(benvanik): switch on requested_type and export as appropriate. Most
327+
// implementations can service IREE_HAL_EXTERNAL_BUFFER_TYPE_DEVICE_ALLOCATION
328+
// by just exposing the underlying device pointer. Those that can service
329+
// IREE_HAL_EXTERNAL_BUFFER_TYPE_HOST_ALLOCATION may be able to avoid a lot of
330+
// additional copies when moving data around between host and device or across
331+
// devices from different drivers.
332+
(void)allocator;
333+
return iree_make_status(IREE_STATUS_UNAVAILABLE,
334+
"external buffer type not supported");
335+
}
336+
337+
static const iree_hal_allocator_vtable_t iree_hal_amdgpu_allocator_vtable = {
338+
.destroy = iree_hal_amdgpu_allocator_destroy,
339+
.host_allocator = iree_hal_amdgpu_allocator_host_allocator,
340+
.trim = iree_hal_amdgpu_allocator_trim,
341+
.query_statistics = iree_hal_amdgpu_allocator_query_statistics,
342+
.query_memory_heaps = iree_hal_amdgpu_allocator_query_memory_heaps,
343+
.query_buffer_compatibility =
344+
iree_hal_amdgpu_allocator_query_buffer_compatibility,
345+
.allocate_buffer = iree_hal_amdgpu_allocator_allocate_buffer,
346+
.deallocate_buffer = iree_hal_amdgpu_allocator_deallocate_buffer,
347+
.import_buffer = iree_hal_amdgpu_allocator_import_buffer,
348+
.export_buffer = iree_hal_amdgpu_allocator_export_buffer,
349+
};

0 commit comments

Comments
 (0)