Skip to content

Commit 6c07258

Browse files
authored
[hip] Move calls to hipFree and similar to the cleanup thread. (#20020)
This keeps them off the main thread since they can block for an unexpectedly large amount of time. --------- Signed-off-by: Andrew Woloszyn <[email protected]>
1 parent b85c180 commit 6c07258

File tree

4 files changed

+73
-16
lines changed

4 files changed

+73
-16
lines changed

runtime/src/iree/hal/drivers/hip/cleanup_thread.c

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -63,7 +63,9 @@ static int iree_hal_hip_cleanup_thread_main(void* param) {
6363
iree_hal_hip_callback_queue_pop_front(&thread->queue, 1);
6464
iree_slim_mutex_unlock(&thread->mutex);
6565

66-
if (iree_status_is_ok(status)) {
66+
// If we have a null event then we don't have to wait
67+
// on the GPU to synchronize.
68+
if (iree_status_is_ok(status) && callback.event) {
6769
status = IREE_HIP_CALL_TO_STATUS(
6870
thread->symbols,
6971
hipEventSynchronize(iree_hal_hip_event_handle(callback.event)));

runtime/src/iree/hal/drivers/hip/hip_allocator.c

Lines changed: 57 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,7 @@
1313
#include "iree/base/tracing.h"
1414
#include "iree/hal/drivers/hip/dynamic_symbols.h"
1515
#include "iree/hal/drivers/hip/hip_buffer.h"
16+
#include "iree/hal/drivers/hip/hip_device.h"
1617
#include "iree/hal/drivers/hip/per_device_information.h"
1718
#include "iree/hal/drivers/hip/status_util.h"
1819
#include "iree/hal/drivers/hip/util/queue.h"
@@ -569,31 +570,67 @@ static void iree_hal_hip_allocator_deallocate_buffer(
569570
iree_hal_buffer_destroy(base_buffer);
570571
}
571572

572-
static void iree_hal_hip_buffer_release_callback(void* user_data,
573-
iree_hal_buffer_t* buffer) {
574-
iree_hal_hip_allocator_t* allocator = (iree_hal_hip_allocator_t*)user_data;
575-
576-
const iree_hal_hip_buffer_type_t buffer_type =
577-
iree_hal_hip_buffer_type(buffer);
578-
579-
iree_hal_hip_buffer_free(allocator->symbols, buffer_type,
580-
iree_hal_hip_buffer_device_pointer(buffer),
581-
iree_hal_hip_buffer_host_pointer(buffer));
582-
583-
switch (buffer_type) {
573+
typedef struct iree_hal_hip_release_async_data_t {
574+
iree_hal_hip_allocator_t* allocator;
575+
iree_hal_hip_buffer_type_t buffer_type;
576+
hipDeviceptr_t device_pointer;
577+
void* host_pointer;
578+
IREE_STATISTICS(iree_hal_memory_type_t memory_type;
579+
iree_device_size_t allocation_size;)
580+
} iree_hal_hip_release_async_data_t;
581+
582+
static iree_status_t iree_hal_hip_buffer_release_callback_async(
583+
void* user_data, iree_hal_hip_event_t* event, iree_status_t status) {
584+
iree_hal_hip_release_async_data_t* async_data =
585+
(iree_hal_hip_release_async_data_t*)user_data;
586+
587+
iree_hal_hip_buffer_free(async_data->allocator->symbols,
588+
async_data->buffer_type, async_data->device_pointer,
589+
async_data->host_pointer);
590+
591+
switch (async_data->buffer_type) {
584592
case IREE_HAL_HIP_BUFFER_TYPE_DEVICE:
585593
case IREE_HAL_HIP_BUFFER_TYPE_HOST: {
586594
IREE_TRACE_FREE_NAMED(IREE_HAL_HIP_ALLOCATOR_ID,
587-
(void*)iree_hal_hip_buffer_device_pointer(buffer));
595+
(void*)async_data->device_pointer);
588596
IREE_STATISTICS(iree_hal_allocator_statistics_record_free(
589-
&allocator->statistics, iree_hal_buffer_memory_type(buffer),
590-
iree_hal_buffer_allocation_size(buffer)));
597+
&async_data->allocator->statistics, async_data->memory_type,
598+
async_data->allocation_size));
591599
break;
592600
}
593601
default:
594602
// Buffer type not tracked.
595603
break;
596604
}
605+
iree_allocator_free(async_data->allocator->host_allocator, async_data);
606+
return status;
607+
}
608+
609+
static void iree_hal_hip_buffer_release_callback(void* user_data,
610+
iree_hal_buffer_t* buffer) {
611+
iree_hal_hip_allocator_t* allocator = (iree_hal_hip_allocator_t*)user_data;
612+
613+
iree_hal_hip_release_async_data_t* release_async_data = NULL;
614+
615+
iree_status_t status = iree_allocator_malloc(allocator->host_allocator,
616+
sizeof(*release_async_data),
617+
(void**)&release_async_data);
618+
if (iree_status_is_ok(status)) {
619+
release_async_data->allocator = allocator;
620+
release_async_data->device_pointer =
621+
iree_hal_hip_buffer_device_pointer(buffer);
622+
release_async_data->host_pointer = iree_hal_hip_buffer_host_pointer(buffer);
623+
release_async_data->buffer_type = iree_hal_hip_buffer_type(buffer);
624+
IREE_STATISTICS({
625+
release_async_data->memory_type = iree_hal_buffer_memory_type(buffer);
626+
release_async_data->allocation_size =
627+
iree_hal_buffer_allocation_size(buffer);
628+
})
629+
status = iree_hal_hip_device_add_asynchronous_cleanup(
630+
allocator->parent_device, &iree_hal_hip_buffer_release_callback_async,
631+
(void*)release_async_data);
632+
}
633+
iree_status_ignore(status);
597634
}
598635

599636
static iree_status_t iree_hal_hip_allocator_import_buffer(
@@ -766,6 +803,7 @@ iree_status_t iree_hal_hip_allocator_alloc_async(
766803
iree_hal_buffer_allocation_size(buffer),
767804
IREE_HOST_SIZE_MAX);
768805
}
806+
IREE_TRACE_ZONE_BEGIN(z0);
769807

770808
int device_ordinal = 0;
771809
device_ordinal =
@@ -837,6 +875,8 @@ iree_status_t iree_hal_hip_allocator_alloc_async(
837875
iree_hal_hip_buffer_set_allocation_empty(buffer);
838876
}
839877

878+
IREE_TRACE_ZONE_END(z0);
879+
840880
return status;
841881
}
842882

@@ -849,6 +889,7 @@ iree_status_t iree_hal_hip_allocator_free_async(
849889
return iree_ok_status();
850890
}
851891

892+
IREE_TRACE_ZONE_BEGIN(z0);
852893
IREE_TRACE_FREE_NAMED(IREE_HAL_HIP_ALLOCATOR_ID, (void*)device_ptr);
853894
IREE_STATISTICS(iree_hal_allocator_statistics_record_free(
854895
&allocator->statistics, iree_hal_buffer_memory_type(buffer),
@@ -888,6 +929,7 @@ iree_status_t iree_hal_hip_allocator_free_async(
888929
if (iree_status_is_ok(status)) {
889930
iree_hal_hip_buffer_set_allocation_empty(buffer);
890931
}
932+
IREE_TRACE_ZONE_END(z0);
891933

892934
return status;
893935
}

runtime/src/iree/hal/drivers/hip/hip_device.c

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1116,6 +1116,14 @@ static iree_status_t iree_hal_hip_device_stream_add_cleanup(
11161116
return status;
11171117
}
11181118

1119+
iree_status_t iree_hal_hip_device_add_asynchronous_cleanup(
1120+
iree_hal_device_t* base_device, iree_hal_hip_cleanup_callback_t callback,
1121+
void* user_data) {
1122+
iree_hal_hip_device_t* device = iree_hal_hip_device_cast(base_device);
1123+
return iree_hal_hip_cleanup_thread_add_cleanup(device->cleanup_thread, NULL,
1124+
callback, user_data);
1125+
}
1126+
11191127
static iree_status_t
11201128
iree_hal_hip_device_stream_signal_semaphores_and_add_cleanup(
11211129
iree_hal_hip_device_t* device, hipStream_t stream,

runtime/src/iree/hal/drivers/hip/hip_device.h

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,7 @@
1212
#include "iree/base/api.h"
1313
#include "iree/hal/api.h"
1414
#include "iree/hal/drivers/hip/api.h"
15+
#include "iree/hal/drivers/hip/cleanup_thread.h"
1516
#include "iree/hal/drivers/hip/dynamic_symbols.h"
1617
#include "iree/hal/drivers/hip/rccl_dynamic_symbols.h"
1718

@@ -49,4 +50,8 @@ static inline hipDeviceptr_t iree_hal_hip_device_size_to_hip_device_prt(
4950
return (hipDeviceptr_t)p;
5051
}
5152

53+
iree_status_t iree_hal_hip_device_add_asynchronous_cleanup(
54+
iree_hal_device_t* base_device, iree_hal_hip_cleanup_callback_t callback,
55+
void* user_data);
56+
5257
#endif // IREE_HAL_DRIVERS_HIP_DEVICE_H_

0 commit comments

Comments
 (0)