55 * Copyright (c) Amazon.com, Inc. or its affiliates.
66 * All Rights reserved.
77 * Copyright (c) 2023 Advanced Micro Devices, Inc. All Rights reserved.
8+ * Copyright (c) 2024 The University of Tennessee and The University
9+ * of Tennessee Research Foundation. All rights
10+ * reserved.
811 *
912 * $COPYRIGHT$
1013 *
@@ -184,6 +187,16 @@ typedef int (*opal_accelerator_base_module_check_addr_fn_t)(
184187typedef int (* opal_accelerator_base_module_create_stream_fn_t )(
185188 int dev_id , opal_accelerator_stream_t * * stream );
186189
190+ /**
191+ * Wait for the completion of all operations inserted into the stream.
192+ *
193+ * @param[IN] stram The stream to wait for.
194+ *
195+ * @return OPAL_SUCCESS or error status on failure
196+ */
197+ typedef int (* opal_accelerator_base_module_sync_stream_fn_t )(
198+ opal_accelerator_stream_t * stream );
199+
187200/**
188201 * Creates an event. An event is a synchronization marker that can be
189202 * appended to a stream to monitor device progress or synchronize the
@@ -193,7 +206,7 @@ typedef int (*opal_accelerator_base_module_create_stream_fn_t)(
193206 * @param[IN] dev_id Associated device for the event or
194207 * MCA_ACCELERATOR_NO_DEVICE_ID
195208 * @param[OUT] event Event to create
196- * @param[IN] enable_ipc support inter-process tracking of the event
209+ * @param[IN] enable_ipc support inter-process tracking of the event
197210 *
198211 * @return OPAL_SUCCESS or error status on failure.
199212 */
@@ -310,6 +323,31 @@ typedef int (*opal_accelerator_base_module_memmove_fn_t)(
310323 int dest_dev_id , int src_dev_id , void * dest , const void * src , size_t size ,
311324 opal_accelerator_transfer_type_t type );
312325
326+
327+ /**
328+ * Copies memory asynchronously from src to dest. Memory of dest and src
329+ * may overlap. Optionally can specify the transfer type to
330+ * avoid pointer detection for performance. The operations will be enqueued
331+ * into the provided stream but are not guaranteed to be complete upon return.
332+ *
333+ * @param[IN] dest_dev_id Associated device to copy to or
334+ * MCA_ACCELERATOR_NO_DEVICE_ID
335+ * @param[IN] src_dev_id Associated device to copy from or
336+ * MCA_ACCELERATOR_NO_DEVICE_ID
337+ * @param[IN] dest Destination to copy memory to
338+ * @param[IN] src Source to copy memory from
339+ * @param[IN] size Size of memory to copy
340+ * @param[IN] stream Stream to perform asynchronous move on
341+ * @param[IN] type Transfer type field for performance
342+ * Can be set to MCA_ACCELERATOR_TRANSFER_UNSPEC
343+ * if caller is unsure of the transfer direction.
344+ *
345+ * @return OPAL_SUCCESS or error status on failure
346+ */
347+ typedef int (* opal_accelerator_base_module_memmove_async_fn_t )(
348+ int dest_dev_id , int src_dev_id , void * dest , const void * src , size_t size ,
349+ opal_accelerator_stream_t * stream , opal_accelerator_transfer_type_t type );
350+
313351/**
314352 * Allocates size bytes memory from the device and sets ptr to the
315353 * pointer of the allocated memory. The memory is not initialized.
@@ -340,6 +378,46 @@ typedef int (*opal_accelerator_base_module_mem_alloc_fn_t)(
340378typedef int (* opal_accelerator_base_module_mem_release_fn_t )(
341379 int dev_id , void * ptr );
342380
381+
382+ /**
383+ * Allocates size bytes memory from the device and sets ptr to the
384+ * pointer of the allocated memory. The memory is not initialized.
385+ * The allocation request is placed into the stream object.
386+ * Any use of the memory must succeed the completion of this
387+ * operation on the stream.
388+ *
389+ * @param[IN] dev_id Associated device for the allocation or
390+ * MCA_ACCELERATOR_NO_DEVICE_ID
391+ * @param[OUT] ptr Returns pointer to allocated memory
392+ * @param[IN] size Size of memory to allocate
393+ * @param[IN] stream Stream into which to insert the allocation request
394+ *
395+ * @return OPAL_SUCCESS or error status on failure
396+ */
397+ typedef int (* opal_accelerator_base_module_mem_alloc_stream_fn_t )(
398+ int dev_id , void * * ptr , size_t size , opal_accelerator_stream_t * stream );
399+
400+ /**
401+ * Frees the memory space pointed to by ptr which has been returned by
402+ * a previous call to an opal_accelerator_base_module_mem_alloc_stream_fn_t().
403+ * If the function is called on a ptr that has already been freed,
404+ * undefined behavior occurs. If ptr is NULL, no operation is performed,
405+ * and the function returns OPAL_SUCCESS.
406+ * The release of the memory will be inserted into the stream and occurs after
407+ * all previous operations have completed.
408+ *
409+ * @param[IN] dev_id Associated device for the allocation or
410+ * MCA_ACCELERATOR_NO_DEVICE_ID
411+ * @param[IN] ptr Pointer to free
412+ * @param[IN] stream Stream into which to insert the free operation
413+ *
414+ * @return OPAL_SUCCESS or error status on failure
415+ */
416+ typedef int (* opal_accelerator_base_module_mem_release_stream_fn_t )(
417+ int dev_id , void * ptr , opal_accelerator_stream_t * stream );
418+
419+
420+
343421/**
344422 * Retrieves the base address and/or size of a memory allocation of the
345423 * device.
@@ -557,6 +635,26 @@ typedef int (*opal_accelerator_base_module_device_can_access_peer_fn_t)(
557635typedef int (* opal_accelerator_base_module_get_buffer_id_fn_t )(
558636 int dev_id , const void * addr , opal_accelerator_buffer_id_t * buf_id );
559637
638+ /**
639+ * Get the number of devices available.
640+ *
641+ * @param[OUT] stram Number of devices.
642+ *
643+ * @return OPAL_SUCCESS or error status on failure
644+ */
645+ typedef int (* opal_accelerator_base_module_get_num_devices_fn_t )(int * num_devices );
646+
647+ /**
648+ * Get the memory bandwidth of the device.
649+ *
650+ * @param[IN] device The device to query.
651+ * @param[OUT] bw The returned bandwidth for the device.
652+ *
653+ * @return OPAL_SUCCESS or error status on failure
654+ */
655+ typedef int (* opal_accelerator_base_module_get_mem_bw_fn_t )(int device , float * bw );
656+
657+
560658/*
561659 * the standard public API data structure
562660 */
@@ -565,17 +663,21 @@ typedef struct {
565663 opal_accelerator_base_module_check_addr_fn_t check_addr ;
566664
567665 opal_accelerator_base_module_create_stream_fn_t create_stream ;
666+ opal_accelerator_base_module_sync_stream_fn_t sync_stream ;
568667 opal_accelerator_base_module_create_event_fn_t create_event ;
569668 opal_accelerator_base_module_record_event_fn_t record_event ;
570669 opal_accelerator_base_module_query_event_fn_t query_event ;
571670 opal_accelerator_base_module_wait_event_fn_t wait_event ;
572671
573672 opal_accelerator_base_module_memcpy_async_fn_t mem_copy_async ;
574673 opal_accelerator_base_module_memcpy_fn_t mem_copy ;
674+ opal_accelerator_base_module_memmove_async_fn_t mem_move_async ;
575675 opal_accelerator_base_module_memmove_fn_t mem_move ;
576676
577677 opal_accelerator_base_module_mem_alloc_fn_t mem_alloc ;
578678 opal_accelerator_base_module_mem_release_fn_t mem_release ;
679+ opal_accelerator_base_module_mem_alloc_stream_fn_t mem_alloc_stream ;
680+ opal_accelerator_base_module_mem_release_stream_fn_t mem_release_stream ;
579681 opal_accelerator_base_module_get_address_range_fn_t get_address_range ;
580682
581683 opal_accelerator_base_module_is_ipc_enabled_fn_t is_ipc_enabled ;
@@ -595,6 +697,9 @@ typedef struct {
595697 opal_accelerator_base_module_device_can_access_peer_fn_t device_can_access_peer ;
596698
597699 opal_accelerator_base_module_get_buffer_id_fn_t get_buffer_id ;
700+
701+ opal_accelerator_base_module_get_num_devices_fn_t num_devices ;
702+ opal_accelerator_base_module_get_mem_bw_fn_t get_mem_bw ;
598703} opal_accelerator_base_module_t ;
599704
600705/**
0 commit comments