@@ -855,6 +855,42 @@ public:
855855} // syclcompat
856856```
857857
858+ ### ptr_to_int
859+
860+ The following cuda backend specific function is introduced in order to
861+ translate from local memory pointers to `uint32_t` or `size_t` variables that
862+ contain a byte address to the local (local refers to`.shared` in nvptx) memory
863+ state space.
864+
865+ ``` c++
866+ namespace syclcompat {
867+ template <typename T>
868+ __syclcompat_inline__
869+ std::enable_if_t<std::is_same_v<T, uint32_t> || std::is_same_v<T, size_t>,
870+ T>
871+ ptr_to_int(void *ptr)
872+ } // syclcompat
873+ ```
874+
875+ These variables can be used in inline PTX instructions that take address
876+ operands. Such inline PTX instructions are commonly used in optimized
877+ libraries. A simplified example usage of the above functions is as follows:
878+
879+ ``` c++
880+ half *data = syclcompat::local_mem<half[NUM_ELEMENTS]>();
881+ // ...
882+ // ...
883+ T addr =
884+ syclcompat::ptr_to_int<T>(reinterpret_cast <char *>(data) + (id % 8 ) * 16 );
885+
886+ uint32_t fragment;
887+ #if defined(__NVPTX__)
888+ asm volatile("ldmatrix.sync.aligned.m8n8.x1.shared.b16 {%0}, [ %1] ;\n"
889+ : "=r"(fragment)
890+ : "r"(addr));
891+ #endif
892+ ```
893+
858894### Device Information
859895
860896` sycl::device ` properties are encapsulated using the ` device_info ` helper class.
@@ -947,42 +983,6 @@ public:
947983};
948984```
949985
950- ### ptr_to_int
951-
952- The following cuda backend specific function is introduced in order to
953- translate from local memory pointers to ` uint32_t ` or ` size_t ` variables that
954- contain a byte address to the local (local refers to` .shared ` in nvptx) memory
955- state space.
956-
957- ``` c++
958- namespace syclcompat {
959- template <typename T >
960- __ syclcompat_inline__
961- std::enable_if_t<std::is_same_v<T, uint32_t> || std::is_same_v<T, size_t>,
962- T>
963- ptr_to_int(void * ptr)
964- } // syclcompat
965- ```
966-
967- These variables can be used in inline PTX instructions that take address
968- operands. Such inline PTX instructions are commonly used in optimized
969- libraries. A simplified example usage of the above functions is as follows:
970-
971- ``` c++
972- half *data = syclcompat::local_mem<half[NUM_ELEMENTS]>();
973- // ...
974- // ...
975- T addr =
976- syclcompat::ptr_to_int<T>(reinterpret_cast<char *>(data) + (id % 8) * 16);
977-
978- uint32_t fragment;
979- #if defined(__NVPTX__)
980- asm volatile("ldmatrix.sync.aligned.m8n8.x1.shared.b16 {%0}, [%1];\n"
981- : "=r"(fragment)
982- : "r"(addr));
983- #endif
984- ```
985-
986986### Device Management
987987
988988Multiple SYCL functionalities are exposed through utility functions to manage
0 commit comments