@@ -59,10 +59,13 @@ THE SOFTWARE.
5959
6060namespace cooperative_groups {
6161
62- /* * \ brief The base type of all cooperative group types
62+ /* * @ brief The base type of all cooperative group types
6363 *
6464 * \details Holds the key properties of a constructed cooperative group types
6565 * object, like the group type, its size, etc
66+ *
67+ * @note Cooperative groups feature is implemented on Linux, under developement
68+ * on Windows.
6669 */
6770class thread_group {
6871 protected:
@@ -111,12 +114,28 @@ class thread_group {
111114 // synchronize the threads in the thread group
112115 __CG_QUALIFIER__ void sync () const ;
113116};
114-
117+ /* *
118+ *-------------------------------------------------------------------------------------------------
119+ *-------------------------------------------------------------------------------------------------
120+ * @defgroup CooperativeG Cooperative Groups
121+ * @ingroup API
122+ * @{
123+ * This section describes the cooperative groups functions of HIP runtime API.
124+ *
125+ * The cooperative groups provides flexible thread parallel programming algorithms, threads
126+ * cooperate and share data to perform collective computations.
127+ *
128+ * @note Cooperative groups feature is implemented on Linux, under developement
129+ * on Windows.
130+ *
131+ */
115132/* * \brief The multi-grid cooperative group type
116133 *
117134 * \details Represents an inter-device cooperative group type where the
118135 * participating threads within the group spans across multple
119136 * devices, running the (same) kernel on these devices
137+ * @note The multi-grid cooperative group type is implemented on Linux, under developement
138+ * on Windows.
120139 */
121140class multi_grid_group : public thread_group {
122141 // Only these friend functions are allowed to construct an object of this class
@@ -140,22 +159,26 @@ class multi_grid_group : public thread_group {
140159 __CG_QUALIFIER__ void sync () const { internal::multi_grid::sync (); }
141160};
142161
143- /* * \ brief User exposed API interface to construct multi-grid cooperative
162+ /* * @ brief User exposed API interface to construct multi-grid cooperative
144163 * group type object - `multi_grid_group`
145164 *
146165 * \details User is not allowed to directly construct an object of type
147166 * `multi_grid_group`. Instead, he should construct it through this
148167 * API function
168+ * @note This multi-grid cooperative API type is implemented on Linux, under developement
169+ * on Windows.
149170 */
150171__CG_QUALIFIER__ multi_grid_group this_multi_grid () {
151172 return multi_grid_group (internal::multi_grid::size ());
152173}
153174
154- /* * \ brief The grid cooperative group type
175+ /* * @ brief The grid cooperative group type
155176 *
156177 * \details Represents an inter-workgroup cooperative group type where the
157178 * participating threads within the group spans across multiple
158179 * workgroups running the (same) kernel on the same device
180+ * @note This is implemented on Linux, under developement
181+ * on Windows.
159182 */
160183class grid_group : public thread_group {
161184 // Only these friend functions are allowed to construct an object of this class
@@ -172,21 +195,25 @@ class grid_group : public thread_group {
172195 __CG_QUALIFIER__ void sync () const { internal::grid::sync (); }
173196};
174197
175- /* * \ brief User exposed API interface to construct grid cooperative group type
198+ /* * @ brief User exposed API interface to construct grid cooperative group type
176199 * object - `grid_group`
177200 *
178201 * \details User is not allowed to directly construct an object of type
179202 * `multi_grid_group`. Instead, he should construct it through this
180203 * API function
204+ * @note This function is implemented on Linux, under developement
205+ * on Windows.
181206 */
182207__CG_QUALIFIER__ grid_group this_grid () { return grid_group (internal::grid::size ()); }
183208
184- /* * \ brief The workgroup (thread-block in CUDA terminology) cooperative group
209+ /* * @ brief The workgroup (thread-block in CUDA terminology) cooperative group
185210 * type
186211 *
187212 * \details Represents an intra-workgroup cooperative group type where the
188213 * participating threads within the group are exactly the same threads
189214 * which are participated in the currently executing `workgroup`
215+ * @note This is implemented on Linux, under developement
216+ * on Windows.
190217 */
191218class thread_block : public thread_group {
192219 // Only these friend functions are allowed to construct an object of thi
@@ -231,6 +258,8 @@ class thread_block : public thread_group {
231258 * \details User is not allowed to directly construct an object of type
232259 * `thread_block`. Instead, he should construct it through this API
233260 * function.
261+ * @note This function is implemented on Linux, under developement
262+ * on Windows.
234263 */
235264__CG_QUALIFIER__ thread_block this_thread_block () {
236265 return thread_block (internal::workgroup::size ());
@@ -240,6 +269,8 @@ __CG_QUALIFIER__ thread_block this_thread_block() {
240269 *
241270 * \details Represents one tiled thread group in a wavefront.
242271 * This group type also supports sub-wave level intrinsics.
272+ * @note This is implemented on Linux, under developement
273+ * on Windows.
243274 */
244275
245276class tiled_group : public thread_group {
@@ -288,6 +319,8 @@ class tiled_group : public thread_group {
288319 *
289320 * \details Represents a active thread group in a wavefront.
290321 * This group type also supports sub-wave level intrinsics.
322+ * @note This is implemented on Linux, under developement
323+ * on Windows.
291324 */
292325class coalesced_group : public thread_group {
293326 private:
@@ -431,6 +464,8 @@ class coalesced_group : public thread_group {
431464/* * \brief User exposed API to create coalesced groups.
432465 *
433466 * \details A collective operation that groups all active lanes into a new thread group.
467+ * @note This function is implemented on Linux, under developement
468+ * on Windows.
434469 */
435470
436471__CG_QUALIFIER__ coalesced_group coalesced_threads () {
@@ -439,6 +474,8 @@ __CG_QUALIFIER__ coalesced_group coalesced_threads() {
439474
440475/* *
441476 * Implemenation of all publicly exposed base class APIs
477+ * @note This function is implemented on Linux, under developement
478+ * on Windows.
442479 */
443480__CG_QUALIFIER__ uint32_t thread_group::thread_rank () const {
444481 switch (this ->_type ) {
@@ -463,7 +500,11 @@ __CG_QUALIFIER__ uint32_t thread_group::thread_rank() const {
463500 }
464501 }
465502}
466-
503+ /* *
504+ * Implemenation of all publicly exposed thread group API
505+ * @note This function is implemented on Linux, under developement
506+ * on Windows.
507+ */
467508__CG_QUALIFIER__ bool thread_group::is_valid () const {
468509 switch (this ->_type ) {
469510 case internal::cg_multi_grid: {
@@ -487,7 +528,11 @@ __CG_QUALIFIER__ bool thread_group::is_valid() const {
487528 }
488529 }
489530}
490-
531+ /* *
532+ * Implemenation of all publicly exposed thread group sync API
533+ * @note This function is implemented on Linux, under developement
534+ * on Windows.
535+ */
491536__CG_QUALIFIER__ void thread_group::sync () const {
492537 switch (this ->_type ) {
493538 case internal::cg_multi_grid: {
@@ -517,19 +562,40 @@ __CG_QUALIFIER__ void thread_group::sync() const {
517562}
518563
519564/* *
520- * Implemenation of publicly exposed `wrapper` APIs on top of basic cooperative
565+ * Implemenation of publicly exposed `wrapper` API on top of basic cooperative
521566 * group type APIs
567+ * @note This function is implemented on Linux, under developement
568+ * on Windows.
522569 */
523570template <class CGTy > __CG_QUALIFIER__ uint32_t group_size (CGTy const & g) { return g.size (); }
524-
571+ /* *
572+ * Implemenation of publicly exposed `wrapper` API on top of basic cooperative
573+ * group type APIs
574+ * @note This function is implemented on Linux, under developement
575+ * on Windows.
576+ */
525577template <class CGTy > __CG_QUALIFIER__ uint32_t thread_rank (CGTy const & g) {
526578 return g.thread_rank ();
527579}
528-
580+ /* *
581+ * Implemenation of publicly exposed `wrapper` API on top of basic cooperative
582+ * group type APIs
583+ * @note This function is implemented on Linux, under developement
584+ * on Windows.
585+ */
529586template <class CGTy > __CG_QUALIFIER__ bool is_valid (CGTy const & g) { return g.is_valid (); }
530-
587+ /* *
588+ * Implemenation of publicly exposed `wrapper` API on top of basic cooperative
589+ * group type APIs
590+ * @note This function is implemented on Linux, under developement
591+ * on Windows.
592+ */
531593template <class CGTy > __CG_QUALIFIER__ void sync (CGTy const & g) { g.sync (); }
532-
594+ /* *
595+ * template class tile_base
596+ * @note This class is implemented on Linux, under developement
597+ * on Windows.
598+ */
533599template <unsigned int tileSize> class tile_base {
534600 protected:
535601 _CG_STATIC_CONST_DECL_ unsigned int numThreads = tileSize;
@@ -543,7 +609,11 @@ template <unsigned int tileSize> class tile_base {
543609 // Number of threads within this tile
544610 __CG_STATIC_QUALIFIER__ unsigned int size () { return numThreads; }
545611};
546-
612+ /* *
613+ * template class thread_block_tile_base
614+ * @note This class is implemented on Linux, under developement
615+ * on Windows.
616+ */
547617template <unsigned int size> class thread_block_tile_base : public tile_base <size> {
548618 static_assert (is_valid_tile_size<size>::value,
549619 " Tile size is either not a power of 2 or greater than the wavefront size" );
@@ -578,6 +648,8 @@ template <unsigned int size> class thread_block_tile_base : public tile_base<siz
578648/* * \brief Group type - thread_block_tile
579649 *
580650 * \details Represents one tile of thread group.
651+ * @note This type is implemented on Linux, under developement
652+ * on Windows.
581653 */
582654
583655template <unsigned int tileSize, class ParentCGTy = void >
@@ -598,6 +670,10 @@ class thread_block_tile_type : public thread_block_tile_base<tileSize>, public t
598670 using tbtBase::size;
599671 using tbtBase::sync;
600672 using tbtBase::thread_rank;
673+ // end of operative group
674+ /* *
675+ * @}
676+ */
601677};
602678
603679
0 commit comments