Skip to content
This repository was archived by the owner on Jan 26, 2024. It is now read-only.

Commit 0e37910

Browse files
committed
SWDEV-389033 - Update header for cooperate group
Change-Id: Ica8f99c644a32835bf480b52a6a2af861f1526c0
1 parent 7f33f56 commit 0e37910

File tree

2 files changed

+117
-20
lines changed

2 files changed

+117
-20
lines changed

include/hip/amd_detail/amd_hip_cooperative_groups.h

Lines changed: 90 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -59,10 +59,13 @@ THE SOFTWARE.
5959

6060
namespace cooperative_groups {
6161

62-
/** \brief The base type of all cooperative group types
62+
/** @brief The base type of all cooperative group types
6363
*
6464
* \details Holds the key properties of a constructed cooperative group types
6565
* object, like the group type, its size, etc
66+
*
67+
* @note Cooperative groups feature is implemented on Linux, under developement
68+
* on Windows.
6669
*/
6770
class thread_group {
6871
protected:
@@ -111,12 +114,28 @@ class thread_group {
111114
// synchronize the threads in the thread group
112115
__CG_QUALIFIER__ void sync() const;
113116
};
114-
117+
/**
118+
*-------------------------------------------------------------------------------------------------
119+
*-------------------------------------------------------------------------------------------------
120+
* @defgroup CooperativeG Cooperative Groups
121+
* @ingroup API
122+
* @{
123+
* This section describes the cooperative groups functions of HIP runtime API.
124+
*
125+
* The cooperative groups provides flexible thread parallel programming algorithms, threads
126+
* cooperate and share data to perform collective computations.
127+
*
128+
* @note Cooperative groups feature is implemented on Linux, under developement
129+
* on Windows.
130+
*
131+
*/
115132
/** \brief The multi-grid cooperative group type
116133
*
117134
* \details Represents an inter-device cooperative group type where the
118135
* participating threads within the group spans across multple
119136
* devices, running the (same) kernel on these devices
137+
* @note The multi-grid cooperative group type is implemented on Linux, under developement
138+
* on Windows.
120139
*/
121140
class multi_grid_group : public thread_group {
122141
// Only these friend functions are allowed to construct an object of this class
@@ -140,22 +159,26 @@ class multi_grid_group : public thread_group {
140159
__CG_QUALIFIER__ void sync() const { internal::multi_grid::sync(); }
141160
};
142161

143-
/** \brief User exposed API interface to construct multi-grid cooperative
162+
/** @brief User exposed API interface to construct multi-grid cooperative
144163
* group type object - `multi_grid_group`
145164
*
146165
* \details User is not allowed to directly construct an object of type
147166
* `multi_grid_group`. Instead, he should construct it through this
148167
* API function
168+
* @note This multi-grid cooperative API type is implemented on Linux, under developement
169+
* on Windows.
149170
*/
150171
__CG_QUALIFIER__ multi_grid_group this_multi_grid() {
151172
return multi_grid_group(internal::multi_grid::size());
152173
}
153174

154-
/** \brief The grid cooperative group type
175+
/** @brief The grid cooperative group type
155176
*
156177
* \details Represents an inter-workgroup cooperative group type where the
157178
* participating threads within the group spans across multiple
158179
* workgroups running the (same) kernel on the same device
180+
* @note This is implemented on Linux, under developement
181+
* on Windows.
159182
*/
160183
class grid_group : public thread_group {
161184
// Only these friend functions are allowed to construct an object of this class
@@ -172,21 +195,25 @@ class grid_group : public thread_group {
172195
__CG_QUALIFIER__ void sync() const { internal::grid::sync(); }
173196
};
174197

175-
/** \brief User exposed API interface to construct grid cooperative group type
198+
/** @brief User exposed API interface to construct grid cooperative group type
176199
* object - `grid_group`
177200
*
178201
* \details User is not allowed to directly construct an object of type
179202
* `multi_grid_group`. Instead, he should construct it through this
180203
* API function
204+
* @note This function is implemented on Linux, under developement
205+
* on Windows.
181206
*/
182207
__CG_QUALIFIER__ grid_group this_grid() { return grid_group(internal::grid::size()); }
183208

184-
/** \brief The workgroup (thread-block in CUDA terminology) cooperative group
209+
/** @brief The workgroup (thread-block in CUDA terminology) cooperative group
185210
* type
186211
*
187212
* \details Represents an intra-workgroup cooperative group type where the
188213
* participating threads within the group are exactly the same threads
189214
* which are participated in the currently executing `workgroup`
215+
* @note This is implemented on Linux, under developement
216+
* on Windows.
190217
*/
191218
class thread_block : public thread_group {
192219
// Only these friend functions are allowed to construct an object of thi
@@ -231,6 +258,8 @@ class thread_block : public thread_group {
231258
* \details User is not allowed to directly construct an object of type
232259
* `thread_block`. Instead, he should construct it through this API
233260
* function.
261+
* @note This function is implemented on Linux, under developement
262+
* on Windows.
234263
*/
235264
__CG_QUALIFIER__ thread_block this_thread_block() {
236265
return thread_block(internal::workgroup::size());
@@ -240,6 +269,8 @@ __CG_QUALIFIER__ thread_block this_thread_block() {
240269
*
241270
* \details Represents one tiled thread group in a wavefront.
242271
* This group type also supports sub-wave level intrinsics.
272+
* @note This is implemented on Linux, under developement
273+
* on Windows.
243274
*/
244275

245276
class tiled_group : public thread_group {
@@ -288,6 +319,8 @@ class tiled_group : public thread_group {
288319
*
289320
* \details Represents a active thread group in a wavefront.
290321
* This group type also supports sub-wave level intrinsics.
322+
* @note This is implemented on Linux, under developement
323+
* on Windows.
291324
*/
292325
class coalesced_group : public thread_group {
293326
private:
@@ -431,6 +464,8 @@ class coalesced_group : public thread_group {
431464
/** \brief User exposed API to create coalesced groups.
432465
*
433466
* \details A collective operation that groups all active lanes into a new thread group.
467+
* @note This function is implemented on Linux, under developement
468+
* on Windows.
434469
*/
435470

436471
__CG_QUALIFIER__ coalesced_group coalesced_threads() {
@@ -439,6 +474,8 @@ __CG_QUALIFIER__ coalesced_group coalesced_threads() {
439474

440475
/**
441476
* Implemenation of all publicly exposed base class APIs
477+
* @note This function is implemented on Linux, under developement
478+
* on Windows.
442479
*/
443480
__CG_QUALIFIER__ uint32_t thread_group::thread_rank() const {
444481
switch (this->_type) {
@@ -463,7 +500,11 @@ __CG_QUALIFIER__ uint32_t thread_group::thread_rank() const {
463500
}
464501
}
465502
}
466-
503+
/**
504+
* Implemenation of all publicly exposed thread group API
505+
* @note This function is implemented on Linux, under developement
506+
* on Windows.
507+
*/
467508
__CG_QUALIFIER__ bool thread_group::is_valid() const {
468509
switch (this->_type) {
469510
case internal::cg_multi_grid: {
@@ -487,7 +528,11 @@ __CG_QUALIFIER__ bool thread_group::is_valid() const {
487528
}
488529
}
489530
}
490-
531+
/**
532+
* Implemenation of all publicly exposed thread group sync API
533+
* @note This function is implemented on Linux, under developement
534+
* on Windows.
535+
*/
491536
__CG_QUALIFIER__ void thread_group::sync() const {
492537
switch (this->_type) {
493538
case internal::cg_multi_grid: {
@@ -517,19 +562,40 @@ __CG_QUALIFIER__ void thread_group::sync() const {
517562
}
518563

519564
/**
520-
* Implemenation of publicly exposed `wrapper` APIs on top of basic cooperative
565+
* Implemenation of publicly exposed `wrapper` API on top of basic cooperative
521566
* group type APIs
567+
* @note This function is implemented on Linux, under developement
568+
* on Windows.
522569
*/
523570
template <class CGTy> __CG_QUALIFIER__ uint32_t group_size(CGTy const& g) { return g.size(); }
524-
571+
/**
572+
* Implemenation of publicly exposed `wrapper` API on top of basic cooperative
573+
* group type APIs
574+
* @note This function is implemented on Linux, under developement
575+
* on Windows.
576+
*/
525577
template <class CGTy> __CG_QUALIFIER__ uint32_t thread_rank(CGTy const& g) {
526578
return g.thread_rank();
527579
}
528-
580+
/**
581+
* Implemenation of publicly exposed `wrapper` API on top of basic cooperative
582+
* group type APIs
583+
* @note This function is implemented on Linux, under developement
584+
* on Windows.
585+
*/
529586
template <class CGTy> __CG_QUALIFIER__ bool is_valid(CGTy const& g) { return g.is_valid(); }
530-
587+
/**
588+
* Implemenation of publicly exposed `wrapper` API on top of basic cooperative
589+
* group type APIs
590+
* @note This function is implemented on Linux, under developement
591+
* on Windows.
592+
*/
531593
template <class CGTy> __CG_QUALIFIER__ void sync(CGTy const& g) { g.sync(); }
532-
594+
/**
595+
* template class tile_base
596+
* @note This class is implemented on Linux, under developement
597+
* on Windows.
598+
*/
533599
template <unsigned int tileSize> class tile_base {
534600
protected:
535601
_CG_STATIC_CONST_DECL_ unsigned int numThreads = tileSize;
@@ -543,7 +609,11 @@ template <unsigned int tileSize> class tile_base {
543609
// Number of threads within this tile
544610
__CG_STATIC_QUALIFIER__ unsigned int size() { return numThreads; }
545611
};
546-
612+
/**
613+
* template class thread_block_tile_base
614+
* @note This class is implemented on Linux, under developement
615+
* on Windows.
616+
*/
547617
template <unsigned int size> class thread_block_tile_base : public tile_base<size> {
548618
static_assert(is_valid_tile_size<size>::value,
549619
"Tile size is either not a power of 2 or greater than the wavefront size");
@@ -578,6 +648,8 @@ template <unsigned int size> class thread_block_tile_base : public tile_base<siz
578648
/** \brief Group type - thread_block_tile
579649
*
580650
* \details Represents one tile of thread group.
651+
* @note This type is implemented on Linux, under developement
652+
* on Windows.
581653
*/
582654

583655
template <unsigned int tileSize, class ParentCGTy = void>
@@ -598,6 +670,10 @@ class thread_block_tile_type : public thread_block_tile_base<tileSize>, public t
598670
using tbtBase::size;
599671
using tbtBase::sync;
600672
using tbtBase::thread_rank;
673+
// end of operative group
674+
/**
675+
* @}
676+
*/
601677
};
602678

603679

include/hip/amd_detail/hip_cooperative_groups_helper.h

Lines changed: 27 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -82,7 +82,10 @@ using is_valid_type =
8282

8383
namespace internal {
8484

85-
/** \brief Enums representing different cooperative group types
85+
/**
86+
* @brief Enums representing different cooperative group types
87+
* @note This enum is only applicable on Linux.
88+
*
8689
*/
8790
typedef enum {
8891
cg_invalid,
@@ -92,9 +95,23 @@ typedef enum {
9295
cg_tiled_group,
9396
cg_coalesced_group
9497
} group_type;
95-
9698
/**
97-
* Functionalities related to multi-grid cooperative group type
99+
* @ingroup CooperativeG
100+
* @{
101+
* This section describes the cooperative groups functions of HIP runtime API.
102+
*
103+
* The cooperative groups provides flexible thread parallel programming algorithms, threads
104+
* cooperate and share data to perform collective computations.
105+
*
106+
* @note Cooperative groups feature is implemented on Linux, under developement
107+
* on Windows.
108+
*
109+
*/
110+
/**
111+
*
112+
* @brief Functionalities related to multi-grid cooperative group type
113+
* @note The following cooperative groups functions are only applicable on Linux.
114+
*
98115
*/
99116
namespace multi_grid {
100117

@@ -116,7 +133,8 @@ __CG_STATIC_QUALIFIER__ void sync() { __ockl_multi_grid_sync(); }
116133
} // namespace multi_grid
117134

118135
/**
119-
* Functionalities related to grid cooperative group type
136+
* @brief Functionalities related to grid cooperative group type
137+
* @note The following cooperative groups functions are only applicable on Linux.
120138
*/
121139
namespace grid {
122140

@@ -149,8 +167,9 @@ __CG_STATIC_QUALIFIER__ void sync() { __ockl_grid_sync(); }
149167
} // namespace grid
150168

151169
/**
152-
* Functionalities related to `workgroup` (thread_block in CUDA terminology)
170+
* @brief Functionalities related to `workgroup` (thread_block in CUDA terminology)
153171
* cooperative group type
172+
* @note The following cooperative groups functions are only applicable on Linux.
154173
*/
155174
namespace workgroup {
156175

@@ -216,7 +235,9 @@ __CG_STATIC_QUALIFIER__ unsigned int masked_bit_count(lane_mask x, unsigned int
216235
} // namespace internal
217236

218237
} // namespace cooperative_groups
219-
238+
/**
239+
* @}
240+
*/
220241
#pragma clang diagnostic pop
221242
#endif // __cplusplus
222243
#endif // HIP_INCLUDE_HIP_AMD_DETAIL_HIP_COOPERATIVE_GROUPS_HELPER_H

0 commit comments

Comments
 (0)