Skip to content

Commit c80dd30

Browse files
authored
Merge branch 'develop' into users/omarin/matmul-ALMIOPEN-1014
2 parents 3af97a3 + 02f625e commit c80dd30

File tree

471 files changed

+19271
-15532
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

471 files changed

+19271
-15532
lines changed

.github/CODEOWNERS

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -179,6 +179,11 @@
179179
/projects/rocthrust/**/.readthedocs.yaml @ROCm/prims-rands-reviewers @ROCm/prims-docs-reviewers
180180
/projects/rocthrust/docs/ @ROCm/prims-rands-reviewers @ROCm/prims-docs-reviewers
181181

182+
/projects/composablekernel/**/*.md @ROCm/prims-rands-reviewers @ROCm/ck-docs-reviewers
183+
/projects/composablekernel/**/*.rst @ROCm/prims-rands-reviewers @ROCm/ck-docs-reviewers
184+
/projects/composablekernel/**/.readthedocs.yaml @ROCm/prims-rands-reviewers @ROCm/ck-docs-reviewers
185+
/projects/composablekernel/docs/ @ROCm/prims-rands-reviewers @ROCm/ck-docs-reviewers
186+
182187
/shared/tensile/**/*.md @ROCm/tensile-reviewers @ROCm/tensile-docs-reviewers
183188
/shared/tensile/**/*.rst @ROCm/tensile-reviewers @ROCm/tensile-docs-reviewers
184189
/shared/tensile/**/.readthedocs.yaml @ROCm/tensile-reviewers @ROCm/tensile-docs-reviewers

dnn-providers/miopen-provider/.clang-tidy

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -33,7 +33,8 @@ Checks: >
3333
-portability-avoid-pragma-once,
3434
-modernize-use-scoped-lock,
3535
-readability-use-concise-preprocessor-directives,
36-
-readability-math-missing-parentheses
36+
-readability-math-missing-parentheses,
37+
-misc-*
3738
3839
WarningsAsErrors: "*"
3940
HeaderFileExtensions: ['h','hpp']

dnn-providers/miopen-provider/MiopenUtils.hpp

Lines changed: 14 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -20,7 +20,7 @@
2020
#define LOG_ON_MIOPEN_FAILURE(status) \
2121
do \
2222
{ \
23-
if(status != miopenStatusSuccess) \
23+
if((status) != miopenStatusSuccess) \
2424
{ \
2525
HIPDNN_PLUGIN_LOG_ERROR("MIOpen error occurred: " << miopenGetErrorString(status)); \
2626
} \
@@ -29,7 +29,7 @@
2929
#define THROW_ON_MIOPEN_FAILURE(status) \
3030
do \
3131
{ \
32-
if(status != miopenStatusSuccess) \
32+
if((status) != miopenStatusSuccess) \
3333
{ \
3434
throw hipdnn_plugin_sdk::HipdnnPluginException( \
3535
HIPDNN_PLUGIN_STATUS_INTERNAL_ERROR, \
@@ -116,18 +116,18 @@ class ScopedTuningPolicy
116116
miopenTuningPolicy_t _originalPolicy{miopenTuningPolicyNone};
117117
};
118118

119-
#define HIPDNN_PREPEND_MESSAGE_ON_THROW(statement, message) \
120-
do \
121-
{ \
122-
try \
123-
{ \
124-
statement; \
125-
} \
126-
catch(hipdnn_plugin_sdk::HipdnnPluginException error) \
127-
{ \
128-
throw hipdnn_plugin_sdk::HipdnnPluginException(error.getStatus(), \
129-
message + error.getMessage()); \
130-
} \
119+
#define HIPDNN_PREPEND_MESSAGE_ON_THROW(statement, message) \
120+
do \
121+
{ \
122+
try \
123+
{ \
124+
statement; \
125+
} \
126+
catch(hipdnn_plugin_sdk::HipdnnPluginException error) \
127+
{ \
128+
throw hipdnn_plugin_sdk::HipdnnPluginException(error.getStatus(), \
129+
(message) + error.getMessage()); \
130+
} \
131131
} while(0)
132132

133133
namespace miopen_plugin::miopen_utils

dnn-providers/miopen-provider/tests/engines/plans/TestMiopenConvPlanBuilder.cpp

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -687,7 +687,10 @@ TEST_F(TestGpuMiopenConvPlanBuilder, InitializeExecutionSettingsSetsWorkspaceSiz
687687
EXPECT_NO_THROW(_planBuilder.buildPlan(_handle, graph, mockEngineConfig, ctx));
688688
auto workspaceLimit = executionSettings.workspaceSizeLimit();
689689
ASSERT_TRUE(workspaceLimit.has_value());
690-
EXPECT_EQ(*workspaceLimit, testWorkspaceSize);
690+
if(workspaceLimit.has_value())
691+
{
692+
EXPECT_EQ(*workspaceLimit, testWorkspaceSize);
693+
}
691694
}
692695

693696
TEST_F(TestGpuMiopenConvPlanBuilder,

projects/composablekernel/include/ck/tensor_description/tensor_adaptor.hpp

Lines changed: 10 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -350,16 +350,16 @@ __host__ __device__ constexpr auto chain_tensor_adaptors(const TensorAdaptor0& a
350350
});
351351

352352
// match hidden id
353-
static_for<0, ndim_low_1, 1>{}([&](auto idim_low_1) {
354-
static_for<0, ndim_bottom_1, 1>{}([&](auto idim_bottom_1) {
355-
// if this low dim is bottom dim, then do id matching
356-
if constexpr(low_dim_hidden_ids_1[idim_low_1] ==
357-
TensorAdaptor1::GetBottomDimensionHiddenIds()[idim_bottom_1])
358-
{
359-
low_dim_hidden_ids_1_mod_(idim_low_1) =
360-
TensorAdaptor0::GetTopDimensionHiddenIds()[idim_bottom_1];
361-
}
362-
});
353+
static_ford<Sequence<ndim_low_1, ndim_bottom_1>>{}([&](auto ii) {
354+
constexpr auto idim_low_1 = Number<ii[Number<0>{}]>{};
355+
constexpr auto idim_bottom_1 = Number<ii[Number<1>{}]>{};
356+
// if this low dim is bottom dim, then do id matching
357+
if constexpr(low_dim_hidden_ids_1[idim_low_1] ==
358+
TensorAdaptor1::GetBottomDimensionHiddenIds()[idim_bottom_1])
359+
{
360+
low_dim_hidden_ids_1_mod_(idim_low_1) =
361+
TensorAdaptor0::GetTopDimensionHiddenIds()[idim_bottom_1];
362+
}
363363
});
364364

365365
return low_dim_hidden_ids_1_mod_;

projects/composablekernel/include/ck/tensor_operation/gpu/block/blockwise_gemm_pipeline_wmmaops_base.hpp

Lines changed: 15 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -202,22 +202,22 @@ struct BlockwiseGemmWmmaops_pipeline_base
202202
using AScaleThreadDesc = decltype(AScaleStruct::scale_thread_desc);
203203
using BScaleThreadDesc = decltype(BScaleStruct::scale_thread_desc);
204204

205-
static_for<0, num_scale_m_block, 1>{}([&](auto m0) {
206-
static_for<0, num_scale_n_block, 1>{}([&](auto n0) {
207-
static_for<0, num_scale_k_block, 1>{}([&](auto k0) {
208-
constexpr index_t c_offset =
209-
CScaleThreadDesc{}.CalculateOffset(make_tuple(k0, m0, n0));
210-
constexpr index_t a_offset =
211-
AScaleThreadDesc{}.CalculateOffset(make_tuple(m0, k0));
212-
constexpr index_t b_offset =
213-
BScaleThreadDesc{}.CalculateOffset(make_tuple(n0, k0));
214-
215-
c_scale_thread_bufs(I0)(Number<c_offset>{}) =
216-
a_scale_struct.scale_thread_bufs(I0)[Number<a_offset>{}] *
217-
b_scale_struct.scale_thread_bufs(I0)[Number<b_offset>{}];
218-
});
205+
static_ford<Sequence<num_scale_m_block, num_scale_n_block, num_scale_k_block>>{}(
206+
[&](auto mnk) {
207+
constexpr auto m0 = Number<mnk[Number<0>{}]>{};
208+
constexpr auto n0 = Number<mnk[Number<1>{}]>{};
209+
constexpr auto k0 = Number<mnk[Number<2>{}]>{};
210+
constexpr index_t c_offset =
211+
CScaleThreadDesc{}.CalculateOffset(make_tuple(k0, m0, n0));
212+
constexpr index_t a_offset =
213+
AScaleThreadDesc{}.CalculateOffset(make_tuple(m0, k0));
214+
constexpr index_t b_offset =
215+
BScaleThreadDesc{}.CalculateOffset(make_tuple(n0, k0));
216+
217+
c_scale_thread_bufs(I0)(Number<c_offset>{}) =
218+
a_scale_struct.scale_thread_bufs(I0)[Number<a_offset>{}] *
219+
b_scale_struct.scale_thread_bufs(I0)[Number<b_offset>{}];
219220
});
220-
});
221221
}
222222

223223
__device__ void Clear()

0 commit comments

Comments
 (0)