Skip to content
Open
Show file tree
Hide file tree
Changes from 4 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions projects/rocsparse/clients/tests/test_csric0.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -44,6 +44,7 @@ Definitions:
- &M_N_range_extra
- { M: 10, N: 10 }
- { M: 235, N: 235 }
- { M: 600, N: 600 }
- { M: 1200, N: 1200 }

Tests:
Expand Down
2 changes: 2 additions & 0 deletions projects/rocsparse/library/src/level2/csrsv_device.h
Original file line number Diff line number Diff line change
Expand Up @@ -420,6 +420,8 @@ namespace rocsparse
// Store the rows result in y
rocsparse::nontemporal_store(local_sum, &y[row * y_inc]);

__builtin_amdgcn_fence(__ATOMIC_RELEASE, "agent");

// Mark row as done
__hip_atomic_store(&done_array[row], 1, __ATOMIC_RELEASE, __HIP_MEMORY_SCOPE_AGENT);
}
Expand Down
2 changes: 1 addition & 1 deletion projects/rocsparse/library/src/level3/csrsm_device.h
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,7 @@

namespace rocsparse
{
template <uint32_t BLOCKSIZE, uint32_t WF_SIZE, bool SLEEP, typename I, typename J, typename T>
template <uint32_t BLOCKSIZE, bool SLEEP, typename I, typename J, typename T>
ROCSPARSE_DEVICE_ILF void csrsm_device(rocsparse_operation transB,
J m,
J nrhs,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,7 @@
#include "csrsm_device.h"
namespace rocsparse
{
template <uint32_t BLOCKSIZE, uint32_t WFSIZE, bool SLEEP, typename I, typename J, typename T>
template <uint32_t BLOCKSIZE, bool SLEEP, typename I, typename J, typename T>
ROCSPARSE_KERNEL(BLOCKSIZE)
void csrsm(rocsparse_operation transB,
J m,
Expand All @@ -57,21 +57,21 @@ namespace rocsparse
{
ROCSPARSE_DEVICE_HOST_SCALAR_GET(alpha);

rocsparse::csrsm_device<BLOCKSIZE, WFSIZE, SLEEP>(transB,
m,
nrhs,
alpha,
csr_row_ptr,
csr_col_ind,
csr_val,
B,
ldb,
done_array,
map,
zero_pivot,
idx_base,
fill_mode,
diag_type);
rocsparse::csrsm_device<BLOCKSIZE, SLEEP>(transB,
m,
nrhs,
alpha,
csr_row_ptr,
csr_col_ind,
csr_val,
B,
ldb,
done_array,
map,
zero_pivot,
idx_base,
fill_mode,
diag_type);
}

template <typename I, typename J, typename T>
Expand Down Expand Up @@ -211,11 +211,10 @@ namespace rocsparse

if(blockdim == 64)
{

if(gcn_arch_name == rocpsarse_arch_names::gfx908 && asicRev < 2)
{
RETURN_IF_HIPLAUNCHKERNELGGL_ERROR(
(rocsparse::csrsm<64, 64, true>),
(rocsparse::csrsm<64, true>),
csrsm_blocks,
csrsm_threads,
0,
Expand All @@ -240,7 +239,7 @@ namespace rocsparse
else
{
RETURN_IF_HIPLAUNCHKERNELGGL_ERROR(
(rocsparse::csrsm<64, 64, false>),
(rocsparse::csrsm<64, false>),
csrsm_blocks,
csrsm_threads,
0,
Expand Down Expand Up @@ -268,7 +267,7 @@ namespace rocsparse
if(gcn_arch_name == rocpsarse_arch_names::gfx908 && asicRev < 2)
{
RETURN_IF_HIPLAUNCHKERNELGGL_ERROR(
(rocsparse::csrsm<128, 64, true>),
(rocsparse::csrsm<128, true>),
csrsm_blocks,
csrsm_threads,
0,
Expand All @@ -293,7 +292,7 @@ namespace rocsparse
else
{
RETURN_IF_HIPLAUNCHKERNELGGL_ERROR(
(rocsparse::csrsm<128, 64, false>),
(rocsparse::csrsm<128, false>),
csrsm_blocks,
csrsm_threads,
0,
Expand Down Expand Up @@ -321,7 +320,7 @@ namespace rocsparse
if(gcn_arch_name == rocpsarse_arch_names::gfx908 && asicRev < 2)
{
RETURN_IF_HIPLAUNCHKERNELGGL_ERROR(
(rocsparse::csrsm<256, 64, true>),
(rocsparse::csrsm<256, true>),
csrsm_blocks,
csrsm_threads,
0,
Expand All @@ -346,7 +345,7 @@ namespace rocsparse
else
{
RETURN_IF_HIPLAUNCHKERNELGGL_ERROR(
(rocsparse::csrsm<256, 64, false>),
(rocsparse::csrsm<256, false>),
csrsm_blocks,
csrsm_threads,
0,
Expand Down Expand Up @@ -374,7 +373,7 @@ namespace rocsparse
if(gcn_arch_name == rocpsarse_arch_names::gfx908 && asicRev < 2)
{
RETURN_IF_HIPLAUNCHKERNELGGL_ERROR(
(rocsparse::csrsm<512, 64, true>),
(rocsparse::csrsm<512, true>),
csrsm_blocks,
csrsm_threads,
0,
Expand All @@ -399,7 +398,7 @@ namespace rocsparse
else
{
RETURN_IF_HIPLAUNCHKERNELGGL_ERROR(
(rocsparse::csrsm<512, 64, false>),
(rocsparse::csrsm<512, false>),
csrsm_blocks,
csrsm_threads,
0,
Expand Down Expand Up @@ -427,7 +426,7 @@ namespace rocsparse
if(gcn_arch_name == rocpsarse_arch_names::gfx908 && asicRev < 2)
{
RETURN_IF_HIPLAUNCHKERNELGGL_ERROR(
(rocsparse::csrsm<1024, 64, true>),
(rocsparse::csrsm<1024, true>),
csrsm_blocks,
csrsm_threads,
0,
Expand All @@ -452,7 +451,7 @@ namespace rocsparse
else
{
RETURN_IF_HIPLAUNCHKERNELGGL_ERROR(
(rocsparse::csrsm<1024, 64, false>),
(rocsparse::csrsm<1024, false>),
csrsm_blocks,
csrsm_threads,
0,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -195,6 +195,8 @@ namespace rocsparse
}
}

__builtin_amdgcn_fence(__ATOMIC_RELEASE, "agent");

if(lid == WF_SIZE - 1)
{
// Last lane writes "we are done" flag
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -232,6 +232,8 @@ namespace rocsparse
}
}

__builtin_amdgcn_fence(__ATOMIC_RELEASE, "agent");

if(lid == WFSIZE - 1)
{
// Last lane writes "we are done" flag
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -41,7 +41,7 @@ rocsparse_status rocsparse::csric0_kernel_launch(rocsparse_handle handle,

rocsparse::csric0_kernel_launch_t launch{};

if(sleep || (trm_info->get_max_nnz() > 1024))
if(sleep || (trm_info->get_max_nnz() > 512))
{
launch = rocsparse::find_csric0_kernel_binsearch_launch(handle, csric0_info, A);
}
Expand Down
Loading