Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
17 changes: 14 additions & 3 deletions backends/cuda-ref/ceed-cuda-ref-operator.c
Original file line number Diff line number Diff line change
Expand Up @@ -1909,9 +1909,20 @@ static int CeedOperatorLinearAssembleAddDiagonalAtPoints_Cuda(CeedOperator op, C
if (!is_active) continue;

// Update unit vector
if (s == 0) CeedCallBackend(CeedVectorSetValue(active_e_vec_in, 0.0));
else CeedCallBackend(CeedVectorSetValueStrided(active_e_vec_in, s - 1, e_vec_size, 0.0));
CeedCallBackend(CeedVectorSetValueStrided(active_e_vec_in, s, e_vec_size, 1.0));
{
// Note: E-vec strides are node * (1) + comp * (elem_size * num_elem) + elem * (elem_size)
CeedInt node = (s - 1) % elem_size, comp = (s - 1) / elem_size;
CeedSize start = node * 1 + comp * (elem_size * num_elem);
CeedSize stop = (comp + 1) * (elem_size * num_elem);

if (s == 0) CeedCallBackend(CeedVectorSetValue(active_e_vec_in, 0.0));
else CeedCallBackend(CeedVectorSetValueStrided(active_e_vec_in, start, stop, elem_size, 0.0));

node = s % elem_size, comp = s / elem_size;
start = node * 1 + comp * (elem_size * num_elem);
stop = (comp + 1) * (elem_size * num_elem);
CeedCallBackend(CeedVectorSetValueStrided(active_e_vec_in, start, stop, elem_size, 1.0));
}

// Basis action
CeedCallBackend(CeedQFunctionFieldGetEvalMode(qf_input_fields[i], &eval_mode));
Expand Down
30 changes: 16 additions & 14 deletions backends/cuda-ref/ceed-cuda-ref-vector.c
Original file line number Diff line number Diff line change
Expand Up @@ -223,20 +223,20 @@ static int CeedVectorSetArray_Cuda(const CeedVector vec, const CeedMemType mem_t
//------------------------------------------------------------------------------
// Copy host array to value strided
//------------------------------------------------------------------------------
static int CeedHostCopyStrided_Cuda(CeedScalar *h_array, CeedSize start, CeedSize step, CeedSize length, CeedScalar *h_copy_array) {
for (CeedSize i = start; i < length; i += step) h_copy_array[i] = h_array[i];
static int CeedHostCopyStrided_Cuda(CeedScalar *h_array, CeedSize start, CeedSize stop, CeedSize step, CeedScalar *h_copy_array) {
for (CeedSize i = start; i < stop; i += step) h_copy_array[i] = h_array[i];
return CEED_ERROR_SUCCESS;
}

//------------------------------------------------------------------------------
// Copy device array to value strided (impl in .cu file)
//------------------------------------------------------------------------------
int CeedDeviceCopyStrided_Cuda(CeedScalar *d_array, CeedSize start, CeedSize step, CeedSize length, CeedScalar *d_copy_array);
int CeedDeviceCopyStrided_Cuda(CeedScalar *d_array, CeedSize start, CeedSize stop, CeedSize step, CeedScalar *d_copy_array);

//------------------------------------------------------------------------------
// Copy a vector to a value strided
//------------------------------------------------------------------------------
static int CeedVectorCopyStrided_Cuda(CeedVector vec, CeedSize start, CeedSize step, CeedVector vec_copy) {
static int CeedVectorCopyStrided_Cuda(CeedVector vec, CeedSize start, CeedSize stop, CeedSize step, CeedVector vec_copy) {
CeedSize length;
CeedVector_Cuda *impl;

Expand All @@ -248,6 +248,7 @@ static int CeedVectorCopyStrided_Cuda(CeedVector vec, CeedSize start, CeedSize s
CeedCallBackend(CeedVectorGetLength(vec_copy, &length_copy));
length = length_vec < length_copy ? length_vec : length_copy;
}
if (stop == -1) stop = length;
// Set value for synced device/host array
if (impl->d_array) {
CeedScalar *copy_array;
Expand All @@ -260,21 +261,21 @@ static int CeedVectorCopyStrided_Cuda(CeedVector vec, CeedSize start, CeedSize s
CeedCallBackend(CeedVectorGetCeed(vec, &ceed));
CeedCallBackend(CeedGetCublasHandle_Cuda(ceed, &handle));
#if defined(CEED_SCALAR_IS_FP32)
CeedCallCublas(ceed, cublasScopy_64(handle, (int64_t)length, impl->d_array + start, (int64_t)step, copy_array + start, (int64_t)step));
CeedCallCublas(ceed, cublasScopy_64(handle, (int64_t)(stop - start), impl->d_array + start, (int64_t)step, copy_array + start, (int64_t)step));
#else /* CEED_SCALAR */
CeedCallCublas(ceed, cublasDcopy_64(handle, (int64_t)length, impl->d_array + start, (int64_t)step, copy_array + start, (int64_t)step));
CeedCallCublas(ceed, cublasDcopy_64(handle, (int64_t)(stop - start), impl->d_array + start, (int64_t)step, copy_array + start, (int64_t)step));
#endif /* CEED_SCALAR */
CeedCallBackend(CeedDestroy(&ceed));
#else /* CUDA_VERSION */
CeedCallBackend(CeedDeviceCopyStrided_Cuda(impl->d_array, start, step, length, copy_array));
CeedCallBackend(CeedDeviceCopyStrided_Cuda(impl->d_array, start, stop, step, copy_array));
#endif /* CUDA_VERSION */
CeedCallBackend(CeedVectorRestoreArray(vec_copy, &copy_array));
impl->h_array = NULL;
} else if (impl->h_array) {
CeedScalar *copy_array;

CeedCallBackend(CeedVectorGetArray(vec_copy, CEED_MEM_HOST, &copy_array));
CeedCallBackend(CeedHostCopyStrided_Cuda(impl->h_array, start, step, length, copy_array));
CeedCallBackend(CeedHostCopyStrided_Cuda(impl->h_array, start, stop, step, copy_array));
CeedCallBackend(CeedVectorRestoreArray(vec_copy, &copy_array));
impl->d_array = NULL;
} else {
Expand Down Expand Up @@ -336,31 +337,32 @@ static int CeedVectorSetValue_Cuda(CeedVector vec, CeedScalar val) {
//------------------------------------------------------------------------------
// Set host array to value strided
//------------------------------------------------------------------------------
static int CeedHostSetValueStrided_Cuda(CeedScalar *h_array, CeedSize start, CeedSize step, CeedSize length, CeedScalar val) {
for (CeedSize i = start; i < length; i += step) h_array[i] = val;
static int CeedHostSetValueStrided_Cuda(CeedScalar *h_array, CeedSize start, CeedSize stop, CeedSize step, CeedScalar val) {
for (CeedSize i = start; i < stop; i += step) h_array[i] = val;
return CEED_ERROR_SUCCESS;
}

//------------------------------------------------------------------------------
// Set device array to value strided (impl in .cu file)
//------------------------------------------------------------------------------
int CeedDeviceSetValueStrided_Cuda(CeedScalar *d_array, CeedSize start, CeedSize step, CeedSize length, CeedScalar val);
int CeedDeviceSetValueStrided_Cuda(CeedScalar *d_array, CeedSize start, CeedSize stop, CeedSize step, CeedScalar val);

//------------------------------------------------------------------------------
// Set a vector to a value strided
//------------------------------------------------------------------------------
static int CeedVectorSetValueStrided_Cuda(CeedVector vec, CeedSize start, CeedSize step, CeedScalar val) {
static int CeedVectorSetValueStrided_Cuda(CeedVector vec, CeedSize start, CeedSize stop, CeedSize step, CeedScalar val) {
CeedSize length;
CeedVector_Cuda *impl;

CeedCallBackend(CeedVectorGetData(vec, &impl));
CeedCallBackend(CeedVectorGetLength(vec, &length));
// Set value for synced device/host array
if (stop == -1) stop = length;
if (impl->d_array) {
CeedCallBackend(CeedDeviceSetValueStrided_Cuda(impl->d_array, start, step, length, val));
CeedCallBackend(CeedDeviceSetValueStrided_Cuda(impl->d_array, start, stop, step, val));
impl->h_array = NULL;
} else if (impl->h_array) {
CeedCallBackend(CeedHostSetValueStrided_Cuda(impl->h_array, start, step, length, val));
CeedCallBackend(CeedHostSetValueStrided_Cuda(impl->h_array, start, stop, step, val));
impl->d_array = NULL;
} else {
return CeedError(CeedVectorReturnCeed(vec), CEED_ERROR_BACKEND, "CeedVector must have valid data set");
Expand Down
32 changes: 16 additions & 16 deletions backends/cuda-ref/kernels/cuda-ref-vector.cu
Original file line number Diff line number Diff line change
Expand Up @@ -11,24 +11,24 @@
//------------------------------------------------------------------------------
// Kernel for copy strided on device
//------------------------------------------------------------------------------
__global__ static void copyStridedK(CeedScalar *__restrict__ vec, CeedSize start, CeedSize step, CeedSize size, CeedScalar *__restrict__ vec_copy) {
__global__ static void copyStridedK(CeedScalar *__restrict__ vec, CeedSize start, CeedSize stop, CeedSize step, CeedScalar *__restrict__ vec_copy) {
const CeedSize index = threadIdx.x + (CeedSize)blockDim.x * blockIdx.x;

if (index < size) {
if ((index - start) % step == 0) vec_copy[index] = vec[index];
if (index < stop - start) {
if (index % step == 0) vec_copy[start + index] = vec[start + index];
}
}

//------------------------------------------------------------------------------
// Copy strided on device memory
//------------------------------------------------------------------------------
extern "C" int CeedDeviceCopyStrided_Cuda(CeedScalar *d_array, CeedSize start, CeedSize step, CeedSize length, CeedScalar *d_copy_array) {
extern "C" int CeedDeviceCopyStrided_Cuda(CeedScalar *d_array, CeedSize start, CeedSize stop, CeedSize step, CeedScalar *d_copy_array) {
const int block_size = 512;
const CeedSize vec_size = length;
int grid_size = vec_size / block_size;
const CeedSize copy_size = stop - start;
int grid_size = copy_size / block_size;

if (block_size * grid_size < vec_size) grid_size += 1;
copyStridedK<<<grid_size, block_size>>>(d_array, start, step, length, d_copy_array);
if (block_size * grid_size < copy_size) grid_size += 1;
copyStridedK<<<grid_size, block_size>>>(d_array, start, stop, step, d_copy_array);
return 0;
}

Expand Down Expand Up @@ -57,24 +57,24 @@ extern "C" int CeedDeviceSetValue_Cuda(CeedScalar *d_array, CeedSize length, Cee
//------------------------------------------------------------------------------
// Kernel for set value strided on device
//------------------------------------------------------------------------------
__global__ static void setValueStridedK(CeedScalar *__restrict__ vec, CeedSize start, CeedSize step, CeedSize size, CeedScalar val) {
__global__ static void setValueStridedK(CeedScalar *__restrict__ vec, CeedSize start, CeedSize stop, CeedSize step, CeedScalar val) {
const CeedSize index = threadIdx.x + (CeedSize)blockDim.x * blockIdx.x;

if (index < size) {
if ((index - start) % step == 0) vec[index] = val;
if (index < stop - start) {
if (index % step == 0) vec[start + index] = val;
}
}

//------------------------------------------------------------------------------
// Set value strided on device memory
//------------------------------------------------------------------------------
extern "C" int CeedDeviceSetValueStrided_Cuda(CeedScalar *d_array, CeedSize start, CeedSize step, CeedSize length, CeedScalar val) {
extern "C" int CeedDeviceSetValueStrided_Cuda(CeedScalar *d_array, CeedSize start, CeedSize stop, CeedSize step, CeedScalar val) {
const int block_size = 512;
const CeedSize vec_size = length;
int grid_size = vec_size / block_size;
const CeedSize set_size = stop - start;
int grid_size = set_size / block_size;

if (block_size * grid_size < vec_size) grid_size += 1;
setValueStridedK<<<grid_size, block_size>>>(d_array, start, step, length, val);
if (block_size * grid_size < set_size) grid_size += 1;
setValueStridedK<<<grid_size, block_size>>>(d_array, start, stop, step, val);
return 0;
}

Expand Down
17 changes: 14 additions & 3 deletions backends/hip-ref/ceed-hip-ref-operator.c
Original file line number Diff line number Diff line change
Expand Up @@ -1906,9 +1906,20 @@ static int CeedOperatorLinearAssembleAddDiagonalAtPoints_Hip(CeedOperator op, Ce
if (!is_active) continue;

// Update unit vector
if (s == 0) CeedCallBackend(CeedVectorSetValue(active_e_vec_in, 0.0));
else CeedCallBackend(CeedVectorSetValueStrided(active_e_vec_in, s - 1, e_vec_size, 0.0));
CeedCallBackend(CeedVectorSetValueStrided(active_e_vec_in, s, e_vec_size, 1.0));
{
// Note: E-vec strides are node * (1) + comp * (elem_size * num_elem) + elem * (elem_size)
CeedInt node = (s - 1) % elem_size, comp = (s - 1) / elem_size;
CeedSize start = node * 1 + comp * (elem_size * num_elem);
CeedSize stop = (comp + 1) * (elem_size * num_elem);

if (s == 0) CeedCallBackend(CeedVectorSetValue(active_e_vec_in, 0.0));
else CeedCallBackend(CeedVectorSetValueStrided(active_e_vec_in, start, stop, elem_size, 0.0));

node = s % elem_size, comp = s / elem_size;
start = node * 1 + comp * (elem_size * num_elem);
stop = (comp + 1) * (elem_size * num_elem);
CeedCallBackend(CeedVectorSetValueStrided(active_e_vec_in, start, stop, elem_size, 1.0));
}

// Basis action
CeedCallBackend(CeedQFunctionFieldGetEvalMode(qf_input_fields[i], &eval_mode));
Expand Down
30 changes: 16 additions & 14 deletions backends/hip-ref/ceed-hip-ref-vector.c
Original file line number Diff line number Diff line change
Expand Up @@ -223,20 +223,20 @@ static int CeedVectorSetArray_Hip(const CeedVector vec, const CeedMemType mem_ty
//------------------------------------------------------------------------------
// Copy host array to value strided
//------------------------------------------------------------------------------
static int CeedHostCopyStrided_Hip(CeedScalar *h_array, CeedSize start, CeedSize step, CeedSize length, CeedScalar *h_copy_array) {
for (CeedSize i = start; i < length; i += step) h_copy_array[i] = h_array[i];
static int CeedHostCopyStrided_Hip(CeedScalar *h_array, CeedSize start, CeedSize stop, CeedSize step, CeedScalar *h_copy_array) {
for (CeedSize i = start; i < stop; i += step) h_copy_array[i] = h_array[i];
return CEED_ERROR_SUCCESS;
}

//------------------------------------------------------------------------------
// Copy device array to value strided (impl in .hip.cpp file)
//------------------------------------------------------------------------------
int CeedDeviceCopyStrided_Hip(CeedScalar *d_array, CeedSize start, CeedSize step, CeedSize length, CeedScalar *d_copy_array);
int CeedDeviceCopyStrided_Hip(CeedScalar *d_array, CeedSize start, CeedSize stop, CeedSize step, CeedScalar *d_copy_array);

//------------------------------------------------------------------------------
// Copy a vector to a value strided
//------------------------------------------------------------------------------
static int CeedVectorCopyStrided_Hip(CeedVector vec, CeedSize start, CeedSize step, CeedVector vec_copy) {
static int CeedVectorCopyStrided_Hip(CeedVector vec, CeedSize start, CeedSize stop, CeedSize step, CeedVector vec_copy) {
CeedSize length;
CeedVector_Hip *impl;

Expand All @@ -248,6 +248,7 @@ static int CeedVectorCopyStrided_Hip(CeedVector vec, CeedSize start, CeedSize st
CeedCallBackend(CeedVectorGetLength(vec_copy, &length_copy));
length = length_vec < length_copy ? length_vec : length_copy;
}
if (stop == -1) stop = length;
// Set value for synced device/host array
if (impl->d_array) {
CeedScalar *copy_array;
Expand All @@ -260,12 +261,12 @@ static int CeedVectorCopyStrided_Hip(CeedVector vec, CeedSize start, CeedSize st
CeedCallBackend(CeedVectorGetCeed(vec, &ceed));
CeedCallBackend(CeedGetHipblasHandle_Hip(ceed, &handle));
#if defined(CEED_SCALAR_IS_FP32)
CeedCallHipblas(ceed, hipblasScopy_64(handle, (int64_t)length, impl->d_array + start, (int64_t)step, copy_array + start, (int64_t)step));
CeedCallHipblas(ceed, hipblasScopy_64(handle, (int64_t)(stop - start), impl->d_array + start, (int64_t)step, copy_array + start, (int64_t)step));
#else /* CEED_SCALAR */
CeedCallHipblas(ceed, hipblasDcopy_64(handle, (int64_t)length, impl->d_array + start, (int64_t)step, copy_array + start, (int64_t)step));
CeedCallHipblas(ceed, hipblasDcopy_64(handle, (int64_t)(stop - start), impl->d_array + start, (int64_t)step, copy_array + start, (int64_t)step));
#endif /* CEED_SCALAR */
#else /* HIP_VERSION */
CeedCallBackend(CeedDeviceCopyStrided_Hip(impl->d_array, start, step, length, copy_array));
CeedCallBackend(CeedDeviceCopyStrided_Hip(impl->d_array, start, stop, step, copy_array));
#endif /* HIP_VERSION */
CeedCallBackend(CeedVectorRestoreArray(vec_copy, &copy_array));
impl->h_array = NULL;
Expand All @@ -274,7 +275,7 @@ static int CeedVectorCopyStrided_Hip(CeedVector vec, CeedSize start, CeedSize st
CeedScalar *copy_array;

CeedCallBackend(CeedVectorGetArray(vec_copy, CEED_MEM_HOST, &copy_array));
CeedCallBackend(CeedHostCopyStrided_Hip(impl->h_array, start, step, length, copy_array));
CeedCallBackend(CeedHostCopyStrided_Hip(impl->h_array, start, stop, step, copy_array));
CeedCallBackend(CeedVectorRestoreArray(vec_copy, &copy_array));
impl->d_array = NULL;
} else {
Expand Down Expand Up @@ -336,31 +337,32 @@ static int CeedVectorSetValue_Hip(CeedVector vec, CeedScalar val) {
//------------------------------------------------------------------------------
// Set host array to value strided
//------------------------------------------------------------------------------
static int CeedHostSetValueStrided_Hip(CeedScalar *h_array, CeedSize start, CeedSize step, CeedSize length, CeedScalar val) {
for (CeedSize i = start; i < length; i += step) h_array[i] = val;
static int CeedHostSetValueStrided_Hip(CeedScalar *h_array, CeedSize start, CeedSize stop, CeedSize step, CeedScalar val) {
for (CeedSize i = start; i < stop; i += step) h_array[i] = val;
return CEED_ERROR_SUCCESS;
}

//------------------------------------------------------------------------------
// Set device array to value strided (impl in .hip.cpp file)
//------------------------------------------------------------------------------
int CeedDeviceSetValueStrided_Hip(CeedScalar *d_array, CeedSize start, CeedSize step, CeedSize length, CeedScalar val);
int CeedDeviceSetValueStrided_Hip(CeedScalar *d_array, CeedSize start, CeedSize stop, CeedSize step, CeedScalar val);

//------------------------------------------------------------------------------
// Set a vector to a value strided
//------------------------------------------------------------------------------
static int CeedVectorSetValueStrided_Hip(CeedVector vec, CeedSize start, CeedSize step, CeedScalar val) {
static int CeedVectorSetValueStrided_Hip(CeedVector vec, CeedSize start, CeedSize stop, CeedSize step, CeedScalar val) {
CeedSize length;
CeedVector_Hip *impl;

CeedCallBackend(CeedVectorGetData(vec, &impl));
CeedCallBackend(CeedVectorGetLength(vec, &length));
// Set value for synced device/host array
if (stop == -1) stop = length;
if (impl->d_array) {
CeedCallBackend(CeedDeviceSetValueStrided_Hip(impl->d_array, start, step, length, val));
CeedCallBackend(CeedDeviceSetValueStrided_Hip(impl->d_array, start, stop, step, val));
impl->h_array = NULL;
} else if (impl->h_array) {
CeedCallBackend(CeedHostSetValueStrided_Hip(impl->h_array, start, step, length, val));
CeedCallBackend(CeedHostSetValueStrided_Hip(impl->h_array, start, stop, step, val));
impl->d_array = NULL;
} else {
return CeedError(CeedVectorReturnCeed(vec), CEED_ERROR_BACKEND, "CeedVector must have valid data set");
Expand Down
Loading