diff --git a/backends/cuda-ref/ceed-cuda-ref-operator.c b/backends/cuda-ref/ceed-cuda-ref-operator.c index 52d1797e49..808816986f 100644 --- a/backends/cuda-ref/ceed-cuda-ref-operator.c +++ b/backends/cuda-ref/ceed-cuda-ref-operator.c @@ -1909,9 +1909,20 @@ static int CeedOperatorLinearAssembleAddDiagonalAtPoints_Cuda(CeedOperator op, C if (!is_active) continue; // Update unit vector - if (s == 0) CeedCallBackend(CeedVectorSetValue(active_e_vec_in, 0.0)); - else CeedCallBackend(CeedVectorSetValueStrided(active_e_vec_in, s - 1, e_vec_size, 0.0)); - CeedCallBackend(CeedVectorSetValueStrided(active_e_vec_in, s, e_vec_size, 1.0)); + { + // Note: E-vec strides are node * (1) + comp * (elem_size * num_elem) + elem * (elem_size) + CeedInt node = (s - 1) % elem_size, comp = (s - 1) / elem_size; + CeedSize start = node * 1 + comp * (elem_size * num_elem); + CeedSize stop = (comp + 1) * (elem_size * num_elem); + + if (s == 0) CeedCallBackend(CeedVectorSetValue(active_e_vec_in, 0.0)); + else CeedCallBackend(CeedVectorSetValueStrided(active_e_vec_in, start, stop, elem_size, 0.0)); + + node = s % elem_size, comp = s / elem_size; + start = node * 1 + comp * (elem_size * num_elem); + stop = (comp + 1) * (elem_size * num_elem); + CeedCallBackend(CeedVectorSetValueStrided(active_e_vec_in, start, stop, elem_size, 1.0)); + } // Basis action CeedCallBackend(CeedQFunctionFieldGetEvalMode(qf_input_fields[i], &eval_mode)); diff --git a/backends/cuda-ref/ceed-cuda-ref-vector.c b/backends/cuda-ref/ceed-cuda-ref-vector.c index 59815605d7..58999fc73a 100644 --- a/backends/cuda-ref/ceed-cuda-ref-vector.c +++ b/backends/cuda-ref/ceed-cuda-ref-vector.c @@ -223,20 +223,20 @@ static int CeedVectorSetArray_Cuda(const CeedVector vec, const CeedMemType mem_t //------------------------------------------------------------------------------ // Copy host array to value strided //------------------------------------------------------------------------------ -static int CeedHostCopyStrided_Cuda(CeedScalar *h_array, CeedSize start, CeedSize step, CeedSize length, CeedScalar *h_copy_array) { - for (CeedSize i = start; i < length; i += step) h_copy_array[i] = h_array[i]; +static int CeedHostCopyStrided_Cuda(CeedScalar *h_array, CeedSize start, CeedSize stop, CeedSize step, CeedScalar *h_copy_array) { + for (CeedSize i = start; i < stop; i += step) h_copy_array[i] = h_array[i]; return CEED_ERROR_SUCCESS; } //------------------------------------------------------------------------------ // Copy device array to value strided (impl in .cu file) //------------------------------------------------------------------------------ -int CeedDeviceCopyStrided_Cuda(CeedScalar *d_array, CeedSize start, CeedSize step, CeedSize length, CeedScalar *d_copy_array); +int CeedDeviceCopyStrided_Cuda(CeedScalar *d_array, CeedSize start, CeedSize stop, CeedSize step, CeedScalar *d_copy_array); //------------------------------------------------------------------------------ // Copy a vector to a value strided //------------------------------------------------------------------------------ -static int CeedVectorCopyStrided_Cuda(CeedVector vec, CeedSize start, CeedSize step, CeedVector vec_copy) { +static int CeedVectorCopyStrided_Cuda(CeedVector vec, CeedSize start, CeedSize stop, CeedSize step, CeedVector vec_copy) { CeedSize length; CeedVector_Cuda *impl; @@ -248,6 +248,7 @@ static int CeedVectorCopyStrided_Cuda(CeedVector vec, CeedSize start, CeedSize s CeedCallBackend(CeedVectorGetLength(vec_copy, &length_copy)); length = length_vec < length_copy ? length_vec : length_copy; } + if (stop == -1) stop = length; // Set value for synced device/host array if (impl->d_array) { CeedScalar *copy_array; @@ -260,13 +261,13 @@ static int CeedVectorCopyStrided_Cuda(CeedVector vec, CeedSize start, CeedSize s CeedCallBackend(CeedVectorGetCeed(vec, &ceed)); CeedCallBackend(CeedGetCublasHandle_Cuda(ceed, &handle)); #if defined(CEED_SCALAR_IS_FP32) - CeedCallCublas(ceed, cublasScopy_64(handle, (int64_t)length, impl->d_array + start, (int64_t)step, copy_array + start, (int64_t)step)); + CeedCallCublas(ceed, cublasScopy_64(handle, (int64_t)(stop - start), impl->d_array + start, (int64_t)step, copy_array + start, (int64_t)step)); #else /* CEED_SCALAR */ - CeedCallCublas(ceed, cublasDcopy_64(handle, (int64_t)length, impl->d_array + start, (int64_t)step, copy_array + start, (int64_t)step)); + CeedCallCublas(ceed, cublasDcopy_64(handle, (int64_t)(stop - start), impl->d_array + start, (int64_t)step, copy_array + start, (int64_t)step)); #endif /* CEED_SCALAR */ CeedCallBackend(CeedDestroy(&ceed)); #else /* CUDA_VERSION */ - CeedCallBackend(CeedDeviceCopyStrided_Cuda(impl->d_array, start, step, length, copy_array)); + CeedCallBackend(CeedDeviceCopyStrided_Cuda(impl->d_array, start, stop, step, copy_array)); #endif /* CUDA_VERSION */ CeedCallBackend(CeedVectorRestoreArray(vec_copy, ©_array)); impl->h_array = NULL; @@ -274,7 +275,7 @@ static int CeedVectorCopyStrided_Cuda(CeedVector vec, CeedSize start, CeedSize s CeedScalar *copy_array; CeedCallBackend(CeedVectorGetArray(vec_copy, CEED_MEM_HOST, ©_array)); - CeedCallBackend(CeedHostCopyStrided_Cuda(impl->h_array, start, step, length, copy_array)); + CeedCallBackend(CeedHostCopyStrided_Cuda(impl->h_array, start, stop, step, copy_array)); CeedCallBackend(CeedVectorRestoreArray(vec_copy, ©_array)); impl->d_array = NULL; } else { @@ -336,31 +337,32 @@ static int CeedVectorSetValue_Cuda(CeedVector vec, CeedScalar val) { //------------------------------------------------------------------------------ // Set host array to value strided //------------------------------------------------------------------------------ -static int CeedHostSetValueStrided_Cuda(CeedScalar *h_array, CeedSize start, CeedSize step, CeedSize length, CeedScalar val) { - for (CeedSize i = start; i < length; i += step) h_array[i] = val; +static int CeedHostSetValueStrided_Cuda(CeedScalar *h_array, CeedSize start, CeedSize stop, CeedSize step, CeedScalar val) { + for (CeedSize i = start; i < stop; i += step) h_array[i] = val; return CEED_ERROR_SUCCESS; } //------------------------------------------------------------------------------ // Set device array to value strided (impl in .cu file) //------------------------------------------------------------------------------ -int CeedDeviceSetValueStrided_Cuda(CeedScalar *d_array, CeedSize start, CeedSize step, CeedSize length, CeedScalar val); +int CeedDeviceSetValueStrided_Cuda(CeedScalar *d_array, CeedSize start, CeedSize stop, CeedSize step, CeedScalar val); //------------------------------------------------------------------------------ // Set a vector to a value strided //------------------------------------------------------------------------------ -static int CeedVectorSetValueStrided_Cuda(CeedVector vec, CeedSize start, CeedSize step, CeedScalar val) { +static int CeedVectorSetValueStrided_Cuda(CeedVector vec, CeedSize start, CeedSize stop, CeedSize step, CeedScalar val) { CeedSize length; CeedVector_Cuda *impl; CeedCallBackend(CeedVectorGetData(vec, &impl)); CeedCallBackend(CeedVectorGetLength(vec, &length)); // Set value for synced device/host array + if (stop == -1) stop = length; if (impl->d_array) { - CeedCallBackend(CeedDeviceSetValueStrided_Cuda(impl->d_array, start, step, length, val)); + CeedCallBackend(CeedDeviceSetValueStrided_Cuda(impl->d_array, start, stop, step, val)); impl->h_array = NULL; } else if (impl->h_array) { - CeedCallBackend(CeedHostSetValueStrided_Cuda(impl->h_array, start, step, length, val)); + CeedCallBackend(CeedHostSetValueStrided_Cuda(impl->h_array, start, stop, step, val)); impl->d_array = NULL; } else { return CeedError(CeedVectorReturnCeed(vec), CEED_ERROR_BACKEND, "CeedVector must have valid data set"); diff --git a/backends/cuda-ref/kernels/cuda-ref-vector.cu b/backends/cuda-ref/kernels/cuda-ref-vector.cu index e325629587..6f83efaa1e 100644 --- a/backends/cuda-ref/kernels/cuda-ref-vector.cu +++ b/backends/cuda-ref/kernels/cuda-ref-vector.cu @@ -11,24 +11,24 @@ //------------------------------------------------------------------------------ // Kernel for copy strided on device //------------------------------------------------------------------------------ -__global__ static void copyStridedK(CeedScalar *__restrict__ vec, CeedSize start, CeedSize step, CeedSize size, CeedScalar *__restrict__ vec_copy) { +__global__ static void copyStridedK(CeedScalar *__restrict__ vec, CeedSize start, CeedSize stop, CeedSize step, CeedScalar *__restrict__ vec_copy) { const CeedSize index = threadIdx.x + (CeedSize)blockDim.x * blockIdx.x; - if (index < size) { - if ((index - start) % step == 0) vec_copy[index] = vec[index]; + if (index < stop - start) { + if (index % step == 0) vec_copy[start + index] = vec[start + index]; } } //------------------------------------------------------------------------------ // Copy strided on device memory //------------------------------------------------------------------------------ -extern "C" int CeedDeviceCopyStrided_Cuda(CeedScalar *d_array, CeedSize start, CeedSize step, CeedSize length, CeedScalar *d_copy_array) { +extern "C" int CeedDeviceCopyStrided_Cuda(CeedScalar *d_array, CeedSize start, CeedSize stop, CeedSize step, CeedScalar *d_copy_array) { const int block_size = 512; - const CeedSize vec_size = length; - int grid_size = vec_size / block_size; + const CeedSize copy_size = stop - start; + int grid_size = copy_size / block_size; - if (block_size * grid_size < vec_size) grid_size += 1; - copyStridedK<<>>(d_array, start, step, length, d_copy_array); + if (block_size * grid_size < copy_size) grid_size += 1; + copyStridedK<<>>(d_array, start, stop, step, d_copy_array); return 0; } @@ -57,24 +57,24 @@ extern "C" int CeedDeviceSetValue_Cuda(CeedScalar *d_array, CeedSize length, Cee //------------------------------------------------------------------------------ // Kernel for set value strided on device //------------------------------------------------------------------------------ -__global__ static void setValueStridedK(CeedScalar *__restrict__ vec, CeedSize start, CeedSize step, CeedSize size, CeedScalar val) { +__global__ static void setValueStridedK(CeedScalar *__restrict__ vec, CeedSize start, CeedSize stop, CeedSize step, CeedScalar val) { const CeedSize index = threadIdx.x + (CeedSize)blockDim.x * blockIdx.x; - if (index < size) { - if ((index - start) % step == 0) vec[index] = val; + if (index < stop - start) { + if (index % step == 0) vec[start + index] = val; } } //------------------------------------------------------------------------------ // Set value strided on device memory //------------------------------------------------------------------------------ -extern "C" int CeedDeviceSetValueStrided_Cuda(CeedScalar *d_array, CeedSize start, CeedSize step, CeedSize length, CeedScalar val) { +extern "C" int CeedDeviceSetValueStrided_Cuda(CeedScalar *d_array, CeedSize start, CeedSize stop, CeedSize step, CeedScalar val) { const int block_size = 512; - const CeedSize vec_size = length; - int grid_size = vec_size / block_size; + const CeedSize set_size = stop - start; + int grid_size = set_size / block_size; - if (block_size * grid_size < vec_size) grid_size += 1; - setValueStridedK<<>>(d_array, start, step, length, val); + if (block_size * grid_size < set_size) grid_size += 1; + setValueStridedK<<>>(d_array, start, stop, step, val); return 0; } diff --git a/backends/hip-ref/ceed-hip-ref-operator.c b/backends/hip-ref/ceed-hip-ref-operator.c index 3124a48eb9..2d28627a72 100644 --- a/backends/hip-ref/ceed-hip-ref-operator.c +++ b/backends/hip-ref/ceed-hip-ref-operator.c @@ -1906,9 +1906,20 @@ static int CeedOperatorLinearAssembleAddDiagonalAtPoints_Hip(CeedOperator op, Ce if (!is_active) continue; // Update unit vector - if (s == 0) CeedCallBackend(CeedVectorSetValue(active_e_vec_in, 0.0)); - else CeedCallBackend(CeedVectorSetValueStrided(active_e_vec_in, s - 1, e_vec_size, 0.0)); - CeedCallBackend(CeedVectorSetValueStrided(active_e_vec_in, s, e_vec_size, 1.0)); + { + // Note: E-vec strides are node * (1) + comp * (elem_size * num_elem) + elem * (elem_size) + CeedInt node = (s - 1) % elem_size, comp = (s - 1) / elem_size; + CeedSize start = node * 1 + comp * (elem_size * num_elem); + CeedSize stop = (comp + 1) * (elem_size * num_elem); + + if (s == 0) CeedCallBackend(CeedVectorSetValue(active_e_vec_in, 0.0)); + else CeedCallBackend(CeedVectorSetValueStrided(active_e_vec_in, start, stop, elem_size, 0.0)); + + node = s % elem_size, comp = s / elem_size; + start = node * 1 + comp * (elem_size * num_elem); + stop = (comp + 1) * (elem_size * num_elem); + CeedCallBackend(CeedVectorSetValueStrided(active_e_vec_in, start, stop, elem_size, 1.0)); + } // Basis action CeedCallBackend(CeedQFunctionFieldGetEvalMode(qf_input_fields[i], &eval_mode)); diff --git a/backends/hip-ref/ceed-hip-ref-vector.c b/backends/hip-ref/ceed-hip-ref-vector.c index 238f4b5625..50e7064551 100644 --- a/backends/hip-ref/ceed-hip-ref-vector.c +++ b/backends/hip-ref/ceed-hip-ref-vector.c @@ -223,20 +223,20 @@ static int CeedVectorSetArray_Hip(const CeedVector vec, const CeedMemType mem_ty //------------------------------------------------------------------------------ // Copy host array to value strided //------------------------------------------------------------------------------ -static int CeedHostCopyStrided_Hip(CeedScalar *h_array, CeedSize start, CeedSize step, CeedSize length, CeedScalar *h_copy_array) { - for (CeedSize i = start; i < length; i += step) h_copy_array[i] = h_array[i]; +static int CeedHostCopyStrided_Hip(CeedScalar *h_array, CeedSize start, CeedSize stop, CeedSize step, CeedScalar *h_copy_array) { + for (CeedSize i = start; i < stop; i += step) h_copy_array[i] = h_array[i]; return CEED_ERROR_SUCCESS; } //------------------------------------------------------------------------------ // Copy device array to value strided (impl in .hip.cpp file) //------------------------------------------------------------------------------ -int CeedDeviceCopyStrided_Hip(CeedScalar *d_array, CeedSize start, CeedSize step, CeedSize length, CeedScalar *d_copy_array); +int CeedDeviceCopyStrided_Hip(CeedScalar *d_array, CeedSize start, CeedSize stop, CeedSize step, CeedScalar *d_copy_array); //------------------------------------------------------------------------------ // Copy a vector to a value strided //------------------------------------------------------------------------------ -static int CeedVectorCopyStrided_Hip(CeedVector vec, CeedSize start, CeedSize step, CeedVector vec_copy) { +static int CeedVectorCopyStrided_Hip(CeedVector vec, CeedSize start, CeedSize stop, CeedSize step, CeedVector vec_copy) { CeedSize length; CeedVector_Hip *impl; @@ -248,6 +248,7 @@ static int CeedVectorCopyStrided_Hip(CeedVector vec, CeedSize start, CeedSize st CeedCallBackend(CeedVectorGetLength(vec_copy, &length_copy)); length = length_vec < length_copy ? length_vec : length_copy; } + if (stop == -1) stop = length; // Set value for synced device/host array if (impl->d_array) { CeedScalar *copy_array; @@ -260,12 +261,12 @@ static int CeedVectorCopyStrided_Hip(CeedVector vec, CeedSize start, CeedSize st CeedCallBackend(CeedVectorGetCeed(vec, &ceed)); CeedCallBackend(CeedGetHipblasHandle_Hip(ceed, &handle)); #if defined(CEED_SCALAR_IS_FP32) - CeedCallHipblas(ceed, hipblasScopy_64(handle, (int64_t)length, impl->d_array + start, (int64_t)step, copy_array + start, (int64_t)step)); + CeedCallHipblas(ceed, hipblasScopy_64(handle, (int64_t)(stop - start), impl->d_array + start, (int64_t)step, copy_array + start, (int64_t)step)); #else /* CEED_SCALAR */ - CeedCallHipblas(ceed, hipblasDcopy_64(handle, (int64_t)length, impl->d_array + start, (int64_t)step, copy_array + start, (int64_t)step)); + CeedCallHipblas(ceed, hipblasDcopy_64(handle, (int64_t)(stop - start), impl->d_array + start, (int64_t)step, copy_array + start, (int64_t)step)); #endif /* CEED_SCALAR */ #else /* HIP_VERSION */ - CeedCallBackend(CeedDeviceCopyStrided_Hip(impl->d_array, start, step, length, copy_array)); + CeedCallBackend(CeedDeviceCopyStrided_Hip(impl->d_array, start, stop, step, copy_array)); #endif /* HIP_VERSION */ CeedCallBackend(CeedVectorRestoreArray(vec_copy, ©_array)); impl->h_array = NULL; @@ -274,7 +275,7 @@ static int CeedVectorCopyStrided_Hip(CeedVector vec, CeedSize start, CeedSize st CeedScalar *copy_array; CeedCallBackend(CeedVectorGetArray(vec_copy, CEED_MEM_HOST, ©_array)); - CeedCallBackend(CeedHostCopyStrided_Hip(impl->h_array, start, step, length, copy_array)); + CeedCallBackend(CeedHostCopyStrided_Hip(impl->h_array, start, stop, step, copy_array)); CeedCallBackend(CeedVectorRestoreArray(vec_copy, ©_array)); impl->d_array = NULL; } else { @@ -336,31 +337,32 @@ static int CeedVectorSetValue_Hip(CeedVector vec, CeedScalar val) { //------------------------------------------------------------------------------ // Set host array to value strided //------------------------------------------------------------------------------ -static int CeedHostSetValueStrided_Hip(CeedScalar *h_array, CeedSize start, CeedSize step, CeedSize length, CeedScalar val) { - for (CeedSize i = start; i < length; i += step) h_array[i] = val; +static int CeedHostSetValueStrided_Hip(CeedScalar *h_array, CeedSize start, CeedSize stop, CeedSize step, CeedScalar val) { + for (CeedSize i = start; i < stop; i += step) h_array[i] = val; return CEED_ERROR_SUCCESS; } //------------------------------------------------------------------------------ // Set device array to value strided (impl in .hip.cpp file) //------------------------------------------------------------------------------ -int CeedDeviceSetValueStrided_Hip(CeedScalar *d_array, CeedSize start, CeedSize step, CeedSize length, CeedScalar val); +int CeedDeviceSetValueStrided_Hip(CeedScalar *d_array, CeedSize start, CeedSize stop, CeedSize step, CeedScalar val); //------------------------------------------------------------------------------ // Set a vector to a value strided //------------------------------------------------------------------------------ -static int CeedVectorSetValueStrided_Hip(CeedVector vec, CeedSize start, CeedSize step, CeedScalar val) { +static int CeedVectorSetValueStrided_Hip(CeedVector vec, CeedSize start, CeedSize stop, CeedSize step, CeedScalar val) { CeedSize length; CeedVector_Hip *impl; CeedCallBackend(CeedVectorGetData(vec, &impl)); CeedCallBackend(CeedVectorGetLength(vec, &length)); // Set value for synced device/host array + if (stop == -1) stop = length; if (impl->d_array) { - CeedCallBackend(CeedDeviceSetValueStrided_Hip(impl->d_array, start, step, length, val)); + CeedCallBackend(CeedDeviceSetValueStrided_Hip(impl->d_array, start, stop, step, val)); impl->h_array = NULL; } else if (impl->h_array) { - CeedCallBackend(CeedHostSetValueStrided_Hip(impl->h_array, start, step, length, val)); + CeedCallBackend(CeedHostSetValueStrided_Hip(impl->h_array, start, stop, step, val)); impl->d_array = NULL; } else { return CeedError(CeedVectorReturnCeed(vec), CEED_ERROR_BACKEND, "CeedVector must have valid data set"); diff --git a/backends/hip-ref/kernels/hip-ref-vector.hip.cpp b/backends/hip-ref/kernels/hip-ref-vector.hip.cpp index a45118a6df..0db492fc80 100644 --- a/backends/hip-ref/kernels/hip-ref-vector.hip.cpp +++ b/backends/hip-ref/kernels/hip-ref-vector.hip.cpp @@ -57,24 +57,24 @@ extern "C" int CeedDeviceSetValue_Hip(CeedScalar *d_array, CeedSize length, Ceed //------------------------------------------------------------------------------ // Kernel for set value strided on device //------------------------------------------------------------------------------ -__global__ static void setValueStridedK(CeedScalar *__restrict__ vec, CeedSize start, CeedSize step, CeedSize size, CeedScalar val) { +__global__ static void setValueStridedK(CeedScalar *__restrict__ vec, CeedSize start, CeedSize stop, CeedSize step, CeedScalar val) { const CeedSize index = threadIdx.x + (CeedSize)blockDim.x * blockIdx.x; - if (index < size) { - if ((index - start) % step == 0) vec[index] = val; + if (index < stop - start) { + if (index % step == 0) vec[start + index] = val; } } //------------------------------------------------------------------------------ // Set value strided on device memory //------------------------------------------------------------------------------ -extern "C" int CeedDeviceSetValueStrided_Hip(CeedScalar *d_array, CeedSize start, CeedSize step, CeedSize length, CeedScalar val) { +extern "C" int CeedDeviceSetValueStrided_Hip(CeedScalar *d_array, CeedSize start, CeedInt stop, CeedSize step, CeedSize length, CeedScalar val) { const int block_size = 512; - const CeedSize vec_size = length; - int grid_size = vec_size / block_size; + const CeedSize set_size = stop - start; + int grid_size = set_size / block_size; - if (block_size * grid_size < vec_size) grid_size += 1; - hipLaunchKernelGGL(setValueStridedK, dim3(grid_size), dim3(block_size), 0, 0, d_array, start, step, length, val); + if (block_size * grid_size < set_size) grid_size += 1; + hipLaunchKernelGGL(setValueStridedK, dim3(grid_size), dim3(block_size), 0, 0, d_array, start, stop, step, val); return 0; } diff --git a/backends/memcheck/ceed-memcheck-vector.c b/backends/memcheck/ceed-memcheck-vector.c index 01187d06e9..1483ac9841 100644 --- a/backends/memcheck/ceed-memcheck-vector.c +++ b/backends/memcheck/ceed-memcheck-vector.c @@ -115,7 +115,7 @@ static int CeedVectorSetValue_Memcheck(CeedVector vec, CeedScalar value) { //------------------------------------------------------------------------------ // Set internal array to value strided //------------------------------------------------------------------------------ -static int CeedVectorSetValueStrided_Memcheck(CeedVector vec, CeedSize start, CeedSize step, CeedScalar val) { +static int CeedVectorSetValueStrided_Memcheck(CeedVector vec, CeedSize start, CeedSize stop, CeedSize step, CeedScalar val) { CeedSize length; CeedVector_Memcheck *impl; @@ -124,7 +124,8 @@ static int CeedVectorSetValueStrided_Memcheck(CeedVector vec, CeedSize start, Ce if (!impl->array_allocated) CeedCallBackend(CeedVectorSetArray_Memcheck(vec, CEED_MEM_HOST, CEED_COPY_VALUES, NULL)); assert(impl->array_allocated); - for (CeedSize i = start; i < length; i += step) impl->array_allocated[i] = val; + if (stop == -1) stop = length; + for (CeedSize i = start; i < stop; i += step) impl->array_allocated[i] = val; return CEED_ERROR_SUCCESS; } diff --git a/include/ceed-impl.h b/include/ceed-impl.h index 4e7941a350..95c920604d 100644 --- a/include/ceed-impl.h +++ b/include/ceed-impl.h @@ -137,10 +137,10 @@ struct CeedVector_private { Ceed ceed; int (*HasValidArray)(CeedVector, bool *); int (*HasBorrowedArrayOfType)(CeedVector, CeedMemType, bool *); - int (*CopyStrided)(CeedVector, CeedSize, CeedSize, CeedVector); + int (*CopyStrided)(CeedVector, CeedSize, CeedSize, CeedSize, CeedVector); int (*SetArray)(CeedVector, CeedMemType, CeedCopyMode, CeedScalar *); int (*SetValue)(CeedVector, CeedScalar); - int (*SetValueStrided)(CeedVector, CeedSize, CeedSize, CeedScalar); + int (*SetValueStrided)(CeedVector, CeedSize, CeedSize, CeedSize, CeedScalar); int (*SyncArray)(CeedVector, CeedMemType); int (*TakeArray)(CeedVector, CeedMemType, CeedScalar **); int (*GetArray)(CeedVector, CeedMemType, CeedScalar **); diff --git a/include/ceed/ceed.h b/include/ceed/ceed.h index 26911b0a67..b1851d6a27 100644 --- a/include/ceed/ceed.h +++ b/include/ceed/ceed.h @@ -181,10 +181,10 @@ CEED_EXTERN int CeedGetPreferredMemType(Ceed ceed, CeedMemType *type); CEED_EXTERN int CeedVectorCreate(Ceed ceed, CeedSize len, CeedVector *vec); CEED_EXTERN int CeedVectorReferenceCopy(CeedVector vec, CeedVector *vec_copy); CEED_EXTERN int CeedVectorCopy(CeedVector vec, CeedVector vec_copy); -CEED_EXTERN int CeedVectorCopyStrided(CeedVector vec, CeedSize start, CeedInt step, CeedVector vec_copy); +CEED_EXTERN int CeedVectorCopyStrided(CeedVector vec, CeedSize start, CeedSize stop, CeedSize step, CeedVector vec_copy); CEED_EXTERN int CeedVectorSetArray(CeedVector vec, CeedMemType mem_type, CeedCopyMode copy_mode, CeedScalar *array); CEED_EXTERN int CeedVectorSetValue(CeedVector vec, CeedScalar value); -CEED_EXTERN int CeedVectorSetValueStrided(CeedVector vec, CeedSize start, CeedInt step, CeedScalar value); +CEED_EXTERN int CeedVectorSetValueStrided(CeedVector vec, CeedSize start, CeedSize stop, CeedSize step, CeedScalar value); CEED_EXTERN int CeedVectorSyncArray(CeedVector vec, CeedMemType mem_type); CEED_EXTERN int CeedVectorTakeArray(CeedVector vec, CeedMemType mem_type, CeedScalar **array); CEED_EXTERN int CeedVectorGetArray(CeedVector vec, CeedMemType mem_type, CeedScalar **array); diff --git a/interface/ceed-vector.c b/interface/ceed-vector.c index 70c7b5464f..3d76acb95d 100644 --- a/interface/ceed-vector.c +++ b/interface/ceed-vector.c @@ -251,7 +251,8 @@ int CeedVectorCopy(CeedVector vec, CeedVector vec_copy) { @brief Copy a strided portion of `CeedVector` contents into a different `CeedVector` @param[in] vec `CeedVector` to copy - @param[in] start First index to copy + @param[in] start First index to copy in the range `[start, stop)` + @param[in] stop One past the last element to copy in the range, or `-1` for `length` @param[in] step Stride between indices to copy @param[in,out] vec_copy `CeedVector` to copy values to @@ -259,19 +260,12 @@ int CeedVectorCopy(CeedVector vec, CeedVector vec_copy) { @ref User **/ -int CeedVectorCopyStrided(CeedVector vec, CeedSize start, CeedInt step, CeedVector vec_copy) { +int CeedVectorCopyStrided(CeedVector vec, CeedSize start, CeedSize stop, CeedSize step, CeedVector vec_copy) { CeedSize length; const CeedScalar *array = NULL; CeedScalar *array_copy = NULL; - // Backend version - if (vec->CopyStrided && vec_copy->CopyStrided) { - CeedCall(vec->CopyStrided(vec, start, step, vec_copy)); - vec_copy->state += 2; - return CEED_ERROR_SUCCESS; - } - - // Get length + // Check length { CeedSize length_vec, length_copy; @@ -280,11 +274,23 @@ int CeedVectorCopyStrided(CeedVector vec, CeedSize start, CeedInt step, CeedVect if (length_vec <= 0 || length_copy <= 0) return CEED_ERROR_SUCCESS; length = length_vec < length_copy ? length_vec : length_copy; } + CeedCheck(stop >= -1 && stop <= length, CeedVectorReturnCeed(vec), CEED_ERROR_ACCESS, + "Invalid value for stop %" CeedSize_FMT ", must be in the range [-1, length]", stop); + CeedCheck(start >= 0 && start <= length && (start <= stop || stop == -1), CeedVectorReturnCeed(vec), CEED_ERROR_ACCESS, + "Invalid value for start %" CeedSize_FMT ", must be in the range [0, stop]", start); + + // Backend version + if (vec->CopyStrided && vec_copy->CopyStrided) { + CeedCall(vec->CopyStrided(vec, start, stop, step, vec_copy)); + vec_copy->state += 2; + return CEED_ERROR_SUCCESS; + } // Copy CeedCall(CeedVectorGetArrayRead(vec, CEED_MEM_HOST, &array)); CeedCall(CeedVectorGetArray(vec_copy, CEED_MEM_HOST, &array_copy)); - for (CeedSize i = start; i < length; i += step) array_copy[i] = array[i]; + if (stop == -1) stop = length; + for (CeedSize i = start; i < stop; i += step) array_copy[i] = array[i]; // Cleanup CeedCall(CeedVectorRestoreArrayRead(vec, &array)); @@ -357,7 +363,8 @@ int CeedVectorSetValue(CeedVector vec, CeedScalar value) { Note: The `CeedVector` must already have valid data set via @ref CeedVectorSetArray() or similar. @param[in,out] vec `CeedVector` - @param[in] start First index to set + @param[in] start First index to set in range `[start, stop)` + @param[in] stop One past the last element to set in the range, or `-1` for `length` @param[in] step Stride between indices to set @param[in] value Value to be used @@ -365,22 +372,26 @@ int CeedVectorSetValue(CeedVector vec, CeedScalar value) { @ref User **/ -int CeedVectorSetValueStrided(CeedVector vec, CeedSize start, CeedInt step, CeedScalar value) { +int CeedVectorSetValueStrided(CeedVector vec, CeedSize start, CeedSize stop, CeedSize step, CeedScalar value) { + CeedSize length; + CeedCheck(vec->state % 2 == 0, CeedVectorReturnCeed(vec), CEED_ERROR_ACCESS, "Cannot grant CeedVector array access, the access lock is already in use"); CeedCheck(vec->num_readers == 0, CeedVectorReturnCeed(vec), CEED_ERROR_ACCESS, "Cannot grant CeedVector array access, a process has read access"); + CeedCall(CeedVectorGetLength(vec, &length)); + CeedCheck(stop >= -1 && stop <= length, CeedVectorReturnCeed(vec), CEED_ERROR_ACCESS, + "Invalid value for stop %" CeedSize_FMT ", must be in the range [-1, length]", stop); if (vec->SetValueStrided) { - CeedCall(vec->SetValueStrided(vec, start, step, value)); + CeedCall(vec->SetValueStrided(vec, start, stop, step, value)); vec->state += 2; } else { - CeedSize length; CeedScalar *array; - CeedCall(CeedVectorGetLength(vec, &length)); if (length <= 0) return CEED_ERROR_SUCCESS; + if (stop == -1) stop = length; CeedCall(CeedVectorGetArray(vec, CEED_MEM_HOST, &array)); - for (CeedSize i = start; i < length; i += step) array[i] = value; + for (CeedSize i = start; i < stop; i += step) array[i] = value; CeedCall(CeedVectorRestoreArray(vec, &array)); } return CEED_ERROR_SUCCESS; @@ -989,8 +1000,8 @@ int CeedVectorReciprocal(CeedVector vec) { Any portion of the provided range that is outside the range of valid indices for the `CeedVector` will be ignored. @param[in] vec `CeedVector` to view - @param[in] start Index of first `CeedVector` entry to view - @param[in] stop Index of last `CeedVector` entry to view + @param[in] start Index of first `CeedVector` entry to view in the range `[start, stop)` + @param[in] stop One past the last element to view in the range, or `-1` for `length` @param[in] step Step between `CeedVector` entries to view @param[in] fp_fmt Printing format @param[in] stream Filestream to write to @@ -1012,7 +1023,7 @@ int CeedVectorViewRange(CeedVector vec, CeedSize start, CeedSize stop, CeedInt s fprintf(stream, " start: %" CeedSize_FMT "\n stop: %" CeedSize_FMT "\n step: %" CeedInt_FMT "\n", start, stop, step); } if (start > length) start = length; - if (stop > length) stop = length; + if (stop == -1 || stop > length) stop = length; snprintf(fmt, sizeof fmt, " %s\n", fp_fmt ? fp_fmt : "%g"); CeedCall(CeedVectorGetArrayRead(vec, CEED_MEM_HOST, &x)); diff --git a/tests/t127-vector.c b/tests/t127-vector.c index e9fb578d65..e13bf15d1b 100644 --- a/tests/t127-vector.c +++ b/tests/t127-vector.c @@ -17,7 +17,7 @@ int main(int argc, char **argv) { // Set strided CeedVectorSetValue(x, 1.0); - CeedVectorSetValueStrided(x, start, step, 42.0); + CeedVectorSetValueStrided(x, start, -1, step, 42.0); { const CeedScalar *read_array; @@ -36,7 +36,7 @@ int main(int argc, char **argv) { // Copy strided CeedVectorSetValue(y, 0.0); - CeedVectorCopyStrided(x, start, step, y); + CeedVectorCopyStrided(x, start, -1, step, y); { const CeedScalar *read_array;