Skip to content

Commit 19b0eb9

Browse files
Merge pull request #33 from NumPower/feat/update_package_1
NDArray_Append, slicing bug fix, AVX2 image support and AVX2 CUDA support
2 parents 7c2184a + 86efdd2 commit 19b0eb9

File tree

8 files changed

+208
-11
lines changed

8 files changed

+208
-11
lines changed

Makefile.frag

Lines changed: 7 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -67,19 +67,19 @@ COMMON_FLAGS = $(DEFS) $(INCLUDES) $(EXTRA_INCLUDES) $(CPPFLAGS) $(PHP_FRAMEWORK
6767
install-cuda:
6868
rm ./.libs -rf
6969
mkdir ./.libs
70-
$(NVCC) -I. -I $(COMMON_FLAGS) $(CFLAGS_CLEAN) $(EXTRA_CFLAGS) $(ALL_CCFLAGS) $(GENCODE_FLAGS) -c $(builddir)./numpower.c -shared -Xcompiler -fPIC -o .libs/numpower.o
70+
$(NVCC) -I. -I $(COMMON_FLAGS) $(CFLAGS_CLEAN) $(EXTRA_CFLAGS) $(ALL_CCFLAGS) $(GENCODE_FLAGS) -c $(builddir)./numpower.c -shared -Xcompiler -fPIC -o .libs/numpower.o
7171
$(NVCC) -I. -I $(COMMON_FLAGS) $(CFLAGS_CLEAN) $(EXTRA_CFLAGS) $(ALL_CCFLAGS) $(GENCODE_FLAGS) -c $(builddir)./src/buffer.c -shared -Xcompiler -fPIC -o .libs/buffer.o
7272
$(NVCC) -I. -I $(COMMON_FLAGS) $(CFLAGS_CLEAN) $(EXTRA_CFLAGS) $(ALL_CCFLAGS) $(GENCODE_FLAGS) -c $(builddir)./src/debug.c -shared -Xcompiler -fPIC -o .libs/debug.o
7373
$(NVCC) -I. -I $(COMMON_FLAGS) $(CFLAGS_CLEAN) $(EXTRA_CFLAGS) $(ALL_CCFLAGS) $(GENCODE_FLAGS) -c $(builddir)./src/indexing.c -shared -Xcompiler -fPIC -o .libs/indexing.o
74-
$(NVCC) -I. -I $(COMMON_FLAGS) $(CFLAGS_CLEAN) $(EXTRA_CFLAGS) $(ALL_CCFLAGS) $(GENCODE_FLAGS) -c $(builddir)./src/initializers.c -shared -Xcompiler -fPIC -o .libs/initializers.o
74+
$(CC) -I. -I $(CXX) $(COMMON_FLAGS) $(CFLAGS_CLEAN) $(EXTRA_CFLAGS) $(ALL_CCFLAGS) $(GENCODE_FLAGS) -c $(builddir)./src/initializers.c -shared -fPIC -o .libs/initializers.o
7575
$(NVCC) -I. -I $(COMMON_FLAGS) $(CFLAGS_CLEAN) $(EXTRA_CFLAGS) $(ALL_CCFLAGS) $(GENCODE_FLAGS) -c $(builddir)./src/iterators.c -shared -Xcompiler -fPIC -o .libs/iterators.o
76-
$(NVCC) -I. -I $(COMMON_FLAGS) $(CFLAGS_CLEAN) $(EXTRA_CFLAGS) $(ALL_CCFLAGS) $(GENCODE_FLAGS) -c $(builddir)./src/logic.c -shared -Xcompiler -fPIC -o .libs/logic.o
77-
$(NVCC) -I. -I $(COMMON_FLAGS) $(CFLAGS_CLEAN) $(EXTRA_CFLAGS) $(ALL_CCFLAGS) $(GENCODE_FLAGS) -c $(builddir)./src/manipulation.c -shared -Xcompiler -fPIC -o .libs/manipulation.o
78-
$(NVCC) -I. -I $(COMMON_FLAGS) $(CFLAGS_CLEAN) $(EXTRA_CFLAGS) $(ALL_CCFLAGS) $(GENCODE_FLAGS) -c $(builddir)./src/ndarray.c -shared -Xcompiler -fPIC -o .libs/ndarray.o
76+
$(CC) -I. -I $(CXX) $(COMMON_FLAGS) $(CFLAGS_CLEAN) $(EXTRA_CFLAGS) $(ALL_CCFLAGS) $(GENCODE_FLAGS) -c $(builddir)./src/logic.c -shared -fPIC -o .libs/logic.o
77+
$(CC) -I. -I $(CXX) $(COMMON_FLAGS) $(CFLAGS_CLEAN) $(EXTRA_CFLAGS) $(ALL_CCFLAGS) $(GENCODE_FLAGS) -c $(builddir)./src/manipulation.c -shared -fPIC -o .libs/manipulation.o
78+
$(CC) -I. -I $(CXX) $(COMMON_FLAGS) $(CFLAGS_CLEAN) $(EXTRA_CFLAGS) $(ALL_CCFLAGS) $(GENCODE_FLAGS) -c $(builddir)./src/ndarray.c -shared -fPIC -o .libs/ndarray.o
7979
$(NVCC) -I. -I $(COMMON_FLAGS) $(CFLAGS_CLEAN) $(EXTRA_CFLAGS) $(ALL_CCFLAGS) $(GENCODE_FLAGS) -c $(builddir)./src/types.c -shared -Xcompiler -fPIC -o .libs/types.o
80-
$(NVCC) -I. -I $(COMMON_FLAGS) $(CFLAGS_CLEAN) $(EXTRA_CFLAGS) $(ALL_CCFLAGS) $(GENCODE_FLAGS) -c $(builddir)./src/ndmath/arithmetics.c -shared -Xcompiler -fPIC -o .libs/arithmetics.o
80+
$(CC) -I. -I $(CXX) $(COMMON_FLAGS) $(CFLAGS_CLEAN) $(EXTRA_CFLAGS) $(ALL_CCFLAGS) $(GENCODE_FLAGS) -c $(builddir)./src/ndmath/arithmetics.c -shared -fPIC -o .libs/arithmetics.o
8181
$(NVCC) -I. -I $(COMMON_FLAGS) $(CFLAGS_CLEAN) $(EXTRA_CFLAGS) $(ALL_CCFLAGS) $(GENCODE_FLAGS) -c $(builddir)./src/ndmath/double_math.c -shared -Xcompiler -fPIC -o .libs/double_math.o
82-
$(NVCC) -I. -I $(COMMON_FLAGS) $(CFLAGS_CLEAN) $(EXTRA_CFLAGS) $(ALL_CCFLAGS) $(GENCODE_FLAGS) -c $(builddir)./src/ndmath/linalg.c -shared -Xcompiler -fPIC -o .libs/linalg.o
82+
$(CC) -I. -I $(CXX) $(COMMON_FLAGS) $(CFLAGS_CLEAN) $(EXTRA_CFLAGS) $(ALL_CCFLAGS) $(GENCODE_FLAGS) -c $(builddir)./src/ndmath/linalg.c -shared -fPIC -o .libs/linalg.o
8383
$(NVCC) -I. -I $(COMMON_FLAGS) $(CFLAGS_CLEAN) $(EXTRA_CFLAGS) $(ALL_CCFLAGS) $(GENCODE_FLAGS) -c $(builddir)./src/gpu_alloc.c -shared -Xcompiler -fPIC -o .libs/gpu_alloc.o
8484
$(NVCC) -I. -I $(COMMON_FLAGS) $(CFLAGS_CLEAN) $(EXTRA_CFLAGS) $(ALL_CCFLAGS) $(GENCODE_FLAGS) -c $(builddir)./src/ndmath/cuda/cuda_math.cu -shared -Xcompiler -fPIC -o .libs/cuda_math.o
8585
$(NVCC) -I. -I $(COMMON_FLAGS) $(CFLAGS_CLEAN) $(EXTRA_CFLAGS) $(ALL_CCFLAGS) $(GENCODE_FLAGS) -c $(builddir)./src/ndmath/statistics.c -shared -Xcompiler -fPIC -o .libs/statistics.o

config.m4

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,18 @@ if test "$PHP_CUDA" != "no"; then
1515
AC_MSG_RESULT([CUBLAS detected ])
1616
PHP_ADD_MAKEFILE_FRAGMENT($abs_srcdir/Makefile.frag, $abs_builddir)
1717
CFLAGS+=" -lcublas -lcudart "
18+
AC_CHECK_HEADER([immintrin.h],
19+
[
20+
AC_DEFINE(HAVE_AVX2,1,[Have AV2/SSE support])
21+
AC_MSG_RESULT([AVX2/SSE detected ])
22+
CXX+=" -mavx2 -march=native "
23+
],[
24+
AC_DEFINE(HAVE_AVX2,0,[Have AV2/SSE support])
25+
AC_MSG_RESULT([AVX2/SSE not found ])
26+
], [
27+
28+
]
29+
)
1830
],[
1931
AC_MSG_RESULT([wrong cublas version or library not found.])
2032
AC_CHECK_HEADER([immintrin.h],

numpower.c

Lines changed: 32 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3017,6 +3017,37 @@ PHP_METHOD(NDArray, add) {
30173017
RETURN_NDARRAY(rtn, return_value);
30183018
}
30193019

3020+
/**
3021+
* NDArray::inner
3022+
*/
3023+
ZEND_BEGIN_ARG_INFO(arginfo_ndarray_append, 0)
3024+
ZEND_ARG_INFO(0, a)
3025+
ZEND_ARG_INFO(0, b)
3026+
ZEND_END_ARG_INFO()
3027+
PHP_METHOD(NDArray, append) {
3028+
NDArray *rtn = NULL;
3029+
zval *a, *b;
3030+
long axis;
3031+
ZEND_PARSE_PARAMETERS_START(2, 2)
3032+
Z_PARAM_ZVAL(a)
3033+
Z_PARAM_ZVAL(b)
3034+
ZEND_PARSE_PARAMETERS_END();
3035+
NDArray *nda = ZVAL_TO_NDARRAY(a);
3036+
NDArray *ndb = ZVAL_TO_NDARRAY(b);
3037+
if (nda == NULL) {
3038+
return;
3039+
}
3040+
if (ndb == NULL) {
3041+
CHECK_INPUT_AND_FREE(a, nda);
3042+
return;
3043+
}
3044+
rtn = NDArray_Append(nda, ndb);
3045+
3046+
CHECK_INPUT_AND_FREE(a, nda);
3047+
CHECK_INPUT_AND_FREE(b, ndb);
3048+
RETURN_NDARRAY(rtn, return_value);
3049+
}
3050+
30203051
/**
30213052
* NDArray::matmul
30223053
*/
@@ -3927,6 +3958,7 @@ static const zend_function_entry class_NDArray_methods[] = {
39273958
ZEND_ME(NDArray, atleast_3d, arginfo_ndarray_atleast_3d, ZEND_ACC_PUBLIC | ZEND_ACC_STATIC)
39283959
ZEND_ME(NDArray, transpose, arginfo_ndarray_transpose, ZEND_ACC_PUBLIC | ZEND_ACC_STATIC)
39293960
ZEND_ME(NDArray, slice, arginfo_ndarray_slice, ZEND_ACC_PUBLIC)
3961+
ZEND_ME(NDArray, append, arginfo_ndarray_append, ZEND_ACC_PUBLIC | ZEND_ACC_STATIC)
39303962

39313963
// INDEXING
39323964
ZEND_ME(NDArray, diagonal, arginfo_ndarray_diagonal, ZEND_ACC_PUBLIC | ZEND_ACC_STATIC)

src/initializers.c

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -295,7 +295,7 @@ NDArray*
295295
NDArray_FromNDArray(NDArray *target, int buffer_offset, int* shape, int* strides, const int* ndim) {
296296
NDArray* rtn = emalloc(sizeof(NDArray));
297297
int total_num_elements = 1;
298-
int out_ndim;
298+
int out_ndim = -1;
299299

300300
if (strides == NULL) {
301301
rtn->strides = emalloc(sizeof(int) * NDArray_NDIM(target));
@@ -310,7 +310,9 @@ NDArray_FromNDArray(NDArray *target, int buffer_offset, int* shape, int* strides
310310
if (shape != NULL) {
311311
rtn->dimensions = shape;
312312
rtn->strides = strides;
313-
out_ndim = *ndim;
313+
if (out_ndim == -1) {
314+
out_ndim = *ndim;
315+
}
314316
}
315317

316318
// Calculate number of elements

src/manipulation.c

Lines changed: 63 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -218,6 +218,30 @@ NDArray_Slice(NDArray* array, NDArray** indexes, int num_indices, int return_vie
218218
return NULL;
219219
}
220220

221+
if (NDArray_NDIM(array) == 1) {
222+
int out_ndim = NDArray_NDIM(array);
223+
if (NDArray_NUMELEMENTS(indexes[0]) >= 1) {
224+
start = (int) NDArray_FDATA(indexes[0])[0];
225+
} else {
226+
start = 0;
227+
}
228+
if (NDArray_NUMELEMENTS(indexes[0]) >= 2) {
229+
stop = (int)NDArray_FDATA(indexes[0])[1];
230+
} else {
231+
stop = NDArray_SHAPE(array)[0];
232+
}
233+
if (NDArray_NUMELEMENTS(indexes[0]) == 3) {
234+
step = (int)NDArray_FDATA(indexes[0])[2];
235+
} else {
236+
step = 1;
237+
}
238+
slice_shape[0] = (int)floorf(((float)stop - (float)start) / (float)step);
239+
slice_strides[0] = NDArray_STRIDES(array)[0];
240+
offset = start * NDArray_STRIDES(array)[0];
241+
slice = NDArray_FromNDArray(array, offset, slice_shape, slice_strides, &out_ndim);
242+
return slice;
243+
}
244+
221245
for (i = 0; i < num_indices; i++) {
222246
if (NDArray_NUMELEMENTS(indexes[i]) >= 1) {
223247
start = (int) NDArray_FDATA(indexes[i])[0];
@@ -263,7 +287,45 @@ NDArray_Slice(NDArray* array, NDArray** indexes, int num_indices, int return_vie
263287
slice->strides = Generate_Strides(slice_shape, slice_ndim, NDArray_ELSIZE(slice));
264288
slice->base = NULL;
265289
NDArray_FREE(array);
266-
NDArray_Print(slice,0);
267290
efree(slice_strides);
268291
return slice;
292+
}
293+
294+
/**
295+
* @param target
296+
* @todo Append all dimensions with axis
297+
* @return
298+
*/
299+
NDArray*
300+
NDArray_Append(NDArray *a, NDArray *b) {
301+
char *tmp_ptr;
302+
if (NDArray_DEVICE(a) != NDArray_DEVICE(b)) {
303+
zend_throw_error(NULL, "NDArrays must be on the same device.");
304+
return NULL;
305+
}
306+
307+
if (NDArray_NDIM(a) != 1 || NDArray_NDIM(b) != 1) {
308+
zend_throw_error(NULL, "You can only append vectors.");
309+
return NULL;
310+
}
311+
312+
int *shape = emalloc(sizeof(int));
313+
shape[0] = NDArray_NUMELEMENTS(a) + NDArray_NUMELEMENTS(b);
314+
NDArray* rtn = NDArray_Empty(shape, 1, NDArray_TYPE(a), NDArray_DEVICE(a));
315+
316+
if (NDArray_DEVICE(a) == NDARRAY_DEVICE_GPU) {
317+
#ifdef HAVE_CUBLAS
318+
NDArray_VMEMCPY_D2D(NDArray_DATA(a), NDArray_DATA(rtn), NDArray_ELSIZE(a) * NDArray_NUMELEMENTS(a));
319+
tmp_ptr = NDArray_DATA(rtn) + NDArray_ELSIZE(a) * NDArray_NUMELEMENTS(a);
320+
NDArray_VMEMCPY_D2D(NDArray_DATA(b), tmp_ptr, NDArray_ELSIZE(b) * NDArray_NUMELEMENTS(b));
321+
#endif
322+
}
323+
324+
if (NDArray_DEVICE(a) == NDARRAY_DEVICE_CPU) {
325+
memcpy(NDArray_DATA(rtn), NDArray_DATA(a), NDArray_ELSIZE(a) * NDArray_NUMELEMENTS(a));
326+
tmp_ptr = NDArray_DATA(rtn) + NDArray_ELSIZE(a) * NDArray_NUMELEMENTS(a);
327+
memcpy(tmp_ptr, NDArray_DATA(b), NDArray_ELSIZE(b) * NDArray_NUMELEMENTS(b));
328+
}
329+
330+
return rtn;
269331
}

src/manipulation.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -10,5 +10,6 @@ void reverse_copy(const int* src, int* dest, int size);
1010
void copy(const int* src, int* dest, unsigned int size);
1111
NDArray* NDArray_Slice(NDArray* array, NDArray** indexes, int num_indices, int return_view);
1212
void *linearize_FLOAT_matrix(float *dst_in, float *src_in, NDArray * a);
13+
NDArray* NDArray_Append(NDArray *a, NDArray *b);
1314

1415
#endif //PHPSCI_NDARRAY_MANIPULATION_H

src/ndarray.c

Lines changed: 88 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -101,6 +101,33 @@ NDArray_FromGD(zval *a) {
101101
i_shape[1] = (int)img_ptr->sy;
102102
i_shape[2] = (int)img_ptr->sx;
103103
rtn = NDArray_Zeros(i_shape, 3, NDARRAY_TYPE_FLOAT32, NDARRAY_DEVICE_CPU);
104+
int elsize = NDArray_ELSIZE(rtn);
105+
106+
#ifdef HAVE_AVX2
107+
__m256i red_mask = _mm256_set_epi32(0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF);
108+
for (int i = 0; i < img_ptr->sy; i++) {
109+
for (int j = 0; j < img_ptr->sx; j += 8) {
110+
offset_red = (NDArray_STRIDES(rtn)[0] / elsize * 0) +
111+
((NDArray_STRIDES(rtn)[1] / elsize) * i) +
112+
((NDArray_STRIDES(rtn)[2] / elsize) * j);
113+
offset_green = ((NDArray_STRIDES(rtn)[0] / elsize) * 1) +
114+
((NDArray_STRIDES(rtn)[1] / elsize) * i) +
115+
((NDArray_STRIDES(rtn)[2] / elsize) * j);
116+
offset_blue = ((NDArray_STRIDES(rtn)[0] / elsize) * 2) +
117+
((NDArray_STRIDES(rtn)[1] / elsize) * i) +
118+
((NDArray_STRIDES(rtn)[2] / elsize) * j);
119+
120+
__m256i color_indices = _mm256_loadu_si256((__m256i*)&img_ptr->tpixels[i][j]);
121+
__m256i red_shifted = _mm256_and_si256(_mm256_srli_epi32(color_indices, 16), red_mask);
122+
__m256i green_shifted = _mm256_and_si256(_mm256_srli_epi32(color_indices, 8), red_mask);
123+
__m256i blue_shifted = _mm256_and_si256(color_indices, red_mask);
124+
125+
_mm256_storeu_ps(&NDArray_FDATA(rtn)[offset_red], _mm256_cvtepi32_ps(red_shifted));
126+
_mm256_storeu_ps(&NDArray_FDATA(rtn)[offset_green], _mm256_cvtepi32_ps(green_shifted));
127+
_mm256_storeu_ps(&NDArray_FDATA(rtn)[offset_blue], _mm256_cvtepi32_ps(blue_shifted));
128+
}
129+
}
130+
#else
104131
for (int i = 0; i < img_ptr->sy; i++) {
105132
for (int j = 0; j < img_ptr->sx; j++) {
106133
offset_red = (NDArray_STRIDES(rtn)[0]/ NDArray_ELSIZE(rtn) * 0) +
@@ -121,6 +148,7 @@ NDArray_FromGD(zval *a) {
121148
NDArray_FDATA(rtn)[offset_green] = (float)green;
122149
}
123150
}
151+
#endif
124152
return rtn;
125153
}
126154

@@ -135,6 +163,64 @@ NDArray_ToGD(NDArray *a, NDArray *n_alpha, zval *output) {
135163
int red, green, blue, alpha;
136164
char *tmp_red, *tmp_blue, *tmp_green;
137165
gdImagePtr im = gdImageCreateTrueColor_(NDArray_SHAPE(a)[2], NDArray_SHAPE(a)[1]);
166+
167+
#ifdef HAVE_AVX2
168+
int elsize = NDArray_ELSIZE(a);
169+
__m256i alpha_mask = _mm256_set_epi32(0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF);
170+
171+
for (int i = 0; i < im->sy; i++) {
172+
for (int j = 0; j < im->sx; j += 8) {
173+
offset_alpha = (NDArray_STRIDES(a)[0] / elsize * i) +
174+
((NDArray_STRIDES(a)[1] / elsize) * j);
175+
offset_red = (NDArray_STRIDES(a)[0] / elsize * 0) +
176+
((NDArray_STRIDES(a)[1] / elsize) * i) +
177+
((NDArray_STRIDES(a)[2] / elsize) * j);
178+
offset_green = ((NDArray_STRIDES(a)[0] / elsize) * 1) +
179+
((NDArray_STRIDES(a)[1] / elsize) * i) +
180+
((NDArray_STRIDES(a)[2] / elsize) * j);
181+
offset_blue = ((NDArray_STRIDES(a)[0] / elsize) * 2) +
182+
((NDArray_STRIDES(a)[1] / elsize) * i) +
183+
((NDArray_STRIDES(a)[2] / elsize) * j);
184+
185+
__m256 red_values = _mm256_loadu_ps(&NDArray_FDATA(a)[offset_red]);
186+
__m256 green_values = _mm256_loadu_ps(&NDArray_FDATA(a)[offset_green]);
187+
__m256 blue_values = _mm256_loadu_ps(&NDArray_FDATA(a)[offset_blue]);
188+
189+
if (n_alpha != NULL) {
190+
__m256i alpha_values = _mm256_cvtps_epi32(_mm256_loadu_ps(&NDArray_FDATA(n_alpha)[offset_alpha]));
191+
alpha_values = _mm256_and_si256(alpha_values, alpha_mask);
192+
} else {
193+
// Handle the case when n_alpha is NULL (no alpha channel)
194+
// Set alpha_values to a default value or do appropriate handling
195+
// For example, you can set alpha_values to all 255 (fully opaque).
196+
__m256i alpha_values = _mm256_set1_epi32(255);
197+
}
198+
199+
__m256i red_int = _mm256_cvtps_epi32(red_values);
200+
__m256i green_int = _mm256_cvtps_epi32(green_values);
201+
__m256i blue_int = _mm256_cvtps_epi32(blue_values);
202+
203+
__m256i color_indices;
204+
205+
if (n_alpha != NULL) {
206+
__m256i alpha_values = _mm256_cvtps_epi32(_mm256_loadu_ps(&NDArray_FDATA(n_alpha)[offset_alpha]));
207+
alpha_values = _mm256_and_si256(alpha_values, alpha_mask);
208+
209+
color_indices = _mm256_or_si256(_mm256_or_si256(_mm256_slli_epi32(alpha_values, 24),
210+
_mm256_slli_epi32(red_int, 16)),
211+
_mm256_or_si256(_mm256_slli_epi32(green_int, 8), blue_int));
212+
} else {
213+
// Handle the case when n_alpha is NULL
214+
// Set color_indices using only red, green, and blue values
215+
color_indices = _mm256_or_si256(_mm256_or_si256(_mm256_slli_epi32(red_int, 16),
216+
_mm256_slli_epi32(green_int, 8)),
217+
blue_int);
218+
}
219+
220+
_mm256_storeu_si256((__m256i*)&im->tpixels[i][j], color_indices);
221+
}
222+
}
223+
#else
138224
for (int i = 0; i < im->sy; i++) {
139225
for (int j = 0; j < im->sx; j++) {
140226
offset_alpha = (NDArray_STRIDES(a)[0]/ NDArray_ELSIZE(a) * i) +
@@ -160,8 +246,10 @@ NDArray_ToGD(NDArray *a, NDArray *n_alpha, zval *output) {
160246
im->tpixels[i][j] = color_index;
161247
}
162248
}
249+
#endif
163250
php_gd_assign_libgdimageptr_as_extgdimage(output, im);
164251
}
252+
165253
#endif
166254

167255
void apply_reduce(NDArray* result, NDArray *target, NDArray* (*operation)(NDArray*, NDArray*)) {

src/ndmath/linalg.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1080,7 +1080,7 @@ convolve2d_same_float(const float* a, const float* b, const int* shape_a,
10801080
NDArray*
10811081
NDArray_Convolve2D(NDArray *a, NDArray *b, char mode, char boundary, float fill_value) {
10821082
if (NDArray_DEVICE(a) != NDArray_DEVICE(b)) {
1083-
zend_throw_error(NULL, "Device error.");
1083+
zend_throw_error(NULL, "Both arrays must be at the same device.");
10841084
return NULL;
10851085
}
10861086

0 commit comments

Comments
 (0)