Skip to content

Commit 89528a6

Browse files
Merge pull request #34 from NumPower/feat/update_package_1
GD Image loading and save fix, GPU allocation improvement and GPU slicing fixes
2 parents 19b0eb9 + f987b5f commit 89528a6

File tree

7 files changed

+178
-96
lines changed

7 files changed

+178
-96
lines changed

config.w32

Lines changed: 106 additions & 66 deletions
Original file line numberDiff line numberDiff line change
@@ -1,73 +1,113 @@
1-
// Comments in this file start with '//'.
2-
// Remove where necessary.
1+
ARG_ENABLE("ndarray",
2+
"whether to enable ndarray support",
3+
"Enable ndarray support",
4+
"no")
35

4-
ARG_ENABLE('ndarray', 'whether to enable ndarray support', 'no')
6+
ARG_WITH("cuda", "for CUDA support",
7+
"Include CUDA support", "no", "no")
58

6-
if PHP_NDARRAY != "no"; then
7-
// Check for CUBLAS library
8-
PHP_CHECK_LIBRARY('cublas', 'cublasDgemm', '
9-
ARG_ENABLE("cublas", "Enable CUBLAS support", "no")
10-
AC_MSG_RESULT(["CUBLAS detected"])
11-
CFLAGS = CFLAGS + " -lcublas -lcudart "
12-
', '
13-
AC_MSG_RESULT(["Wrong CUBLAS version or library not found."])
14-
')
9+
if test x%PHP_CUDA% != xno; then
10+
CHECK_LIB("cublas", "cublasDgemm", ,
11+
[
12+
AC_DEFINE("HAVE_CUBLAS", 1)
13+
PHP_ADD_LIBRARY("cublas", , "NDARRAY_SHARED_LIBADD")
14+
AC_MSG_RESULT("CUBLAS detected")
15+
PHP_ADD_MAKEFILE_FRAGMENT("$abs_srcdir/Makefile.frag", "$abs_builddir")
16+
CFLAGS+=" -lcublas -lcudart"
17+
AC_CHECK_HEADER("immintrin.h",
18+
[
19+
AC_DEFINE("HAVE_AVX2", 1)
20+
AC_MSG_RESULT("AVX2/SSE detected")
21+
CXX+=" -mavx2 -march=native"
22+
],
23+
[
24+
AC_DEFINE("HAVE_AVX2", 0)
25+
AC_MSG_RESULT("AVX2/SSE not found")
26+
],
27+
[]
28+
)
29+
],
30+
[
31+
AC_MSG_RESULT("wrong cublas version or library not found.")
32+
AC_CHECK_HEADER("immintrin.h",
33+
[
34+
AC_DEFINE("HAVE_AVX2", 1)
35+
AC_MSG_RESULT("AVX2/SSE detected")
36+
CFLAGS+=" -mavx2 -march=native"
37+
],
38+
[
39+
AC_DEFINE("HAVE_AVX2", 0)
40+
AC_MSG_RESULT("AVX2/SSE not found")
41+
],
42+
[]
43+
)
44+
]
45+
)
46+
else
47+
AC_CHECK_HEADER("immintrin.h",
48+
[
49+
AC_DEFINE("HAVE_AVX2", 1)
50+
AC_MSG_RESULT("AVX2/SSE detected")
51+
CFLAGS+=" -mavx2 -march=native"
52+
],
53+
[
54+
AC_DEFINE("HAVE_AVX2", 0)
55+
AC_MSG_RESULT("AVX2/SSE not found")
56+
],
57+
[]
58+
)
59+
fi
1560

16-
// Check for AVX2 support
17-
PHP_CHECK_HEADER('immintrin.h', '
18-
AC_DEFINE("HAVE_AVX2", 1, ["Have AVX2/SSE support"])
19-
AC_MSG_RESULT(["AVX2/SSE detected"])
20-
CFLAGS = CFLAGS + " -mavx2 "
21-
', '
22-
AC_DEFINE("HAVE_AVX2", 0, ["Have AVX2/SSE support"])
23-
AC_MSG_RESULT(["AVX2/SSE not found"])
24-
')
61+
if test x%PHP_GD% != xno; then
62+
AC_DEFINE("HAVE_GD", 1)
63+
AC_MSG_RESULT("GD detected")
64+
PHP_ADD_EXTENSION_DEP("ndarray", "gd", true)
65+
endif
2566

26-
// Check for CBLAS library
27-
PHP_CHECK_LIBRARY('cblas', 'cblas_sdot', '
28-
ARG_ENABLE("cblas", "Enable CBLAS support", "no")
29-
AC_MSG_RESULT(["CBLAS detected"])
30-
CFLAGS = CFLAGS + " -lcblas "
31-
', '
32-
PHP_CHECK_LIBRARY('openblas', 'cblas_sdot', '
33-
ARG_ENABLE("openblas", "Enable OpenBLAS support", "no")
34-
AC_MSG_RESULT(["OpenBLAS detected"])
35-
AC_DEFINE("HAVE_CBLAS", 1, [""])
36-
CFLAGS = CFLAGS + " -lopenblas -lpthread "
37-
', '
38-
AC_MSG_ERROR(["Wrong OpenBLAS/BLAS version or library not found."])
39-
', '
40-
-lopenblas
41-
')
42-
', '
43-
-lcblas
44-
')
67+
CHECK_LIB("cblas", "cblas_sdot",
68+
[
69+
AC_DEFINE("HAVE_CBLAS", 1)
70+
PHP_ADD_LIBRARY("cblas", , "NDARRAY_SHARED_LIBADD")
71+
AC_MSG_RESULT("CBlas detected")
72+
CFLAGS+=" -lcblas"
73+
],
74+
[
75+
CHECK_LIB("openblas", "cblas_sdot",
76+
[
77+
PHP_ADD_LIBRARY("openblas", , "NDARRAY_SHARED_LIBADD")
78+
AC_MSG_RESULT("OpenBLAS detected")
79+
AC_DEFINE("HAVE_CBLAS", 1)
80+
CFLAGS+=" -lopenblas -lpthread"
81+
],
82+
[
83+
AC_MSG_ERROR("wrong openblas/blas version or library not found.")
84+
],
85+
[
86+
"-lopenblas"
87+
]
88+
)
89+
],
90+
[
91+
"-lcblas"
92+
]
93+
)
4594

46-
// Check for LAPACKE library
47-
PHP_CHECK_LIBRARY('lapack', 'dgesvd_', '
48-
ARG_ENABLE("lapacke", "Enable LAPACKE support", "no")
49-
AC_MSG_RESULT(["LAPACKE detected"])
50-
CFLAGS = CFLAGS + " -llapack -llapacke "
51-
', '
52-
AC_MSG_ERROR(["Wrong LAPACKE version or library not found."])
53-
')
95+
CHECK_LIB("lapacke", "LAPACKE_sgesdd",
96+
[
97+
AC_DEFINE("HAVE_LAPACKE", 1)
98+
PHP_ADD_LIBRARY("lapack", , "NDARRAY_SHARED_LIBADD")
99+
AC_MSG_RESULT("LAPACKE detected")
100+
CFLAGS+=" -llapack -llapacke"
101+
],
102+
[
103+
AC_MSG_ERROR("wrong LAPACKE version or library not found. Try `apt install liblapacke-dev`")
104+
]
105+
)
54106

55-
// Add your extension's source files
56-
PHP_ADD_EXTENSION('ndarray', '
57-
numpower.c \
58-
src/initializers.c \
59-
src/ndmath/double_math.c \
60-
src/ndarray.c \
61-
src/debug.c \
62-
src/buffer.c \
63-
src/logic.c \
64-
src/gpu_alloc.c \
65-
src/ndmath/linalg.c \
66-
src/manipulation.c \
67-
src/iterators.c \
68-
src/indexing.c \
69-
src/ndmath/arithmetics.c \
70-
src/ndmath/statistics.c \
71-
src/types.c
72-
')
107+
if test x%PHP_NDARRAY% != xno; then
108+
AC_DEFINE("HAVE_NDARRAY", 1, "Have ndarray support")
109+
PHP_NEW_EXTENSION("ndarray",
110+
"numpower.c src/initializers.c src/ndmath/double_math.c src/ndarray.c src/debug.c src/buffer.c src/logic.c src/gpu_alloc.c src/ndmath/linalg.c src/manipulation.c src/iterators.c src/indexing.c src/ndmath/arithmetics.c src/ndmath/statistics.c src/types.c",
111+
%ext_shared%
112+
)
73113
endif

numpower.c

Lines changed: 12 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -474,20 +474,24 @@ PHP_METHOD(NDArray, setDevice) {
474474
#endif
475475
}
476476

477-
ZEND_BEGIN_ARG_INFO(arginfo_reshape, 1)
477+
// @todo Indices conversion lose precision, we must convert it directly to a integer vector in C
478+
// without relying on ZVAL_TO_NDARRAY. We must apply the same for all other cases where a
479+
// PHP array of longs is converted to NDArray before being converted to a C integer.
480+
ZEND_BEGIN_ARG_INFO(arginfo_reshape, 2)
481+
ZEND_ARG_INFO(0, a)
478482
ZEND_ARG_INFO(0, shape_zval)
479483
ZEND_END_ARG_INFO();
480484
PHP_METHOD(NDArray, reshape) {
481485
int *new_shape;
482486
zval *shape_zval;
483-
zval *current = getThis();
487+
zval *a;
484488
NDArray *rtn;
485-
ZEND_PARSE_PARAMETERS_START(1, 1)
486-
Z_PARAM_ZVAL(shape_zval)
489+
ZEND_PARSE_PARAMETERS_START(2, 2)
490+
Z_PARAM_ZVAL(a)
491+
Z_PARAM_ZVAL(shape_zval)
487492
ZEND_PARSE_PARAMETERS_END();
488-
NDArray* target = ZVAL_TO_NDARRAY(current);
493+
NDArray* target = ZVAL_TO_NDARRAY(a);
489494
NDArray* shape = ZVAL_TO_NDARRAY(shape_zval);
490-
491495
new_shape = NDArray_ToIntVector(shape);
492496

493497
rtn = NDArray_Reshape(target, new_shape, NDArray_NUMELEMENTS(shape));
@@ -501,6 +505,7 @@ PHP_METHOD(NDArray, reshape) {
501505
if (Z_TYPE_P(shape_zval) == IS_ARRAY) {
502506
NDArray_FREE(shape);
503507
}
508+
CHECK_INPUT_AND_FREE(a, target);
504509
RETURN_NDARRAY(rtn, return_value);
505510
}
506511

@@ -3947,7 +3952,7 @@ static const zend_function_entry class_NDArray_methods[] = {
39473952
ZEND_ME(NDArray, max, arginfo_ndarray_max, ZEND_ACC_PUBLIC | ZEND_ACC_STATIC)
39483953

39493954
// MANIPULATION
3950-
ZEND_ME(NDArray, reshape, arginfo_reshape, ZEND_ACC_PUBLIC)
3955+
ZEND_ME(NDArray, reshape, arginfo_reshape, ZEND_ACC_PUBLIC | ZEND_ACC_STATIC)
39513956
ZEND_ME(NDArray, toArray, arginfo_toArray, ZEND_ACC_PUBLIC)
39523957
ZEND_ME(NDArray, toImage, arginfo_toImage, ZEND_ACC_PUBLIC)
39533958
ZEND_ME(NDArray, copy, arginfo_ndarray_copy, ZEND_ACC_PUBLIC | ZEND_ACC_STATIC)

src/buffer.c

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -62,7 +62,6 @@ void buffer_free() {
6262
*/
6363
void buffer_ndarray_free(int uuid) {
6464
if (MAIN_MEM_STACK.buffer != NULL) {
65-
// @todo investigate double free problem
6665
if (MAIN_MEM_STACK.lastFreed == -1) {
6766
MAIN_MEM_STACK.lastFreed = uuid;
6867
}

src/gpu_alloc.c

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,5 @@
11
#include "../config.h"
2+
#include <Zend/zend.h>
23

34
#ifdef HAVE_CUBLAS
45
#include "gpu_alloc.h"
@@ -9,27 +10,26 @@
910
void
1011
NDArray_VMALLOC(void** target, unsigned int size) {
1112
MAIN_MEM_STACK.totalGPUAllocated++;
12-
cudaMalloc(target, size);
13-
cudaDeviceSynchronize();
13+
cublasStatus_t stat = cudaMalloc(target, size);
14+
if (stat != cudaSuccess) {
15+
zend_throw_error(NULL, "device memory allocation failed");
16+
}
1417
}
1518

1619
void
1720
NDArray_VMEMCPY_D2D(char* target, char* dst, unsigned int size) {
1821
cudaMemcpy(dst, target, size, cudaMemcpyDeviceToDevice);
19-
cudaDeviceSynchronize();
2022
}
2123

2224
void
2325
NDArray_VMEMCPY_H2D(char* target, char* dst, unsigned int size) {
2426
cudaMemcpy(dst, target, size, cudaMemcpyHostToDevice);
25-
cudaDeviceSynchronize();
2627
}
2728

2829
void
2930
NDArray_VFREE(void* target) {
3031
MAIN_MEM_STACK.totalGPUAllocated--;
3132
cudaFree(target);
32-
cudaDeviceSynchronize();
3333
}
3434

3535
void

src/logic.c

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,6 @@
1818
/**
1919
* Check if all values are not 0
2020
*
21-
* @todo Implement non-AVX2 logic
2221
* @param a
2322
* @return
2423
*/

src/manipulation.c

Lines changed: 8 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -161,7 +161,7 @@ linearize_FLOAT_matrix(float *dst_in,
161161
cublasHandle_t handle;
162162
cublasCreate(&handle);
163163
cublasScopy(handle, columns,
164-
(const float*)((const float*)src + (columns - 1) * column_strides),
164+
(const float*)src,
165165
column_strides, dst, one);
166166
#endif
167167
}
@@ -178,7 +178,7 @@ linearize_FLOAT_matrix(float *dst_in,
178178
#ifdef HAVE_CUBLAS
179179
cublasHandle_t handle;
180180
cublasCreate(&handle);
181-
cublasScopy(handle, columns, (const float*)((const char*)src + (columns - 1) * column_strides),
181+
cublasScopy(handle, columns, (const float*)src,
182182
column_strides / sizeof(float), dst, one);
183183
#endif
184184
}
@@ -206,18 +206,17 @@ linearize_FLOAT_matrix(float *dst_in,
206206

207207
NDArray*
208208
NDArray_Slice(NDArray* array, NDArray** indexes, int num_indices, int return_view) {
209+
if (num_indices > NDArray_NDIM(array)) {
210+
zend_throw_error(NULL, "too many indices for array");
211+
return NULL;
212+
}
213+
209214
NDArray *slice, *rtn;
210215
int slice_ndim = NDArray_NDIM(array);
211216
int *slice_shape = emalloc(sizeof(int) * slice_ndim);
212217
int *slice_strides = emalloc(sizeof(int) * slice_ndim);
213218
int i, offset = 0;
214219
int start = 0, stop = 0, step = 0;
215-
216-
if (num_indices > NDArray_NDIM(array)) {
217-
zend_throw_error(NULL, "too many indices for array");
218-
return NULL;
219-
}
220-
221220
if (NDArray_NDIM(array) == 1) {
222221
int out_ndim = NDArray_NDIM(array);
223222
if (NDArray_NUMELEMENTS(indexes[0]) >= 1) {
@@ -265,7 +264,6 @@ NDArray_Slice(NDArray* array, NDArray** indexes, int num_indices, int return_vie
265264
slice_shape[i] = (int)floorf(((float)stop - (float)start) / (float)step);
266265
offset += start * NDArray_STRIDES(array)[i];
267266
}
268-
269267
for (; i < slice_ndim; i++) {
270268
slice_shape[i] = NDArray_SHAPE(array)[i];
271269
}
@@ -276,12 +274,12 @@ NDArray_Slice(NDArray* array, NDArray** indexes, int num_indices, int return_vie
276274
if (NDArray_DEVICE(array) == NDARRAY_DEVICE_CPU) {
277275
rtn_data = emalloc(NDArray_ELSIZE(array) * NDArray_NUMELEMENTS(slice));
278276
}
277+
279278
#ifdef HAVE_CUBLAS
280279
if (NDArray_DEVICE(array) == NDARRAY_DEVICE_GPU) {
281280
NDArray_VMALLOC((void**)&rtn_data, NDArray_ELSIZE(array) * NDArray_NUMELEMENTS(slice));
282281
}
283282
#endif
284-
285283
linearize_FLOAT_matrix(rtn_data, NDArray_FDATA(slice), slice);
286284
slice->data = (char*)rtn_data;
287285
slice->strides = Generate_Strides(slice_shape, slice_ndim, NDArray_ELSIZE(slice));

0 commit comments

Comments
 (0)