Skip to content

Commit 087659a

Browse files
Merge pull request #50 from NumPower/feat/ndarray_0_5_0
NumPower 0.5.0
2 parents c254ddd + 1d68748 commit 087659a

29 files changed

+1899
-154
lines changed

CMakeLists.txt

Lines changed: 9 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,12 +1,14 @@
11
cmake_minimum_required(VERSION 3.26)
2+
23
project(numpower)
34

5+
46
set(CMAKE_CXX_STANDARD 14)
57

8+
include_directories(/usr/include/)
69
include_directories(/usr/local/include/php/main/)
710
include_directories(/usr/local/include/php)
811
include_directories(/usr/local/include)
9-
include_directories(/usr/include/)
1012
include_directories(.)
1113
include_directories(src)
1214
include_directories(src/ndmath)
@@ -52,4 +54,9 @@ add_executable(numpower
5254
src/ndmath/signal.c
5355
src/ndmath/signal.h
5456
src/ndmath/calculation.c
55-
)
57+
src/ndmath/calculation.h
58+
src/dnn.c
59+
src/dnn.h
60+
src/ndmath/cuda/cuda_dnn.cu
61+
src/ndmath/cuda/cuda_dnn.cuh
62+
)

Makefile.frag

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -80,10 +80,12 @@ install-cuda:
8080
$(CC) -I. -I $(CXX) $(COMMON_FLAGS) $(CFLAGS_CLEAN) $(EXTRA_CFLAGS) $(ALL_CCFLAGS) $(GENCODE_FLAGS) -c $(builddir)./src/ndmath/linalg.c -shared -fPIC -o .libs/linalg.o
8181
$(CC) -I. -I $(CXX) $(COMMON_FLAGS) $(CFLAGS_CLEAN) $(EXTRA_CFLAGS) $(ALL_CCFLAGS) $(GENCODE_FLAGS) -c $(builddir)./src/ndmath/signal.c -shared -fPIC -o .libs/signal.o
8282
$(CC) -I. -I $(CXX) $(COMMON_FLAGS) $(CFLAGS_CLEAN) $(EXTRA_CFLAGS) $(ALL_CCFLAGS) $(GENCODE_FLAGS) -c $(builddir)./src/ndmath/calculation.c -shared -fPIC -o .libs/calculation.o
83+
$(CC) -I. -I $(CXX) $(COMMON_FLAGS) $(CFLAGS_CLEAN) $(EXTRA_CFLAGS) $(ALL_CCFLAGS) $(GENCODE_FLAGS) -c $(builddir)./src/dnn.c -shared -fPIC -o .libs/dnn.o
8384
$(NVCC) -I. -I $(COMMON_FLAGS) $(CFLAGS_CLEAN) $(EXTRA_CFLAGS) $(ALL_CCFLAGS) $(GENCODE_FLAGS) -c $(builddir)./src/gpu_alloc.c -shared -Xcompiler -fPIC -o .libs/gpu_alloc.o
8485
$(NVCC) -I. -I $(COMMON_FLAGS) $(CFLAGS_CLEAN) $(EXTRA_CFLAGS) $(ALL_CCFLAGS) $(GENCODE_FLAGS) -c $(builddir)./src/ndmath/cuda/cuda_math.cu -shared -Xcompiler -fPIC -o .libs/cuda_math.o
86+
$(NVCC) -I. -I $(COMMON_FLAGS) $(CFLAGS_CLEAN) $(EXTRA_CFLAGS) $(ALL_CCFLAGS) $(GENCODE_FLAGS) -c $(builddir)./src/ndmath/cuda/cuda_dnn.cu -shared -Xcompiler -fPIC -o .libs/cuda_dnn.o
8587
$(NVCC) -I. -I $(COMMON_FLAGS) $(CFLAGS_CLEAN) $(EXTRA_CFLAGS) $(ALL_CCFLAGS) $(GENCODE_FLAGS) -c $(builddir)./src/ndmath/statistics.c -shared -Xcompiler -fPIC -o .libs/statistics.o
86-
$(NVCC) -shared .libs/numpower.o .libs/signal.o .libs/initializers.o .libs/double_math.o .libs/ndarray.o .libs/debug.o .libs/statistics.o .libs/buffer.o .libs/logic.o .libs/gpu_alloc.o .libs/linalg.o .libs/manipulation.o .libs/iterators.o .libs/indexing.o .libs/arithmetics.o .libs/types.o .libs/cuda_math.o -lcublas -lcusolver -lcudart -llapack -llapacke -lopenblas -lpthread -lcuda -o .libs/ndarray.so
88+
$(NVCC) -shared .libs/numpower.o .libs/signal.o .libs/initializers.o .libs/double_math.o .libs/ndarray.o .libs/debug.o .libs/statistics.o .libs/calculation.o .libs/buffer.o .libs/dnn.o .libs/cuda_dnn.o .libs/logic.o .libs/gpu_alloc.o .libs/linalg.o .libs/manipulation.o .libs/iterators.o .libs/indexing.o .libs/arithmetics.o .libs/types.o .libs/cuda_math.o $(CFLAGS_CLEAN) -o .libs/ndarray.so
8789
cp ./.libs/ndarray.so $(phplibdir)/ndarray.so
8890
cp ./.libs/ndarray.so $(EXTENSION_DIR)/ndarray.so
8991

README.md

Lines changed: 11 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -29,7 +29,8 @@ NumPower aims to manage memory more efficiently than a matrix in PHP arrays
2929
- PHP 8.x
3030
- LAPACKE
3131
- OpenBLAS
32-
- **Optional (GPU)**: CUBLAS, CUDA Build Toolkit
32+
- **Optional**: Intel MKL
33+
- **Optional (GPU)**: CUBLAS, CUDA Build Toolkit and cuDNN
3334
- **Optional (Image)**: PHP-GD
3435

3536
## GPU Support
@@ -58,3 +59,12 @@ $x_cpu = $x->cpu();
5859

5960
> **You must explicitly copy the arrays you want to use in your devices**. Cross-array operations (like adding) will
6061
> raise an exception if the arrays used are on different devices.
62+
63+
## DNN Features
64+
When NumPower identifies the oneDNN or cuDNN libraries during compilation,
65+
some resources for DNN will be available for use.
66+
67+
This includes for example high performance feed forward and backward for convolutions and pooling
68+
for both CPU (oneDNN) and GPU (cuDNN).
69+
70+
If you do not have any of the libraries installed, all DNN API methods calls will raise a fatal error.

config.m4

100644100755
Lines changed: 26 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -86,16 +86,33 @@ PHP_CHECK_LIBRARY(cblas,cblas_sdot,
8686
-lcblas
8787
])
8888

89+
PHP_CHECK_LIBRARY(mkl_rt,LAPACKE_sgesdd,
90+
[
91+
AC_DEFINE(HAVE_LAPACKE_MKL,1,[ ])
92+
PHP_ADD_LIBRARY(lapack,,NDARRAY_SHARED_LIBADD)
93+
AC_MSG_RESULT([LAPACKE (MKL) detected ])
94+
CFLAGS+=" -lmkl_rt "
95+
],[
96+
PHP_CHECK_LIBRARY(lapacke,LAPACKE_sgesdd,
97+
[
98+
AC_DEFINE(HAVE_LAPACKE,1,[ ])
99+
PHP_ADD_LIBRARY(lapack,,NDARRAY_SHARED_LIBADD)
100+
AC_MSG_RESULT([LAPACKE detected ])
101+
CFLAGS+=" -llapack -llapacke "
102+
],[
103+
AC_MSG_ERROR([wrong LAPACKE version or library not found. Try `apt install liblapacke-dev`])
104+
])
105+
])
89106

90107

91-
PHP_CHECK_LIBRARY(lapacke,LAPACKE_sgesdd,
92-
[
93-
AC_DEFINE(HAVE_LAPACKE,1,[ ])
94-
PHP_ADD_LIBRARY(lapack,,NDARRAY_SHARED_LIBADD)
95-
AC_MSG_RESULT([LAPACKE detected ])
96-
CFLAGS+=" -llapack -llapacke "
97-
],[
98-
AC_MSG_ERROR([wrong LAPACKE version or library not found. Try `apt install liblapacke-dev`])
108+
PHP_CHECK_LIBRARY(cudnn, cudnnCreate,
109+
[
110+
AC_DEFINE(HAVE_CUDNN,1,[ ])
111+
PHP_ADD_LIBRARY(z,,NDARRAY_SHARED_LIBADD)
112+
AC_MSG_RESULT([cuDNN detected, enabling GPU DNN capabilities.])
113+
CFLAGS+=" -lz -lcudnn "
114+
],[
115+
AC_MSG_RESULT([cuDNN not found. GPU DNN capabilities disabled.])
99116
])
100117

101118
if test "$PHP_NDARRAY" != "no"; then
@@ -111,6 +128,7 @@ if test "$PHP_NDARRAY" != "no"; then
111128
src/gpu_alloc.c \
112129
src/ndmath/linalg.c \
113130
src/manipulation.c \
131+
src/dnn.c \
114132
src/iterators.c \
115133
src/indexing.c \
116134
src/ndmath/arithmetics.c \

0 commit comments

Comments
 (0)