Skip to content

Commit 86efdd2

Browse files
feat: Enable AVX2 when using CUDA
1 parent 2152d7b commit 86efdd2

File tree

2 files changed

+19
-7
lines changed

2 files changed

+19
-7
lines changed

Makefile.frag

Lines changed: 7 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -67,19 +67,19 @@ COMMON_FLAGS = $(DEFS) $(INCLUDES) $(EXTRA_INCLUDES) $(CPPFLAGS) $(PHP_FRAMEWORK
6767
install-cuda:
6868
rm ./.libs -rf
6969
mkdir ./.libs
70-
$(NVCC) -I. -I $(COMMON_FLAGS) $(CFLAGS_CLEAN) $(EXTRA_CFLAGS) $(ALL_CCFLAGS) $(GENCODE_FLAGS) -c $(builddir)./numpower.c -shared -Xcompiler -fPIC -o .libs/numpower.o
70+
$(NVCC) -I. -I $(COMMON_FLAGS) $(CFLAGS_CLEAN) $(EXTRA_CFLAGS) $(ALL_CCFLAGS) $(GENCODE_FLAGS) -c $(builddir)./numpower.c -shared -Xcompiler -fPIC -o .libs/numpower.o
7171
$(NVCC) -I. -I $(COMMON_FLAGS) $(CFLAGS_CLEAN) $(EXTRA_CFLAGS) $(ALL_CCFLAGS) $(GENCODE_FLAGS) -c $(builddir)./src/buffer.c -shared -Xcompiler -fPIC -o .libs/buffer.o
7272
$(NVCC) -I. -I $(COMMON_FLAGS) $(CFLAGS_CLEAN) $(EXTRA_CFLAGS) $(ALL_CCFLAGS) $(GENCODE_FLAGS) -c $(builddir)./src/debug.c -shared -Xcompiler -fPIC -o .libs/debug.o
7373
$(NVCC) -I. -I $(COMMON_FLAGS) $(CFLAGS_CLEAN) $(EXTRA_CFLAGS) $(ALL_CCFLAGS) $(GENCODE_FLAGS) -c $(builddir)./src/indexing.c -shared -Xcompiler -fPIC -o .libs/indexing.o
74-
$(NVCC) -I. -I $(COMMON_FLAGS) $(CFLAGS_CLEAN) $(EXTRA_CFLAGS) $(ALL_CCFLAGS) $(GENCODE_FLAGS) -c $(builddir)./src/initializers.c -shared -Xcompiler -fPIC -o .libs/initializers.o
74+
$(CC) -I. -I $(CXX) $(COMMON_FLAGS) $(CFLAGS_CLEAN) $(EXTRA_CFLAGS) $(ALL_CCFLAGS) $(GENCODE_FLAGS) -c $(builddir)./src/initializers.c -shared -fPIC -o .libs/initializers.o
7575
$(NVCC) -I. -I $(COMMON_FLAGS) $(CFLAGS_CLEAN) $(EXTRA_CFLAGS) $(ALL_CCFLAGS) $(GENCODE_FLAGS) -c $(builddir)./src/iterators.c -shared -Xcompiler -fPIC -o .libs/iterators.o
76-
$(NVCC) -I. -I $(COMMON_FLAGS) $(CFLAGS_CLEAN) $(EXTRA_CFLAGS) $(ALL_CCFLAGS) $(GENCODE_FLAGS) -c $(builddir)./src/logic.c -shared -Xcompiler -fPIC -o .libs/logic.o
77-
$(NVCC) -I. -I $(COMMON_FLAGS) $(CFLAGS_CLEAN) $(EXTRA_CFLAGS) $(ALL_CCFLAGS) $(GENCODE_FLAGS) -c $(builddir)./src/manipulation.c -shared -Xcompiler -fPIC -o .libs/manipulation.o
78-
$(NVCC) -I. -I $(COMMON_FLAGS) $(CFLAGS_CLEAN) $(EXTRA_CFLAGS) $(ALL_CCFLAGS) $(GENCODE_FLAGS) -c $(builddir)./src/ndarray.c -shared -Xcompiler -fPIC -o .libs/ndarray.o
76+
$(CC) -I. -I $(CXX) $(COMMON_FLAGS) $(CFLAGS_CLEAN) $(EXTRA_CFLAGS) $(ALL_CCFLAGS) $(GENCODE_FLAGS) -c $(builddir)./src/logic.c -shared -fPIC -o .libs/logic.o
77+
$(CC) -I. -I $(CXX) $(COMMON_FLAGS) $(CFLAGS_CLEAN) $(EXTRA_CFLAGS) $(ALL_CCFLAGS) $(GENCODE_FLAGS) -c $(builddir)./src/manipulation.c -shared -fPIC -o .libs/manipulation.o
78+
$(CC) -I. -I $(CXX) $(COMMON_FLAGS) $(CFLAGS_CLEAN) $(EXTRA_CFLAGS) $(ALL_CCFLAGS) $(GENCODE_FLAGS) -c $(builddir)./src/ndarray.c -shared -fPIC -o .libs/ndarray.o
7979
$(NVCC) -I. -I $(COMMON_FLAGS) $(CFLAGS_CLEAN) $(EXTRA_CFLAGS) $(ALL_CCFLAGS) $(GENCODE_FLAGS) -c $(builddir)./src/types.c -shared -Xcompiler -fPIC -o .libs/types.o
80-
$(NVCC) -I. -I $(COMMON_FLAGS) $(CFLAGS_CLEAN) $(EXTRA_CFLAGS) $(ALL_CCFLAGS) $(GENCODE_FLAGS) -c $(builddir)./src/ndmath/arithmetics.c -shared -Xcompiler -fPIC -o .libs/arithmetics.o
80+
$(CC) -I. -I $(CXX) $(COMMON_FLAGS) $(CFLAGS_CLEAN) $(EXTRA_CFLAGS) $(ALL_CCFLAGS) $(GENCODE_FLAGS) -c $(builddir)./src/ndmath/arithmetics.c -shared -fPIC -o .libs/arithmetics.o
8181
$(NVCC) -I. -I $(COMMON_FLAGS) $(CFLAGS_CLEAN) $(EXTRA_CFLAGS) $(ALL_CCFLAGS) $(GENCODE_FLAGS) -c $(builddir)./src/ndmath/double_math.c -shared -Xcompiler -fPIC -o .libs/double_math.o
82-
$(NVCC) -I. -I $(COMMON_FLAGS) $(CFLAGS_CLEAN) $(EXTRA_CFLAGS) $(ALL_CCFLAGS) $(GENCODE_FLAGS) -c $(builddir)./src/ndmath/linalg.c -shared -Xcompiler -fPIC -o .libs/linalg.o
82+
$(CC) -I. -I $(CXX) $(COMMON_FLAGS) $(CFLAGS_CLEAN) $(EXTRA_CFLAGS) $(ALL_CCFLAGS) $(GENCODE_FLAGS) -c $(builddir)./src/ndmath/linalg.c -shared -fPIC -o .libs/linalg.o
8383
$(NVCC) -I. -I $(COMMON_FLAGS) $(CFLAGS_CLEAN) $(EXTRA_CFLAGS) $(ALL_CCFLAGS) $(GENCODE_FLAGS) -c $(builddir)./src/gpu_alloc.c -shared -Xcompiler -fPIC -o .libs/gpu_alloc.o
8484
$(NVCC) -I. -I $(COMMON_FLAGS) $(CFLAGS_CLEAN) $(EXTRA_CFLAGS) $(ALL_CCFLAGS) $(GENCODE_FLAGS) -c $(builddir)./src/ndmath/cuda/cuda_math.cu -shared -Xcompiler -fPIC -o .libs/cuda_math.o
8585
$(NVCC) -I. -I $(COMMON_FLAGS) $(CFLAGS_CLEAN) $(EXTRA_CFLAGS) $(ALL_CCFLAGS) $(GENCODE_FLAGS) -c $(builddir)./src/ndmath/statistics.c -shared -Xcompiler -fPIC -o .libs/statistics.o

config.m4

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,18 @@ if test "$PHP_CUDA" != "no"; then
1515
AC_MSG_RESULT([CUBLAS detected ])
1616
PHP_ADD_MAKEFILE_FRAGMENT($abs_srcdir/Makefile.frag, $abs_builddir)
1717
CFLAGS+=" -lcublas -lcudart "
18+
AC_CHECK_HEADER([immintrin.h],
19+
[
20+
AC_DEFINE(HAVE_AVX2,1,[Have AV2/SSE support])
21+
AC_MSG_RESULT([AVX2/SSE detected ])
22+
CXX+=" -mavx2 -march=native "
23+
],[
24+
AC_DEFINE(HAVE_AVX2,0,[Have AV2/SSE support])
25+
AC_MSG_RESULT([AVX2/SSE not found ])
26+
], [
27+
28+
]
29+
)
1830
],[
1931
AC_MSG_RESULT([wrong cublas version or library not found.])
2032
AC_CHECK_HEADER([immintrin.h],

0 commit comments

Comments
 (0)