Skip to content

Commit 568e53c

Browse files
authored
Align umath native bench with both Numba and Numpy versions (#14)
* Use high accuracy VML/SVML functions in native code * Drop -fp-model precise flag * Minor cleanup in umath_bench.c.src: remove outer layer of numpy templating and replace with preprocessor directives * Fix python benchmark's logic to look up erf, invsqrt from numpy.core.umath. * Use 1/sqrt for SVML "invsqrt" to match Numba * Parse command-line options in umath native benchmarks using getopt_long - use ./<executable> -h for help.
1 parent a84ed68 commit 568e53c

File tree

5 files changed

+253
-147
lines changed

5 files changed

+253
-147
lines changed

numpy/linalg/Makefile

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,7 @@
44

55
CXX = icc
66
CXXFLAGS = -O3 -g -xCORE-AVX2 -axCOMMON-AVX512 -qopenmp \
7-
-qopt-report=5 -qopt-report-phase=openmp,par,vec
7+
-qopt-report=5 -qopt-report-phase=openmp,par,vec
88
LDFLAGS = -lmkl_rt -qopenmp
99

1010
TARGET = linalg

numpy/random/Makefile

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -7,8 +7,8 @@ SOURCES = $(addsuffix .c,$(BENCHMARKS))
77
CC = icc
88
CLANG_FORMAT = clang-format
99
CFLAGS += -m64 -fPIC -fomit-frame-pointer -xSSE4.2 -axCORE-AVX2,CORE-AVX512 \
10-
-O3 -fp-model fast=2 -fimf-precision=high -prec-sqrt -prec-div \
11-
-fprotect-parens
10+
-O3 -fp-model fast=2 -fimf-precision=high -prec-sqrt -prec-div \
11+
-fprotect-parens
1212
LDFLAGS += -lmkl_rt
1313

1414
run: $(BENCHMARKS)

numpy/umath/Makefile

Lines changed: 6 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -4,14 +4,17 @@
44

55
CC = icc
66
CFLAGS = -qopenmp -xCORE-AVX2 -axCOMMON-AVX512 -O3 \
7-
-g -lmkl_rt
7+
-g -lmkl_rt -Wall -pedantic
8+
9+
ifneq ($(CONDA_PREFIX),)
10+
CFLAGS += -I$(CONDA_PREFIX)/include
11+
endif
812

913
PYTHON ?= python
1014

11-
ACC ?= la
15+
ACC ?= ha
1216
ifeq ($(ACC), ha)
1317
CFLAGS += -fimf-precision=high -D_VML_ACCURACY_HA_
14-
CFLAGS += -fp-model precise
1518
endif
1619
ifeq ($(ACC), la)
1720
CFLAGS += -fimf-precision=medium -D_VML_ACCURACY_LA_

0 commit comments

Comments
 (0)