Skip to content

Commit 99efbbb

Browse files
committed
Fixed #395. Enable optimized cgemm for Sandybridge. Added optimized sdot kernel.
Fixed c/zgemm, zgemv computational error of haswell, piledriver, bullldozer, and barcelona on Windows. Merge branch 'develop' of https://github.com/wernsaar/OpenBLAS into wernsaar-develop Conflicts: kernel/Makefile.L1 kernel/x86_64/KERNEL param.h
2 parents 2499177 + 22e5aee commit 99efbbb

19 files changed

+2473
-151
lines changed

Makefile

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -23,7 +23,7 @@ endif
2323
SUBDIRS_ALL = $(SUBDIRS) test ctest utest exports benchmark ../laswp ../bench
2424

2525
.PHONY : all libs netlib test ctest shared install
26-
.NOTPARALLEL : all libs prof lapack-test install
26+
.NOTPARALLEL : all libs prof lapack-test install blas-test
2727

2828
all :: libs netlib tests shared
2929
@echo
@@ -282,6 +282,11 @@ lapack-test :
282282
make -j 1 -C $(NETLIB_LAPACK_DIR)/TESTING xeigtstc xeigtstd xeigtsts xeigtstz xlintstc xlintstd xlintstds xlintstrfd xlintstrfz xlintsts xlintstz xlintstzc xlintstrfs xlintstrfc
283283
(cd $(NETLIB_LAPACK_DIR); ./lapack_testing.py -r )
284284

285+
blas-test:
286+
(cd $(NETLIB_LAPACK_DIR)/BLAS && rm -f x* *.out)
287+
make -j 1 -C $(NETLIB_LAPACK_DIR) blas_testing
288+
(cd $(NETLIB_LAPACK_DIR)/BLAS && cat *.out)
289+
285290

286291
dummy :
287292

kernel/Makefile.L1

Lines changed: 15 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -687,15 +687,27 @@ $(KDIR)ddot_k$(TSUFFIX).$(SUFFIX) $(KDIR)ddot_k$(TPSUFFIX).$(PSUFFIX) : $(KERNEL
687687
$(KDIR)qdot_k$(TSUFFIX).$(SUFFIX) $(KDIR)qdot_k$(TPSUFFIX).$(PSUFFIX) : $(KERNELDIR)/$(QDOTKERNEL)
688688
$(CC) -c $(CFLAGS) -UCOMPLEX -DXDOUBLE $< -o $@
689689

690-
$(KDIR)dsdot_k$(TSUFFIX).$(SUFFIX) $(KDIR)dsdot_k$(TPSUFFIX).$(PSUFFIX) : $(KERNELDIR)/$(SDOTKERNEL)
691-
$(CC) -c $(CFLAGS) -UCOMPLEX -UDOUBLE -DDSDOT $< -o $@
692-
693690
$(KDIR)sdot_k$(TSUFFIX).$(SUFFIX) $(KDIR)sdot_k$(TPSUFFIX).$(PSUFFIX) : $(KERNELDIR)/$(SDOTKERNEL)
694691
$(CC) -c $(CFLAGS) -UCOMPLEX -UDOUBLE $< -o $@
695692

693+
ifdef DSDOTKERNEL
694+
695+
$(KDIR)dsdot_k$(TSUFFIX).$(SUFFIX) $(KDIR)dsdot_k$(TPSUFFIX).$(PSUFFIX) : $(KERNELDIR)/$(DSDOTKERNEL)
696+
$(CC) -c $(CFLAGS) -UCOMPLEX -UDOUBLE -DDSDOT $< -o $@
697+
698+
$(KDIR)sdsdot_k$(TSUFFIX).$(SUFFIX) $(KDIR)sdsdot_k$(TPSUFFIX).$(PSUFFIX) : $(KERNELDIR)/$(DSDOTKERNEL)
699+
$(CC) -c $(CFLAGS) -UCOMPLEX -UDOUBLE -DDSDOT $< -o $@
700+
701+
else
702+
703+
$(KDIR)dsdot_k$(TSUFFIX).$(SUFFIX) $(KDIR)dsdot_k$(TPSUFFIX).$(PSUFFIX) : $(KERNELDIR)/$(SDOTKERNEL)
704+
$(CC) -c $(CFLAGS) -UCOMPLEX -UDOUBLE -DDSDOT $< -o $@
705+
696706
$(KDIR)sdsdot_k$(TSUFFIX).$(SUFFIX) $(KDIR)sdsdot_k$(TPSUFFIX).$(PSUFFIX) : $(KERNELDIR)/$(SDOTKERNEL)
697707
$(CC) -c $(CFLAGS) -UCOMPLEX -UDOUBLE -DDSDOT $< -o $@
698708

709+
endif
710+
699711
$(KDIR)zdotu_k$(TSUFFIX).$(SUFFIX) $(KDIR)zdotu_k$(TPSUFFIX).$(PSUFFIX) : $(KERNELDIR)/$(ZDOTKERNEL)
700712
$(CC) -c $(CFLAGS) -DCOMPLEX -DDOUBLE -UCONJ $< -o $@
701713

kernel/x86_64/KERNEL

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -119,9 +119,15 @@ XCOPYKERNEL = zcopy.S
119119
endif
120120

121121
ifndef SDOTKERNEL
122-
SDOTKERNEL = ../arm/dot.c
122+
SDOTKERNEL = dot_sse.S
123123
endif
124124

125+
126+
ifndef DSDOTKERNEL
127+
DSDOTKERNEL = ../arm/dot.c
128+
endif
129+
130+
125131
ifndef DDOTKERNEL
126132
DDOTKERNEL = dot_sse2.S
127133
endif

kernel/x86_64/KERNEL.BARCELONA

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@ SGEMVNKERNEL = sgemv_n.S
22
SGEMVTKERNEL = sgemv_t.S
33

44
ZGEMVNKERNEL = zgemv_n_dup.S
5-
ZGEMVTKERNEL = zgemv_t_dup.S
5+
ZGEMVTKERNEL = zgemv_t.S
66

77
SGEMMKERNEL = gemm_kernel_8x4_barcelona.S
88
SGEMMINCOPY = ../generic/gemm_ncopy_8.c

kernel/x86_64/KERNEL.BOBCAT

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
ZGEMVNKERNEL = zgemv_n_dup.S
2-
ZGEMVTKERNEL = zgemv_t_dup.S
2+
ZGEMVTKERNEL = zgemv_t.S
33

44
SGEMMKERNEL = gemm_kernel_8x4_barcelona.S
55
SGEMMINCOPY = ../generic/gemm_ncopy_8.c

kernel/x86_64/KERNEL.BULLDOZER

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@ SGEMVNKERNEL = sgemv_n.S
22
SGEMVTKERNEL = sgemv_t.S
33

44
ZGEMVNKERNEL = zgemv_n_dup.S
5-
ZGEMVTKERNEL = zgemv_t_dup.S
5+
ZGEMVTKERNEL = zgemv_t.S
66

77
DGEMVNKERNEL = dgemv_n_bulldozer.S
88
DGEMVTKERNEL = dgemv_t_bulldozer.S

kernel/x86_64/KERNEL.OPTERON_SSE3

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
ZGEMVNKERNEL = zgemv_n_dup.S
2-
ZGEMVTKERNEL = zgemv_t_dup.S
2+
ZGEMVTKERNEL = zgemv_t.S
33

44
SGEMMKERNEL = gemm_kernel_8x4_sse.S
55
SGEMMINCOPY = ../generic/gemm_ncopy_8.c

kernel/x86_64/KERNEL.PILEDRIVER

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@ SGEMVNKERNEL = sgemv_n.S
22
SGEMVTKERNEL = sgemv_t.S
33

44
ZGEMVNKERNEL = zgemv_n_dup.S
5-
ZGEMVTKERNEL = zgemv_t_dup.S
5+
ZGEMVTKERNEL = zgemv_t.S
66

77
DGEMVNKERNEL = dgemv_n_bulldozer.S
88
DGEMVTKERNEL = dgemv_t_bulldozer.S

kernel/x86_64/KERNEL.PRESCOTT

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
ZGEMVNKERNEL = zgemv_n_dup.S
2-
ZGEMVTKERNEL = zgemv_t_dup.S
2+
ZGEMVTKERNEL = zgemv_t.S
33

44
SGEMMKERNEL = gemm_kernel_8x4_sse3.S
55
SGEMMINCOPY = ../generic/gemm_ncopy_8.c

kernel/x86_64/KERNEL.SANDYBRIDGE

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -21,11 +21,11 @@ DGEMMITCOPYOBJ = dgemm_itcopy$(TSUFFIX).$(SUFFIX)
2121
DGEMMONCOPYOBJ = dgemm_oncopy$(TSUFFIX).$(SUFFIX)
2222
DGEMMOTCOPYOBJ = dgemm_otcopy$(TSUFFIX).$(SUFFIX)
2323

24-
CGEMMKERNEL = zgemm_kernel_2x4_nehalem.S
25-
CGEMMINCOPY = zgemm_ncopy_2.S
26-
CGEMMITCOPY = zgemm_tcopy_2.S
27-
CGEMMONCOPY = ../generic/zgemm_ncopy_4.c
28-
CGEMMOTCOPY = ../generic/zgemm_tcopy_4.c
24+
CGEMMKERNEL = cgemm_kernel_8x2_sandy.S
25+
CGEMMINCOPY = ../generic/zgemm_ncopy_8.c
26+
CGEMMITCOPY = ../generic/zgemm_tcopy_8.c
27+
CGEMMONCOPY = ../generic/zgemm_ncopy_2.c
28+
CGEMMOTCOPY = ../generic/zgemm_tcopy_2.c
2929
CGEMMINCOPYOBJ = cgemm_incopy$(TSUFFIX).$(SUFFIX)
3030
CGEMMITCOPYOBJ = cgemm_itcopy$(TSUFFIX).$(SUFFIX)
3131
CGEMMONCOPYOBJ = cgemm_oncopy$(TSUFFIX).$(SUFFIX)

0 commit comments

Comments
 (0)