Skip to content

Commit 9c42f03

Browse files
committed
Updated cgemm- and sgemm-kernel for POWER8 SMP
1 parent a51102e commit 9c42f03

File tree

4 files changed

+45
-28
lines changed

4 files changed

+45
-28
lines changed

common_power.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -798,7 +798,7 @@ Lmcount$lazy_ptr:
798798
#elif defined(PPC440FP2)
799799
#define BUFFER_SIZE ( 16 << 20)
800800
#elif defined(POWER8)
801-
#define BUFFER_SIZE ( 64 << 20)
801+
#define BUFFER_SIZE ( 32 << 20)
802802
#else
803803
#define BUFFER_SIZE ( 16 << 20)
804804
#endif

kernel/power/cgemm_kernel_8x4_power8.S

Lines changed: 22 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -82,7 +82,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
8282
#endif
8383

8484
#ifdef __64BIT__
85-
#define STACKSIZE 512
85+
#define STACKSIZE 32000
8686
#define ALPHA_R_SP 296(SP)
8787
#define ALPHA_I_SP 304(SP)
8888
#define FZERO 312(SP)
@@ -136,6 +136,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
136136
#define alpha_sr vs30
137137
#define alpha_si vs31
138138

139+
#define FRAMEPOINTER r12
139140

140141
#define BBUFFER r14
141142
#define L r15
@@ -161,6 +162,10 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
161162
PROLOGUE
162163
PROFCODE
163164

165+
mr FRAMEPOINTER, SP
166+
addi SP, SP, -STACKSIZE
167+
addi SP, SP, -STACKSIZE
168+
addi SP, SP, -STACKSIZE
164169
addi SP, SP, -STACKSIZE
165170
li r0, 0
166171

@@ -233,37 +238,37 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
233238

234239
#ifdef linux
235240
#ifdef __64BIT__
236-
ld LDC, FRAMESLOT(0) + STACKSIZE(SP)
241+
ld LDC, FRAMESLOT(0) + 0(FRAMEPOINTER)
237242
#endif
238243
#endif
239244

240245
#if defined(_AIX) || defined(__APPLE__)
241246
#ifdef __64BIT__
242-
ld LDC, FRAMESLOT(0) + STACKSIZE(SP)
247+
ld LDC, FRAMESLOT(0) + 0(FRAMEPOINTER)
243248
#else
244249
#ifdef DOUBLE
245-
lwz B, FRAMESLOT(0) + STACKSIZE(SP)
246-
lwz C, FRAMESLOT(1) + STACKSIZE(SP)
247-
lwz LDC, FRAMESLOT(2) + STACKSIZE(SP)
250+
lwz B, FRAMESLOT(0) + 0(FRAMEPOINTER)
251+
lwz C, FRAMESLOT(1) + 0(FRAMEPOINTER)
252+
lwz LDC, FRAMESLOT(2) + 0(FRAMEPOINTER)
248253
#else
249-
lwz LDC, FRAMESLOT(0) + STACKSIZE(SP)
254+
lwz LDC, FRAMESLOT(0) + 0(FRAMEPOINTER)
250255
#endif
251256
#endif
252257
#endif
253258

254259
#ifdef TRMMKERNEL
255260
#if defined(linux) && defined(__64BIT__)
256-
ld OFFSET, FRAMESLOT(1) + STACKSIZE(SP)
261+
ld OFFSET, FRAMESLOT(1) + 0(FRAMEPOINTER)
257262
#endif
258263

259264
#if defined(_AIX) || defined(__APPLE__)
260265
#ifdef __64BIT__
261-
ld OFFSET, FRAMESLOT(1) + STACKSIZE(SP)
266+
ld OFFSET, FRAMESLOT(1) + 0(FRAMEPOINTER)
262267
#else
263268
#ifdef DOUBLE
264-
lwz OFFSET, FRAMESLOT(3) + STACKSIZE(SP)
269+
lwz OFFSET, FRAMESLOT(3) + 0(FRAMEPOINTER)
265270
#else
266-
lwz OFFSET, FRAMESLOT(1) + STACKSIZE(SP)
271+
lwz OFFSET, FRAMESLOT(1) + 0(FRAMEPOINTER)
267272
#endif
268273
#endif
269274
#endif
@@ -290,9 +295,9 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
290295
li o32 , 32
291296
li o48 , 48
292297

293-
li T1, 512
294-
slwi T1, T1, 16
295-
add BBUFFER, A, T1
298+
addi BBUFFER, SP, 512+4096
299+
li T1, -4096
300+
and BBUFFER, BBUFFER, T1
296301

297302

298303
#ifdef __64BIT__
@@ -392,6 +397,9 @@ L999:
392397
#endif
393398

394399
addi SP, SP, STACKSIZE
400+
addi SP, SP, STACKSIZE
401+
addi SP, SP, STACKSIZE
402+
addi SP, SP, STACKSIZE
395403

396404
blr
397405

kernel/power/sgemm_kernel_16x8_power8.S

Lines changed: 18 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -82,7 +82,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
8282
#endif
8383

8484
#ifdef __64BIT__
85-
#define STACKSIZE 512
85+
#define STACKSIZE 32752
8686
#define ALPHA_SP 296(SP)
8787
#define FZERO 304(SP)
8888
#else
@@ -132,6 +132,8 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
132132

133133
#define o0 0
134134

135+
#define FRAMEPOINTER r12
136+
135137
#define BBUFFER r14
136138
#define o4 r15
137139
#define o12 r16
@@ -160,6 +162,10 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
160162
PROLOGUE
161163
PROFCODE
162164

165+
mr FRAMEPOINTER, SP
166+
addi SP, SP, -STACKSIZE
167+
addi SP, SP, -STACKSIZE
168+
addi SP, SP, -STACKSIZE
163169
addi SP, SP, -STACKSIZE
164170
li r0, 0
165171

@@ -231,25 +237,25 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
231237

232238
#if defined(_AIX) || defined(__APPLE__)
233239
#if !defined(__64BIT__) && defined(DOUBLE)
234-
lwz LDC, FRAMESLOT(0) + STACKSIZE(SP)
240+
lwz LDC, FRAMESLOT(0) + 0(FRAMEPOINTER)
235241
#endif
236242
#endif
237243

238244
slwi LDC, LDC, 2
239245

240246
#if defined(TRMMKERNEL)
241247
#if defined(linux) && defined(__64BIT__)
242-
ld OFFSET, FRAMESLOT(0) + STACKSIZE(SP)
248+
ld OFFSET, FRAMESLOT(0) + 0(FRAMEPOINTER)
243249
#endif
244250

245251
#if defined(_AIX) || defined(__APPLE__)
246252
#ifdef __64BIT__
247-
ld OFFSET, FRAMESLOT(0) + STACKSIZE(SP)
253+
ld OFFSET, FRAMESLOT(0) + 0(FRAMEPOINTER)
248254
#else
249255
#ifdef DOUBLE
250-
lwz OFFSET, FRAMESLOT(1) + STACKSIZE(SP)
256+
lwz OFFSET, FRAMESLOT(1) + 0(FRAMEPOINTER)
251257
#else
252-
lwz OFFSET, FRAMESLOT(0) + STACKSIZE(SP)
258+
lwz OFFSET, FRAMESLOT(0) + 0(FRAMEPOINTER)
253259
#endif
254260
#endif
255261
#endif
@@ -271,9 +277,9 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
271277
li o32, 32
272278
li o48, 48
273279

274-
li T1, 512
275-
slwi T1, T1, 16
276-
add BBUFFER, A, T1
280+
addi BBUFFER, SP, 512+4096
281+
li T1, -4096
282+
and BBUFFER, BBUFFER, T1
277283

278284
addi T1, SP, 300
279285
stxsspx f1, o0 , T1
@@ -355,6 +361,9 @@ L999:
355361
#endif
356362

357363
addi SP, SP, STACKSIZE
364+
addi SP, SP, STACKSIZE
365+
addi SP, SP, STACKSIZE
366+
addi SP, SP, STACKSIZE
358367

359368
blr
360369

param.h

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1964,8 +1964,8 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
19641964
#define SNUMOPT 16
19651965
#define DNUMOPT 8
19661966

1967-
#define GEMM_DEFAULT_OFFSET_A 131072
1968-
#define GEMM_DEFAULT_OFFSET_B 131072
1967+
#define GEMM_DEFAULT_OFFSET_A 4096
1968+
#define GEMM_DEFAULT_OFFSET_B 4096
19691969
#define GEMM_DEFAULT_ALIGN 0x03fffUL
19701970

19711971
#define SGEMM_DEFAULT_UNROLL_M 16
@@ -1987,9 +1987,9 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
19871987
#define CGEMM_DEFAULT_Q 720
19881988
#define ZGEMM_DEFAULT_Q 720
19891989

1990-
#define SGEMM_DEFAULT_R 14400
1990+
#define SGEMM_DEFAULT_R 21600
19911991
#define DGEMM_DEFAULT_R 14400
1992-
#define CGEMM_DEFAULT_R 14400
1992+
#define CGEMM_DEFAULT_R 16200
19931993
#define ZGEMM_DEFAULT_R 14400
19941994

19951995
#define SYMV_P 8

0 commit comments

Comments
 (0)