Skip to content

Commit 8b14022

Browse files
committed
optimized dtrsm_kernel_LT for POWER8
1 parent 318cad9 commit 8b14022

File tree

2 files changed

+45
-2
lines changed

2 files changed

+45
-2
lines changed

kernel/power/dtrsm_kernel_LT_16x4_power8.S

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -219,6 +219,7 @@
219219
li o24, 24
220220
li o32, 32
221221
li o48, 48
222+
li PRE, 384
222223

223224
mr KK, OFFSET
224225

kernel/power/dtrsm_logic_LT_16x4_power8.S

Lines changed: 44 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,33 @@ DSTRM_LT_L4x16_BEGIN:
1818

1919
mr BO, B
2020

21+
li L, -128
22+
23+
mr T1, CO
24+
add T2, T1, LDC
25+
add T3, T2, LDC
26+
add T4, T3, LDC
27+
28+
and T1, T1, L
29+
and T2, T2, L
30+
and T3, T3, L
31+
and T4, T4, L
32+
33+
dcbt T1, r0
34+
dcbt T2, r0
35+
dcbt T3, r0
36+
dcbt T4, r0
37+
38+
addi T1, T1, 128
39+
addi T2, T2, 128
40+
addi T3, T3, 128
41+
addi T4, T4, 128
42+
43+
dcbt T1, r0
44+
dcbt T2, r0
45+
dcbt T3, r0
46+
dcbt T4, r0
47+
2148

2249
DSTRM_LT_L4x16_LOOP_START:
2350

@@ -26,15 +53,30 @@ DSTRM_LT_L4x16_LOOP_START:
2653

2754

2855
addic. L, KK, 0
29-
ble DSTRM_LT_L4x16_SAVE
56+
ble- DSTRM_LT_L4x16_SAVE
3057

3158
DSTRM_LT_L4x16_LOOP:
3259

60+
dcbt AO, PRE
61+
dcbt BO, PRE
62+
KERNEL_16x4
63+
addic. L, L, -1
64+
ble- DSTRM_LT_L4x16_SAVE
65+
66+
dcbt AO, PRE
67+
KERNEL_16x4
68+
addic. L, L, -1
69+
ble- DSTRM_LT_L4x16_SAVE
3370

71+
dcbt AO, PRE
3472
KERNEL_16x4
73+
addic. L, L, -1
74+
ble- DSTRM_LT_L4x16_SAVE
3575

76+
dcbt AO, PRE
77+
KERNEL_16x4
3678
addic. L, L, -1
37-
bgt DSTRM_LT_L4x16_LOOP
79+
bgt+ DSTRM_LT_L4x16_LOOP
3880

3981

4082
DSTRM_LT_L4x16_SAVE:

0 commit comments

Comments
 (0)