Skip to content

Commit 8a074b3

Browse files
authored
Update dgemm_kernel_4x8_haswell.S
1 parent 211ab03 commit 8a074b3

File tree

1 file changed

+37
-5
lines changed

1 file changed

+37
-5
lines changed

kernel/x86_64/dgemm_kernel_4x8_haswell.S

Lines changed: 37 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -267,24 +267,24 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
267267

268268
.macro SAVE4x12
269269

270-
prefetcht0 128(%rsp) /*BUFFER 1*/
270+
prefetcht0 BUFFER1
271271
vbroadcastsd ALPHA, %ymm0
272272

273273
vmulpd %ymm0 , %ymm4 , %ymm4
274274
vmulpd %ymm0 , %ymm5 , %ymm5
275275
vmulpd %ymm0 , %ymm6 , %ymm6
276276
vmulpd %ymm0 , %ymm7 , %ymm7
277-
prefetcht0 192(%rsp)
277+
prefetcht0 64 + BUFFER1
278278
vmulpd %ymm0 , %ymm8 , %ymm8
279279
vmulpd %ymm0 , %ymm9 , %ymm9
280280
vmulpd %ymm0 , %ymm10, %ymm10
281281
vmulpd %ymm0 , %ymm11, %ymm11
282-
prefetcht0 256(%rsp)
282+
prefetcht0 128 + BUFFER1
283283
vmulpd %ymm0 , %ymm12, %ymm12
284284
vmulpd %ymm0 , %ymm13, %ymm13
285285
vmulpd %ymm0 , %ymm14, %ymm14
286286
vmulpd %ymm0 , %ymm15, %ymm15
287-
prefetcht0 320(%rsp)
287+
prefetcht0 192 + BUFFER1
288288
vpermilpd $ 0x05 , %ymm5, %ymm5
289289
vpermilpd $ 0x05 , %ymm7, %ymm7
290290

@@ -1606,6 +1606,37 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
16061606
.endm
16071607

16081608

1609+
.macro PREFETCHT0_C
1610+
prefetcht0 (CO1)
1611+
prefetcht0 24(CO1)
1612+
prefetcht0 (CO1,LDC,4)
1613+
prefetcht0 24(CO1,LDC,4)
1614+
prefetcht0 (CO1,LDC,8)
1615+
prefetcht0 24(CO1,LDC,8)
1616+
addq LDC,CO1
1617+
prefetcht0 (CO1)
1618+
prefetcht0 24(CO1)
1619+
prefetcht0 (CO1,LDC,4)
1620+
prefetcht0 24(CO1,LDC,4)
1621+
prefetcht0 (CO1,LDC,8)
1622+
prefetcht0 24(CO1,LDC,8)
1623+
leaq (CO1,LDC,2),CO1
1624+
prefetcht0 (CO1)
1625+
prefetcht0 24(CO1)
1626+
prefetcht0 (CO1,LDC,4)
1627+
prefetcht0 24(CO1,LDC,4)
1628+
prefetcht0 (CO1,LDC,8)
1629+
prefetcht0 24(CO1,LDC,8)
1630+
subq LDC,CO1
1631+
prefetcht0 (CO1)
1632+
prefetcht0 24(CO1)
1633+
prefetcht0 (CO1,LDC,4)
1634+
prefetcht0 24(CO1,LDC,4)
1635+
prefetcht0 (CO1,LDC,8)
1636+
prefetcht0 24(CO1,LDC,8)
1637+
subq LDC,CO1
1638+
subq LDC,CO1
1639+
.endm
16091640
/*******************************************************************************************/
16101641

16111642
#if !defined(TRMMKERNEL)
@@ -1773,7 +1804,8 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
17731804

17741805
dec %rax
17751806
jne .L12_12
1776-
1807+
1808+
PREFETCHT0_C
17771809
.L12_12a:
17781810

17791811
KERNEL4x12_M1

0 commit comments

Comments
 (0)