Skip to content

Commit 9c89757

Browse files
authored
Add files via upload
1 parent 9b04bae commit 9c89757

File tree

1 file changed

+28
-1
lines changed

1 file changed

+28
-1
lines changed

kernel/x86_64/dgemm_kernel_4x8_haswell.S

Lines changed: 28 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1865,13 +1865,26 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
18651865

18661866
SAVE4x12
18671867

1868+
salq $3, K
1869+
prefetcht2 32(B)
1870+
prefetcht2 32(B, K, 8)
1871+
prefetcht2 96(B)
1872+
prefetcht2 96(B, K, 8)
1873+
addq $128, B
1874+
sarq $3, K
1875+
18681876
decq I # i --
18691877
jne .L12_11
18701878
ALIGN_4
18711879

18721880
/**************************************************************************
18731881
* Rest of M
18741882
***************************************************************************/
1883+
movq M, I
1884+
sarq $2, I
1885+
salq $7, I
1886+
subq I, B
1887+
18751888
.L12_20:
18761889
// Test rest of M
18771890

@@ -2102,7 +2115,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
21022115

21032116
jmp .L13_16
21042117

2105-
2118+
PREFETCHT0_C
21062119
.L13_13:
21072120

21082121
test $1, %rax
@@ -2147,13 +2160,27 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
21472160

21482161
SAVE4x12
21492162

2163+
salq $3, K
2164+
prefetcht2 (B)
2165+
prefetcht2 (B, K, 8)
2166+
prefetcht2 64(B)
2167+
prefetcht2 64(B, K, 8)
2168+
addq $128, B
2169+
sarq $3, K
2170+
21502171
decq I # i --
21512172
jne .L13_11
21522173
ALIGN_4
21532174

21542175
/**************************************************************************
21552176
* Rest of M
21562177
***************************************************************************/
2178+
2179+
movq M, I
2180+
sarq $2, I
2181+
salq $7, I
2182+
subq I, B
2183+
21572184
.L13_20:
21582185
// Test rest of M
21592186

0 commit comments

Comments
 (0)