Skip to content

Commit 825777f

Browse files
authored
Update dgemm_kernel_4x8_haswell.S
1 parent 9c89757 commit 825777f

File tree

1 file changed

+9
-2
lines changed

1 file changed

+9
-2
lines changed

kernel/x86_64/dgemm_kernel_4x8_haswell.S

Lines changed: 9 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1865,12 +1865,15 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
18651865

18661866
SAVE4x12
18671867

1868+
/* here for the prefetch of next b source block */
1869+
/* the increment should be proportional to the ratio of GEMM_Q/GEMM_P */
1870+
/* currently an increment of 128 byte is suitable */
18681871
salq $3, K
18691872
prefetcht2 32(B)
18701873
prefetcht2 32(B, K, 8)
18711874
prefetcht2 96(B)
18721875
prefetcht2 96(B, K, 8)
1873-
addq $128, B
1876+
addq $128, B /* increment */
18741877
sarq $3, K
18751878

18761879
decq I # i --
@@ -1880,6 +1883,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
18801883
/**************************************************************************
18811884
* Rest of M
18821885
***************************************************************************/
1886+
/* recover the original value of pointer B */
18831887
movq M, I
18841888
sarq $2, I
18851889
salq $7, I
@@ -2160,6 +2164,9 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
21602164

21612165
SAVE4x12
21622166

2167+
/* here for the prefetch of next b source block */
2168+
/* the increment should be proportional to the ratio of GEMM_Q/GEMM_P */
2169+
/* currently an increment of 128 byte is suitable */
21632170
salq $3, K
21642171
prefetcht2 (B)
21652172
prefetcht2 (B, K, 8)
@@ -2175,7 +2182,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
21752182
/**************************************************************************
21762183
* Rest of M
21772184
***************************************************************************/
2178-
2185+
/* recover the original value of pointer B */
21792186
movq M, I
21802187
sarq $2, I
21812188
salq $7, I

0 commit comments

Comments
 (0)