Skip to content

Commit 3a1b58d

Browse files
authored
Merge pull request #2653 from craft-zhang/cortex-a53
fix INIT8x4 of SGEMM on Arm Cortex-A53
2 parents f7659be + bc6fd20 commit 3a1b58d

File tree

1 file changed

+4
-12
lines changed

1 file changed

+4
-12
lines changed

kernel/arm64/sgemm_kernel_8x8_cortexa53.S

Lines changed: 4 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -681,12 +681,12 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
681681
.macro INIT8x4
682682
fmov s16, wzr
683683
fmov s17, wzr
684+
fmov s18, wzr
685+
fmov s19, s16
684686
fmov s20, wzr
685687
fmov s21, s16
686-
fmov s24, wzr
687-
fmov s25, s16
688-
fmov s28, wzr
689-
fmov s29, s16
688+
fmov s22, wzr
689+
fmov s23, s16
690690
.endm
691691

692692
.macro KERNEL8x4_I
@@ -765,14 +765,6 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
765765
fmla v21.4s, v3.4s, v6.s[2]
766766
fmla v22.4s, v2.4s, v6.s[3]
767767
fmla v23.4s, v3.4s, v6.s[3]
768-
fmla v24.4s, v2.4s, v7.s[0]
769-
fmla v25.4s, v3.4s, v7.s[0]
770-
fmla v26.4s, v2.4s, v7.s[1]
771-
fmla v27.4s, v3.4s, v7.s[1]
772-
fmla v28.4s, v2.4s, v7.s[2]
773-
fmla v29.4s, v3.4s, v7.s[2]
774-
fmla v30.4s, v2.4s, v7.s[3]
775-
fmla v31.4s, v3.4s, v7.s[3]
776768
.endm
777769

778770
.macro KERNEL8x4_SUB

0 commit comments

Comments
 (0)