diff --git a/kernel/loongarch64/cscal_lasx.S b/kernel/loongarch64/cscal_lasx.S index e32071def7..d44e978a64 100644 --- a/kernel/loongarch64/cscal_lasx.S +++ b/kernel/loongarch64/cscal_lasx.S @@ -94,7 +94,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. CMPEQ $fcc1, ALPHAI, a1 bge $r0, I, .L19 /////// INCX == 1 && N >= 4 //////// - bnez DUMMY2, .L17 // if DUMMPY2 == 1, called from c/zscal. + bnez DUMMY2, .L17 // if DUMMY2 == 1, called from c/zscal. bceqz $fcc0, .L17 @@ -146,6 +146,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. addi.d I, I, -1 blt $r0, I, .L17 b .L19 + .align 3 /////// INCX == 1 && N < 8 /////// @@ -156,7 +157,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. andi I, N, 7 #endif beqz I, .L999 - bnez DUMMY2, .L998 // if DUMMPY2 == 1, called from c/zscal. + bnez DUMMY2, .L998 // if DUMMY2 == 1, called from c/zscal. bceqz $fcc0, .L998 @@ -171,7 +172,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. CMPEQ $fcc1, ALPHAI, a1 move XX, X bge $r0, I, .L29 - bnez DUMMY2, .L25 // if DUMMPY2 == 1, called from c/zscal. + bnez DUMMY2, .L25 // if DUMMY2 == 1, called from c/zscal. bceqz $fcc0, .L25 bceqz $fcc1, .L25 @@ -341,7 +342,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. andi I, N, 7 #endif beqz I, .L999 - bnez DUMMY2, .L998 // if DUMMPY2 == 1, called from c/zscal. + bnez DUMMY2, .L998 // if DUMMY2 == 1, called from c/zscal. bceqz $fcc0, .L998 diff --git a/kernel/loongarch64/cscal_lsx.S b/kernel/loongarch64/cscal_lsx.S index 241d3d16e4..c235a206a6 100644 --- a/kernel/loongarch64/cscal_lsx.S +++ b/kernel/loongarch64/cscal_lsx.S @@ -33,6 +33,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #define ALPHAI $f1 #define X $r7 #define INCX $r8 +#define DUMMY2 $r9 #define I $r12 #define TEMP $r13 @@ -65,6 +66,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. bge $r0, N, .L999 bge $r0, INCX, .L999 + ld.d DUMMY2, $sp, 0 li.d TEMP, 1 movgr2fr.d a1, $r0 FFINT a1, a1 @@ -84,24 +86,20 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. srai.d I, N, 2 bne INCX, TEMP, .L22 +/////// INCX == 1 //////// .L11: - bge $r0, I, .L997 CMPEQ $fcc0, ALPHAR, a1 CMPEQ $fcc1, ALPHAI, a1 - bceqz $fcc0, .L13 - b .L14 - .align 3 + bge $r0, I, .L19 -.L13: - bceqz $fcc1, .L114 //alpha_r != 0.0 && alpha_i != 0.0 - b .L113 //alpha_r != 0.0 && alpha_i == 0.0 +/////// INCX == 1 && N >= 4 //////// + bnez DUMMY2, .L17 // if DUMMPY2 == 1, called from c/zscal. -.L14: - bceqz $fcc1, .L114 //alpha_r == 0.0 && alpha_i != 0.0 - b .L111 //alpha_r == 0.0 && alpha_i == 0.0 - .align 3 + bceqz $fcc0, .L17 -.L111: //alpha_r == 0.0 && alpha_i == 0.0 + bceqz $fcc1, .L17 + +.L15: //alpha_r == 0.0 && alpha_i == 0.0 vst VXZ, X, 0 * SIZE #ifdef DOUBLE vst VXZ, X, 2 * SIZE @@ -112,50 +110,11 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #endif addi.d X, X, 8 * SIZE addi.d I, I, -1 - blt $r0, I, .L111 - b .L997 - .align 3 - -.L113: //alpha_r != 0.0 && alpha_i == 0.0 - vld VX0, X, 0 * SIZE -#ifdef DOUBLE - vld VX1, X, 2 * SIZE - vpickev.d x1, VX1, VX0 - vpickod.d x2, VX1, VX0 - vfmul.d x3, VXAR, x1 - vfmul.d x4, VXAR, x2 - vilvl.d VX2, x4 ,x3 - vilvh.d VX3, x4, x3 - vst VX2, X, 0 * SIZE - vst VX3, X, 2 * SIZE - vld VX0, X, 4 * SIZE - vld VX1, X, 6 * SIZE - vpickev.d x1, VX1, VX0 - vpickod.d x2, VX1, VX0 - vfmul.d x3, VXAR, x1 - vfmul.d x4, VXAR, x2 - vilvl.d VX2, x4 ,x3 - vilvh.d VX3, x4, x3 - vst VX2, X, 4 * SIZE - vst VX3, X, 6 * SIZE -#else - vld VX1, X, 4 * SIZE - vpickev.w x1, VX1, VX0 - vpickod.w x2, VX1, VX0 - vfmul.s x3, VXAR, x1 - vfmul.s x4, VXAR, x2 - vilvl.w VX2, x4 ,x3 - vilvh.w VX3, x4, x3 - vst VX2, X, 0 * SIZE - vst VX3, X, 4 * SIZE -#endif - addi.d X, X, 8 * SIZE - addi.d I, I, -1 - blt $r0, I, .L113 - b .L997 + blt $r0, I, .L15 + b .L19 .align 3 -.L114: //alpha_r != 0.0 && alpha_i != 0.0 +.L17: vld VX0, X, 0 * SIZE #ifdef DOUBLE vld VX1, X, 2 * SIZE @@ -196,29 +155,35 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #endif addi.d X, X, 8 * SIZE addi.d I, I, -1 - blt $r0, I, .L114 - b .L997 + blt $r0, I, .L17 + b .L19 .align 3 +/////// INCX == 1 && N < 8 /////// +.L19: + andi I, N, 3 + beqz I, .L999 + bnez DUMMY2, .L998 // if DUMMPY2 == 1, called from c/zscal. + + bceqz $fcc0, .L998 + + bceqz $fcc1, .L998 + + b .L995 // alpha_r == 0.0 && alpha_i == 0.0 + +/////// INCX != 1 //////// .L22: - bge $r0, I, .L997 - move XX, X CMPEQ $fcc0, ALPHAR, a1 CMPEQ $fcc1, ALPHAI, a1 - bceqz $fcc0, .L23 - b .L24 - .align 3 + move XX, X + bge $r0, I, .L29 + bnez DUMMY2, .L25 // if DUMMPY2 == 1, called from c/zscal. -.L23: - bceqz $fcc1, .L224 //alpha_r != 0.0 && alpha_i != 0.0 - b .L223 //alpha_r != 0.0 && alpha_i == 0.0 + bceqz $fcc0, .L25 -.L24: - bceqz $fcc1, .L224 //alpha_r == 0.0 && alpha_i != 0.0 - b .L221 //alpha_r == 0.0 && alpha_i == 0.0 - .align 3 + bceqz $fcc1, .L25 -.L221: //alpha_r == 0.0 && alpha_i == 0.0 +.L27: //alpha_r == 0.0 && alpha_i == 0.0 #ifdef DOUBLE vstelm.d VXZ, X, 0, 0 vstelm.d VXZ, X, 1 * SIZE, 0 @@ -246,92 +211,11 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #endif add.d X, X, INCX addi.d I, I, -1 - blt $r0, I, .L221 - b .L997 + blt $r0, I, .L27 + b .L29 .align 3 -.L223: //alpha_r != 0.0 && alpha_i == 0.0 -#ifdef DOUBLE - ld.d t1, X, 0 * SIZE - ld.d t2, X, 1 * SIZE - add.d X, X, INCX - ld.d t3, X, 0 * SIZE - ld.d t4, X, 1 * SIZE - add.d X, X, INCX - vinsgr2vr.d x1, t1, 0 - vinsgr2vr.d x2, t2, 0 - vinsgr2vr.d x1, t3, 1 - vinsgr2vr.d x2, t4, 1 - vfmul.d x3, VXAR, x1 - vfmul.d x4, VXAR, x2 - vstelm.d x3, XX, 0 * SIZE, 0 - vstelm.d x4, XX, 1 * SIZE, 0 - add.d XX, XX, INCX - vstelm.d x3, XX, 0 * SIZE, 1 - vstelm.d x4, XX, 1 * SIZE, 1 - add.d XX, XX, INCX - - ld.d t1, X, 0 * SIZE - ld.d t2, X, 1 * SIZE - add.d X, X, INCX - ld.d t3, X, 0 * SIZE - ld.d t4, X, 1 * SIZE - vinsgr2vr.d x1, t1, 0 - vinsgr2vr.d x2, t2, 0 - vinsgr2vr.d x1, t3, 1 - vinsgr2vr.d x2, t4, 1 - add.d X, X, INCX - vfmul.d x3, VXAR, x1 - vfmul.d x4, VXAR, x2 - addi.d I, I, -1 - vstelm.d x3, XX, 0 * SIZE, 0 - vstelm.d x4, XX, 1 * SIZE, 0 - add.d XX, XX, INCX - vstelm.d x3, XX, 0 * SIZE, 1 - vstelm.d x4, XX, 1 * SIZE, 1 -#else - ld.w t1, X, 0 * SIZE - ld.w t2, X, 1 * SIZE - add.d X, X, INCX - ld.w t3, X, 0 * SIZE - ld.w t4, X, 1 * SIZE - add.d X, X, INCX - vinsgr2vr.w x1, t1, 0 - vinsgr2vr.w x2, t2, 0 - vinsgr2vr.w x1, t3, 1 - vinsgr2vr.w x2, t4, 1 - ld.w t1, X, 0 * SIZE - ld.w t2, X, 1 * SIZE - add.d X, X, INCX - ld.w t3, X, 0 * SIZE - ld.w t4, X, 1 * SIZE - vinsgr2vr.w x1, t1, 2 - vinsgr2vr.w x2, t2, 2 - vinsgr2vr.w x1, t3, 3 - vinsgr2vr.w x2, t4, 3 - add.d X, X, INCX - - vfmul.s x3, VXAR, x1 - vfmul.s x4, VXAR, x2 - addi.d I, I, -1 - vstelm.w x3, XX, 0 * SIZE, 0 - vstelm.w x4, XX, 1 * SIZE, 0 - add.d XX, XX, INCX - vstelm.w x3, XX, 0 * SIZE, 1 - vstelm.w x4, XX, 1 * SIZE, 1 - add.d XX, XX, INCX - vstelm.w x3, XX, 0 * SIZE, 2 - vstelm.w x4, XX, 1 * SIZE, 2 - add.d XX, XX, INCX - vstelm.w x3, XX, 0 * SIZE, 3 - vstelm.w x4, XX, 1 * SIZE, 3 -#endif - add.d XX, XX, INCX - blt $r0, I, .L223 - b .L997 - .align 3 - -.L224: //alpha_r != 0.0 && alpha_i != 0.0 +.L25: #ifdef DOUBLE ld.d t1, X, 0 * SIZE ld.d t2, X, 1 * SIZE @@ -414,15 +298,29 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. vstelm.w x4, XX, 1 * SIZE, 3 #endif add.d XX, XX, INCX - blt $r0, I, .L224 - b .L997 + blt $r0, I, .L25 + b .L29 .align 3 -.L997: - andi I, N, 3 - bge $r0, I, .L999 - .align 3 +/////// INCX != 1 && N < 8 /////// +.L29: + andi I, N, 3 + beqz I, .L999 + bnez DUMMY2, .L998 // if DUMMPY2 == 1, called from c/zscal. + + bceqz $fcc0, .L998 + + bceqz $fcc1, .L998 + b .L995 // alpha_r == 0.0 && alpha_i == 0.0 + +.L995: // alpha_r == 0.0 && alpha_i == 0.0 + ST a1, X, 0 * SIZE + ST a1, X, 1 * SIZE + addi.d I, I, -1 + add.d X, X, INCX + blt $r0, I, .L995 + b .L999 .L998: LD a1, X, 0 * SIZE LD a2, X, 1 * SIZE @@ -435,7 +333,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. ST s2, X, 1 * SIZE add.d X, X, INCX blt $r0, I, .L998 - .align 3 + b .L999 .L999: move $r4, $r12 diff --git a/kernel/loongarch64/zscal.S b/kernel/loongarch64/zscal.S index a12e527a56..f6213b1591 100644 --- a/kernel/loongarch64/zscal.S +++ b/kernel/loongarch64/zscal.S @@ -53,6 +53,8 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. PROLOGUE li.d TEMP, 2 * SIZE + ld.d XX, $sp, 0 // Load dummy2 + slli.d XX, XX, ZBASE_SHIFT MTC a1, $r0 slli.d INCX, INCX, ZBASE_SHIFT bge $r0, N, .L999 @@ -60,6 +62,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. CMPEQ $fcc1, ALPHA_I, a1 bceqz $fcc0, .L50 bceqz $fcc1, .L50 + beq XX, TEMP, .L50 // if dummp2 == 1, do not directly copy 0 srai.d I, N, 2 bne INCX, TEMP, .L20 bge $r0, I, .L15