From 7f728d28d496dca26c0540943c7424ce3fdbfa09 Mon Sep 17 00:00:00 2001 From: WANG Rui Date: Thu, 18 Sep 2025 20:57:25 +0800 Subject: [PATCH 1/3] [LoongArch] Fix assertion failure when vec-args are not fully passed in vec-regs --- .../LoongArch/LoongArchISelLowering.cpp | 17 ++- .../CodeGen/LoongArch/lasx/issue159529.ll | 143 ++++++++++++++++++ .../test/CodeGen/LoongArch/lsx/issue159529.ll | 51 +++++++ 3 files changed, 206 insertions(+), 5 deletions(-) create mode 100644 llvm/test/CodeGen/LoongArch/lasx/issue159529.ll create mode 100644 llvm/test/CodeGen/LoongArch/lsx/issue159529.ll diff --git a/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp b/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp index e8668860c2b38..159a8eb16689e 100644 --- a/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp +++ b/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp @@ -7330,16 +7330,23 @@ static bool CC_LoongArch(const DataLayout &DL, LoongArchABI::ABI ABI, unsigned StoreSizeBytes = GRLen / 8; Align StackAlign = Align(GRLen / 8); - if (ValVT == MVT::f32 && !UseGPRForFloat) + if (ValVT == MVT::f32 && !UseGPRForFloat) { Reg = State.AllocateReg(ArgFPR32s); - else if (ValVT == MVT::f64 && !UseGPRForFloat) + } else if (ValVT == MVT::f64 && !UseGPRForFloat) { Reg = State.AllocateReg(ArgFPR64s); - else if (ValVT.is128BitVector()) + } else if (ValVT.is128BitVector()) { Reg = State.AllocateReg(ArgVRs); - else if (ValVT.is256BitVector()) + UseGPRForFloat = false; + StoreSizeBytes = 16; + StackAlign = Align(16); + } else if (ValVT.is256BitVector()) { Reg = State.AllocateReg(ArgXRs); - else + UseGPRForFloat = false; + StoreSizeBytes = 32; + StackAlign = Align(32); + } else { Reg = State.AllocateReg(ArgGPRs); + } unsigned StackOffset = Reg ? 0 : State.AllocateStack(StoreSizeBytes, StackAlign); diff --git a/llvm/test/CodeGen/LoongArch/lasx/issue159529.ll b/llvm/test/CodeGen/LoongArch/lasx/issue159529.ll new file mode 100644 index 0000000000000..8eff42197ca4a --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/lasx/issue159529.ll @@ -0,0 +1,143 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc --mtriple=loongarch32 --mattr=+32s,+lasx < %s | FileCheck %s --check-prefix=LA32 +; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s --check-prefix=LA64 + +define fastcc <64 x i64> @test1(<64 x i64> %0) { +; LA32-LABEL: test1: +; LA32: # %bb.0: # %entry +; LA32-NEXT: addi.w $sp, $sp, -32 +; LA32-NEXT: .cfi_def_cfa_offset 32 +; LA32-NEXT: st.w $ra, $sp, 28 # 4-byte Folded Spill +; LA32-NEXT: st.w $fp, $sp, 24 # 4-byte Folded Spill +; LA32-NEXT: .cfi_offset 1, -4 +; LA32-NEXT: .cfi_offset 22, -8 +; LA32-NEXT: addi.w $fp, $sp, 32 +; LA32-NEXT: .cfi_def_cfa 22, 0 +; LA32-NEXT: bstrins.w $sp, $zero, 4, 0 +; LA32-NEXT: xvld $xr8, $fp, 0 +; LA32-NEXT: xvld $xr9, $fp, 32 +; LA32-NEXT: xvld $xr10, $fp, 64 +; LA32-NEXT: xvld $xr11, $fp, 96 +; LA32-NEXT: xvld $xr12, $fp, 224 +; LA32-NEXT: xvld $xr13, $fp, 192 +; LA32-NEXT: xvld $xr14, $fp, 160 +; LA32-NEXT: xvld $xr15, $fp, 128 +; LA32-NEXT: xvst $xr12, $a0, 480 +; LA32-NEXT: xvst $xr13, $a0, 448 +; LA32-NEXT: xvst $xr14, $a0, 416 +; LA32-NEXT: xvst $xr15, $a0, 384 +; LA32-NEXT: xvst $xr11, $a0, 352 +; LA32-NEXT: xvst $xr10, $a0, 320 +; LA32-NEXT: xvst $xr9, $a0, 288 +; LA32-NEXT: xvst $xr8, $a0, 256 +; LA32-NEXT: xvst $xr7, $a0, 224 +; LA32-NEXT: xvst $xr6, $a0, 192 +; LA32-NEXT: xvst $xr5, $a0, 160 +; LA32-NEXT: xvst $xr4, $a0, 128 +; LA32-NEXT: xvst $xr3, $a0, 96 +; LA32-NEXT: xvst $xr2, $a0, 64 +; LA32-NEXT: xvst $xr1, $a0, 32 +; LA32-NEXT: xvst $xr0, $a0, 0 +; LA32-NEXT: addi.w $sp, $fp, -32 +; LA32-NEXT: ld.w $fp, $sp, 24 # 4-byte Folded Reload +; LA32-NEXT: ld.w $ra, $sp, 28 # 4-byte Folded Reload +; LA32-NEXT: addi.w $sp, $sp, 32 +; LA32-NEXT: ret +; +; LA64-LABEL: test1: +; LA64: # %bb.0: # %entry +; LA64-NEXT: addi.d $sp, $sp, -32 +; LA64-NEXT: .cfi_def_cfa_offset 32 +; LA64-NEXT: st.d $ra, $sp, 24 # 8-byte Folded Spill +; LA64-NEXT: st.d $fp, $sp, 16 # 8-byte Folded Spill +; LA64-NEXT: .cfi_offset 1, -8 +; LA64-NEXT: .cfi_offset 22, -16 +; LA64-NEXT: addi.d $fp, $sp, 32 +; LA64-NEXT: .cfi_def_cfa 22, 0 +; LA64-NEXT: bstrins.d $sp, $zero, 4, 0 +; LA64-NEXT: xvld $xr8, $fp, 0 +; LA64-NEXT: xvld $xr9, $fp, 32 +; LA64-NEXT: xvld $xr10, $fp, 64 +; LA64-NEXT: xvld $xr11, $fp, 96 +; LA64-NEXT: xvld $xr12, $fp, 224 +; LA64-NEXT: xvld $xr13, $fp, 192 +; LA64-NEXT: xvld $xr14, $fp, 160 +; LA64-NEXT: xvld $xr15, $fp, 128 +; LA64-NEXT: xvst $xr12, $a0, 480 +; LA64-NEXT: xvst $xr13, $a0, 448 +; LA64-NEXT: xvst $xr14, $a0, 416 +; LA64-NEXT: xvst $xr15, $a0, 384 +; LA64-NEXT: xvst $xr11, $a0, 352 +; LA64-NEXT: xvst $xr10, $a0, 320 +; LA64-NEXT: xvst $xr9, $a0, 288 +; LA64-NEXT: xvst $xr8, $a0, 256 +; LA64-NEXT: xvst $xr7, $a0, 224 +; LA64-NEXT: xvst $xr6, $a0, 192 +; LA64-NEXT: xvst $xr5, $a0, 160 +; LA64-NEXT: xvst $xr4, $a0, 128 +; LA64-NEXT: xvst $xr3, $a0, 96 +; LA64-NEXT: xvst $xr2, $a0, 64 +; LA64-NEXT: xvst $xr1, $a0, 32 +; LA64-NEXT: xvst $xr0, $a0, 0 +; LA64-NEXT: addi.d $sp, $fp, -32 +; LA64-NEXT: ld.d $fp, $sp, 16 # 8-byte Folded Reload +; LA64-NEXT: ld.d $ra, $sp, 24 # 8-byte Folded Reload +; LA64-NEXT: addi.d $sp, $sp, 32 +; LA64-NEXT: ret +entry: + ret <64 x i64> %0 +} + +define fastcc <32 x double> @test2(<32 x double> %0, <32 x double> %1) { +; LA32-LABEL: test2: +; LA32: # %bb.0: # %entry +; LA32-NEXT: addi.w $sp, $sp, -32 +; LA32-NEXT: .cfi_def_cfa_offset 32 +; LA32-NEXT: st.w $ra, $sp, 28 # 4-byte Folded Spill +; LA32-NEXT: st.w $fp, $sp, 24 # 4-byte Folded Spill +; LA32-NEXT: .cfi_offset 1, -4 +; LA32-NEXT: .cfi_offset 22, -8 +; LA32-NEXT: addi.w $fp, $sp, 32 +; LA32-NEXT: .cfi_def_cfa 22, 0 +; LA32-NEXT: bstrins.w $sp, $zero, 4, 0 +; LA32-NEXT: xvst $xr7, $a0, 224 +; LA32-NEXT: xvst $xr6, $a0, 192 +; LA32-NEXT: xvst $xr5, $a0, 160 +; LA32-NEXT: xvst $xr4, $a0, 128 +; LA32-NEXT: xvst $xr3, $a0, 96 +; LA32-NEXT: xvst $xr2, $a0, 64 +; LA32-NEXT: xvst $xr1, $a0, 32 +; LA32-NEXT: xvst $xr0, $a0, 0 +; LA32-NEXT: addi.w $sp, $fp, -32 +; LA32-NEXT: ld.w $fp, $sp, 24 # 4-byte Folded Reload +; LA32-NEXT: ld.w $ra, $sp, 28 # 4-byte Folded Reload +; LA32-NEXT: addi.w $sp, $sp, 32 +; LA32-NEXT: ret +; +; LA64-LABEL: test2: +; LA64: # %bb.0: # %entry +; LA64-NEXT: addi.d $sp, $sp, -32 +; LA64-NEXT: .cfi_def_cfa_offset 32 +; LA64-NEXT: st.d $ra, $sp, 24 # 8-byte Folded Spill +; LA64-NEXT: st.d $fp, $sp, 16 # 8-byte Folded Spill +; LA64-NEXT: .cfi_offset 1, -8 +; LA64-NEXT: .cfi_offset 22, -16 +; LA64-NEXT: addi.d $fp, $sp, 32 +; LA64-NEXT: .cfi_def_cfa 22, 0 +; LA64-NEXT: bstrins.d $sp, $zero, 4, 0 +; LA64-NEXT: xvst $xr7, $a0, 224 +; LA64-NEXT: xvst $xr6, $a0, 192 +; LA64-NEXT: xvst $xr5, $a0, 160 +; LA64-NEXT: xvst $xr4, $a0, 128 +; LA64-NEXT: xvst $xr3, $a0, 96 +; LA64-NEXT: xvst $xr2, $a0, 64 +; LA64-NEXT: xvst $xr1, $a0, 32 +; LA64-NEXT: xvst $xr0, $a0, 0 +; LA64-NEXT: addi.d $sp, $fp, -32 +; LA64-NEXT: ld.d $fp, $sp, 16 # 8-byte Folded Reload +; LA64-NEXT: ld.d $ra, $sp, 24 # 8-byte Folded Reload +; LA64-NEXT: addi.d $sp, $sp, 32 +; LA64-NEXT: ret +entry: + ret <32 x double> %0 +} diff --git a/llvm/test/CodeGen/LoongArch/lsx/issue159529.ll b/llvm/test/CodeGen/LoongArch/lsx/issue159529.ll new file mode 100644 index 0000000000000..5d5ef937f073f --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/lsx/issue159529.ll @@ -0,0 +1,51 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc --mtriple=loongarch32 --mattr=+32s,+lsx < %s | FileCheck %s +; RUN: llc --mtriple=loongarch64 --mattr=+lsx < %s | FileCheck %s + +define fastcc <64 x i32> @test1(<64 x i32> %0) { +; CHECK-LABEL: test1: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vld $vr8, $sp, 0 +; CHECK-NEXT: vld $vr9, $sp, 16 +; CHECK-NEXT: vld $vr10, $sp, 32 +; CHECK-NEXT: vld $vr11, $sp, 48 +; CHECK-NEXT: vld $vr12, $sp, 112 +; CHECK-NEXT: vld $vr13, $sp, 96 +; CHECK-NEXT: vld $vr14, $sp, 80 +; CHECK-NEXT: vld $vr15, $sp, 64 +; CHECK-NEXT: vst $vr12, $a0, 240 +; CHECK-NEXT: vst $vr13, $a0, 224 +; CHECK-NEXT: vst $vr14, $a0, 208 +; CHECK-NEXT: vst $vr15, $a0, 192 +; CHECK-NEXT: vst $vr11, $a0, 176 +; CHECK-NEXT: vst $vr10, $a0, 160 +; CHECK-NEXT: vst $vr9, $a0, 144 +; CHECK-NEXT: vst $vr8, $a0, 128 +; CHECK-NEXT: vst $vr7, $a0, 112 +; CHECK-NEXT: vst $vr6, $a0, 96 +; CHECK-NEXT: vst $vr5, $a0, 80 +; CHECK-NEXT: vst $vr4, $a0, 64 +; CHECK-NEXT: vst $vr3, $a0, 48 +; CHECK-NEXT: vst $vr2, $a0, 32 +; CHECK-NEXT: vst $vr1, $a0, 16 +; CHECK-NEXT: vst $vr0, $a0, 0 +; CHECK-NEXT: ret +entry: + ret <64 x i32> %0 +} + +define fastcc <16 x double> @test2(<16 x double> %0, <16 x double> %1) { +; CHECK-LABEL: test2: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vst $vr7, $a0, 112 +; CHECK-NEXT: vst $vr6, $a0, 96 +; CHECK-NEXT: vst $vr5, $a0, 80 +; CHECK-NEXT: vst $vr4, $a0, 64 +; CHECK-NEXT: vst $vr3, $a0, 48 +; CHECK-NEXT: vst $vr2, $a0, 32 +; CHECK-NEXT: vst $vr1, $a0, 16 +; CHECK-NEXT: vst $vr0, $a0, 0 +; CHECK-NEXT: ret +entry: + ret <16 x double> %0 +} From 7f8fba0df23758077752fa366768534c432bb5a8 Mon Sep 17 00:00:00 2001 From: WANG Rui Date: Mon, 22 Sep 2025 09:39:58 +0800 Subject: [PATCH 2/3] Address weining's comment --- .../CodeGen/LoongArch/lasx/issue159529.ll | 20 ++----------------- .../test/CodeGen/LoongArch/lsx/issue159529.ll | 4 ++-- 2 files changed, 4 insertions(+), 20 deletions(-) diff --git a/llvm/test/CodeGen/LoongArch/lasx/issue159529.ll b/llvm/test/CodeGen/LoongArch/lasx/issue159529.ll index 8eff42197ca4a..22942a0843b67 100644 --- a/llvm/test/CodeGen/LoongArch/lasx/issue159529.ll +++ b/llvm/test/CodeGen/LoongArch/lasx/issue159529.ll @@ -2,17 +2,13 @@ ; RUN: llc --mtriple=loongarch32 --mattr=+32s,+lasx < %s | FileCheck %s --check-prefix=LA32 ; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s --check-prefix=LA64 -define fastcc <64 x i64> @test1(<64 x i64> %0) { +define fastcc <64 x i64> @test1(<64 x i64> %0) nounwind { ; LA32-LABEL: test1: ; LA32: # %bb.0: # %entry ; LA32-NEXT: addi.w $sp, $sp, -32 -; LA32-NEXT: .cfi_def_cfa_offset 32 ; LA32-NEXT: st.w $ra, $sp, 28 # 4-byte Folded Spill ; LA32-NEXT: st.w $fp, $sp, 24 # 4-byte Folded Spill -; LA32-NEXT: .cfi_offset 1, -4 -; LA32-NEXT: .cfi_offset 22, -8 ; LA32-NEXT: addi.w $fp, $sp, 32 -; LA32-NEXT: .cfi_def_cfa 22, 0 ; LA32-NEXT: bstrins.w $sp, $zero, 4, 0 ; LA32-NEXT: xvld $xr8, $fp, 0 ; LA32-NEXT: xvld $xr9, $fp, 32 @@ -47,13 +43,9 @@ define fastcc <64 x i64> @test1(<64 x i64> %0) { ; LA64-LABEL: test1: ; LA64: # %bb.0: # %entry ; LA64-NEXT: addi.d $sp, $sp, -32 -; LA64-NEXT: .cfi_def_cfa_offset 32 ; LA64-NEXT: st.d $ra, $sp, 24 # 8-byte Folded Spill ; LA64-NEXT: st.d $fp, $sp, 16 # 8-byte Folded Spill -; LA64-NEXT: .cfi_offset 1, -8 -; LA64-NEXT: .cfi_offset 22, -16 ; LA64-NEXT: addi.d $fp, $sp, 32 -; LA64-NEXT: .cfi_def_cfa 22, 0 ; LA64-NEXT: bstrins.d $sp, $zero, 4, 0 ; LA64-NEXT: xvld $xr8, $fp, 0 ; LA64-NEXT: xvld $xr9, $fp, 32 @@ -88,17 +80,13 @@ entry: ret <64 x i64> %0 } -define fastcc <32 x double> @test2(<32 x double> %0, <32 x double> %1) { +define fastcc <32 x double> @test2(<32 x double> %0, <32 x double> %1) nounwind { ; LA32-LABEL: test2: ; LA32: # %bb.0: # %entry ; LA32-NEXT: addi.w $sp, $sp, -32 -; LA32-NEXT: .cfi_def_cfa_offset 32 ; LA32-NEXT: st.w $ra, $sp, 28 # 4-byte Folded Spill ; LA32-NEXT: st.w $fp, $sp, 24 # 4-byte Folded Spill -; LA32-NEXT: .cfi_offset 1, -4 -; LA32-NEXT: .cfi_offset 22, -8 ; LA32-NEXT: addi.w $fp, $sp, 32 -; LA32-NEXT: .cfi_def_cfa 22, 0 ; LA32-NEXT: bstrins.w $sp, $zero, 4, 0 ; LA32-NEXT: xvst $xr7, $a0, 224 ; LA32-NEXT: xvst $xr6, $a0, 192 @@ -117,13 +105,9 @@ define fastcc <32 x double> @test2(<32 x double> %0, <32 x double> %1) { ; LA64-LABEL: test2: ; LA64: # %bb.0: # %entry ; LA64-NEXT: addi.d $sp, $sp, -32 -; LA64-NEXT: .cfi_def_cfa_offset 32 ; LA64-NEXT: st.d $ra, $sp, 24 # 8-byte Folded Spill ; LA64-NEXT: st.d $fp, $sp, 16 # 8-byte Folded Spill -; LA64-NEXT: .cfi_offset 1, -8 -; LA64-NEXT: .cfi_offset 22, -16 ; LA64-NEXT: addi.d $fp, $sp, 32 -; LA64-NEXT: .cfi_def_cfa 22, 0 ; LA64-NEXT: bstrins.d $sp, $zero, 4, 0 ; LA64-NEXT: xvst $xr7, $a0, 224 ; LA64-NEXT: xvst $xr6, $a0, 192 diff --git a/llvm/test/CodeGen/LoongArch/lsx/issue159529.ll b/llvm/test/CodeGen/LoongArch/lsx/issue159529.ll index 5d5ef937f073f..a2e58bc0647f2 100644 --- a/llvm/test/CodeGen/LoongArch/lsx/issue159529.ll +++ b/llvm/test/CodeGen/LoongArch/lsx/issue159529.ll @@ -2,7 +2,7 @@ ; RUN: llc --mtriple=loongarch32 --mattr=+32s,+lsx < %s | FileCheck %s ; RUN: llc --mtriple=loongarch64 --mattr=+lsx < %s | FileCheck %s -define fastcc <64 x i32> @test1(<64 x i32> %0) { +define fastcc <64 x i32> @test1(<64 x i32> %0) nounwind { ; CHECK-LABEL: test1: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vld $vr8, $sp, 0 @@ -34,7 +34,7 @@ entry: ret <64 x i32> %0 } -define fastcc <16 x double> @test2(<16 x double> %0, <16 x double> %1) { +define fastcc <16 x double> @test2(<16 x double> %0, <16 x double> %1) nounwind { ; CHECK-LABEL: test2: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vst $vr7, $a0, 112 From cc2fbc6ba42910b2d90919afeeae11e28fdc46fc Mon Sep 17 00:00:00 2001 From: WANG Rui Date: Mon, 22 Sep 2025 13:20:35 +0800 Subject: [PATCH 3/3] Remove fastcc --- llvm/test/CodeGen/LoongArch/lasx/issue159529.ll | 4 ++-- llvm/test/CodeGen/LoongArch/lsx/issue159529.ll | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/llvm/test/CodeGen/LoongArch/lasx/issue159529.ll b/llvm/test/CodeGen/LoongArch/lasx/issue159529.ll index 22942a0843b67..c10b2fc1368be 100644 --- a/llvm/test/CodeGen/LoongArch/lasx/issue159529.ll +++ b/llvm/test/CodeGen/LoongArch/lasx/issue159529.ll @@ -2,7 +2,7 @@ ; RUN: llc --mtriple=loongarch32 --mattr=+32s,+lasx < %s | FileCheck %s --check-prefix=LA32 ; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s --check-prefix=LA64 -define fastcc <64 x i64> @test1(<64 x i64> %0) nounwind { +define <64 x i64> @test1(<64 x i64> %0) nounwind { ; LA32-LABEL: test1: ; LA32: # %bb.0: # %entry ; LA32-NEXT: addi.w $sp, $sp, -32 @@ -80,7 +80,7 @@ entry: ret <64 x i64> %0 } -define fastcc <32 x double> @test2(<32 x double> %0, <32 x double> %1) nounwind { +define <32 x double> @test2(<32 x double> %0, <32 x double> %1) nounwind { ; LA32-LABEL: test2: ; LA32: # %bb.0: # %entry ; LA32-NEXT: addi.w $sp, $sp, -32 diff --git a/llvm/test/CodeGen/LoongArch/lsx/issue159529.ll b/llvm/test/CodeGen/LoongArch/lsx/issue159529.ll index a2e58bc0647f2..4ed899713bd1d 100644 --- a/llvm/test/CodeGen/LoongArch/lsx/issue159529.ll +++ b/llvm/test/CodeGen/LoongArch/lsx/issue159529.ll @@ -2,7 +2,7 @@ ; RUN: llc --mtriple=loongarch32 --mattr=+32s,+lsx < %s | FileCheck %s ; RUN: llc --mtriple=loongarch64 --mattr=+lsx < %s | FileCheck %s -define fastcc <64 x i32> @test1(<64 x i32> %0) nounwind { +define <64 x i32> @test1(<64 x i32> %0) nounwind { ; CHECK-LABEL: test1: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vld $vr8, $sp, 0 @@ -34,7 +34,7 @@ entry: ret <64 x i32> %0 } -define fastcc <16 x double> @test2(<16 x double> %0, <16 x double> %1) nounwind { +define <16 x double> @test2(<16 x double> %0, <16 x double> %1) nounwind { ; CHECK-LABEL: test2: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vst $vr7, $a0, 112