Skip to content

Conversation

@heiher
Copy link
Member

@heiher heiher commented Sep 18, 2025

Fixes #159529

@llvmbot
Copy link
Member

llvmbot commented Sep 18, 2025

@llvm/pr-subscribers-backend-loongarch

Author: hev (heiher)

Changes

Fixes #159529


Full diff: https://github.com/llvm/llvm-project/pull/159568.diff

3 Files Affected:

  • (modified) llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp (+16-7)
  • (added) llvm/test/CodeGen/LoongArch/lasx/issue159529.ll (+143)
  • (added) llvm/test/CodeGen/LoongArch/lsx/issue159529.ll (+51)
diff --git a/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp b/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp
index e8668860c2b38..71601919afe02 100644
--- a/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp
+++ b/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp
@@ -7327,19 +7327,28 @@ static bool CC_LoongArch(const DataLayout &DL, LoongArchABI::ABI ABI,
 
   // Allocate to a register if possible, or else a stack slot.
   Register Reg;
-  unsigned StoreSizeBytes = GRLen / 8;
-  Align StackAlign = Align(GRLen / 8);
+  unsigned StoreSizeBytes;
+  Align StackAlign;
 
-  if (ValVT == MVT::f32 && !UseGPRForFloat)
+  if (ValVT == MVT::f32 && !UseGPRForFloat) {
     Reg = State.AllocateReg(ArgFPR32s);
-  else if (ValVT == MVT::f64 && !UseGPRForFloat)
+  } else if (ValVT == MVT::f64 && !UseGPRForFloat) {
     Reg = State.AllocateReg(ArgFPR64s);
-  else if (ValVT.is128BitVector())
+  } else if (ValVT.is128BitVector()) {
     Reg = State.AllocateReg(ArgVRs);
-  else if (ValVT.is256BitVector())
+    UseGPRForFloat = false;
+    StoreSizeBytes = 16;
+    StackAlign = Align(16);
+  } else if (ValVT.is256BitVector()) {
     Reg = State.AllocateReg(ArgXRs);
-  else
+    UseGPRForFloat = false;
+    StoreSizeBytes = 32;
+    StackAlign = Align(32);
+  } else {
     Reg = State.AllocateReg(ArgGPRs);
+    StoreSizeBytes = GRLen / 8;
+    StackAlign = Align(GRLen / 8);
+  }
 
   unsigned StackOffset =
       Reg ? 0 : State.AllocateStack(StoreSizeBytes, StackAlign);
diff --git a/llvm/test/CodeGen/LoongArch/lasx/issue159529.ll b/llvm/test/CodeGen/LoongArch/lasx/issue159529.ll
new file mode 100644
index 0000000000000..8eff42197ca4a
--- /dev/null
+++ b/llvm/test/CodeGen/LoongArch/lasx/issue159529.ll
@@ -0,0 +1,143 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc --mtriple=loongarch32 --mattr=+32s,+lasx < %s | FileCheck %s --check-prefix=LA32
+; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s --check-prefix=LA64
+
+define fastcc <64 x i64> @test1(<64 x i64> %0) {
+; LA32-LABEL: test1:
+; LA32:       # %bb.0: # %entry
+; LA32-NEXT:    addi.w $sp, $sp, -32
+; LA32-NEXT:    .cfi_def_cfa_offset 32
+; LA32-NEXT:    st.w $ra, $sp, 28 # 4-byte Folded Spill
+; LA32-NEXT:    st.w $fp, $sp, 24 # 4-byte Folded Spill
+; LA32-NEXT:    .cfi_offset 1, -4
+; LA32-NEXT:    .cfi_offset 22, -8
+; LA32-NEXT:    addi.w $fp, $sp, 32
+; LA32-NEXT:    .cfi_def_cfa 22, 0
+; LA32-NEXT:    bstrins.w $sp, $zero, 4, 0
+; LA32-NEXT:    xvld $xr8, $fp, 0
+; LA32-NEXT:    xvld $xr9, $fp, 32
+; LA32-NEXT:    xvld $xr10, $fp, 64
+; LA32-NEXT:    xvld $xr11, $fp, 96
+; LA32-NEXT:    xvld $xr12, $fp, 224
+; LA32-NEXT:    xvld $xr13, $fp, 192
+; LA32-NEXT:    xvld $xr14, $fp, 160
+; LA32-NEXT:    xvld $xr15, $fp, 128
+; LA32-NEXT:    xvst $xr12, $a0, 480
+; LA32-NEXT:    xvst $xr13, $a0, 448
+; LA32-NEXT:    xvst $xr14, $a0, 416
+; LA32-NEXT:    xvst $xr15, $a0, 384
+; LA32-NEXT:    xvst $xr11, $a0, 352
+; LA32-NEXT:    xvst $xr10, $a0, 320
+; LA32-NEXT:    xvst $xr9, $a0, 288
+; LA32-NEXT:    xvst $xr8, $a0, 256
+; LA32-NEXT:    xvst $xr7, $a0, 224
+; LA32-NEXT:    xvst $xr6, $a0, 192
+; LA32-NEXT:    xvst $xr5, $a0, 160
+; LA32-NEXT:    xvst $xr4, $a0, 128
+; LA32-NEXT:    xvst $xr3, $a0, 96
+; LA32-NEXT:    xvst $xr2, $a0, 64
+; LA32-NEXT:    xvst $xr1, $a0, 32
+; LA32-NEXT:    xvst $xr0, $a0, 0
+; LA32-NEXT:    addi.w $sp, $fp, -32
+; LA32-NEXT:    ld.w $fp, $sp, 24 # 4-byte Folded Reload
+; LA32-NEXT:    ld.w $ra, $sp, 28 # 4-byte Folded Reload
+; LA32-NEXT:    addi.w $sp, $sp, 32
+; LA32-NEXT:    ret
+;
+; LA64-LABEL: test1:
+; LA64:       # %bb.0: # %entry
+; LA64-NEXT:    addi.d $sp, $sp, -32
+; LA64-NEXT:    .cfi_def_cfa_offset 32
+; LA64-NEXT:    st.d $ra, $sp, 24 # 8-byte Folded Spill
+; LA64-NEXT:    st.d $fp, $sp, 16 # 8-byte Folded Spill
+; LA64-NEXT:    .cfi_offset 1, -8
+; LA64-NEXT:    .cfi_offset 22, -16
+; LA64-NEXT:    addi.d $fp, $sp, 32
+; LA64-NEXT:    .cfi_def_cfa 22, 0
+; LA64-NEXT:    bstrins.d $sp, $zero, 4, 0
+; LA64-NEXT:    xvld $xr8, $fp, 0
+; LA64-NEXT:    xvld $xr9, $fp, 32
+; LA64-NEXT:    xvld $xr10, $fp, 64
+; LA64-NEXT:    xvld $xr11, $fp, 96
+; LA64-NEXT:    xvld $xr12, $fp, 224
+; LA64-NEXT:    xvld $xr13, $fp, 192
+; LA64-NEXT:    xvld $xr14, $fp, 160
+; LA64-NEXT:    xvld $xr15, $fp, 128
+; LA64-NEXT:    xvst $xr12, $a0, 480
+; LA64-NEXT:    xvst $xr13, $a0, 448
+; LA64-NEXT:    xvst $xr14, $a0, 416
+; LA64-NEXT:    xvst $xr15, $a0, 384
+; LA64-NEXT:    xvst $xr11, $a0, 352
+; LA64-NEXT:    xvst $xr10, $a0, 320
+; LA64-NEXT:    xvst $xr9, $a0, 288
+; LA64-NEXT:    xvst $xr8, $a0, 256
+; LA64-NEXT:    xvst $xr7, $a0, 224
+; LA64-NEXT:    xvst $xr6, $a0, 192
+; LA64-NEXT:    xvst $xr5, $a0, 160
+; LA64-NEXT:    xvst $xr4, $a0, 128
+; LA64-NEXT:    xvst $xr3, $a0, 96
+; LA64-NEXT:    xvst $xr2, $a0, 64
+; LA64-NEXT:    xvst $xr1, $a0, 32
+; LA64-NEXT:    xvst $xr0, $a0, 0
+; LA64-NEXT:    addi.d $sp, $fp, -32
+; LA64-NEXT:    ld.d $fp, $sp, 16 # 8-byte Folded Reload
+; LA64-NEXT:    ld.d $ra, $sp, 24 # 8-byte Folded Reload
+; LA64-NEXT:    addi.d $sp, $sp, 32
+; LA64-NEXT:    ret
+entry:
+  ret <64 x i64> %0
+}
+
+define fastcc <32 x double> @test2(<32 x double> %0, <32 x double> %1) {
+; LA32-LABEL: test2:
+; LA32:       # %bb.0: # %entry
+; LA32-NEXT:    addi.w $sp, $sp, -32
+; LA32-NEXT:    .cfi_def_cfa_offset 32
+; LA32-NEXT:    st.w $ra, $sp, 28 # 4-byte Folded Spill
+; LA32-NEXT:    st.w $fp, $sp, 24 # 4-byte Folded Spill
+; LA32-NEXT:    .cfi_offset 1, -4
+; LA32-NEXT:    .cfi_offset 22, -8
+; LA32-NEXT:    addi.w $fp, $sp, 32
+; LA32-NEXT:    .cfi_def_cfa 22, 0
+; LA32-NEXT:    bstrins.w $sp, $zero, 4, 0
+; LA32-NEXT:    xvst $xr7, $a0, 224
+; LA32-NEXT:    xvst $xr6, $a0, 192
+; LA32-NEXT:    xvst $xr5, $a0, 160
+; LA32-NEXT:    xvst $xr4, $a0, 128
+; LA32-NEXT:    xvst $xr3, $a0, 96
+; LA32-NEXT:    xvst $xr2, $a0, 64
+; LA32-NEXT:    xvst $xr1, $a0, 32
+; LA32-NEXT:    xvst $xr0, $a0, 0
+; LA32-NEXT:    addi.w $sp, $fp, -32
+; LA32-NEXT:    ld.w $fp, $sp, 24 # 4-byte Folded Reload
+; LA32-NEXT:    ld.w $ra, $sp, 28 # 4-byte Folded Reload
+; LA32-NEXT:    addi.w $sp, $sp, 32
+; LA32-NEXT:    ret
+;
+; LA64-LABEL: test2:
+; LA64:       # %bb.0: # %entry
+; LA64-NEXT:    addi.d $sp, $sp, -32
+; LA64-NEXT:    .cfi_def_cfa_offset 32
+; LA64-NEXT:    st.d $ra, $sp, 24 # 8-byte Folded Spill
+; LA64-NEXT:    st.d $fp, $sp, 16 # 8-byte Folded Spill
+; LA64-NEXT:    .cfi_offset 1, -8
+; LA64-NEXT:    .cfi_offset 22, -16
+; LA64-NEXT:    addi.d $fp, $sp, 32
+; LA64-NEXT:    .cfi_def_cfa 22, 0
+; LA64-NEXT:    bstrins.d $sp, $zero, 4, 0
+; LA64-NEXT:    xvst $xr7, $a0, 224
+; LA64-NEXT:    xvst $xr6, $a0, 192
+; LA64-NEXT:    xvst $xr5, $a0, 160
+; LA64-NEXT:    xvst $xr4, $a0, 128
+; LA64-NEXT:    xvst $xr3, $a0, 96
+; LA64-NEXT:    xvst $xr2, $a0, 64
+; LA64-NEXT:    xvst $xr1, $a0, 32
+; LA64-NEXT:    xvst $xr0, $a0, 0
+; LA64-NEXT:    addi.d $sp, $fp, -32
+; LA64-NEXT:    ld.d $fp, $sp, 16 # 8-byte Folded Reload
+; LA64-NEXT:    ld.d $ra, $sp, 24 # 8-byte Folded Reload
+; LA64-NEXT:    addi.d $sp, $sp, 32
+; LA64-NEXT:    ret
+entry:
+  ret <32 x double> %0
+}
diff --git a/llvm/test/CodeGen/LoongArch/lsx/issue159529.ll b/llvm/test/CodeGen/LoongArch/lsx/issue159529.ll
new file mode 100644
index 0000000000000..5d5ef937f073f
--- /dev/null
+++ b/llvm/test/CodeGen/LoongArch/lsx/issue159529.ll
@@ -0,0 +1,51 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc --mtriple=loongarch32 --mattr=+32s,+lsx < %s | FileCheck %s
+; RUN: llc --mtriple=loongarch64 --mattr=+lsx < %s | FileCheck %s
+
+define fastcc <64 x i32> @test1(<64 x i32> %0) {
+; CHECK-LABEL: test1:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    vld $vr8, $sp, 0
+; CHECK-NEXT:    vld $vr9, $sp, 16
+; CHECK-NEXT:    vld $vr10, $sp, 32
+; CHECK-NEXT:    vld $vr11, $sp, 48
+; CHECK-NEXT:    vld $vr12, $sp, 112
+; CHECK-NEXT:    vld $vr13, $sp, 96
+; CHECK-NEXT:    vld $vr14, $sp, 80
+; CHECK-NEXT:    vld $vr15, $sp, 64
+; CHECK-NEXT:    vst $vr12, $a0, 240
+; CHECK-NEXT:    vst $vr13, $a0, 224
+; CHECK-NEXT:    vst $vr14, $a0, 208
+; CHECK-NEXT:    vst $vr15, $a0, 192
+; CHECK-NEXT:    vst $vr11, $a0, 176
+; CHECK-NEXT:    vst $vr10, $a0, 160
+; CHECK-NEXT:    vst $vr9, $a0, 144
+; CHECK-NEXT:    vst $vr8, $a0, 128
+; CHECK-NEXT:    vst $vr7, $a0, 112
+; CHECK-NEXT:    vst $vr6, $a0, 96
+; CHECK-NEXT:    vst $vr5, $a0, 80
+; CHECK-NEXT:    vst $vr4, $a0, 64
+; CHECK-NEXT:    vst $vr3, $a0, 48
+; CHECK-NEXT:    vst $vr2, $a0, 32
+; CHECK-NEXT:    vst $vr1, $a0, 16
+; CHECK-NEXT:    vst $vr0, $a0, 0
+; CHECK-NEXT:    ret
+entry:
+  ret <64 x i32> %0
+}
+
+define fastcc <16 x double> @test2(<16 x double> %0, <16 x double> %1) {
+; CHECK-LABEL: test2:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    vst $vr7, $a0, 112
+; CHECK-NEXT:    vst $vr6, $a0, 96
+; CHECK-NEXT:    vst $vr5, $a0, 80
+; CHECK-NEXT:    vst $vr4, $a0, 64
+; CHECK-NEXT:    vst $vr3, $a0, 48
+; CHECK-NEXT:    vst $vr2, $a0, 32
+; CHECK-NEXT:    vst $vr1, $a0, 16
+; CHECK-NEXT:    vst $vr0, $a0, 0
+; CHECK-NEXT:    ret
+entry:
+  ret <16 x double> %0
+}

@heiher heiher force-pushed the users/hev/issue-159529 branch from 4e1f4ec to 7f728d2 Compare September 18, 2025 13:10
Copy link
Contributor

@SixWeining SixWeining left a comment

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

LGTM

@heiher heiher merged commit 7d1952a into main Sep 22, 2025
9 checks passed
@heiher heiher deleted the users/hev/issue-159529 branch September 22, 2025 07:45
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment

Projects

None yet

Development

Successfully merging this pull request may close these issues.

[LoongArch] Assertion '(!UseGPRForFloat || LocVT == GRLenVT) && "Expected an GRLenVT at this stage"' failed.

4 participants