Skip to content

Conversation

@davemgreen
Copy link
Collaborator

If we are inserting into lane 0 of a zero vector, we can use the ldr instructions to get the upper-lane zero for free. Do not attempt to make post-inc operations in that case, which should be less micro-ops overall.

…a zero vector

If we are inserting into lane 0 of a zero vector, we can use the ldr
instructions to get the upper-lane zero for free. Do not attempt to make
post-inc operations in that case, which should be less micro-ops overall.
@llvmbot
Copy link
Member

llvmbot commented Jun 25, 2025

@llvm/pr-subscribers-backend-aarch64

Author: David Green (davemgreen)

Changes

If we are inserting into lane 0 of a zero vector, we can use the ldr instructions to get the upper-lane zero for free. Do not attempt to make post-inc operations in that case, which should be less micro-ops overall.


Full diff: https://github.com/llvm/llvm-project/pull/145723.diff

2 Files Affected:

  • (modified) llvm/lib/Target/AArch64/AArch64ISelLowering.cpp (+2)
  • (modified) llvm/test/CodeGen/AArch64/arm64-indexed-vector-ldst.ll (+9-11)
diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
index 13835747c91e5..9759cc159d5a3 100644
--- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
+++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
@@ -23395,6 +23395,8 @@ static SDValue performPostLD1Combine(SDNode *N,
     auto *LaneC = dyn_cast<ConstantSDNode>(Lane);
     if (!LaneC || LaneC->getZExtValue() >= VT.getVectorNumElements())
       return SDValue();
+    if (LaneC->getZExtValue() == 0 && isNullOrNullSplat(N->getOperand(0)))
+      return SDValue();
   }
 
   LoadSDNode *LoadSDN = cast<LoadSDNode>(LD);
diff --git a/llvm/test/CodeGen/AArch64/arm64-indexed-vector-ldst.ll b/llvm/test/CodeGen/AArch64/arm64-indexed-vector-ldst.ll
index 0779c75c345e3..2af7cd4370b10 100644
--- a/llvm/test/CodeGen/AArch64/arm64-indexed-vector-ldst.ll
+++ b/llvm/test/CodeGen/AArch64/arm64-indexed-vector-ldst.ll
@@ -13340,9 +13340,9 @@ define <16 x i8> @test_v16i8_post_reg_ld1lane(ptr %bar, ptr %ptr, i64 %inc, <16
 define <16 x i8> @test_v16i8_post_reg_ld1lane_zero(ptr %bar, ptr %ptr, i64 %inc) {
 ; CHECK-SD-LABEL: test_v16i8_post_reg_ld1lane_zero:
 ; CHECK-SD:       ; %bb.0:
-; CHECK-SD-NEXT:    movi.2d v0, #0000000000000000
-; CHECK-SD-NEXT:    ld1.b { v0 }[0], [x0], x2
-; CHECK-SD-NEXT:    str x0, [x1]
+; CHECK-SD-NEXT:    ldr b0, [x0]
+; CHECK-SD-NEXT:    add x8, x0, x2
+; CHECK-SD-NEXT:    str x8, [x1]
 ; CHECK-SD-NEXT:    ret
 ;
 ; CHECK-GI-LABEL: test_v16i8_post_reg_ld1lane_zero:
@@ -14133,17 +14133,15 @@ define i32 @load_single_extract_variable_index_masked2_i32(ptr %A, i32 %idx) {
 define void @chained_insert_zero(ptr noundef %fenc, ptr noundef %pred, ptr noundef %residual, i32 noundef %stride) {
 ; CHECK-SD-LABEL: chained_insert_zero:
 ; CHECK-SD:       ; %bb.0: ; %entry
-; CHECK-SD-NEXT:    movi.2d v0, #0000000000000000
-; CHECK-SD-NEXT:    movi.2d v1, #0000000000000000
+; CHECK-SD-NEXT:    ldr s0, [x1]
+; CHECK-SD-NEXT:    ldr s1, [x0]
 ; CHECK-SD-NEXT:    ; kill: def $w3 killed $w3 def $x3
 ; CHECK-SD-NEXT:    sxtw x8, w3
-; CHECK-SD-NEXT:    ld1.s { v0 }[0], [x0], x8
-; CHECK-SD-NEXT:    ld1.s { v1 }[0], [x1], x8
-; CHECK-SD-NEXT:    sbfiz x8, x3, #1, #32
-; CHECK-SD-NEXT:    usubl.8h v0, v0, v1
+; CHECK-SD-NEXT:    usubl.8h v0, v1, v0
 ; CHECK-SD-NEXT:    str d0, [x2]
-; CHECK-SD-NEXT:    ldr s0, [x0]
-; CHECK-SD-NEXT:    ldr s1, [x1]
+; CHECK-SD-NEXT:    ldr s0, [x0, x8]
+; CHECK-SD-NEXT:    ldr s1, [x1, x8]
+; CHECK-SD-NEXT:    sbfiz x8, x3, #1, #32
 ; CHECK-SD-NEXT:    usubl.8h v0, v0, v1
 ; CHECK-SD-NEXT:    str d0, [x2, x8]
 ; CHECK-SD-NEXT:    ret

@davemgreen davemgreen merged commit cf3d136 into llvm:main Jun 27, 2025
9 checks passed
@davemgreen davemgreen deleted the gh-a64-nold1postinczero branch June 27, 2025 11:47
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment

Projects

None yet

Development

Successfully merging this pull request may close these issues.

4 participants