Skip to content

Conversation

@davemgreen
Copy link
Collaborator

This helps clear up some of the legalization artefacts. Not all of the cast_combines are added (notably select combines) as they currently have questionable benefit in the test updates.

This helps clear up some of the legalization artefacts. Not all of the
cast_combines are added (notably select combines) as they currently have
questionable benefit in the test updates.
@llvmbot
Copy link
Member

llvmbot commented Oct 16, 2024

@llvm/pr-subscribers-backend-aarch64

@llvm/pr-subscribers-llvm-globalisel

Author: David Green (davemgreen)

Changes

This helps clear up some of the legalization artefacts. Not all of the cast_combines are added (notably select combines) as they currently have questionable benefit in the test updates.


Patch is 24.45 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/112509.diff

13 Files Affected:

  • (modified) llvm/include/llvm/Target/GlobalISel/Combine.td (+9-5)
  • (modified) llvm/lib/Target/AArch64/AArch64Combine.td (+3-3)
  • (modified) llvm/test/CodeGen/AArch64/GlobalISel/combine-cast.mir (+7-14)
  • (modified) llvm/test/CodeGen/AArch64/GlobalISel/combine-trunc.mir (+29-69)
  • (modified) llvm/test/CodeGen/AArch64/add.ll (+1-5)
  • (modified) llvm/test/CodeGen/AArch64/and-mask-removal.ll (+2-2)
  • (modified) llvm/test/CodeGen/AArch64/andorxor.ll (+3-15)
  • (modified) llvm/test/CodeGen/AArch64/bitcast.ll (+4-16)
  • (modified) llvm/test/CodeGen/AArch64/concat-vector.ll (+21-70)
  • (modified) llvm/test/CodeGen/AArch64/fcmp.ll (+6-5)
  • (modified) llvm/test/CodeGen/AArch64/itofp.ll (+1-4)
  • (modified) llvm/test/CodeGen/AArch64/mul.ll (+1-5)
  • (modified) llvm/test/CodeGen/AArch64/sub.ll (+1-5)
diff --git a/llvm/include/llvm/Target/GlobalISel/Combine.td b/llvm/include/llvm/Target/GlobalISel/Combine.td
index 77cb4370b54664..d0373a7dadfcf9 100644
--- a/llvm/include/llvm/Target/GlobalISel/Combine.td
+++ b/llvm/include/llvm/Target/GlobalISel/Combine.td
@@ -1791,20 +1791,24 @@ class integer_of_opcode<Instruction castOpcode> : GICombineRule <
 
 def integer_of_truncate : integer_of_opcode<G_TRUNC>;
 
-def cast_combines: GICombineGroup<[
+def cast_of_cast_combines: GICombineGroup<[
   truncate_of_zext,
   truncate_of_sext,
   truncate_of_anyext,
-  select_of_zext,
-  select_of_anyext,
-  select_of_truncate,
   zext_of_zext,
   zext_of_anyext,
   sext_of_sext,
   sext_of_anyext,
   anyext_of_anyext,
   anyext_of_zext,
-  anyext_of_sext,
+  anyext_of_sext
+]>;
+
+def cast_combines: GICombineGroup<[
+  cast_of_cast_combines,
+  select_of_zext,
+  select_of_anyext,
+  select_of_truncate,
   buildvector_of_truncate,
   narrow_binop_add,
   narrow_binop_sub,
diff --git a/llvm/lib/Target/AArch64/AArch64Combine.td b/llvm/lib/Target/AArch64/AArch64Combine.td
index ead6455ddd5278..321190c83b79f3 100644
--- a/llvm/lib/Target/AArch64/AArch64Combine.td
+++ b/llvm/lib/Target/AArch64/AArch64Combine.td
@@ -314,9 +314,9 @@ def AArch64PostLegalizerLowering
 // Post-legalization combines which are primarily optimizations.
 def AArch64PostLegalizerCombiner
     : GICombiner<"AArch64PostLegalizerCombinerImpl",
-                       [copy_prop, combines_for_extload,
-                        combine_indexed_load_store,
-                        sext_trunc_sextload, mutate_anyext_to_zext,
+                       [copy_prop, cast_of_cast_combines, buildvector_of_truncate,
+                        integer_of_truncate, mutate_anyext_to_zext,
+                        combines_for_extload, combine_indexed_load_store, sext_trunc_sextload,
                         hoist_logic_op_with_same_opcode_hands,
                         redundant_and, xor_of_and_with_same_reg,
                         extractvecelt_pairwise_add, redundant_or,
diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/combine-cast.mir b/llvm/test/CodeGen/AArch64/GlobalISel/combine-cast.mir
index ae04cc77dcaf13..b045deebc56e03 100644
--- a/llvm/test/CodeGen/AArch64/GlobalISel/combine-cast.mir
+++ b/llvm/test/CodeGen/AArch64/GlobalISel/combine-cast.mir
@@ -135,20 +135,13 @@ name:            test_combine_trunc_build_vector
 legalized: true
 body:             |
   bb.1:
-    ; CHECK-PRE-LABEL: name: test_combine_trunc_build_vector
-    ; CHECK-PRE: %arg1:_(s64) = COPY $x0
-    ; CHECK-PRE-NEXT: %arg2:_(s64) = COPY $x0
-    ; CHECK-PRE-NEXT: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC %arg1(s64)
-    ; CHECK-PRE-NEXT: [[TRUNC1:%[0-9]+]]:_(s32) = G_TRUNC %arg2(s64)
-    ; CHECK-PRE-NEXT: %small:_(<2 x s32>) = G_BUILD_VECTOR [[TRUNC]](s32), [[TRUNC1]](s32)
-    ; CHECK-PRE-NEXT: $x0 = COPY %small(<2 x s32>)
-    ;
-    ; CHECK-POST-LABEL: name: test_combine_trunc_build_vector
-    ; CHECK-POST: %arg1:_(s64) = COPY $x0
-    ; CHECK-POST-NEXT: %arg2:_(s64) = COPY $x0
-    ; CHECK-POST-NEXT: %bv:_(<2 x s64>) = G_BUILD_VECTOR %arg1(s64), %arg2(s64)
-    ; CHECK-POST-NEXT: %small:_(<2 x s32>) = G_TRUNC %bv(<2 x s64>)
-    ; CHECK-POST-NEXT: $x0 = COPY %small(<2 x s32>)
+    ; CHECK-LABEL: name: test_combine_trunc_build_vector
+    ; CHECK: %arg1:_(s64) = COPY $x0
+    ; CHECK-NEXT: %arg2:_(s64) = COPY $x0
+    ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC %arg1(s64)
+    ; CHECK-NEXT: [[TRUNC1:%[0-9]+]]:_(s32) = G_TRUNC %arg2(s64)
+    ; CHECK-NEXT: %small:_(<2 x s32>) = G_BUILD_VECTOR [[TRUNC]](s32), [[TRUNC1]](s32)
+    ; CHECK-NEXT: $x0 = COPY %small(<2 x s32>)
     %arg1:_(s64) = COPY $x0
     %arg2:_(s64) = COPY $x0
     %bv:_(<2 x s64>) = G_BUILD_VECTOR %arg1(s64), %arg2(s64)
diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/combine-trunc.mir b/llvm/test/CodeGen/AArch64/GlobalISel/combine-trunc.mir
index 4a38b5d4c63dd9..9a2b9dd4b2b608 100644
--- a/llvm/test/CodeGen/AArch64/GlobalISel/combine-trunc.mir
+++ b/llvm/test/CodeGen/AArch64/GlobalISel/combine-trunc.mir
@@ -32,20 +32,12 @@ legalized: true
 body:             |
   bb.1:
   liveins: $h0
-    ; CHECK-PRE-LABEL: name: test_combine_trunc_anyext_s32_s16
-    ; CHECK-PRE: liveins: $h0
-    ; CHECK-PRE-NEXT: {{  $}}
-    ; CHECK-PRE-NEXT: [[COPY:%[0-9]+]]:_(s16) = COPY $h0
-    ; CHECK-PRE-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[COPY]](s16)
-    ; CHECK-PRE-NEXT: $w0 = COPY [[ANYEXT]](s32)
-    ;
-    ; CHECK-POST-LABEL: name: test_combine_trunc_anyext_s32_s16
-    ; CHECK-POST: liveins: $h0
-    ; CHECK-POST-NEXT: {{  $}}
-    ; CHECK-POST-NEXT: [[COPY:%[0-9]+]]:_(s16) = COPY $h0
-    ; CHECK-POST-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[COPY]](s16)
-    ; CHECK-POST-NEXT: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[ANYEXT]](s64)
-    ; CHECK-POST-NEXT: $w0 = COPY [[TRUNC]](s32)
+    ; CHECK-LABEL: name: test_combine_trunc_anyext_s32_s16
+    ; CHECK: liveins: $h0
+    ; CHECK-NEXT: {{  $}}
+    ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s16) = COPY $h0
+    ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[COPY]](s16)
+    ; CHECK-NEXT: $w0 = COPY [[ANYEXT]](s32)
     %0:_(s16) = COPY $h0
     %1:_(s64) = G_ANYEXT %0(s16)
     %2:_(s32) = G_TRUNC %1(s64)
@@ -82,20 +74,12 @@ legalized: true
 body:             |
   bb.1:
   liveins: $h0
-    ; CHECK-PRE-LABEL: name: test_combine_trunc_sext_s32_s16
-    ; CHECK-PRE: liveins: $h0
-    ; CHECK-PRE-NEXT: {{  $}}
-    ; CHECK-PRE-NEXT: [[COPY:%[0-9]+]]:_(s16) = COPY $h0
-    ; CHECK-PRE-NEXT: [[SEXT:%[0-9]+]]:_(s32) = G_SEXT [[COPY]](s16)
-    ; CHECK-PRE-NEXT: $w0 = COPY [[SEXT]](s32)
-    ;
-    ; CHECK-POST-LABEL: name: test_combine_trunc_sext_s32_s16
-    ; CHECK-POST: liveins: $h0
-    ; CHECK-POST-NEXT: {{  $}}
-    ; CHECK-POST-NEXT: [[COPY:%[0-9]+]]:_(s16) = COPY $h0
-    ; CHECK-POST-NEXT: [[SEXT:%[0-9]+]]:_(s64) = G_SEXT [[COPY]](s16)
-    ; CHECK-POST-NEXT: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[SEXT]](s64)
-    ; CHECK-POST-NEXT: $w0 = COPY [[TRUNC]](s32)
+    ; CHECK-LABEL: name: test_combine_trunc_sext_s32_s16
+    ; CHECK: liveins: $h0
+    ; CHECK-NEXT: {{  $}}
+    ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s16) = COPY $h0
+    ; CHECK-NEXT: [[SEXT:%[0-9]+]]:_(s32) = G_SEXT [[COPY]](s16)
+    ; CHECK-NEXT: $w0 = COPY [[SEXT]](s32)
     %0:_(s16) = COPY $h0
     %1:_(s64) = G_SEXT %0(s16)
     %2:_(s32) = G_TRUNC %1(s64)
@@ -107,20 +91,12 @@ legalized: true
 body:             |
   bb.1:
   liveins: $h0
-    ; CHECK-PRE-LABEL: name: test_combine_trunc_zext_s32_s16
-    ; CHECK-PRE: liveins: $h0
-    ; CHECK-PRE-NEXT: {{  $}}
-    ; CHECK-PRE-NEXT: [[COPY:%[0-9]+]]:_(s16) = COPY $h0
-    ; CHECK-PRE-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[COPY]](s16)
-    ; CHECK-PRE-NEXT: $w0 = COPY [[ZEXT]](s32)
-    ;
-    ; CHECK-POST-LABEL: name: test_combine_trunc_zext_s32_s16
-    ; CHECK-POST: liveins: $h0
-    ; CHECK-POST-NEXT: {{  $}}
-    ; CHECK-POST-NEXT: [[COPY:%[0-9]+]]:_(s16) = COPY $h0
-    ; CHECK-POST-NEXT: [[ZEXT:%[0-9]+]]:_(s64) = G_ZEXT [[COPY]](s16)
-    ; CHECK-POST-NEXT: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[ZEXT]](s64)
-    ; CHECK-POST-NEXT: $w0 = COPY [[TRUNC]](s32)
+    ; CHECK-LABEL: name: test_combine_trunc_zext_s32_s16
+    ; CHECK: liveins: $h0
+    ; CHECK-NEXT: {{  $}}
+    ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s16) = COPY $h0
+    ; CHECK-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[COPY]](s16)
+    ; CHECK-NEXT: $w0 = COPY [[ZEXT]](s32)
     %0:_(s16) = COPY $h0
     %1:_(s64) = G_ZEXT %0(s16)
     %2:_(s32) = G_TRUNC %1(s64)
@@ -132,19 +108,11 @@ legalized: true
 body:             |
   bb.1:
   liveins: $w0
-    ; CHECK-PRE-LABEL: name: test_combine_trunc_anyext_s32_s32
-    ; CHECK-PRE: liveins: $w0
-    ; CHECK-PRE-NEXT: {{  $}}
-    ; CHECK-PRE-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $w0
-    ; CHECK-PRE-NEXT: $w0 = COPY [[COPY]](s32)
-    ;
-    ; CHECK-POST-LABEL: name: test_combine_trunc_anyext_s32_s32
-    ; CHECK-POST: liveins: $w0
-    ; CHECK-POST-NEXT: {{  $}}
-    ; CHECK-POST-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $w0
-    ; CHECK-POST-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[COPY]](s32)
-    ; CHECK-POST-NEXT: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[ANYEXT]](s64)
-    ; CHECK-POST-NEXT: $w0 = COPY [[TRUNC]](s32)
+    ; CHECK-LABEL: name: test_combine_trunc_anyext_s32_s32
+    ; CHECK: liveins: $w0
+    ; CHECK-NEXT: {{  $}}
+    ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $w0
+    ; CHECK-NEXT: $w0 = COPY [[COPY]](s32)
     %0:_(s32) = COPY $w0
     %1:_(s64) = G_ANYEXT %0(s32)
     %2:_(s32) = G_TRUNC %1(s64)
@@ -156,20 +124,12 @@ legalized: true
 body:             |
   bb.1:
   liveins: $x0
-    ; CHECK-PRE-LABEL: name: test_combine_trunc_anyext_s32_s64
-    ; CHECK-PRE: liveins: $x0
-    ; CHECK-PRE-NEXT: {{  $}}
-    ; CHECK-PRE-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY $x0
-    ; CHECK-PRE-NEXT: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[COPY]](s64)
-    ; CHECK-PRE-NEXT: $w0 = COPY [[TRUNC]](s32)
-    ;
-    ; CHECK-POST-LABEL: name: test_combine_trunc_anyext_s32_s64
-    ; CHECK-POST: liveins: $x0
-    ; CHECK-POST-NEXT: {{  $}}
-    ; CHECK-POST-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY $x0
-    ; CHECK-POST-NEXT: [[ANYEXT:%[0-9]+]]:_(s128) = G_ANYEXT [[COPY]](s64)
-    ; CHECK-POST-NEXT: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[ANYEXT]](s128)
-    ; CHECK-POST-NEXT: $w0 = COPY [[TRUNC]](s32)
+    ; CHECK-LABEL: name: test_combine_trunc_anyext_s32_s64
+    ; CHECK: liveins: $x0
+    ; CHECK-NEXT: {{  $}}
+    ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY $x0
+    ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[COPY]](s64)
+    ; CHECK-NEXT: $w0 = COPY [[TRUNC]](s32)
     %0:_(s64) = COPY $x0
     %1:_(s128) = G_ANYEXT %0(s64)
     %2:_(s32) = G_TRUNC %1(s128)
diff --git a/llvm/test/CodeGen/AArch64/add.ll b/llvm/test/CodeGen/AArch64/add.ll
index fc1a0c71d4cdf0..ce7e3101a7a541 100644
--- a/llvm/test/CodeGen/AArch64/add.ll
+++ b/llvm/test/CodeGen/AArch64/add.ll
@@ -171,11 +171,7 @@ define void @v4i8(ptr %p1, ptr %p2) {
 ; CHECK-GI-NEXT:    ushll v0.8h, v3.8b, #0
 ; CHECK-GI-NEXT:    ushll v1.8h, v5.8b, #0
 ; CHECK-GI-NEXT:    add v0.4h, v0.4h, v1.4h
-; CHECK-GI-NEXT:    mov v1.h[0], v0.h[0]
-; CHECK-GI-NEXT:    mov v1.h[1], v0.h[1]
-; CHECK-GI-NEXT:    mov v1.h[2], v0.h[2]
-; CHECK-GI-NEXT:    mov v1.h[3], v0.h[3]
-; CHECK-GI-NEXT:    xtn v0.8b, v1.8h
+; CHECK-GI-NEXT:    uzp1 v0.8b, v0.8b, v0.8b
 ; CHECK-GI-NEXT:    fmov w8, s0
 ; CHECK-GI-NEXT:    str w8, [x0]
 ; CHECK-GI-NEXT:    ret
diff --git a/llvm/test/CodeGen/AArch64/and-mask-removal.ll b/llvm/test/CodeGen/AArch64/and-mask-removal.ll
index f005ca47ad124f..09f00b3845f25f 100644
--- a/llvm/test/CodeGen/AArch64/and-mask-removal.ll
+++ b/llvm/test/CodeGen/AArch64/and-mask-removal.ll
@@ -530,10 +530,10 @@ define i64 @test_2_selects(i8 zeroext %a) {
 ; CHECK-LABEL: test_2_selects:
 ; CHECK:       ; %bb.0:
 ; CHECK-NEXT:    add w9, w0, #24
-; CHECK-NEXT:    mov w8, #131
+; CHECK-NEXT:    mov w8, #131 ; =0x83
 ; CHECK-NEXT:    and w9, w9, #0xff
 ; CHECK-NEXT:    cmp w9, #81
-; CHECK-NEXT:    mov w9, #57
+; CHECK-NEXT:    mov w9, #57 ; =0x39
 ; CHECK-NEXT:    csel x8, x8, xzr, lo
 ; CHECK-NEXT:    csel x9, xzr, x9, eq
 ; CHECK-NEXT:    add x0, x8, x9
diff --git a/llvm/test/CodeGen/AArch64/andorxor.ll b/llvm/test/CodeGen/AArch64/andorxor.ll
index 5385a917619fa0..459daece90deed 100644
--- a/llvm/test/CodeGen/AArch64/andorxor.ll
+++ b/llvm/test/CodeGen/AArch64/andorxor.ll
@@ -463,11 +463,7 @@ define void @and_v4i8(ptr %p1, ptr %p2) {
 ; CHECK-GI-NEXT:    ushll v0.8h, v3.8b, #0
 ; CHECK-GI-NEXT:    ushll v1.8h, v5.8b, #0
 ; CHECK-GI-NEXT:    and v0.8b, v0.8b, v1.8b
-; CHECK-GI-NEXT:    mov v1.h[0], v0.h[0]
-; CHECK-GI-NEXT:    mov v1.h[1], v0.h[1]
-; CHECK-GI-NEXT:    mov v1.h[2], v0.h[2]
-; CHECK-GI-NEXT:    mov v1.h[3], v0.h[3]
-; CHECK-GI-NEXT:    xtn v0.8b, v1.8h
+; CHECK-GI-NEXT:    uzp1 v0.8b, v0.8b, v0.8b
 ; CHECK-GI-NEXT:    fmov w8, s0
 ; CHECK-GI-NEXT:    str w8, [x0]
 ; CHECK-GI-NEXT:    ret
@@ -514,11 +510,7 @@ define void @or_v4i8(ptr %p1, ptr %p2) {
 ; CHECK-GI-NEXT:    ushll v0.8h, v3.8b, #0
 ; CHECK-GI-NEXT:    ushll v1.8h, v5.8b, #0
 ; CHECK-GI-NEXT:    orr v0.8b, v0.8b, v1.8b
-; CHECK-GI-NEXT:    mov v1.h[0], v0.h[0]
-; CHECK-GI-NEXT:    mov v1.h[1], v0.h[1]
-; CHECK-GI-NEXT:    mov v1.h[2], v0.h[2]
-; CHECK-GI-NEXT:    mov v1.h[3], v0.h[3]
-; CHECK-GI-NEXT:    xtn v0.8b, v1.8h
+; CHECK-GI-NEXT:    uzp1 v0.8b, v0.8b, v0.8b
 ; CHECK-GI-NEXT:    fmov w8, s0
 ; CHECK-GI-NEXT:    str w8, [x0]
 ; CHECK-GI-NEXT:    ret
@@ -565,11 +557,7 @@ define void @xor_v4i8(ptr %p1, ptr %p2) {
 ; CHECK-GI-NEXT:    ushll v0.8h, v3.8b, #0
 ; CHECK-GI-NEXT:    ushll v1.8h, v5.8b, #0
 ; CHECK-GI-NEXT:    eor v0.8b, v0.8b, v1.8b
-; CHECK-GI-NEXT:    mov v1.h[0], v0.h[0]
-; CHECK-GI-NEXT:    mov v1.h[1], v0.h[1]
-; CHECK-GI-NEXT:    mov v1.h[2], v0.h[2]
-; CHECK-GI-NEXT:    mov v1.h[3], v0.h[3]
-; CHECK-GI-NEXT:    xtn v0.8b, v1.8h
+; CHECK-GI-NEXT:    uzp1 v0.8b, v0.8b, v0.8b
 ; CHECK-GI-NEXT:    fmov w8, s0
 ; CHECK-GI-NEXT:    str w8, [x0]
 ; CHECK-GI-NEXT:    ret
diff --git a/llvm/test/CodeGen/AArch64/bitcast.ll b/llvm/test/CodeGen/AArch64/bitcast.ll
index 79cfeedb74bce0..bbdf8b0a13d358 100644
--- a/llvm/test/CodeGen/AArch64/bitcast.ll
+++ b/llvm/test/CodeGen/AArch64/bitcast.ll
@@ -60,11 +60,7 @@ define i32 @bitcast_v4i8_i32(<4 x i8> %a, <4 x i8> %b){
 ; CHECK-GI-LABEL: bitcast_v4i8_i32:
 ; CHECK-GI:       // %bb.0:
 ; CHECK-GI-NEXT:    add v0.4h, v0.4h, v1.4h
-; CHECK-GI-NEXT:    mov v1.h[0], v0.h[0]
-; CHECK-GI-NEXT:    mov v1.h[1], v0.h[1]
-; CHECK-GI-NEXT:    mov v1.h[2], v0.h[2]
-; CHECK-GI-NEXT:    mov v1.h[3], v0.h[3]
-; CHECK-GI-NEXT:    xtn v0.8b, v1.8h
+; CHECK-GI-NEXT:    uzp1 v0.8b, v0.8b, v0.8b
 ; CHECK-GI-NEXT:    fmov w0, s0
 ; CHECK-GI-NEXT:    ret
   %c = add <4 x i8> %a, %b
@@ -116,9 +112,7 @@ define i32 @bitcast_v2i16_i32(<2 x i16> %a, <2 x i16> %b){
 ; CHECK-GI-LABEL: bitcast_v2i16_i32:
 ; CHECK-GI:       // %bb.0:
 ; CHECK-GI-NEXT:    add v0.2s, v0.2s, v1.2s
-; CHECK-GI-NEXT:    mov v1.s[0], v0.s[0]
-; CHECK-GI-NEXT:    mov v1.s[1], v0.s[1]
-; CHECK-GI-NEXT:    xtn v0.4h, v1.4s
+; CHECK-GI-NEXT:    uzp1 v0.4h, v0.4h, v0.4h
 ; CHECK-GI-NEXT:    fmov w0, s0
 ; CHECK-GI-NEXT:    ret
   %c = add <2 x i16> %a, %b
@@ -418,9 +412,7 @@ define <4 x i8> @bitcast_v2i16_v4i8(<2 x i16> %a, <2 x i16> %b){
 ; CHECK-GI-LABEL: bitcast_v2i16_v4i8:
 ; CHECK-GI:       // %bb.0:
 ; CHECK-GI-NEXT:    add v0.2s, v0.2s, v1.2s
-; CHECK-GI-NEXT:    mov v1.s[0], v0.s[0]
-; CHECK-GI-NEXT:    mov v1.s[1], v0.s[1]
-; CHECK-GI-NEXT:    xtn v0.4h, v1.4s
+; CHECK-GI-NEXT:    uzp1 v0.4h, v0.4h, v0.4h
 ; CHECK-GI-NEXT:    mov b1, v0.b[1]
 ; CHECK-GI-NEXT:    mov v2.b[0], v0.b[0]
 ; CHECK-GI-NEXT:    mov b3, v0.b[2]
@@ -455,11 +447,7 @@ define <2 x i16> @bitcast_v4i8_v2i16(<4 x i8> %a, <4 x i8> %b){
 ; CHECK-GI-LABEL: bitcast_v4i8_v2i16:
 ; CHECK-GI:       // %bb.0:
 ; CHECK-GI-NEXT:    add v0.4h, v0.4h, v1.4h
-; CHECK-GI-NEXT:    mov v1.h[0], v0.h[0]
-; CHECK-GI-NEXT:    mov v1.h[1], v0.h[1]
-; CHECK-GI-NEXT:    mov v1.h[2], v0.h[2]
-; CHECK-GI-NEXT:    mov v1.h[3], v0.h[3]
-; CHECK-GI-NEXT:    xtn v0.8b, v1.8h
+; CHECK-GI-NEXT:    uzp1 v0.8b, v0.8b, v0.8b
 ; CHECK-GI-NEXT:    mov h1, v0.h[1]
 ; CHECK-GI-NEXT:    mov v0.h[1], v1.h[0]
 ; CHECK-GI-NEXT:    ushll v0.4s, v0.4h, #0
diff --git a/llvm/test/CodeGen/AArch64/concat-vector.ll b/llvm/test/CodeGen/AArch64/concat-vector.ll
index d800b2549cf223..0033999b9bd51d 100644
--- a/llvm/test/CodeGen/AArch64/concat-vector.ll
+++ b/llvm/test/CodeGen/AArch64/concat-vector.ll
@@ -33,18 +33,8 @@ define <8 x i8> @concat2(<4 x i8> %A, <4 x i8> %B) {
 ;
 ; CHECK-GI-LABEL: concat2:
 ; CHECK-GI:       // %bb.0:
-; CHECK-GI-NEXT:    // kill: def $d0 killed $d0 def $q0
-; CHECK-GI-NEXT:    mov v2.h[0], v0.h[0]
-; CHECK-GI-NEXT:    // kill: def $d1 killed $d1 def $q1
-; CHECK-GI-NEXT:    mov v3.h[0], v1.h[0]
-; CHECK-GI-NEXT:    mov v2.h[1], v0.h[1]
-; CHECK-GI-NEXT:    mov v3.h[1], v1.h[1]
-; CHECK-GI-NEXT:    mov v2.h[2], v0.h[2]
-; CHECK-GI-NEXT:    mov v3.h[2], v1.h[2]
-; CHECK-GI-NEXT:    mov v2.h[3], v0.h[3]
-; CHECK-GI-NEXT:    mov v3.h[3], v1.h[3]
-; CHECK-GI-NEXT:    xtn v0.8b, v2.8h
-; CHECK-GI-NEXT:    xtn v1.8b, v3.8h
+; CHECK-GI-NEXT:    uzp1 v0.8b, v0.8b, v0.8b
+; CHECK-GI-NEXT:    uzp1 v1.8b, v1.8b, v0.8b
 ; CHECK-GI-NEXT:    fmov w8, s0
 ; CHECK-GI-NEXT:    mov v0.s[0], w8
 ; CHECK-GI-NEXT:    fmov w8, s1
@@ -74,15 +64,9 @@ define <4 x i16> @concat4(<2 x i16> %A, <2 x i16> %B) {
 ;
 ; CHECK-GI-LABEL: concat4:
 ; CHECK-GI:       // %bb.0:
-; CHECK-GI-NEXT:    // kill: def $d0 killed $d0 def $q0
-; CHECK-GI-NEXT:    mov v2.s[0], v0.s[0]
-; CHECK-GI-NEXT:    // kill: def $d1 killed $d1 def $q1
-; CHECK-GI-NEXT:    mov v2.s[1], v0.s[1]
-; CHECK-GI-NEXT:    mov v0.s[0], v1.s[0]
-; CHECK-GI-NEXT:    xtn v2.4h, v2.4s
-; CHECK-GI-NEXT:    mov v0.s[1], v1.s[1]
-; CHECK-GI-NEXT:    xtn v1.4h, v0.4s
-; CHECK-GI-NEXT:    fmov w8, s2
+; CHECK-GI-NEXT:    uzp1 v0.4h, v0.4h, v0.4h
+; CHECK-GI-NEXT:    uzp1 v1.4h, v1.4h, v0.4h
+; CHECK-GI-NEXT:    fmov w8, s0
 ; CHECK-GI-NEXT:    mov v0.s[0], w8
 ; CHECK-GI-NEXT:    fmov w8, s1
 ; CHECK-GI-NEXT:    mov v0.s[1], w8
@@ -183,12 +167,11 @@ define <8 x i16> @concat_v8s16_v2s16(ptr %ptr) {
 ;
 ; CHECK-GI-LABEL: concat_v8s16_v2s16:
 ; CHECK-GI:       // %bb.0:
-; CHECK-GI-NEXT:    ldr h0, [x0]
-; CHECK-GI-NEXT:    ldr h1, [x0, #2]
-; CHECK-GI-NEXT:    mov v0.s[1], v1.s[0]
-; CHECK-GI-NEXT:    xtn v0.4h, v0.4s
-; CHECK-GI-NEXT:    fmov w8, s0
-; CHECK-GI-NEXT:    mov v0.s[0], w8
+; CHECK-GI-NEXT:    ldrh w8, [x0]
+; CHECK-GI-NEXT:    ldrh w9, [x0, #2]
+; CHECK-GI-NEXT:    fmov s1, w8
+; CHECK-GI-NEXT:    mov v1.h[1], w9
+; CHECK-GI-NEXT:    mov v0.s[0], v1.s[0]
 ; CHECK-GI-NEXT:    ret
     %a = load <2 x i16>, ptr %ptr
     %b = shufflevector <2 x i16> %a, <2 x i16> %a, <8 x i32> <i32 0, i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
@@ -238,34 +221,14 @@ define <16 x i8> @concat_v16s8_v4s8_reg(<4 x i8> %A, <4 x i8> %B, <4 x i8> %C, <
 ;
 ; CHECK-GI-LABEL: concat_v16s8_v4s8_reg:
 ; CHECK-GI:       // %bb.0:
-; CHECK-GI-NEXT:    // kill: def $d0 killed $d0 def $q0
-; CHECK-GI-NEXT:    mov v4.h[0], v0.h[0]
-; CHECK-GI-NEXT:    // kill: def $d1 killed $d1 def $q1
-; CHECK-GI-NEXT:    mov v5.h[0], v1.h[0]
-; CHECK-GI-NEXT:    // kill: def $d2 killed $d2 def $q2
-; CHECK-GI-NEXT:    // kill: def $d3 killed $d3 def $q3
-; CHECK-GI-NEXT:    mov v6.h[0], v2.h[0]
-; CHECK-GI-NEXT:    mov v7.h[0], v3.h[0]
-; CHECK-GI-NEXT:    mov v4.h[1], v0.h[1]
-; CHECK-GI-NEXT:    mov v5.h[1], v1.h[1]
-; CHECK-GI-NEXT:    mov v6.h[1], v2.h[1]
-; CHECK-GI-NEXT:    mov v7.h[1], v3.h[1]
-; CHECK-GI-NEXT:    mov v4.h[2], v0.h[2]
-; CHECK-GI-NEXT:    mov v5.h[2], v1.h[2]
-; CHECK-GI-NEXT:    mov v6.h[2], v2.h[2]
-; CHECK-GI-NEXT:    mov v7.h[2], v3.h[2]
-; CHECK-GI-NEXT:    mov v4.h[3], v0.h[3]
-; CHECK-GI-NEXT:    mov v5.h[3], v1.h[3]
-; CHECK-GI-NEXT:    mov v6.h[3], v2.h[3]
-; CHECK-GI-NEXT:    mov v7.h[3], v3.h[3]
-; CHECK-GI-NEXT:    xtn v0.8b, v4.8h
-; CHECK-GI-NEXT:    xtn v1.8b, v5.8h
-; CHECK-GI-NEXT:    xtn v2.8b, v6.8h
+; CHECK-GI-NEXT:    uzp1 v0.8b, v0.8b, v0.8b
+; CHECK-GI-NEXT:    uzp1 v1.8b, v1.8b, v0.8b
 ; CHECK-GI-NEXT:    fmov w8, s0
 ; CHECK-GI-NEXT:    mov v0.s[0], w8
 ; CHECK-GI-NEXT:    fmov w8, s1
-; CHECK-GI-NEXT:    xtn v1.8b, v7.8h
+; CHECK-GI-NEXT:    uzp1 v2.8b, v2.8b, v0.8b
 ; CHECK-GI-NEXT:    mov v0.s[1], w8
+; CHECK-GI-NEXT:    uzp1 v1.8b, v3.8b, v0.8b
 ; CHECK-GI-NEXT:    fmov w8, s2
 ; CHECK-GI-NEXT:    mov v0.s[2], w8
 ; CHECK-GI-NEXT:    fmov w8, s1
@@ -291,29 +254,17 @@ define <8 x i16> @concat_v8s16_v2s16_reg(<2 x i16> %A, <2 x i16> %B, <2 x i16> %
 ;
 ; CHECK-GI-LABEL: concat_v8s16_v2s16_reg:
 ; CHECK-GI:       // %bb.0:
-; CHECK-GI-NEXT:    // kill: def $d0 killed $d0 def $q0
-; CHECK-GI-NEXT:    mov v4.s[0], v0.s[0]
-; CHECK-GI-NEXT:    // kill: def $d1 killed $d1 def $q1
-; CHECK-GI-NEXT:    mov v5.s[0], v1.s[0]
-; CHECK-GI-NEXT:    // kill: def $d2 killed $d2 def $q2
-; CHECK-GI-NEXT:    // kill: def $d3 killed $d3 def $q3
-; CHECK-GI-NEXT:    mov v4.s[1], v0.s[1]
-; CHECK-GI-NEXT:    mov v5.s[1], v1.s[1]
-; CHECK-GI-NEXT:    mov v1.s[0], v2.s[0]
-; CHECK-GI-NEXT:    xtn v0.4h, v4.4s
-; CHECK-GI-NEXT:    xtn v4.4h, v5.4s
-; CHECK-GI-NEXT:    mov v1.s[1], v2.s[1]
-; CHECK-GI-NEXT:    mov v2.s[0], v3.s[0]
+; CHECK-GI-NEXT:    uzp1 v0.4h, v0.4h, v0.4h
+; CHECK-GI-NEXT:    uzp1 v1.4h, v1.4h, v0.4h
 ; CHECK-GI-NEXT:    fmov w8, s0
-; CHECK-GI-NEXT:    xtn v1.4h, v1.4s
-; CHECK-GI-NEXT:    mov v2.s[1], v3.s[1]
 ; CHECK-...
[truncated]

@davemgreen davemgreen merged commit 2f792f6 into llvm:main Oct 18, 2024
@davemgreen davemgreen deleted the gh-gi-postlegalcast branch October 18, 2024 08:57
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment

Projects

None yet

Development

Successfully merging this pull request may close these issues.

3 participants