diff --git a/llvm/include/llvm/Target/GlobalISel/Combine.td b/llvm/include/llvm/Target/GlobalISel/Combine.td index 77cb4370b5466..d0373a7dadfcf 100644 --- a/llvm/include/llvm/Target/GlobalISel/Combine.td +++ b/llvm/include/llvm/Target/GlobalISel/Combine.td @@ -1791,20 +1791,24 @@ class integer_of_opcode : GICombineRule < def integer_of_truncate : integer_of_opcode; -def cast_combines: GICombineGroup<[ +def cast_of_cast_combines: GICombineGroup<[ truncate_of_zext, truncate_of_sext, truncate_of_anyext, - select_of_zext, - select_of_anyext, - select_of_truncate, zext_of_zext, zext_of_anyext, sext_of_sext, sext_of_anyext, anyext_of_anyext, anyext_of_zext, - anyext_of_sext, + anyext_of_sext +]>; + +def cast_combines: GICombineGroup<[ + cast_of_cast_combines, + select_of_zext, + select_of_anyext, + select_of_truncate, buildvector_of_truncate, narrow_binop_add, narrow_binop_sub, diff --git a/llvm/lib/Target/AArch64/AArch64Combine.td b/llvm/lib/Target/AArch64/AArch64Combine.td index ead6455ddd527..321190c83b79f 100644 --- a/llvm/lib/Target/AArch64/AArch64Combine.td +++ b/llvm/lib/Target/AArch64/AArch64Combine.td @@ -314,9 +314,9 @@ def AArch64PostLegalizerLowering // Post-legalization combines which are primarily optimizations. def AArch64PostLegalizerCombiner : GICombiner<"AArch64PostLegalizerCombinerImpl", - [copy_prop, combines_for_extload, - combine_indexed_load_store, - sext_trunc_sextload, mutate_anyext_to_zext, + [copy_prop, cast_of_cast_combines, buildvector_of_truncate, + integer_of_truncate, mutate_anyext_to_zext, + combines_for_extload, combine_indexed_load_store, sext_trunc_sextload, hoist_logic_op_with_same_opcode_hands, redundant_and, xor_of_and_with_same_reg, extractvecelt_pairwise_add, redundant_or, diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/combine-cast.mir b/llvm/test/CodeGen/AArch64/GlobalISel/combine-cast.mir index ae04cc77dcaf1..b045deebc56e0 100644 --- a/llvm/test/CodeGen/AArch64/GlobalISel/combine-cast.mir +++ b/llvm/test/CodeGen/AArch64/GlobalISel/combine-cast.mir @@ -135,20 +135,13 @@ name: test_combine_trunc_build_vector legalized: true body: | bb.1: - ; CHECK-PRE-LABEL: name: test_combine_trunc_build_vector - ; CHECK-PRE: %arg1:_(s64) = COPY $x0 - ; CHECK-PRE-NEXT: %arg2:_(s64) = COPY $x0 - ; CHECK-PRE-NEXT: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC %arg1(s64) - ; CHECK-PRE-NEXT: [[TRUNC1:%[0-9]+]]:_(s32) = G_TRUNC %arg2(s64) - ; CHECK-PRE-NEXT: %small:_(<2 x s32>) = G_BUILD_VECTOR [[TRUNC]](s32), [[TRUNC1]](s32) - ; CHECK-PRE-NEXT: $x0 = COPY %small(<2 x s32>) - ; - ; CHECK-POST-LABEL: name: test_combine_trunc_build_vector - ; CHECK-POST: %arg1:_(s64) = COPY $x0 - ; CHECK-POST-NEXT: %arg2:_(s64) = COPY $x0 - ; CHECK-POST-NEXT: %bv:_(<2 x s64>) = G_BUILD_VECTOR %arg1(s64), %arg2(s64) - ; CHECK-POST-NEXT: %small:_(<2 x s32>) = G_TRUNC %bv(<2 x s64>) - ; CHECK-POST-NEXT: $x0 = COPY %small(<2 x s32>) + ; CHECK-LABEL: name: test_combine_trunc_build_vector + ; CHECK: %arg1:_(s64) = COPY $x0 + ; CHECK-NEXT: %arg2:_(s64) = COPY $x0 + ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC %arg1(s64) + ; CHECK-NEXT: [[TRUNC1:%[0-9]+]]:_(s32) = G_TRUNC %arg2(s64) + ; CHECK-NEXT: %small:_(<2 x s32>) = G_BUILD_VECTOR [[TRUNC]](s32), [[TRUNC1]](s32) + ; CHECK-NEXT: $x0 = COPY %small(<2 x s32>) %arg1:_(s64) = COPY $x0 %arg2:_(s64) = COPY $x0 %bv:_(<2 x s64>) = G_BUILD_VECTOR %arg1(s64), %arg2(s64) diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/combine-trunc.mir b/llvm/test/CodeGen/AArch64/GlobalISel/combine-trunc.mir index 4a38b5d4c63dd..9a2b9dd4b2b60 100644 --- a/llvm/test/CodeGen/AArch64/GlobalISel/combine-trunc.mir +++ b/llvm/test/CodeGen/AArch64/GlobalISel/combine-trunc.mir @@ -32,20 +32,12 @@ legalized: true body: | bb.1: liveins: $h0 - ; CHECK-PRE-LABEL: name: test_combine_trunc_anyext_s32_s16 - ; CHECK-PRE: liveins: $h0 - ; CHECK-PRE-NEXT: {{ $}} - ; CHECK-PRE-NEXT: [[COPY:%[0-9]+]]:_(s16) = COPY $h0 - ; CHECK-PRE-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[COPY]](s16) - ; CHECK-PRE-NEXT: $w0 = COPY [[ANYEXT]](s32) - ; - ; CHECK-POST-LABEL: name: test_combine_trunc_anyext_s32_s16 - ; CHECK-POST: liveins: $h0 - ; CHECK-POST-NEXT: {{ $}} - ; CHECK-POST-NEXT: [[COPY:%[0-9]+]]:_(s16) = COPY $h0 - ; CHECK-POST-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[COPY]](s16) - ; CHECK-POST-NEXT: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[ANYEXT]](s64) - ; CHECK-POST-NEXT: $w0 = COPY [[TRUNC]](s32) + ; CHECK-LABEL: name: test_combine_trunc_anyext_s32_s16 + ; CHECK: liveins: $h0 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s16) = COPY $h0 + ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[COPY]](s16) + ; CHECK-NEXT: $w0 = COPY [[ANYEXT]](s32) %0:_(s16) = COPY $h0 %1:_(s64) = G_ANYEXT %0(s16) %2:_(s32) = G_TRUNC %1(s64) @@ -82,20 +74,12 @@ legalized: true body: | bb.1: liveins: $h0 - ; CHECK-PRE-LABEL: name: test_combine_trunc_sext_s32_s16 - ; CHECK-PRE: liveins: $h0 - ; CHECK-PRE-NEXT: {{ $}} - ; CHECK-PRE-NEXT: [[COPY:%[0-9]+]]:_(s16) = COPY $h0 - ; CHECK-PRE-NEXT: [[SEXT:%[0-9]+]]:_(s32) = G_SEXT [[COPY]](s16) - ; CHECK-PRE-NEXT: $w0 = COPY [[SEXT]](s32) - ; - ; CHECK-POST-LABEL: name: test_combine_trunc_sext_s32_s16 - ; CHECK-POST: liveins: $h0 - ; CHECK-POST-NEXT: {{ $}} - ; CHECK-POST-NEXT: [[COPY:%[0-9]+]]:_(s16) = COPY $h0 - ; CHECK-POST-NEXT: [[SEXT:%[0-9]+]]:_(s64) = G_SEXT [[COPY]](s16) - ; CHECK-POST-NEXT: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[SEXT]](s64) - ; CHECK-POST-NEXT: $w0 = COPY [[TRUNC]](s32) + ; CHECK-LABEL: name: test_combine_trunc_sext_s32_s16 + ; CHECK: liveins: $h0 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s16) = COPY $h0 + ; CHECK-NEXT: [[SEXT:%[0-9]+]]:_(s32) = G_SEXT [[COPY]](s16) + ; CHECK-NEXT: $w0 = COPY [[SEXT]](s32) %0:_(s16) = COPY $h0 %1:_(s64) = G_SEXT %0(s16) %2:_(s32) = G_TRUNC %1(s64) @@ -107,20 +91,12 @@ legalized: true body: | bb.1: liveins: $h0 - ; CHECK-PRE-LABEL: name: test_combine_trunc_zext_s32_s16 - ; CHECK-PRE: liveins: $h0 - ; CHECK-PRE-NEXT: {{ $}} - ; CHECK-PRE-NEXT: [[COPY:%[0-9]+]]:_(s16) = COPY $h0 - ; CHECK-PRE-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[COPY]](s16) - ; CHECK-PRE-NEXT: $w0 = COPY [[ZEXT]](s32) - ; - ; CHECK-POST-LABEL: name: test_combine_trunc_zext_s32_s16 - ; CHECK-POST: liveins: $h0 - ; CHECK-POST-NEXT: {{ $}} - ; CHECK-POST-NEXT: [[COPY:%[0-9]+]]:_(s16) = COPY $h0 - ; CHECK-POST-NEXT: [[ZEXT:%[0-9]+]]:_(s64) = G_ZEXT [[COPY]](s16) - ; CHECK-POST-NEXT: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[ZEXT]](s64) - ; CHECK-POST-NEXT: $w0 = COPY [[TRUNC]](s32) + ; CHECK-LABEL: name: test_combine_trunc_zext_s32_s16 + ; CHECK: liveins: $h0 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s16) = COPY $h0 + ; CHECK-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[COPY]](s16) + ; CHECK-NEXT: $w0 = COPY [[ZEXT]](s32) %0:_(s16) = COPY $h0 %1:_(s64) = G_ZEXT %0(s16) %2:_(s32) = G_TRUNC %1(s64) @@ -132,19 +108,11 @@ legalized: true body: | bb.1: liveins: $w0 - ; CHECK-PRE-LABEL: name: test_combine_trunc_anyext_s32_s32 - ; CHECK-PRE: liveins: $w0 - ; CHECK-PRE-NEXT: {{ $}} - ; CHECK-PRE-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $w0 - ; CHECK-PRE-NEXT: $w0 = COPY [[COPY]](s32) - ; - ; CHECK-POST-LABEL: name: test_combine_trunc_anyext_s32_s32 - ; CHECK-POST: liveins: $w0 - ; CHECK-POST-NEXT: {{ $}} - ; CHECK-POST-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $w0 - ; CHECK-POST-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[COPY]](s32) - ; CHECK-POST-NEXT: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[ANYEXT]](s64) - ; CHECK-POST-NEXT: $w0 = COPY [[TRUNC]](s32) + ; CHECK-LABEL: name: test_combine_trunc_anyext_s32_s32 + ; CHECK: liveins: $w0 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $w0 + ; CHECK-NEXT: $w0 = COPY [[COPY]](s32) %0:_(s32) = COPY $w0 %1:_(s64) = G_ANYEXT %0(s32) %2:_(s32) = G_TRUNC %1(s64) @@ -156,20 +124,12 @@ legalized: true body: | bb.1: liveins: $x0 - ; CHECK-PRE-LABEL: name: test_combine_trunc_anyext_s32_s64 - ; CHECK-PRE: liveins: $x0 - ; CHECK-PRE-NEXT: {{ $}} - ; CHECK-PRE-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY $x0 - ; CHECK-PRE-NEXT: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[COPY]](s64) - ; CHECK-PRE-NEXT: $w0 = COPY [[TRUNC]](s32) - ; - ; CHECK-POST-LABEL: name: test_combine_trunc_anyext_s32_s64 - ; CHECK-POST: liveins: $x0 - ; CHECK-POST-NEXT: {{ $}} - ; CHECK-POST-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY $x0 - ; CHECK-POST-NEXT: [[ANYEXT:%[0-9]+]]:_(s128) = G_ANYEXT [[COPY]](s64) - ; CHECK-POST-NEXT: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[ANYEXT]](s128) - ; CHECK-POST-NEXT: $w0 = COPY [[TRUNC]](s32) + ; CHECK-LABEL: name: test_combine_trunc_anyext_s32_s64 + ; CHECK: liveins: $x0 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY $x0 + ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[COPY]](s64) + ; CHECK-NEXT: $w0 = COPY [[TRUNC]](s32) %0:_(s64) = COPY $x0 %1:_(s128) = G_ANYEXT %0(s64) %2:_(s32) = G_TRUNC %1(s128) diff --git a/llvm/test/CodeGen/AArch64/add.ll b/llvm/test/CodeGen/AArch64/add.ll index fc1a0c71d4cdf..ce7e3101a7a54 100644 --- a/llvm/test/CodeGen/AArch64/add.ll +++ b/llvm/test/CodeGen/AArch64/add.ll @@ -171,11 +171,7 @@ define void @v4i8(ptr %p1, ptr %p2) { ; CHECK-GI-NEXT: ushll v0.8h, v3.8b, #0 ; CHECK-GI-NEXT: ushll v1.8h, v5.8b, #0 ; CHECK-GI-NEXT: add v0.4h, v0.4h, v1.4h -; CHECK-GI-NEXT: mov v1.h[0], v0.h[0] -; CHECK-GI-NEXT: mov v1.h[1], v0.h[1] -; CHECK-GI-NEXT: mov v1.h[2], v0.h[2] -; CHECK-GI-NEXT: mov v1.h[3], v0.h[3] -; CHECK-GI-NEXT: xtn v0.8b, v1.8h +; CHECK-GI-NEXT: uzp1 v0.8b, v0.8b, v0.8b ; CHECK-GI-NEXT: fmov w8, s0 ; CHECK-GI-NEXT: str w8, [x0] ; CHECK-GI-NEXT: ret diff --git a/llvm/test/CodeGen/AArch64/and-mask-removal.ll b/llvm/test/CodeGen/AArch64/and-mask-removal.ll index f005ca47ad124..09f00b3845f25 100644 --- a/llvm/test/CodeGen/AArch64/and-mask-removal.ll +++ b/llvm/test/CodeGen/AArch64/and-mask-removal.ll @@ -530,10 +530,10 @@ define i64 @test_2_selects(i8 zeroext %a) { ; CHECK-LABEL: test_2_selects: ; CHECK: ; %bb.0: ; CHECK-NEXT: add w9, w0, #24 -; CHECK-NEXT: mov w8, #131 +; CHECK-NEXT: mov w8, #131 ; =0x83 ; CHECK-NEXT: and w9, w9, #0xff ; CHECK-NEXT: cmp w9, #81 -; CHECK-NEXT: mov w9, #57 +; CHECK-NEXT: mov w9, #57 ; =0x39 ; CHECK-NEXT: csel x8, x8, xzr, lo ; CHECK-NEXT: csel x9, xzr, x9, eq ; CHECK-NEXT: add x0, x8, x9 diff --git a/llvm/test/CodeGen/AArch64/andorxor.ll b/llvm/test/CodeGen/AArch64/andorxor.ll index 5385a917619fa..459daece90dee 100644 --- a/llvm/test/CodeGen/AArch64/andorxor.ll +++ b/llvm/test/CodeGen/AArch64/andorxor.ll @@ -463,11 +463,7 @@ define void @and_v4i8(ptr %p1, ptr %p2) { ; CHECK-GI-NEXT: ushll v0.8h, v3.8b, #0 ; CHECK-GI-NEXT: ushll v1.8h, v5.8b, #0 ; CHECK-GI-NEXT: and v0.8b, v0.8b, v1.8b -; CHECK-GI-NEXT: mov v1.h[0], v0.h[0] -; CHECK-GI-NEXT: mov v1.h[1], v0.h[1] -; CHECK-GI-NEXT: mov v1.h[2], v0.h[2] -; CHECK-GI-NEXT: mov v1.h[3], v0.h[3] -; CHECK-GI-NEXT: xtn v0.8b, v1.8h +; CHECK-GI-NEXT: uzp1 v0.8b, v0.8b, v0.8b ; CHECK-GI-NEXT: fmov w8, s0 ; CHECK-GI-NEXT: str w8, [x0] ; CHECK-GI-NEXT: ret @@ -514,11 +510,7 @@ define void @or_v4i8(ptr %p1, ptr %p2) { ; CHECK-GI-NEXT: ushll v0.8h, v3.8b, #0 ; CHECK-GI-NEXT: ushll v1.8h, v5.8b, #0 ; CHECK-GI-NEXT: orr v0.8b, v0.8b, v1.8b -; CHECK-GI-NEXT: mov v1.h[0], v0.h[0] -; CHECK-GI-NEXT: mov v1.h[1], v0.h[1] -; CHECK-GI-NEXT: mov v1.h[2], v0.h[2] -; CHECK-GI-NEXT: mov v1.h[3], v0.h[3] -; CHECK-GI-NEXT: xtn v0.8b, v1.8h +; CHECK-GI-NEXT: uzp1 v0.8b, v0.8b, v0.8b ; CHECK-GI-NEXT: fmov w8, s0 ; CHECK-GI-NEXT: str w8, [x0] ; CHECK-GI-NEXT: ret @@ -565,11 +557,7 @@ define void @xor_v4i8(ptr %p1, ptr %p2) { ; CHECK-GI-NEXT: ushll v0.8h, v3.8b, #0 ; CHECK-GI-NEXT: ushll v1.8h, v5.8b, #0 ; CHECK-GI-NEXT: eor v0.8b, v0.8b, v1.8b -; CHECK-GI-NEXT: mov v1.h[0], v0.h[0] -; CHECK-GI-NEXT: mov v1.h[1], v0.h[1] -; CHECK-GI-NEXT: mov v1.h[2], v0.h[2] -; CHECK-GI-NEXT: mov v1.h[3], v0.h[3] -; CHECK-GI-NEXT: xtn v0.8b, v1.8h +; CHECK-GI-NEXT: uzp1 v0.8b, v0.8b, v0.8b ; CHECK-GI-NEXT: fmov w8, s0 ; CHECK-GI-NEXT: str w8, [x0] ; CHECK-GI-NEXT: ret diff --git a/llvm/test/CodeGen/AArch64/bitcast.ll b/llvm/test/CodeGen/AArch64/bitcast.ll index 79cfeedb74bce..bbdf8b0a13d35 100644 --- a/llvm/test/CodeGen/AArch64/bitcast.ll +++ b/llvm/test/CodeGen/AArch64/bitcast.ll @@ -60,11 +60,7 @@ define i32 @bitcast_v4i8_i32(<4 x i8> %a, <4 x i8> %b){ ; CHECK-GI-LABEL: bitcast_v4i8_i32: ; CHECK-GI: // %bb.0: ; CHECK-GI-NEXT: add v0.4h, v0.4h, v1.4h -; CHECK-GI-NEXT: mov v1.h[0], v0.h[0] -; CHECK-GI-NEXT: mov v1.h[1], v0.h[1] -; CHECK-GI-NEXT: mov v1.h[2], v0.h[2] -; CHECK-GI-NEXT: mov v1.h[3], v0.h[3] -; CHECK-GI-NEXT: xtn v0.8b, v1.8h +; CHECK-GI-NEXT: uzp1 v0.8b, v0.8b, v0.8b ; CHECK-GI-NEXT: fmov w0, s0 ; CHECK-GI-NEXT: ret %c = add <4 x i8> %a, %b @@ -116,9 +112,7 @@ define i32 @bitcast_v2i16_i32(<2 x i16> %a, <2 x i16> %b){ ; CHECK-GI-LABEL: bitcast_v2i16_i32: ; CHECK-GI: // %bb.0: ; CHECK-GI-NEXT: add v0.2s, v0.2s, v1.2s -; CHECK-GI-NEXT: mov v1.s[0], v0.s[0] -; CHECK-GI-NEXT: mov v1.s[1], v0.s[1] -; CHECK-GI-NEXT: xtn v0.4h, v1.4s +; CHECK-GI-NEXT: uzp1 v0.4h, v0.4h, v0.4h ; CHECK-GI-NEXT: fmov w0, s0 ; CHECK-GI-NEXT: ret %c = add <2 x i16> %a, %b @@ -418,9 +412,7 @@ define <4 x i8> @bitcast_v2i16_v4i8(<2 x i16> %a, <2 x i16> %b){ ; CHECK-GI-LABEL: bitcast_v2i16_v4i8: ; CHECK-GI: // %bb.0: ; CHECK-GI-NEXT: add v0.2s, v0.2s, v1.2s -; CHECK-GI-NEXT: mov v1.s[0], v0.s[0] -; CHECK-GI-NEXT: mov v1.s[1], v0.s[1] -; CHECK-GI-NEXT: xtn v0.4h, v1.4s +; CHECK-GI-NEXT: uzp1 v0.4h, v0.4h, v0.4h ; CHECK-GI-NEXT: mov b1, v0.b[1] ; CHECK-GI-NEXT: mov v2.b[0], v0.b[0] ; CHECK-GI-NEXT: mov b3, v0.b[2] @@ -455,11 +447,7 @@ define <2 x i16> @bitcast_v4i8_v2i16(<4 x i8> %a, <4 x i8> %b){ ; CHECK-GI-LABEL: bitcast_v4i8_v2i16: ; CHECK-GI: // %bb.0: ; CHECK-GI-NEXT: add v0.4h, v0.4h, v1.4h -; CHECK-GI-NEXT: mov v1.h[0], v0.h[0] -; CHECK-GI-NEXT: mov v1.h[1], v0.h[1] -; CHECK-GI-NEXT: mov v1.h[2], v0.h[2] -; CHECK-GI-NEXT: mov v1.h[3], v0.h[3] -; CHECK-GI-NEXT: xtn v0.8b, v1.8h +; CHECK-GI-NEXT: uzp1 v0.8b, v0.8b, v0.8b ; CHECK-GI-NEXT: mov h1, v0.h[1] ; CHECK-GI-NEXT: mov v0.h[1], v1.h[0] ; CHECK-GI-NEXT: ushll v0.4s, v0.4h, #0 diff --git a/llvm/test/CodeGen/AArch64/concat-vector.ll b/llvm/test/CodeGen/AArch64/concat-vector.ll index d800b2549cf22..0033999b9bd51 100644 --- a/llvm/test/CodeGen/AArch64/concat-vector.ll +++ b/llvm/test/CodeGen/AArch64/concat-vector.ll @@ -33,18 +33,8 @@ define <8 x i8> @concat2(<4 x i8> %A, <4 x i8> %B) { ; ; CHECK-GI-LABEL: concat2: ; CHECK-GI: // %bb.0: -; CHECK-GI-NEXT: // kill: def $d0 killed $d0 def $q0 -; CHECK-GI-NEXT: mov v2.h[0], v0.h[0] -; CHECK-GI-NEXT: // kill: def $d1 killed $d1 def $q1 -; CHECK-GI-NEXT: mov v3.h[0], v1.h[0] -; CHECK-GI-NEXT: mov v2.h[1], v0.h[1] -; CHECK-GI-NEXT: mov v3.h[1], v1.h[1] -; CHECK-GI-NEXT: mov v2.h[2], v0.h[2] -; CHECK-GI-NEXT: mov v3.h[2], v1.h[2] -; CHECK-GI-NEXT: mov v2.h[3], v0.h[3] -; CHECK-GI-NEXT: mov v3.h[3], v1.h[3] -; CHECK-GI-NEXT: xtn v0.8b, v2.8h -; CHECK-GI-NEXT: xtn v1.8b, v3.8h +; CHECK-GI-NEXT: uzp1 v0.8b, v0.8b, v0.8b +; CHECK-GI-NEXT: uzp1 v1.8b, v1.8b, v0.8b ; CHECK-GI-NEXT: fmov w8, s0 ; CHECK-GI-NEXT: mov v0.s[0], w8 ; CHECK-GI-NEXT: fmov w8, s1 @@ -74,15 +64,9 @@ define <4 x i16> @concat4(<2 x i16> %A, <2 x i16> %B) { ; ; CHECK-GI-LABEL: concat4: ; CHECK-GI: // %bb.0: -; CHECK-GI-NEXT: // kill: def $d0 killed $d0 def $q0 -; CHECK-GI-NEXT: mov v2.s[0], v0.s[0] -; CHECK-GI-NEXT: // kill: def $d1 killed $d1 def $q1 -; CHECK-GI-NEXT: mov v2.s[1], v0.s[1] -; CHECK-GI-NEXT: mov v0.s[0], v1.s[0] -; CHECK-GI-NEXT: xtn v2.4h, v2.4s -; CHECK-GI-NEXT: mov v0.s[1], v1.s[1] -; CHECK-GI-NEXT: xtn v1.4h, v0.4s -; CHECK-GI-NEXT: fmov w8, s2 +; CHECK-GI-NEXT: uzp1 v0.4h, v0.4h, v0.4h +; CHECK-GI-NEXT: uzp1 v1.4h, v1.4h, v0.4h +; CHECK-GI-NEXT: fmov w8, s0 ; CHECK-GI-NEXT: mov v0.s[0], w8 ; CHECK-GI-NEXT: fmov w8, s1 ; CHECK-GI-NEXT: mov v0.s[1], w8 @@ -183,12 +167,11 @@ define <8 x i16> @concat_v8s16_v2s16(ptr %ptr) { ; ; CHECK-GI-LABEL: concat_v8s16_v2s16: ; CHECK-GI: // %bb.0: -; CHECK-GI-NEXT: ldr h0, [x0] -; CHECK-GI-NEXT: ldr h1, [x0, #2] -; CHECK-GI-NEXT: mov v0.s[1], v1.s[0] -; CHECK-GI-NEXT: xtn v0.4h, v0.4s -; CHECK-GI-NEXT: fmov w8, s0 -; CHECK-GI-NEXT: mov v0.s[0], w8 +; CHECK-GI-NEXT: ldrh w8, [x0] +; CHECK-GI-NEXT: ldrh w9, [x0, #2] +; CHECK-GI-NEXT: fmov s1, w8 +; CHECK-GI-NEXT: mov v1.h[1], w9 +; CHECK-GI-NEXT: mov v0.s[0], v1.s[0] ; CHECK-GI-NEXT: ret %a = load <2 x i16>, ptr %ptr %b = shufflevector <2 x i16> %a, <2 x i16> %a, <8 x i32> @@ -238,34 +221,14 @@ define <16 x i8> @concat_v16s8_v4s8_reg(<4 x i8> %A, <4 x i8> %B, <4 x i8> %C, < ; ; CHECK-GI-LABEL: concat_v16s8_v4s8_reg: ; CHECK-GI: // %bb.0: -; CHECK-GI-NEXT: // kill: def $d0 killed $d0 def $q0 -; CHECK-GI-NEXT: mov v4.h[0], v0.h[0] -; CHECK-GI-NEXT: // kill: def $d1 killed $d1 def $q1 -; CHECK-GI-NEXT: mov v5.h[0], v1.h[0] -; CHECK-GI-NEXT: // kill: def $d2 killed $d2 def $q2 -; CHECK-GI-NEXT: // kill: def $d3 killed $d3 def $q3 -; CHECK-GI-NEXT: mov v6.h[0], v2.h[0] -; CHECK-GI-NEXT: mov v7.h[0], v3.h[0] -; CHECK-GI-NEXT: mov v4.h[1], v0.h[1] -; CHECK-GI-NEXT: mov v5.h[1], v1.h[1] -; CHECK-GI-NEXT: mov v6.h[1], v2.h[1] -; CHECK-GI-NEXT: mov v7.h[1], v3.h[1] -; CHECK-GI-NEXT: mov v4.h[2], v0.h[2] -; CHECK-GI-NEXT: mov v5.h[2], v1.h[2] -; CHECK-GI-NEXT: mov v6.h[2], v2.h[2] -; CHECK-GI-NEXT: mov v7.h[2], v3.h[2] -; CHECK-GI-NEXT: mov v4.h[3], v0.h[3] -; CHECK-GI-NEXT: mov v5.h[3], v1.h[3] -; CHECK-GI-NEXT: mov v6.h[3], v2.h[3] -; CHECK-GI-NEXT: mov v7.h[3], v3.h[3] -; CHECK-GI-NEXT: xtn v0.8b, v4.8h -; CHECK-GI-NEXT: xtn v1.8b, v5.8h -; CHECK-GI-NEXT: xtn v2.8b, v6.8h +; CHECK-GI-NEXT: uzp1 v0.8b, v0.8b, v0.8b +; CHECK-GI-NEXT: uzp1 v1.8b, v1.8b, v0.8b ; CHECK-GI-NEXT: fmov w8, s0 ; CHECK-GI-NEXT: mov v0.s[0], w8 ; CHECK-GI-NEXT: fmov w8, s1 -; CHECK-GI-NEXT: xtn v1.8b, v7.8h +; CHECK-GI-NEXT: uzp1 v2.8b, v2.8b, v0.8b ; CHECK-GI-NEXT: mov v0.s[1], w8 +; CHECK-GI-NEXT: uzp1 v1.8b, v3.8b, v0.8b ; CHECK-GI-NEXT: fmov w8, s2 ; CHECK-GI-NEXT: mov v0.s[2], w8 ; CHECK-GI-NEXT: fmov w8, s1 @@ -291,29 +254,17 @@ define <8 x i16> @concat_v8s16_v2s16_reg(<2 x i16> %A, <2 x i16> %B, <2 x i16> % ; ; CHECK-GI-LABEL: concat_v8s16_v2s16_reg: ; CHECK-GI: // %bb.0: -; CHECK-GI-NEXT: // kill: def $d0 killed $d0 def $q0 -; CHECK-GI-NEXT: mov v4.s[0], v0.s[0] -; CHECK-GI-NEXT: // kill: def $d1 killed $d1 def $q1 -; CHECK-GI-NEXT: mov v5.s[0], v1.s[0] -; CHECK-GI-NEXT: // kill: def $d2 killed $d2 def $q2 -; CHECK-GI-NEXT: // kill: def $d3 killed $d3 def $q3 -; CHECK-GI-NEXT: mov v4.s[1], v0.s[1] -; CHECK-GI-NEXT: mov v5.s[1], v1.s[1] -; CHECK-GI-NEXT: mov v1.s[0], v2.s[0] -; CHECK-GI-NEXT: xtn v0.4h, v4.4s -; CHECK-GI-NEXT: xtn v4.4h, v5.4s -; CHECK-GI-NEXT: mov v1.s[1], v2.s[1] -; CHECK-GI-NEXT: mov v2.s[0], v3.s[0] +; CHECK-GI-NEXT: uzp1 v0.4h, v0.4h, v0.4h +; CHECK-GI-NEXT: uzp1 v1.4h, v1.4h, v0.4h ; CHECK-GI-NEXT: fmov w8, s0 -; CHECK-GI-NEXT: xtn v1.4h, v1.4s -; CHECK-GI-NEXT: mov v2.s[1], v3.s[1] ; CHECK-GI-NEXT: mov v0.s[0], w8 -; CHECK-GI-NEXT: fmov w8, s4 -; CHECK-GI-NEXT: xtn v2.4h, v2.4s -; CHECK-GI-NEXT: mov v0.s[1], w8 ; CHECK-GI-NEXT: fmov w8, s1 -; CHECK-GI-NEXT: mov v0.s[2], w8 +; CHECK-GI-NEXT: uzp1 v2.4h, v2.4h, v0.4h +; CHECK-GI-NEXT: mov v0.s[1], w8 +; CHECK-GI-NEXT: uzp1 v1.4h, v3.4h, v0.4h ; CHECK-GI-NEXT: fmov w8, s2 +; CHECK-GI-NEXT: mov v0.s[2], w8 +; CHECK-GI-NEXT: fmov w8, s1 ; CHECK-GI-NEXT: mov v0.s[3], w8 ; CHECK-GI-NEXT: ret %b = shufflevector <2 x i16> %A, <2 x i16> %B, <8 x i32> diff --git a/llvm/test/CodeGen/AArch64/fcmp.ll b/llvm/test/CodeGen/AArch64/fcmp.ll index baab53d8bdbd4..66f26fc9d8597 100644 --- a/llvm/test/CodeGen/AArch64/fcmp.ll +++ b/llvm/test/CodeGen/AArch64/fcmp.ll @@ -922,26 +922,27 @@ define <3 x i32> @v3f64_i32(<3 x double> %a, <3 x double> %b, <3 x i32> %d, <3 x ; CHECK-GI-LABEL: v3f64_i32: ; CHECK-GI: // %bb.0: // %entry ; CHECK-GI-NEXT: // kill: def $d0 killed $d0 def $q0 -; CHECK-GI-NEXT: // kill: def $d1 killed $d1 def $q1 ; CHECK-GI-NEXT: // kill: def $d3 killed $d3 def $q3 -; CHECK-GI-NEXT: mov w8, #31 // =0x1f +; CHECK-GI-NEXT: // kill: def $d1 killed $d1 def $q1 ; CHECK-GI-NEXT: // kill: def $d4 killed $d4 def $q4 +; CHECK-GI-NEXT: mov w8, #31 // =0x1f ; CHECK-GI-NEXT: fcmp d2, d5 ; CHECK-GI-NEXT: mov v0.d[1], v1.d[0] ; CHECK-GI-NEXT: mov v3.d[1], v4.d[0] ; CHECK-GI-NEXT: mov v1.s[0], w8 ; CHECK-GI-NEXT: cset w9, mi -; CHECK-GI-NEXT: mov v2.d[0], x9 +; CHECK-GI-NEXT: mov v2.s[0], w9 ; CHECK-GI-NEXT: mov w9, #-1 // =0xffffffff ; CHECK-GI-NEXT: fcmgt v0.2d, v3.2d, v0.2d ; CHECK-GI-NEXT: mov v1.s[1], w8 ; CHECK-GI-NEXT: mov v3.s[0], w9 +; CHECK-GI-NEXT: xtn v0.2s, v0.2d ; CHECK-GI-NEXT: mov v1.s[2], w8 -; CHECK-GI-NEXT: uzp1 v0.4s, v0.4s, v2.4s ; CHECK-GI-NEXT: mov v3.s[1], w9 +; CHECK-GI-NEXT: mov v0.d[1], v2.d[0] +; CHECK-GI-NEXT: mov v3.s[2], w9 ; CHECK-GI-NEXT: ushl v0.4s, v0.4s, v1.4s ; CHECK-GI-NEXT: neg v1.4s, v1.4s -; CHECK-GI-NEXT: mov v3.s[2], w9 ; CHECK-GI-NEXT: sshl v0.4s, v0.4s, v1.4s ; CHECK-GI-NEXT: eor v1.16b, v0.16b, v3.16b ; CHECK-GI-NEXT: and v0.16b, v6.16b, v0.16b diff --git a/llvm/test/CodeGen/AArch64/itofp.ll b/llvm/test/CodeGen/AArch64/itofp.ll index c5bde81ba4a5e..81c1a64f2d434 100644 --- a/llvm/test/CodeGen/AArch64/itofp.ll +++ b/llvm/test/CodeGen/AArch64/itofp.ll @@ -7937,10 +7937,7 @@ define <2 x half> @stofp_v2i8_v2f16(<2 x i8> %a) { ; ; CHECK-GI-FP16-LABEL: stofp_v2i8_v2f16: ; CHECK-GI-FP16: // %bb.0: // %entry -; CHECK-GI-FP16-NEXT: // kill: def $d0 killed $d0 def $q0 -; CHECK-GI-FP16-NEXT: mov v1.s[0], v0.s[0] -; CHECK-GI-FP16-NEXT: mov v1.s[1], v0.s[1] -; CHECK-GI-FP16-NEXT: xtn v0.4h, v1.4s +; CHECK-GI-FP16-NEXT: uzp1 v0.4h, v0.4h, v0.4h ; CHECK-GI-FP16-NEXT: shl v0.4h, v0.4h, #8 ; CHECK-GI-FP16-NEXT: sshr v0.4h, v0.4h, #8 ; CHECK-GI-FP16-NEXT: scvtf v0.4h, v0.4h diff --git a/llvm/test/CodeGen/AArch64/mul.ll b/llvm/test/CodeGen/AArch64/mul.ll index 9e748c9641aa8..5e7f71c18c27a 100644 --- a/llvm/test/CodeGen/AArch64/mul.ll +++ b/llvm/test/CodeGen/AArch64/mul.ll @@ -183,11 +183,7 @@ define void @v4i8(ptr %p1, ptr %p2) { ; CHECK-GI-NEXT: ushll v0.8h, v3.8b, #0 ; CHECK-GI-NEXT: ushll v1.8h, v5.8b, #0 ; CHECK-GI-NEXT: mul v0.4h, v0.4h, v1.4h -; CHECK-GI-NEXT: mov v1.h[0], v0.h[0] -; CHECK-GI-NEXT: mov v1.h[1], v0.h[1] -; CHECK-GI-NEXT: mov v1.h[2], v0.h[2] -; CHECK-GI-NEXT: mov v1.h[3], v0.h[3] -; CHECK-GI-NEXT: xtn v0.8b, v1.8h +; CHECK-GI-NEXT: uzp1 v0.8b, v0.8b, v0.8b ; CHECK-GI-NEXT: fmov w8, s0 ; CHECK-GI-NEXT: str w8, [x0] ; CHECK-GI-NEXT: ret diff --git a/llvm/test/CodeGen/AArch64/sub.ll b/llvm/test/CodeGen/AArch64/sub.ll index 8e7586bd4843c..c298e6d8a1ff2 100644 --- a/llvm/test/CodeGen/AArch64/sub.ll +++ b/llvm/test/CodeGen/AArch64/sub.ll @@ -171,11 +171,7 @@ define void @v4i8(ptr %p1, ptr %p2) { ; CHECK-GI-NEXT: ushll v0.8h, v3.8b, #0 ; CHECK-GI-NEXT: ushll v1.8h, v5.8b, #0 ; CHECK-GI-NEXT: sub v0.4h, v0.4h, v1.4h -; CHECK-GI-NEXT: mov v1.h[0], v0.h[0] -; CHECK-GI-NEXT: mov v1.h[1], v0.h[1] -; CHECK-GI-NEXT: mov v1.h[2], v0.h[2] -; CHECK-GI-NEXT: mov v1.h[3], v0.h[3] -; CHECK-GI-NEXT: xtn v0.8b, v1.8h +; CHECK-GI-NEXT: uzp1 v0.8b, v0.8b, v0.8b ; CHECK-GI-NEXT: fmov w8, s0 ; CHECK-GI-NEXT: str w8, [x0] ; CHECK-GI-NEXT: ret