diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp index 0e17897cf60b0..ef5f2210573e0 100644 --- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -149,10 +149,6 @@ static cl::opt EnableShrinkLoadReplaceStoreWithStore( cl::desc("DAG combiner enable load//store with " "a narrower store")); -static cl::opt EnableVectorFCopySignExtendRound( - "combiner-vector-fcopysign-extend-round", cl::Hidden, cl::init(false), - cl::desc( - "Enable merging extends and rounds into FCOPYSIGN on vector types")); namespace { class DAGCombiner { @@ -18011,7 +18007,8 @@ static inline bool CanCombineFCOPYSIGN_EXTEND_ROUND(EVT XTy, EVT YTy) { if (YTy == MVT::f128) return false; - return !YTy.isVector() || EnableVectorFCopySignExtendRound; + // Avoid mismatched vector operand types, for better instruction selection. + return !YTy.isVector(); } static inline bool CanCombineFCOPYSIGN_EXTEND_ROUND(SDNode *N) { diff --git a/llvm/test/CodeGen/AArch64/sve-fcopysign.ll b/llvm/test/CodeGen/AArch64/sve-fcopysign.ll index 78843e392e536..96056db2a4f2d 100644 --- a/llvm/test/CodeGen/AArch64/sve-fcopysign.ll +++ b/llvm/test/CodeGen/AArch64/sve-fcopysign.ll @@ -1,6 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc < %s -mtriple=aarch64 -mattr=+sve -o - | FileCheck --check-prefixes=CHECK,CHECK-NO-EXTEND-ROUND %s -; RUN: llc < %s -mtriple=aarch64 -mattr=+sve --combiner-vector-fcopysign-extend-round -o - | FileCheck --check-prefixes=CHECK,CHECK-EXTEND-ROUND %s +; RUN: llc < %s -mtriple=aarch64 -mattr=+sve -o - | FileCheck %s target datalayout = "e-m:o-i64:64-i128:128-n32:64-S128" ;============ v2f32 @@ -47,32 +46,16 @@ define @test_copysign_v4f32_v4f32( %a, ; SplitVecOp #1 define @test_copysign_v4f32_v4f64( %a, %b) #0 { -; CHECK-NO-EXTEND-ROUND-LABEL: test_copysign_v4f32_v4f64: -; CHECK-NO-EXTEND-ROUND: // %bb.0: -; CHECK-NO-EXTEND-ROUND-NEXT: ptrue p0.d -; CHECK-NO-EXTEND-ROUND-NEXT: and z0.s, z0.s, #0x7fffffff -; CHECK-NO-EXTEND-ROUND-NEXT: fcvt z2.s, p0/m, z2.d -; CHECK-NO-EXTEND-ROUND-NEXT: fcvt z1.s, p0/m, z1.d -; CHECK-NO-EXTEND-ROUND-NEXT: uzp1 z1.s, z1.s, z2.s -; CHECK-NO-EXTEND-ROUND-NEXT: and z1.s, z1.s, #0x80000000 -; CHECK-NO-EXTEND-ROUND-NEXT: orr z0.d, z0.d, z1.d -; CHECK-NO-EXTEND-ROUND-NEXT: ret -; -; CHECK-EXTEND-ROUND-LABEL: test_copysign_v4f32_v4f64: -; CHECK-EXTEND-ROUND: // %bb.0: -; CHECK-EXTEND-ROUND-NEXT: ptrue p0.d -; CHECK-EXTEND-ROUND-NEXT: uunpkhi z3.d, z0.s -; CHECK-EXTEND-ROUND-NEXT: uunpklo z0.d, z0.s -; CHECK-EXTEND-ROUND-NEXT: fcvt z2.s, p0/m, z2.d -; CHECK-EXTEND-ROUND-NEXT: fcvt z1.s, p0/m, z1.d -; CHECK-EXTEND-ROUND-NEXT: and z3.s, z3.s, #0x7fffffff -; CHECK-EXTEND-ROUND-NEXT: and z0.s, z0.s, #0x7fffffff -; CHECK-EXTEND-ROUND-NEXT: and z2.s, z2.s, #0x80000000 -; CHECK-EXTEND-ROUND-NEXT: and z1.s, z1.s, #0x80000000 -; CHECK-EXTEND-ROUND-NEXT: orr z2.d, z3.d, z2.d -; CHECK-EXTEND-ROUND-NEXT: orr z0.d, z0.d, z1.d -; CHECK-EXTEND-ROUND-NEXT: uzp1 z0.s, z0.s, z2.s -; CHECK-EXTEND-ROUND-NEXT: ret +; CHECK-LABEL: test_copysign_v4f32_v4f64: +; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p0.d +; CHECK-NEXT: and z0.s, z0.s, #0x7fffffff +; CHECK-NEXT: fcvt z2.s, p0/m, z2.d +; CHECK-NEXT: fcvt z1.s, p0/m, z1.d +; CHECK-NEXT: uzp1 z1.s, z1.s, z2.s +; CHECK-NEXT: and z1.s, z1.s, #0x80000000 +; CHECK-NEXT: orr z0.d, z0.d, z1.d +; CHECK-NEXT: ret %tmp0 = fptrunc %b to %r = call @llvm.copysign.v4f32( %a, %tmp0) ret %r @@ -177,32 +160,16 @@ define @test_copysign_v4f16_v4f32( %a, @test_copysign_v4f16_v4f64( %a, %b) #0 { -; CHECK-NO-EXTEND-ROUND-LABEL: test_copysign_v4f16_v4f64: -; CHECK-NO-EXTEND-ROUND: // %bb.0: -; CHECK-NO-EXTEND-ROUND-NEXT: ptrue p0.d -; CHECK-NO-EXTEND-ROUND-NEXT: and z0.h, z0.h, #0x7fff -; CHECK-NO-EXTEND-ROUND-NEXT: fcvt z2.h, p0/m, z2.d -; CHECK-NO-EXTEND-ROUND-NEXT: fcvt z1.h, p0/m, z1.d -; CHECK-NO-EXTEND-ROUND-NEXT: uzp1 z1.s, z1.s, z2.s -; CHECK-NO-EXTEND-ROUND-NEXT: and z1.h, z1.h, #0x8000 -; CHECK-NO-EXTEND-ROUND-NEXT: orr z0.d, z0.d, z1.d -; CHECK-NO-EXTEND-ROUND-NEXT: ret -; -; CHECK-EXTEND-ROUND-LABEL: test_copysign_v4f16_v4f64: -; CHECK-EXTEND-ROUND: // %bb.0: -; CHECK-EXTEND-ROUND-NEXT: ptrue p0.d -; CHECK-EXTEND-ROUND-NEXT: uunpkhi z3.d, z0.s -; CHECK-EXTEND-ROUND-NEXT: uunpklo z0.d, z0.s -; CHECK-EXTEND-ROUND-NEXT: fcvt z2.h, p0/m, z2.d -; CHECK-EXTEND-ROUND-NEXT: fcvt z1.h, p0/m, z1.d -; CHECK-EXTEND-ROUND-NEXT: and z3.h, z3.h, #0x7fff -; CHECK-EXTEND-ROUND-NEXT: and z0.h, z0.h, #0x7fff -; CHECK-EXTEND-ROUND-NEXT: and z2.h, z2.h, #0x8000 -; CHECK-EXTEND-ROUND-NEXT: and z1.h, z1.h, #0x8000 -; CHECK-EXTEND-ROUND-NEXT: orr z2.d, z3.d, z2.d -; CHECK-EXTEND-ROUND-NEXT: orr z0.d, z0.d, z1.d -; CHECK-EXTEND-ROUND-NEXT: uzp1 z0.s, z0.s, z2.s -; CHECK-EXTEND-ROUND-NEXT: ret +; CHECK-LABEL: test_copysign_v4f16_v4f64: +; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p0.d +; CHECK-NEXT: and z0.h, z0.h, #0x7fff +; CHECK-NEXT: fcvt z2.h, p0/m, z2.d +; CHECK-NEXT: fcvt z1.h, p0/m, z1.d +; CHECK-NEXT: uzp1 z1.s, z1.s, z2.s +; CHECK-NEXT: and z1.h, z1.h, #0x8000 +; CHECK-NEXT: orr z0.d, z0.d, z1.d +; CHECK-NEXT: ret %tmp0 = fptrunc %b to %r = call @llvm.copysign.v4f16( %a, %tmp0) ret %r @@ -224,32 +191,16 @@ define @test_copysign_v8f16_v8f16( %a, @test_copysign_v8f16_v8f32( %a, %b) #0 { -; CHECK-NO-EXTEND-ROUND-LABEL: test_copysign_v8f16_v8f32: -; CHECK-NO-EXTEND-ROUND: // %bb.0: -; CHECK-NO-EXTEND-ROUND-NEXT: ptrue p0.s -; CHECK-NO-EXTEND-ROUND-NEXT: and z0.h, z0.h, #0x7fff -; CHECK-NO-EXTEND-ROUND-NEXT: fcvt z2.h, p0/m, z2.s -; CHECK-NO-EXTEND-ROUND-NEXT: fcvt z1.h, p0/m, z1.s -; CHECK-NO-EXTEND-ROUND-NEXT: uzp1 z1.h, z1.h, z2.h -; CHECK-NO-EXTEND-ROUND-NEXT: and z1.h, z1.h, #0x8000 -; CHECK-NO-EXTEND-ROUND-NEXT: orr z0.d, z0.d, z1.d -; CHECK-NO-EXTEND-ROUND-NEXT: ret -; -; CHECK-EXTEND-ROUND-LABEL: test_copysign_v8f16_v8f32: -; CHECK-EXTEND-ROUND: // %bb.0: -; CHECK-EXTEND-ROUND-NEXT: ptrue p0.s -; CHECK-EXTEND-ROUND-NEXT: uunpkhi z3.s, z0.h -; CHECK-EXTEND-ROUND-NEXT: uunpklo z0.s, z0.h -; CHECK-EXTEND-ROUND-NEXT: fcvt z2.h, p0/m, z2.s -; CHECK-EXTEND-ROUND-NEXT: fcvt z1.h, p0/m, z1.s -; CHECK-EXTEND-ROUND-NEXT: and z3.h, z3.h, #0x7fff -; CHECK-EXTEND-ROUND-NEXT: and z0.h, z0.h, #0x7fff -; CHECK-EXTEND-ROUND-NEXT: and z2.h, z2.h, #0x8000 -; CHECK-EXTEND-ROUND-NEXT: and z1.h, z1.h, #0x8000 -; CHECK-EXTEND-ROUND-NEXT: orr z2.d, z3.d, z2.d -; CHECK-EXTEND-ROUND-NEXT: orr z0.d, z0.d, z1.d -; CHECK-EXTEND-ROUND-NEXT: uzp1 z0.h, z0.h, z2.h -; CHECK-EXTEND-ROUND-NEXT: ret +; CHECK-LABEL: test_copysign_v8f16_v8f32: +; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p0.s +; CHECK-NEXT: and z0.h, z0.h, #0x7fff +; CHECK-NEXT: fcvt z2.h, p0/m, z2.s +; CHECK-NEXT: fcvt z1.h, p0/m, z1.s +; CHECK-NEXT: uzp1 z1.h, z1.h, z2.h +; CHECK-NEXT: and z1.h, z1.h, #0x8000 +; CHECK-NEXT: orr z0.d, z0.d, z1.d +; CHECK-NEXT: ret %tmp0 = fptrunc %b to %r = call @llvm.copysign.v8f16( %a, %tmp0) ret %r @@ -259,48 +210,28 @@ define @test_copysign_v8f16_v8f32( %a, @test_copysign_nxv4f32_nxv4f16( %a, %b) #0 { -; CHECK-NO-EXTEND-ROUND-LABEL: test_copysign_nxv4f32_nxv4f16: -; CHECK-NO-EXTEND-ROUND: // %bb.0: -; CHECK-NO-EXTEND-ROUND-NEXT: and z1.s, z1.s, #0x80000000 -; CHECK-NO-EXTEND-ROUND-NEXT: and z0.s, z0.s, #0x7fffffff -; CHECK-NO-EXTEND-ROUND-NEXT: ptrue p0.s -; CHECK-NO-EXTEND-ROUND-NEXT: orr z0.d, z0.d, z1.d -; CHECK-NO-EXTEND-ROUND-NEXT: fcvt z0.h, p0/m, z0.s -; CHECK-NO-EXTEND-ROUND-NEXT: ret -; -; CHECK-EXTEND-ROUND-LABEL: test_copysign_nxv4f32_nxv4f16: -; CHECK-EXTEND-ROUND: // %bb.0: -; CHECK-EXTEND-ROUND-NEXT: ptrue p0.s -; CHECK-EXTEND-ROUND-NEXT: fcvt z0.h, p0/m, z0.s -; CHECK-EXTEND-ROUND-NEXT: fcvt z1.h, p0/m, z1.s -; CHECK-EXTEND-ROUND-NEXT: and z1.h, z1.h, #0x8000 -; CHECK-EXTEND-ROUND-NEXT: and z0.h, z0.h, #0x7fff -; CHECK-EXTEND-ROUND-NEXT: orr z0.d, z0.d, z1.d -; CHECK-EXTEND-ROUND-NEXT: ret +; CHECK-LABEL: test_copysign_nxv4f32_nxv4f16: +; CHECK: // %bb.0: +; CHECK-NEXT: and z1.s, z1.s, #0x80000000 +; CHECK-NEXT: and z0.s, z0.s, #0x7fffffff +; CHECK-NEXT: ptrue p0.s +; CHECK-NEXT: orr z0.d, z0.d, z1.d +; CHECK-NEXT: fcvt z0.h, p0/m, z0.s +; CHECK-NEXT: ret %t1 = call @llvm.copysign.v4f32( %a, %b) %t2 = fptrunc %t1 to ret %t2 } define @test_copysign_nxv2f64_nxv2f32( %a, %b) #0 { -; CHECK-NO-EXTEND-ROUND-LABEL: test_copysign_nxv2f64_nxv2f32: -; CHECK-NO-EXTEND-ROUND: // %bb.0: -; CHECK-NO-EXTEND-ROUND-NEXT: and z1.d, z1.d, #0x8000000000000000 -; CHECK-NO-EXTEND-ROUND-NEXT: and z0.d, z0.d, #0x7fffffffffffffff -; CHECK-NO-EXTEND-ROUND-NEXT: ptrue p0.d -; CHECK-NO-EXTEND-ROUND-NEXT: orr z0.d, z0.d, z1.d -; CHECK-NO-EXTEND-ROUND-NEXT: fcvt z0.s, p0/m, z0.d -; CHECK-NO-EXTEND-ROUND-NEXT: ret -; -; CHECK-EXTEND-ROUND-LABEL: test_copysign_nxv2f64_nxv2f32: -; CHECK-EXTEND-ROUND: // %bb.0: -; CHECK-EXTEND-ROUND-NEXT: ptrue p0.d -; CHECK-EXTEND-ROUND-NEXT: fcvt z0.s, p0/m, z0.d -; CHECK-EXTEND-ROUND-NEXT: fcvt z1.s, p0/m, z1.d -; CHECK-EXTEND-ROUND-NEXT: and z1.s, z1.s, #0x80000000 -; CHECK-EXTEND-ROUND-NEXT: and z0.s, z0.s, #0x7fffffff -; CHECK-EXTEND-ROUND-NEXT: orr z0.d, z0.d, z1.d -; CHECK-EXTEND-ROUND-NEXT: ret +; CHECK-LABEL: test_copysign_nxv2f64_nxv2f32: +; CHECK: // %bb.0: +; CHECK-NEXT: and z1.d, z1.d, #0x8000000000000000 +; CHECK-NEXT: and z0.d, z0.d, #0x7fffffffffffffff +; CHECK-NEXT: ptrue p0.d +; CHECK-NEXT: orr z0.d, z0.d, z1.d +; CHECK-NEXT: fcvt z0.s, p0/m, z0.d +; CHECK-NEXT: ret %t1 = call @llvm.copysign.v2f64( %a, %b) %t2 = fptrunc %t1 to ret %t2 diff --git a/llvm/test/CodeGen/AArch64/sve-fixed-length-fcopysign.ll b/llvm/test/CodeGen/AArch64/sve-fixed-length-fcopysign.ll index e77cd9ef55eaf..37450431d8a11 100644 --- a/llvm/test/CodeGen/AArch64/sve-fixed-length-fcopysign.ll +++ b/llvm/test/CodeGen/AArch64/sve-fixed-length-fcopysign.ll @@ -1,10 +1,7 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -aarch64-sve-vector-bits-min=256 < %s | FileCheck %s -check-prefixes=CHECK,VBITS_GE_256,CHECK_NO_EXTEND_ROUND -; RUN: llc -aarch64-sve-vector-bits-min=512 < %s | FileCheck %s -check-prefixes=CHECK,VBITS_GE_512,CHECK_NO_EXTEND_ROUND -; RUN: llc -aarch64-sve-vector-bits-min=2048 < %s | FileCheck %s -check-prefixes=CHECK,VBITS_GE_512,CHECK_NO_EXTEND_ROUND -; RUN: llc -aarch64-sve-vector-bits-min=256 --combiner-vector-fcopysign-extend-round < %s | FileCheck %s -check-prefixes=CHECK,VBITS_GE_256,CHECK_EXTEND_ROUND -; RUN: llc -aarch64-sve-vector-bits-min=512 --combiner-vector-fcopysign-extend-round < %s | FileCheck %s -check-prefixes=CHECK,VBITS_GE_512,CHECK_EXTEND_ROUND -; RUN: llc -aarch64-sve-vector-bits-min=2048 --combiner-vector-fcopysign-extend-round < %s | FileCheck %s -check-prefixes=CHECK,VBITS_GE_512,CHECK_EXTEND_ROUND +; RUN: llc -aarch64-sve-vector-bits-min=256 < %s | FileCheck %s -check-prefixes=CHECK,VBITS_GE_256 +; RUN: llc -aarch64-sve-vector-bits-min=512 < %s | FileCheck %s -check-prefixes=CHECK,VBITS_GE_512 +; RUN: llc -aarch64-sve-vector-bits-min=2048 < %s | FileCheck %s -check-prefixes=CHECK,VBITS_GE_512 target datalayout = "e-m:o-i64:64-i128:128-n32:64-S128" @@ -436,30 +433,17 @@ define void @test_copysign_v2f64_v2f32(ptr %ap, ptr %bp) vscale_range(2,0) #0 { ; SplitVecRes mismatched define void @test_copysign_v4f64_v4f32(ptr %ap, ptr %bp) vscale_range(2,0) #0 { -; CHECK_NO_EXTEND_ROUND-LABEL: test_copysign_v4f64_v4f32: -; CHECK_NO_EXTEND_ROUND: // %bb.0: -; CHECK_NO_EXTEND_ROUND-NEXT: ptrue p0.d, vl4 -; CHECK_NO_EXTEND_ROUND-NEXT: ld1w { z0.d }, p0/z, [x1] -; CHECK_NO_EXTEND_ROUND-NEXT: ld1d { z1.d }, p0/z, [x0] -; CHECK_NO_EXTEND_ROUND-NEXT: fcvt z0.d, p0/m, z0.s -; CHECK_NO_EXTEND_ROUND-NEXT: and z1.d, z1.d, #0x7fffffffffffffff -; CHECK_NO_EXTEND_ROUND-NEXT: and z0.d, z0.d, #0x8000000000000000 -; CHECK_NO_EXTEND_ROUND-NEXT: orr z0.d, z1.d, z0.d -; CHECK_NO_EXTEND_ROUND-NEXT: st1d { z0.d }, p0, [x0] -; CHECK_NO_EXTEND_ROUND-NEXT: ret -; -; CHECK_EXTEND_ROUND-LABEL: test_copysign_v4f64_v4f32: -; CHECK_EXTEND_ROUND: // %bb.0: -; CHECK_EXTEND_ROUND-NEXT: ldr q0, [x1] -; CHECK_EXTEND_ROUND-NEXT: ptrue p0.d, vl4 -; CHECK_EXTEND_ROUND-NEXT: uunpklo z0.d, z0.s -; CHECK_EXTEND_ROUND-NEXT: ld1d { z1.d }, p0/z, [x0] -; CHECK_EXTEND_ROUND-NEXT: and z1.d, z1.d, #0x7fffffffffffffff -; CHECK_EXTEND_ROUND-NEXT: fcvt z0.d, p0/m, z0.s -; CHECK_EXTEND_ROUND-NEXT: and z0.d, z0.d, #0x8000000000000000 -; CHECK_EXTEND_ROUND-NEXT: orr z0.d, z1.d, z0.d -; CHECK_EXTEND_ROUND-NEXT: st1d { z0.d }, p0, [x0] -; CHECK_EXTEND_ROUND-NEXT: ret +; CHECK-LABEL: test_copysign_v4f64_v4f32: +; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p0.d, vl4 +; CHECK-NEXT: ld1w { z0.d }, p0/z, [x1] +; CHECK-NEXT: ld1d { z1.d }, p0/z, [x0] +; CHECK-NEXT: fcvt z0.d, p0/m, z0.s +; CHECK-NEXT: and z1.d, z1.d, #0x7fffffffffffffff +; CHECK-NEXT: and z0.d, z0.d, #0x8000000000000000 +; CHECK-NEXT: orr z0.d, z1.d, z0.d +; CHECK-NEXT: st1d { z0.d }, p0, [x0] +; CHECK-NEXT: ret %a = load <4 x double>, ptr %ap %b = load <4 x float>, ptr %bp %tmp0 = fpext <4 x float> %b to <4 x double> diff --git a/llvm/test/CodeGen/AArch64/sve2-fcopysign.ll b/llvm/test/CodeGen/AArch64/sve2-fcopysign.ll index 778d7e193e23e..a7fa9e7575df6 100644 --- a/llvm/test/CodeGen/AArch64/sve2-fcopysign.ll +++ b/llvm/test/CodeGen/AArch64/sve2-fcopysign.ll @@ -1,6 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc < %s -mtriple=aarch64 -mattr=+sve2 -o - | FileCheck --check-prefixes=CHECK,CHECK_NO_EXTEND_ROUND %s -; RUN: llc < %s -mtriple=aarch64 -mattr=+sve2 --combiner-vector-fcopysign-extend-round -o - | FileCheck --check-prefixes=CHECK,CHECK_EXTEND_ROUND %s +; RUN: llc < %s -mtriple=aarch64 -mattr=+sve2 -o - | FileCheck %s target datalayout = "e-m:o-i64:64-i128:128-n32:64-S128" @@ -45,28 +44,15 @@ define @test_copysign_v4f32_v4f32( %a, ; SplitVecOp #1 define @test_copysign_v4f32_v4f64( %a, %b) #0 { -; CHECK_NO_EXTEND_ROUND-LABEL: test_copysign_v4f32_v4f64: -; CHECK_NO_EXTEND_ROUND: // %bb.0: -; CHECK_NO_EXTEND_ROUND-NEXT: ptrue p0.d -; CHECK_NO_EXTEND_ROUND-NEXT: mov z3.s, #0x7fffffff -; CHECK_NO_EXTEND_ROUND-NEXT: fcvt z2.s, p0/m, z2.d -; CHECK_NO_EXTEND_ROUND-NEXT: fcvt z1.s, p0/m, z1.d -; CHECK_NO_EXTEND_ROUND-NEXT: uzp1 z1.s, z1.s, z2.s -; CHECK_NO_EXTEND_ROUND-NEXT: bsl z0.d, z0.d, z1.d, z3.d -; CHECK_NO_EXTEND_ROUND-NEXT: ret -; -; CHECK_EXTEND_ROUND-LABEL: test_copysign_v4f32_v4f64: -; CHECK_EXTEND_ROUND: // %bb.0: -; CHECK_EXTEND_ROUND-NEXT: ptrue p0.d -; CHECK_EXTEND_ROUND-NEXT: uunpkhi z3.d, z0.s -; CHECK_EXTEND_ROUND-NEXT: mov z4.s, #0x7fffffff -; CHECK_EXTEND_ROUND-NEXT: uunpklo z0.d, z0.s -; CHECK_EXTEND_ROUND-NEXT: fcvt z2.s, p0/m, z2.d -; CHECK_EXTEND_ROUND-NEXT: fcvt z1.s, p0/m, z1.d -; CHECK_EXTEND_ROUND-NEXT: bsl z3.d, z3.d, z2.d, z4.d -; CHECK_EXTEND_ROUND-NEXT: bsl z0.d, z0.d, z1.d, z4.d -; CHECK_EXTEND_ROUND-NEXT: uzp1 z0.s, z0.s, z3.s -; CHECK_EXTEND_ROUND-NEXT: ret +; CHECK-LABEL: test_copysign_v4f32_v4f64: +; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p0.d +; CHECK-NEXT: mov z3.s, #0x7fffffff +; CHECK-NEXT: fcvt z2.s, p0/m, z2.d +; CHECK-NEXT: fcvt z1.s, p0/m, z1.d +; CHECK-NEXT: uzp1 z1.s, z1.s, z2.s +; CHECK-NEXT: bsl z0.d, z0.d, z1.d, z3.d +; CHECK-NEXT: ret %tmp0 = fptrunc %b to %r = call @llvm.copysign.v4f32( %a, %tmp0) ret %r @@ -105,29 +91,17 @@ declare @llvm.copysign.v2f64( %a, @test_copysign_v4f64_v4f32( %a, %b) #0 { -; CHECK_NO_EXTEND_ROUND-LABEL: test_copysign_v4f64_v4f32: -; CHECK_NO_EXTEND_ROUND: // %bb.0: -; CHECK_NO_EXTEND_ROUND-NEXT: uunpkhi z3.d, z2.s -; CHECK_NO_EXTEND_ROUND-NEXT: uunpklo z2.d, z2.s -; CHECK_NO_EXTEND_ROUND-NEXT: ptrue p0.d -; CHECK_NO_EXTEND_ROUND-NEXT: mov z4.d, #0x7fffffffffffffff -; CHECK_NO_EXTEND_ROUND-NEXT: fcvt z3.d, p0/m, z3.s -; CHECK_NO_EXTEND_ROUND-NEXT: fcvt z2.d, p0/m, z2.s -; CHECK_NO_EXTEND_ROUND-NEXT: bsl z0.d, z0.d, z2.d, z4.d -; CHECK_NO_EXTEND_ROUND-NEXT: bsl z1.d, z1.d, z3.d, z4.d -; CHECK_NO_EXTEND_ROUND-NEXT: ret -; -; CHECK_EXTEND_ROUND-LABEL: test_copysign_v4f64_v4f32: -; CHECK_EXTEND_ROUND: // %bb.0: -; CHECK_EXTEND_ROUND-NEXT: uunpkhi z3.d, z2.s -; CHECK_EXTEND_ROUND-NEXT: uunpklo z2.d, z2.s -; CHECK_EXTEND_ROUND-NEXT: ptrue p0.d -; CHECK_EXTEND_ROUND-NEXT: mov z4.d, #0x7fffffffffffffff -; CHECK_EXTEND_ROUND-NEXT: fcvt z2.d, p0/m, z2.s -; CHECK_EXTEND_ROUND-NEXT: fcvt z3.d, p0/m, z3.s -; CHECK_EXTEND_ROUND-NEXT: bsl z0.d, z0.d, z2.d, z4.d -; CHECK_EXTEND_ROUND-NEXT: bsl z1.d, z1.d, z3.d, z4.d -; CHECK_EXTEND_ROUND-NEXT: ret +; CHECK-LABEL: test_copysign_v4f64_v4f32: +; CHECK: // %bb.0: +; CHECK-NEXT: uunpkhi z3.d, z2.s +; CHECK-NEXT: uunpklo z2.d, z2.s +; CHECK-NEXT: ptrue p0.d +; CHECK-NEXT: mov z4.d, #0x7fffffffffffffff +; CHECK-NEXT: fcvt z3.d, p0/m, z3.s +; CHECK-NEXT: fcvt z2.d, p0/m, z2.s +; CHECK-NEXT: bsl z0.d, z0.d, z2.d, z4.d +; CHECK-NEXT: bsl z1.d, z1.d, z3.d, z4.d +; CHECK-NEXT: ret %tmp0 = fpext %b to %r = call @llvm.copysign.v4f64( %a, %tmp0) ret %r @@ -173,28 +147,15 @@ define @test_copysign_v4f16_v4f32( %a, @test_copysign_v4f16_v4f64( %a, %b) #0 { -; CHECK_NO_EXTEND_ROUND-LABEL: test_copysign_v4f16_v4f64: -; CHECK_NO_EXTEND_ROUND: // %bb.0: -; CHECK_NO_EXTEND_ROUND-NEXT: ptrue p0.d -; CHECK_NO_EXTEND_ROUND-NEXT: mov z3.h, #32767 // =0x7fff -; CHECK_NO_EXTEND_ROUND-NEXT: fcvt z2.h, p0/m, z2.d -; CHECK_NO_EXTEND_ROUND-NEXT: fcvt z1.h, p0/m, z1.d -; CHECK_NO_EXTEND_ROUND-NEXT: uzp1 z1.s, z1.s, z2.s -; CHECK_NO_EXTEND_ROUND-NEXT: bsl z0.d, z0.d, z1.d, z3.d -; CHECK_NO_EXTEND_ROUND-NEXT: ret -; -; CHECK_EXTEND_ROUND-LABEL: test_copysign_v4f16_v4f64: -; CHECK_EXTEND_ROUND: // %bb.0: -; CHECK_EXTEND_ROUND-NEXT: ptrue p0.d -; CHECK_EXTEND_ROUND-NEXT: uunpkhi z3.d, z0.s -; CHECK_EXTEND_ROUND-NEXT: mov z4.h, #32767 // =0x7fff -; CHECK_EXTEND_ROUND-NEXT: uunpklo z0.d, z0.s -; CHECK_EXTEND_ROUND-NEXT: fcvt z2.h, p0/m, z2.d -; CHECK_EXTEND_ROUND-NEXT: fcvt z1.h, p0/m, z1.d -; CHECK_EXTEND_ROUND-NEXT: bsl z3.d, z3.d, z2.d, z4.d -; CHECK_EXTEND_ROUND-NEXT: bsl z0.d, z0.d, z1.d, z4.d -; CHECK_EXTEND_ROUND-NEXT: uzp1 z0.s, z0.s, z3.s -; CHECK_EXTEND_ROUND-NEXT: ret +; CHECK-LABEL: test_copysign_v4f16_v4f64: +; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p0.d +; CHECK-NEXT: mov z3.h, #32767 // =0x7fff +; CHECK-NEXT: fcvt z2.h, p0/m, z2.d +; CHECK-NEXT: fcvt z1.h, p0/m, z1.d +; CHECK-NEXT: uzp1 z1.s, z1.s, z2.s +; CHECK-NEXT: bsl z0.d, z0.d, z1.d, z3.d +; CHECK-NEXT: ret %tmp0 = fptrunc %b to %r = call @llvm.copysign.v4f16( %a, %tmp0) ret %r @@ -215,28 +176,15 @@ define @test_copysign_v8f16_v8f16( %a, @test_copysign_v8f16_v8f32( %a, %b) #0 { -; CHECK_NO_EXTEND_ROUND-LABEL: test_copysign_v8f16_v8f32: -; CHECK_NO_EXTEND_ROUND: // %bb.0: -; CHECK_NO_EXTEND_ROUND-NEXT: ptrue p0.s -; CHECK_NO_EXTEND_ROUND-NEXT: mov z3.h, #32767 // =0x7fff -; CHECK_NO_EXTEND_ROUND-NEXT: fcvt z2.h, p0/m, z2.s -; CHECK_NO_EXTEND_ROUND-NEXT: fcvt z1.h, p0/m, z1.s -; CHECK_NO_EXTEND_ROUND-NEXT: uzp1 z1.h, z1.h, z2.h -; CHECK_NO_EXTEND_ROUND-NEXT: bsl z0.d, z0.d, z1.d, z3.d -; CHECK_NO_EXTEND_ROUND-NEXT: ret -; -; CHECK_EXTEND_ROUND-LABEL: test_copysign_v8f16_v8f32: -; CHECK_EXTEND_ROUND: // %bb.0: -; CHECK_EXTEND_ROUND-NEXT: ptrue p0.s -; CHECK_EXTEND_ROUND-NEXT: uunpkhi z3.s, z0.h -; CHECK_EXTEND_ROUND-NEXT: mov z4.h, #32767 // =0x7fff -; CHECK_EXTEND_ROUND-NEXT: uunpklo z0.s, z0.h -; CHECK_EXTEND_ROUND-NEXT: fcvt z2.h, p0/m, z2.s -; CHECK_EXTEND_ROUND-NEXT: fcvt z1.h, p0/m, z1.s -; CHECK_EXTEND_ROUND-NEXT: bsl z3.d, z3.d, z2.d, z4.d -; CHECK_EXTEND_ROUND-NEXT: bsl z0.d, z0.d, z1.d, z4.d -; CHECK_EXTEND_ROUND-NEXT: uzp1 z0.h, z0.h, z3.h -; CHECK_EXTEND_ROUND-NEXT: ret +; CHECK-LABEL: test_copysign_v8f16_v8f32: +; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p0.s +; CHECK-NEXT: mov z3.h, #32767 // =0x7fff +; CHECK-NEXT: fcvt z2.h, p0/m, z2.s +; CHECK-NEXT: fcvt z1.h, p0/m, z1.s +; CHECK-NEXT: uzp1 z1.h, z1.h, z2.h +; CHECK-NEXT: bsl z0.d, z0.d, z1.d, z3.d +; CHECK-NEXT: ret %tmp0 = fptrunc %b to %r = call @llvm.copysign.v8f16( %a, %tmp0) ret %r diff --git a/llvm/test/CodeGen/AArch64/sve2-fixed-length-fcopysign.ll b/llvm/test/CodeGen/AArch64/sve2-fixed-length-fcopysign.ll index e39e5feefa255..98056fd1d0bf0 100644 --- a/llvm/test/CodeGen/AArch64/sve2-fixed-length-fcopysign.ll +++ b/llvm/test/CodeGen/AArch64/sve2-fixed-length-fcopysign.ll @@ -1,10 +1,7 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -aarch64-sve-vector-bits-min=256 < %s | FileCheck %s -check-prefixes=CHECK,VBITS_GE_256,CHECK_NO_EXTEND_ROUND -; RUN: llc -aarch64-sve-vector-bits-min=512 < %s | FileCheck %s -check-prefixes=CHECK,VBITS_GE_512,CHECK_NO_EXTEND_ROUND -; RUN: llc -aarch64-sve-vector-bits-min=2048 < %s | FileCheck %s -check-prefixes=CHECK,VBITS_GE_512,CHECK_NO_EXTEND_ROUND -; RUN: llc -aarch64-sve-vector-bits-min=256 --combiner-vector-fcopysign-extend-round < %s | FileCheck %s -check-prefixes=CHECK,VBITS_GE_256,CHECK_EXTEND_ROUND -; RUN: llc -aarch64-sve-vector-bits-min=512 --combiner-vector-fcopysign-extend-round < %s | FileCheck %s -check-prefixes=CHECK,VBITS_GE_512,CHECK_EXTEND_ROUND -; RUN: llc -aarch64-sve-vector-bits-min=2048 --combiner-vector-fcopysign-extend-round < %s | FileCheck %s -check-prefixes=CHECK,VBITS_GE_512,CHECK_EXTEND_ROUND +; RUN: llc -aarch64-sve-vector-bits-min=256 < %s | FileCheck %s -check-prefixes=CHECK,VBITS_GE_256 +; RUN: llc -aarch64-sve-vector-bits-min=512 < %s | FileCheck %s -check-prefixes=CHECK,VBITS_GE_512 +; RUN: llc -aarch64-sve-vector-bits-min=2048 < %s | FileCheck %s -check-prefixes=CHECK,VBITS_GE_512 target datalayout = "e-m:o-i64:64-i128:128-n32:64-S128" @@ -416,28 +413,16 @@ define void @test_copysign_v2f64_v2f32(ptr %ap, ptr %bp) vscale_range(2,0) #0 { ; SplitVecRes mismatched define void @test_copysign_v4f64_v4f32(ptr %ap, ptr %bp) vscale_range(2,0) #0 { -; CHECK_NO_EXTEND_ROUND-LABEL: test_copysign_v4f64_v4f32: -; CHECK_NO_EXTEND_ROUND: // %bb.0: -; CHECK_NO_EXTEND_ROUND-NEXT: ptrue p0.d, vl4 -; CHECK_NO_EXTEND_ROUND-NEXT: mov z1.d, #0x7fffffffffffffff -; CHECK_NO_EXTEND_ROUND-NEXT: ld1w { z0.d }, p0/z, [x1] -; CHECK_NO_EXTEND_ROUND-NEXT: ld1d { z2.d }, p0/z, [x0] -; CHECK_NO_EXTEND_ROUND-NEXT: fcvt z0.d, p0/m, z0.s -; CHECK_NO_EXTEND_ROUND-NEXT: bsl z2.d, z2.d, z0.d, z1.d -; CHECK_NO_EXTEND_ROUND-NEXT: st1d { z2.d }, p0, [x0] -; CHECK_NO_EXTEND_ROUND-NEXT: ret -; -; CHECK_EXTEND_ROUND-LABEL: test_copysign_v4f64_v4f32: -; CHECK_EXTEND_ROUND: // %bb.0: -; CHECK_EXTEND_ROUND-NEXT: ldr q0, [x1] -; CHECK_EXTEND_ROUND-NEXT: ptrue p0.d, vl4 -; CHECK_EXTEND_ROUND-NEXT: mov z1.d, #0x7fffffffffffffff -; CHECK_EXTEND_ROUND-NEXT: uunpklo z0.d, z0.s -; CHECK_EXTEND_ROUND-NEXT: ld1d { z2.d }, p0/z, [x0] -; CHECK_EXTEND_ROUND-NEXT: fcvt z0.d, p0/m, z0.s -; CHECK_EXTEND_ROUND-NEXT: bsl z2.d, z2.d, z0.d, z1.d -; CHECK_EXTEND_ROUND-NEXT: st1d { z2.d }, p0, [x0] -; CHECK_EXTEND_ROUND-NEXT: ret +; CHECK-LABEL: test_copysign_v4f64_v4f32: +; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p0.d, vl4 +; CHECK-NEXT: mov z1.d, #0x7fffffffffffffff +; CHECK-NEXT: ld1w { z0.d }, p0/z, [x1] +; CHECK-NEXT: ld1d { z2.d }, p0/z, [x0] +; CHECK-NEXT: fcvt z0.d, p0/m, z0.s +; CHECK-NEXT: bsl z2.d, z2.d, z0.d, z1.d +; CHECK-NEXT: st1d { z2.d }, p0, [x0] +; CHECK-NEXT: ret %a = load <4 x double>, ptr %ap %b = load <4 x float>, ptr %bp %tmp0 = fpext <4 x float> %b to <4 x double>