From ff83b3523a9371e50e0ab895d485e0ed83814d6d Mon Sep 17 00:00:00 2001 From: Thurston Dang Date: Fri, 18 Apr 2025 05:45:24 +0000 Subject: [PATCH 1/6] [msan][NFCI] Add AVX512 FP16 tests Forked from llvm/test/CodeGen/X86/avx512fp16-arith-intrinsics.ll Handled suboptimally (visitInstruction): - llvm.x86.avx512fp16.add.ph.512 - llvm.x86.avx512fp16.div.ph.512 - llvm.x86.avx512fp16.mask.vcvtpd2ph.512 - llvm.x86.avx512fp16.mask.vcvtph2pd.512 - llvm.x86.avx512fp16.mask.vcvtph2psx.512 - llvm.x86.avx512fp16.mask.vcvtps2phx.512 - llvm.x86.avx512fp16.mask.vcvtsd2sh.round - llvm.x86.avx512fp16.mask.vcvtsh2sd.round - llvm.x86.avx512fp16.mask.vcvtsh2ss.round - llvm.x86.avx512fp16.mask.vcvtss2sh.round - llvm.x86.avx512fp16.max.ph.512 - llvm.x86.avx512fp16.min.ph.512 - llvm.x86.avx512fp16.mul.ph.512 - llvm.x86.avx512fp16.sub.ph.512 --- .../X86/avx512fp16-arith-intrinsics.ll | 1822 +++++++++++++++++ 1 file changed, 1822 insertions(+) create mode 100644 llvm/test/Instrumentation/MemorySanitizer/X86/avx512fp16-arith-intrinsics.ll diff --git a/llvm/test/Instrumentation/MemorySanitizer/X86/avx512fp16-arith-intrinsics.ll b/llvm/test/Instrumentation/MemorySanitizer/X86/avx512fp16-arith-intrinsics.ll new file mode 100644 index 0000000000000..1d59f65119c4c --- /dev/null +++ b/llvm/test/Instrumentation/MemorySanitizer/X86/avx512fp16-arith-intrinsics.ll @@ -0,0 +1,1822 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5 +; RUN: opt %s -S -mattr=+avx512f -passes=msan 2>&1 | FileCheck %s +; +; Forked from llvm/test/CodeGen/X86/avx512fp16-arith-intrinsics.ll +; +; Handled suboptimally (visitInstruction): +; - llvm.x86.avx512fp16.add.ph.512 +; - llvm.x86.avx512fp16.div.ph.512 +; - llvm.x86.avx512fp16.mask.vcvtpd2ph.512 +; - llvm.x86.avx512fp16.mask.vcvtph2pd.512 +; - llvm.x86.avx512fp16.mask.vcvtph2psx.512 +; - llvm.x86.avx512fp16.mask.vcvtps2phx.512 +; - llvm.x86.avx512fp16.mask.vcvtsd2sh.round +; - llvm.x86.avx512fp16.mask.vcvtsh2sd.round +; - llvm.x86.avx512fp16.mask.vcvtsh2ss.round +; - llvm.x86.avx512fp16.mask.vcvtss2sh.round +; - llvm.x86.avx512fp16.max.ph.512 +; - llvm.x86.avx512fp16.min.ph.512 +; - llvm.x86.avx512fp16.mul.ph.512 +; - llvm.x86.avx512fp16.sub.ph.512 + +target datalayout = "e-m:o-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128" +target triple = "x86_64-unknown-linux-gnu" + +declare <32 x half> @llvm.x86.avx512fp16.add.ph.512(<32 x half>, <32 x half>, i32) + +define <32 x half> @test_int_x86_avx512fp16_add_ph_512(<32 x half> %x1, <32 x half> %x2) #0 { +; CHECK-LABEL: define <32 x half> @test_int_x86_avx512fp16_add_ph_512( +; CHECK-SAME: <32 x half> [[X1:%.*]], <32 x half> [[X2:%.*]]) #[[ATTR1:[0-9]+]] { +; CHECK-NEXT: [[TMP1:%.*]] = load <32 x i16>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <32 x i16>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP3:%.*]] = bitcast <32 x i16> [[TMP1]] to i512 +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i512 [[TMP3]], 0 +; CHECK-NEXT: [[TMP4:%.*]] = bitcast <32 x i16> [[TMP2]] to i512 +; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i512 [[TMP4]], 0 +; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]] +; CHECK-NEXT: br i1 [[_MSOR]], label %[[BB5:.*]], label %[[BB6:.*]], !prof [[PROF1:![0-9]+]] +; CHECK: [[BB5]]: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR4:[0-9]+]] +; CHECK-NEXT: unreachable +; CHECK: [[BB6]]: +; CHECK-NEXT: [[RES:%.*]] = call <32 x half> @llvm.x86.avx512fp16.add.ph.512(<32 x half> [[X1]], <32 x half> [[X2]], i32 4) +; CHECK-NEXT: store <32 x i16> zeroinitializer, ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <32 x half> [[RES]] +; + %res = call <32 x half> @llvm.x86.avx512fp16.add.ph.512(<32 x half> %x1, <32 x half> %x2, i32 4) + ret <32 x half> %res +} + +define <32 x half> @test_int_x86_avx512fp16_mask_add_ph_512(<32 x half> %src, <32 x half> %x1, <32 x half> %x2, i32 %msk, ptr %ptr) #0 { +; CHECK-LABEL: define <32 x half> @test_int_x86_avx512fp16_mask_add_ph_512( +; CHECK-SAME: <32 x half> [[SRC:%.*]], <32 x half> [[X1:%.*]], <32 x half> [[X2:%.*]], i32 [[MSK:%.*]], ptr [[PTR:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 192) to ptr), align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <32 x i16>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8 +; CHECK-NEXT: [[TMP3:%.*]] = load <32 x i16>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 128) to ptr), align 8 +; CHECK-NEXT: [[TMP4:%.*]] = load <32 x i16>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP5:%.*]] = bitcast i32 [[TMP1]] to <32 x i1> +; CHECK-NEXT: [[MASK:%.*]] = bitcast i32 [[MSK]] to <32 x i1> +; CHECK-NEXT: [[TMP6:%.*]] = bitcast <32 x i16> [[TMP2]] to i512 +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i512 [[TMP6]], 0 +; CHECK-NEXT: [[TMP7:%.*]] = bitcast <32 x i16> [[TMP3]] to i512 +; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i512 [[TMP7]], 0 +; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]] +; CHECK-NEXT: br i1 [[_MSOR]], label %[[BB8:.*]], label %[[BB9:.*]], !prof [[PROF1]] +; CHECK: [[BB8]]: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR4]] +; CHECK-NEXT: unreachable +; CHECK: [[BB9]]: +; CHECK-NEXT: [[RES0:%.*]] = call <32 x half> @llvm.x86.avx512fp16.add.ph.512(<32 x half> [[X1]], <32 x half> [[X2]], i32 4) +; CHECK-NEXT: [[TMP10:%.*]] = select <32 x i1> [[MASK]], <32 x i16> zeroinitializer, <32 x i16> [[TMP4]] +; CHECK-NEXT: [[TMP11:%.*]] = bitcast <32 x half> [[RES0]] to <32 x i16> +; CHECK-NEXT: [[TMP12:%.*]] = bitcast <32 x half> [[SRC]] to <32 x i16> +; CHECK-NEXT: [[TMP13:%.*]] = xor <32 x i16> [[TMP11]], [[TMP12]] +; CHECK-NEXT: [[TMP14:%.*]] = or <32 x i16> [[TMP13]], zeroinitializer +; CHECK-NEXT: [[TMP15:%.*]] = or <32 x i16> [[TMP14]], [[TMP4]] +; CHECK-NEXT: [[_MSPROP_SELECT:%.*]] = select <32 x i1> [[TMP5]], <32 x i16> [[TMP15]], <32 x i16> [[TMP10]] +; CHECK-NEXT: [[RES:%.*]] = select <32 x i1> [[MASK]], <32 x half> [[RES0]], <32 x half> [[SRC]] +; CHECK-NEXT: store <32 x i16> [[_MSPROP_SELECT]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <32 x half> [[RES]] +; + %mask = bitcast i32 %msk to <32 x i1> + %res0 = call <32 x half> @llvm.x86.avx512fp16.add.ph.512(<32 x half> %x1, <32 x half> %x2, i32 4) + %res = select <32 x i1> %mask, <32 x half> %res0, <32 x half> %src + ret <32 x half> %res +} + +define <32 x half> @test_int_x86_avx512fp16_maskz_add_ph_512(<32 x half> %src, <32 x half> %x1, <32 x half> %x2, i32 %msk, ptr %ptr) #0 { +; CHECK-LABEL: define <32 x half> @test_int_x86_avx512fp16_maskz_add_ph_512( +; CHECK-SAME: <32 x half> [[SRC:%.*]], <32 x half> [[X1:%.*]], <32 x half> [[X2:%.*]], i32 [[MSK:%.*]], ptr [[PTR:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 192) to ptr), align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 200) to ptr), align 8 +; CHECK-NEXT: [[TMP3:%.*]] = load <32 x i16>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8 +; CHECK-NEXT: [[TMP4:%.*]] = load <32 x i16>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 128) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP5:%.*]] = bitcast i32 [[TMP1]] to <32 x i1> +; CHECK-NEXT: [[MASK:%.*]] = bitcast i32 [[MSK]] to <32 x i1> +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP2]], 0 +; CHECK-NEXT: br i1 [[_MSCMP]], label %[[BB6:.*]], label %[[BB7:.*]], !prof [[PROF1]] +; CHECK: [[BB6]]: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR4]] +; CHECK-NEXT: unreachable +; CHECK: [[BB7]]: +; CHECK-NEXT: [[VAL:%.*]] = load <32 x half>, ptr [[PTR]], align 64 +; CHECK-NEXT: [[TMP8:%.*]] = ptrtoint ptr [[PTR]] to i64 +; CHECK-NEXT: [[TMP9:%.*]] = xor i64 [[TMP8]], 87960930222080 +; CHECK-NEXT: [[TMP10:%.*]] = inttoptr i64 [[TMP9]] to ptr +; CHECK-NEXT: [[_MSLD:%.*]] = load <32 x i16>, ptr [[TMP10]], align 64 +; CHECK-NEXT: [[TMP11:%.*]] = bitcast <32 x i16> [[TMP3]] to i512 +; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i512 [[TMP11]], 0 +; CHECK-NEXT: [[TMP12:%.*]] = bitcast <32 x i16> [[TMP4]] to i512 +; CHECK-NEXT: [[_MSCMP3:%.*]] = icmp ne i512 [[TMP12]], 0 +; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP2]], [[_MSCMP3]] +; CHECK-NEXT: br i1 [[_MSOR]], label %[[BB13:.*]], label %[[BB14:.*]], !prof [[PROF1]] +; CHECK: [[BB13]]: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR4]] +; CHECK-NEXT: unreachable +; CHECK: [[BB14]]: +; CHECK-NEXT: [[RES0:%.*]] = call <32 x half> @llvm.x86.avx512fp16.add.ph.512(<32 x half> [[X1]], <32 x half> [[X2]], i32 4) +; CHECK-NEXT: [[TMP15:%.*]] = select <32 x i1> [[MASK]], <32 x i16> zeroinitializer, <32 x i16> zeroinitializer +; CHECK-NEXT: [[TMP16:%.*]] = bitcast <32 x half> [[RES0]] to <32 x i16> +; CHECK-NEXT: [[TMP17:%.*]] = xor <32 x i16> [[TMP16]], zeroinitializer +; CHECK-NEXT: [[TMP18:%.*]] = or <32 x i16> [[TMP17]], zeroinitializer +; CHECK-NEXT: [[TMP19:%.*]] = or <32 x i16> [[TMP18]], zeroinitializer +; CHECK-NEXT: [[_MSPROP_SELECT:%.*]] = select <32 x i1> [[TMP5]], <32 x i16> [[TMP19]], <32 x i16> [[TMP15]] +; CHECK-NEXT: [[RES1:%.*]] = select <32 x i1> [[MASK]], <32 x half> [[RES0]], <32 x half> zeroinitializer +; CHECK-NEXT: [[TMP20:%.*]] = bitcast <32 x i16> [[TMP3]] to i512 +; CHECK-NEXT: [[_MSCMP4:%.*]] = icmp ne i512 [[TMP20]], 0 +; CHECK-NEXT: [[TMP21:%.*]] = bitcast <32 x i16> [[_MSLD]] to i512 +; CHECK-NEXT: [[_MSCMP5:%.*]] = icmp ne i512 [[TMP21]], 0 +; CHECK-NEXT: [[_MSOR6:%.*]] = or i1 [[_MSCMP4]], [[_MSCMP5]] +; CHECK-NEXT: br i1 [[_MSOR6]], label %[[BB22:.*]], label %[[BB23:.*]], !prof [[PROF1]] +; CHECK: [[BB22]]: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR4]] +; CHECK-NEXT: unreachable +; CHECK: [[BB23]]: +; CHECK-NEXT: [[T2:%.*]] = call <32 x half> @llvm.x86.avx512fp16.add.ph.512(<32 x half> [[X1]], <32 x half> [[VAL]], i32 4) +; CHECK-NEXT: [[TMP24:%.*]] = select <32 x i1> [[MASK]], <32 x i16> zeroinitializer, <32 x i16> zeroinitializer +; CHECK-NEXT: [[TMP25:%.*]] = bitcast <32 x half> [[T2]] to <32 x i16> +; CHECK-NEXT: [[TMP26:%.*]] = xor <32 x i16> [[TMP25]], zeroinitializer +; CHECK-NEXT: [[TMP27:%.*]] = or <32 x i16> [[TMP26]], zeroinitializer +; CHECK-NEXT: [[TMP28:%.*]] = or <32 x i16> [[TMP27]], zeroinitializer +; CHECK-NEXT: [[_MSPROP_SELECT1:%.*]] = select <32 x i1> [[TMP5]], <32 x i16> [[TMP28]], <32 x i16> [[TMP24]] +; CHECK-NEXT: [[RES2:%.*]] = select <32 x i1> [[MASK]], <32 x half> [[T2]], <32 x half> zeroinitializer +; CHECK-NEXT: [[_MSPROP:%.*]] = or <32 x i16> [[_MSPROP_SELECT]], [[_MSPROP_SELECT1]] +; CHECK-NEXT: [[RES3:%.*]] = fadd <32 x half> [[RES1]], [[RES2]] +; CHECK-NEXT: store <32 x i16> [[_MSPROP]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <32 x half> [[RES3]] +; + %mask = bitcast i32 %msk to <32 x i1> + %val = load <32 x half>, ptr %ptr + %res0 = call <32 x half> @llvm.x86.avx512fp16.add.ph.512(<32 x half> %x1, <32 x half> %x2, i32 4) + %res1 = select <32 x i1> %mask, <32 x half> %res0, <32 x half> zeroinitializer + %t2 = call <32 x half> @llvm.x86.avx512fp16.add.ph.512(<32 x half> %x1, <32 x half> %val, i32 4) + %res2 = select <32 x i1> %mask, <32 x half> %t2, <32 x half> zeroinitializer + %res3 = fadd <32 x half> %res1, %res2 + ret <32 x half> %res3 +} + +define <32 x half> @test_int_x86_avx512fp16_add_ph_512_round(<32 x half> %x1, <32 x half> %x2, <32 x half> %src, i32 %msk, ptr %ptr) #0 { +; CHECK-LABEL: define <32 x half> @test_int_x86_avx512fp16_add_ph_512_round( +; CHECK-SAME: <32 x half> [[X1:%.*]], <32 x half> [[X2:%.*]], <32 x half> [[SRC:%.*]], i32 [[MSK:%.*]], ptr [[PTR:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 192) to ptr), align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <32 x i16>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP3:%.*]] = load <32 x i16>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8 +; CHECK-NEXT: [[TMP4:%.*]] = load <32 x i16>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 128) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP5:%.*]] = bitcast i32 [[TMP1]] to <32 x i1> +; CHECK-NEXT: [[MASK:%.*]] = bitcast i32 [[MSK]] to <32 x i1> +; CHECK-NEXT: [[TMP6:%.*]] = bitcast <32 x i16> [[TMP2]] to i512 +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i512 [[TMP6]], 0 +; CHECK-NEXT: [[TMP7:%.*]] = bitcast <32 x i16> [[TMP3]] to i512 +; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i512 [[TMP7]], 0 +; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]] +; CHECK-NEXT: br i1 [[_MSOR]], label %[[BB8:.*]], label %[[BB9:.*]], !prof [[PROF1]] +; CHECK: [[BB8]]: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR4]] +; CHECK-NEXT: unreachable +; CHECK: [[BB9]]: +; CHECK-NEXT: [[T1:%.*]] = call <32 x half> @llvm.x86.avx512fp16.add.ph.512(<32 x half> [[X1]], <32 x half> [[X2]], i32 10) +; CHECK-NEXT: [[TMP10:%.*]] = select <32 x i1> [[MASK]], <32 x i16> zeroinitializer, <32 x i16> [[TMP4]] +; CHECK-NEXT: [[TMP11:%.*]] = bitcast <32 x half> [[T1]] to <32 x i16> +; CHECK-NEXT: [[TMP12:%.*]] = bitcast <32 x half> [[SRC]] to <32 x i16> +; CHECK-NEXT: [[TMP13:%.*]] = xor <32 x i16> [[TMP11]], [[TMP12]] +; CHECK-NEXT: [[TMP14:%.*]] = or <32 x i16> [[TMP13]], zeroinitializer +; CHECK-NEXT: [[TMP15:%.*]] = or <32 x i16> [[TMP14]], [[TMP4]] +; CHECK-NEXT: [[_MSPROP_SELECT:%.*]] = select <32 x i1> [[TMP5]], <32 x i16> [[TMP15]], <32 x i16> [[TMP10]] +; CHECK-NEXT: [[RES:%.*]] = select <32 x i1> [[MASK]], <32 x half> [[T1]], <32 x half> [[SRC]] +; CHECK-NEXT: store <32 x i16> [[_MSPROP_SELECT]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <32 x half> [[RES]] +; + %mask = bitcast i32 %msk to <32 x i1> + %t1 = call <32 x half> @llvm.x86.avx512fp16.add.ph.512(<32 x half> %x1, <32 x half> %x2, i32 10) + %res = select <32 x i1> %mask, <32 x half> %t1, <32 x half> %src + ret <32 x half> %res +} + +declare <32 x half> @llvm.x86.avx512fp16.sub.ph.512(<32 x half>, <32 x half>, i32) + +define <32 x half> @test_int_x86_avx512fp16_sub_ph_512(<32 x half> %x1, <32 x half> %x2) #0 { +; CHECK-LABEL: define <32 x half> @test_int_x86_avx512fp16_sub_ph_512( +; CHECK-SAME: <32 x half> [[X1:%.*]], <32 x half> [[X2:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: [[TMP1:%.*]] = load <32 x i16>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <32 x i16>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP3:%.*]] = bitcast <32 x i16> [[TMP1]] to i512 +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i512 [[TMP3]], 0 +; CHECK-NEXT: [[TMP4:%.*]] = bitcast <32 x i16> [[TMP2]] to i512 +; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i512 [[TMP4]], 0 +; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]] +; CHECK-NEXT: br i1 [[_MSOR]], label %[[BB5:.*]], label %[[BB6:.*]], !prof [[PROF1]] +; CHECK: [[BB5]]: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR4]] +; CHECK-NEXT: unreachable +; CHECK: [[BB6]]: +; CHECK-NEXT: [[RES:%.*]] = call <32 x half> @llvm.x86.avx512fp16.sub.ph.512(<32 x half> [[X1]], <32 x half> [[X2]], i32 4) +; CHECK-NEXT: store <32 x i16> zeroinitializer, ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <32 x half> [[RES]] +; + %res = call <32 x half> @llvm.x86.avx512fp16.sub.ph.512(<32 x half> %x1, <32 x half> %x2, i32 4) + ret <32 x half> %res +} + +define <32 x half> @test_int_x86_avx512fp16_mask_sub_ph_512(<32 x half> %src, <32 x half> %x1, <32 x half> %x2, i32 %msk, ptr %ptr) #0 { +; CHECK-LABEL: define <32 x half> @test_int_x86_avx512fp16_mask_sub_ph_512( +; CHECK-SAME: <32 x half> [[SRC:%.*]], <32 x half> [[X1:%.*]], <32 x half> [[X2:%.*]], i32 [[MSK:%.*]], ptr [[PTR:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 192) to ptr), align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <32 x i16>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8 +; CHECK-NEXT: [[TMP3:%.*]] = load <32 x i16>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 128) to ptr), align 8 +; CHECK-NEXT: [[TMP4:%.*]] = load <32 x i16>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP5:%.*]] = bitcast i32 [[TMP1]] to <32 x i1> +; CHECK-NEXT: [[MASK:%.*]] = bitcast i32 [[MSK]] to <32 x i1> +; CHECK-NEXT: [[TMP6:%.*]] = bitcast <32 x i16> [[TMP2]] to i512 +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i512 [[TMP6]], 0 +; CHECK-NEXT: [[TMP7:%.*]] = bitcast <32 x i16> [[TMP3]] to i512 +; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i512 [[TMP7]], 0 +; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]] +; CHECK-NEXT: br i1 [[_MSOR]], label %[[BB8:.*]], label %[[BB9:.*]], !prof [[PROF1]] +; CHECK: [[BB8]]: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR4]] +; CHECK-NEXT: unreachable +; CHECK: [[BB9]]: +; CHECK-NEXT: [[RES0:%.*]] = call <32 x half> @llvm.x86.avx512fp16.sub.ph.512(<32 x half> [[X1]], <32 x half> [[X2]], i32 4) +; CHECK-NEXT: [[TMP10:%.*]] = select <32 x i1> [[MASK]], <32 x i16> zeroinitializer, <32 x i16> [[TMP4]] +; CHECK-NEXT: [[TMP11:%.*]] = bitcast <32 x half> [[RES0]] to <32 x i16> +; CHECK-NEXT: [[TMP12:%.*]] = bitcast <32 x half> [[SRC]] to <32 x i16> +; CHECK-NEXT: [[TMP13:%.*]] = xor <32 x i16> [[TMP11]], [[TMP12]] +; CHECK-NEXT: [[TMP14:%.*]] = or <32 x i16> [[TMP13]], zeroinitializer +; CHECK-NEXT: [[TMP15:%.*]] = or <32 x i16> [[TMP14]], [[TMP4]] +; CHECK-NEXT: [[_MSPROP_SELECT:%.*]] = select <32 x i1> [[TMP5]], <32 x i16> [[TMP15]], <32 x i16> [[TMP10]] +; CHECK-NEXT: [[RES:%.*]] = select <32 x i1> [[MASK]], <32 x half> [[RES0]], <32 x half> [[SRC]] +; CHECK-NEXT: store <32 x i16> [[_MSPROP_SELECT]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <32 x half> [[RES]] +; + %mask = bitcast i32 %msk to <32 x i1> + %res0 = call <32 x half> @llvm.x86.avx512fp16.sub.ph.512(<32 x half> %x1, <32 x half> %x2, i32 4) + %res = select <32 x i1> %mask, <32 x half> %res0, <32 x half> %src + ret <32 x half> %res +} + +define <32 x half> @test_int_x86_avx512fp16_maskz_sub_ph_512(<32 x half> %src, <32 x half> %x1, <32 x half> %x2, i32 %msk, ptr %ptr) #0 { +; CHECK-LABEL: define <32 x half> @test_int_x86_avx512fp16_maskz_sub_ph_512( +; CHECK-SAME: <32 x half> [[SRC:%.*]], <32 x half> [[X1:%.*]], <32 x half> [[X2:%.*]], i32 [[MSK:%.*]], ptr [[PTR:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 192) to ptr), align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 200) to ptr), align 8 +; CHECK-NEXT: [[TMP3:%.*]] = load <32 x i16>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8 +; CHECK-NEXT: [[TMP4:%.*]] = load <32 x i16>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 128) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP5:%.*]] = bitcast i32 [[TMP1]] to <32 x i1> +; CHECK-NEXT: [[MASK:%.*]] = bitcast i32 [[MSK]] to <32 x i1> +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP2]], 0 +; CHECK-NEXT: br i1 [[_MSCMP]], label %[[BB6:.*]], label %[[BB7:.*]], !prof [[PROF1]] +; CHECK: [[BB6]]: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR4]] +; CHECK-NEXT: unreachable +; CHECK: [[BB7]]: +; CHECK-NEXT: [[VAL:%.*]] = load <32 x half>, ptr [[PTR]], align 64 +; CHECK-NEXT: [[TMP8:%.*]] = ptrtoint ptr [[PTR]] to i64 +; CHECK-NEXT: [[TMP9:%.*]] = xor i64 [[TMP8]], 87960930222080 +; CHECK-NEXT: [[TMP10:%.*]] = inttoptr i64 [[TMP9]] to ptr +; CHECK-NEXT: [[_MSLD:%.*]] = load <32 x i16>, ptr [[TMP10]], align 64 +; CHECK-NEXT: [[TMP11:%.*]] = bitcast <32 x i16> [[TMP3]] to i512 +; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i512 [[TMP11]], 0 +; CHECK-NEXT: [[TMP12:%.*]] = bitcast <32 x i16> [[TMP4]] to i512 +; CHECK-NEXT: [[_MSCMP3:%.*]] = icmp ne i512 [[TMP12]], 0 +; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP2]], [[_MSCMP3]] +; CHECK-NEXT: br i1 [[_MSOR]], label %[[BB13:.*]], label %[[BB14:.*]], !prof [[PROF1]] +; CHECK: [[BB13]]: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR4]] +; CHECK-NEXT: unreachable +; CHECK: [[BB14]]: +; CHECK-NEXT: [[RES0:%.*]] = call <32 x half> @llvm.x86.avx512fp16.sub.ph.512(<32 x half> [[X1]], <32 x half> [[X2]], i32 4) +; CHECK-NEXT: [[TMP15:%.*]] = select <32 x i1> [[MASK]], <32 x i16> zeroinitializer, <32 x i16> zeroinitializer +; CHECK-NEXT: [[TMP16:%.*]] = bitcast <32 x half> [[RES0]] to <32 x i16> +; CHECK-NEXT: [[TMP17:%.*]] = xor <32 x i16> [[TMP16]], zeroinitializer +; CHECK-NEXT: [[TMP18:%.*]] = or <32 x i16> [[TMP17]], zeroinitializer +; CHECK-NEXT: [[TMP19:%.*]] = or <32 x i16> [[TMP18]], zeroinitializer +; CHECK-NEXT: [[_MSPROP_SELECT:%.*]] = select <32 x i1> [[TMP5]], <32 x i16> [[TMP19]], <32 x i16> [[TMP15]] +; CHECK-NEXT: [[RES1:%.*]] = select <32 x i1> [[MASK]], <32 x half> [[RES0]], <32 x half> zeroinitializer +; CHECK-NEXT: [[TMP20:%.*]] = bitcast <32 x i16> [[TMP3]] to i512 +; CHECK-NEXT: [[_MSCMP4:%.*]] = icmp ne i512 [[TMP20]], 0 +; CHECK-NEXT: [[TMP21:%.*]] = bitcast <32 x i16> [[_MSLD]] to i512 +; CHECK-NEXT: [[_MSCMP5:%.*]] = icmp ne i512 [[TMP21]], 0 +; CHECK-NEXT: [[_MSOR6:%.*]] = or i1 [[_MSCMP4]], [[_MSCMP5]] +; CHECK-NEXT: br i1 [[_MSOR6]], label %[[BB22:.*]], label %[[BB23:.*]], !prof [[PROF1]] +; CHECK: [[BB22]]: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR4]] +; CHECK-NEXT: unreachable +; CHECK: [[BB23]]: +; CHECK-NEXT: [[T2:%.*]] = call <32 x half> @llvm.x86.avx512fp16.sub.ph.512(<32 x half> [[X1]], <32 x half> [[VAL]], i32 4) +; CHECK-NEXT: [[TMP24:%.*]] = select <32 x i1> [[MASK]], <32 x i16> zeroinitializer, <32 x i16> zeroinitializer +; CHECK-NEXT: [[TMP25:%.*]] = bitcast <32 x half> [[T2]] to <32 x i16> +; CHECK-NEXT: [[TMP26:%.*]] = xor <32 x i16> [[TMP25]], zeroinitializer +; CHECK-NEXT: [[TMP27:%.*]] = or <32 x i16> [[TMP26]], zeroinitializer +; CHECK-NEXT: [[TMP28:%.*]] = or <32 x i16> [[TMP27]], zeroinitializer +; CHECK-NEXT: [[_MSPROP_SELECT1:%.*]] = select <32 x i1> [[TMP5]], <32 x i16> [[TMP28]], <32 x i16> [[TMP24]] +; CHECK-NEXT: [[RES2:%.*]] = select <32 x i1> [[MASK]], <32 x half> [[T2]], <32 x half> zeroinitializer +; CHECK-NEXT: [[_MSPROP:%.*]] = or <32 x i16> [[_MSPROP_SELECT]], [[_MSPROP_SELECT1]] +; CHECK-NEXT: [[RES3:%.*]] = fsub <32 x half> [[RES1]], [[RES2]] +; CHECK-NEXT: store <32 x i16> [[_MSPROP]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <32 x half> [[RES3]] +; + %mask = bitcast i32 %msk to <32 x i1> + %val = load <32 x half>, ptr %ptr + %res0 = call <32 x half> @llvm.x86.avx512fp16.sub.ph.512(<32 x half> %x1, <32 x half> %x2, i32 4) + %res1 = select <32 x i1> %mask, <32 x half> %res0, <32 x half> zeroinitializer + %t2 = call <32 x half> @llvm.x86.avx512fp16.sub.ph.512(<32 x half> %x1, <32 x half> %val, i32 4) + %res2 = select <32 x i1> %mask, <32 x half> %t2, <32 x half> zeroinitializer + %res3 = fsub <32 x half> %res1, %res2 + ret <32 x half> %res3 +} + +define <32 x half> @test_int_x86_avx512fp16_sub_ph_512_round(<32 x half> %x1, <32 x half> %x2, <32 x half> %src, i32 %msk, ptr %ptr) #0 { +; CHECK-LABEL: define <32 x half> @test_int_x86_avx512fp16_sub_ph_512_round( +; CHECK-SAME: <32 x half> [[X1:%.*]], <32 x half> [[X2:%.*]], <32 x half> [[SRC:%.*]], i32 [[MSK:%.*]], ptr [[PTR:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 192) to ptr), align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <32 x i16>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP3:%.*]] = load <32 x i16>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8 +; CHECK-NEXT: [[TMP4:%.*]] = load <32 x i16>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 128) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP5:%.*]] = bitcast i32 [[TMP1]] to <32 x i1> +; CHECK-NEXT: [[MASK:%.*]] = bitcast i32 [[MSK]] to <32 x i1> +; CHECK-NEXT: [[TMP6:%.*]] = bitcast <32 x i16> [[TMP2]] to i512 +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i512 [[TMP6]], 0 +; CHECK-NEXT: [[TMP7:%.*]] = bitcast <32 x i16> [[TMP3]] to i512 +; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i512 [[TMP7]], 0 +; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]] +; CHECK-NEXT: br i1 [[_MSOR]], label %[[BB8:.*]], label %[[BB9:.*]], !prof [[PROF1]] +; CHECK: [[BB8]]: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR4]] +; CHECK-NEXT: unreachable +; CHECK: [[BB9]]: +; CHECK-NEXT: [[T1:%.*]] = call <32 x half> @llvm.x86.avx512fp16.sub.ph.512(<32 x half> [[X1]], <32 x half> [[X2]], i32 10) +; CHECK-NEXT: [[TMP10:%.*]] = select <32 x i1> [[MASK]], <32 x i16> zeroinitializer, <32 x i16> [[TMP4]] +; CHECK-NEXT: [[TMP11:%.*]] = bitcast <32 x half> [[T1]] to <32 x i16> +; CHECK-NEXT: [[TMP12:%.*]] = bitcast <32 x half> [[SRC]] to <32 x i16> +; CHECK-NEXT: [[TMP13:%.*]] = xor <32 x i16> [[TMP11]], [[TMP12]] +; CHECK-NEXT: [[TMP14:%.*]] = or <32 x i16> [[TMP13]], zeroinitializer +; CHECK-NEXT: [[TMP15:%.*]] = or <32 x i16> [[TMP14]], [[TMP4]] +; CHECK-NEXT: [[_MSPROP_SELECT:%.*]] = select <32 x i1> [[TMP5]], <32 x i16> [[TMP15]], <32 x i16> [[TMP10]] +; CHECK-NEXT: [[RES:%.*]] = select <32 x i1> [[MASK]], <32 x half> [[T1]], <32 x half> [[SRC]] +; CHECK-NEXT: store <32 x i16> [[_MSPROP_SELECT]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <32 x half> [[RES]] +; + %mask = bitcast i32 %msk to <32 x i1> + %t1 = call <32 x half> @llvm.x86.avx512fp16.sub.ph.512(<32 x half> %x1, <32 x half> %x2, i32 10) + %res = select <32 x i1> %mask, <32 x half> %t1, <32 x half> %src + ret <32 x half> %res +} + +declare <32 x half> @llvm.x86.avx512fp16.mul.ph.512(<32 x half>, <32 x half>, i32) + +define <32 x half> @test_int_x86_avx512fp16_mul_ph_512(<32 x half> %x1, <32 x half> %x2) #0 { +; CHECK-LABEL: define <32 x half> @test_int_x86_avx512fp16_mul_ph_512( +; CHECK-SAME: <32 x half> [[X1:%.*]], <32 x half> [[X2:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: [[TMP1:%.*]] = load <32 x i16>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <32 x i16>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP3:%.*]] = bitcast <32 x i16> [[TMP1]] to i512 +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i512 [[TMP3]], 0 +; CHECK-NEXT: [[TMP4:%.*]] = bitcast <32 x i16> [[TMP2]] to i512 +; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i512 [[TMP4]], 0 +; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]] +; CHECK-NEXT: br i1 [[_MSOR]], label %[[BB5:.*]], label %[[BB6:.*]], !prof [[PROF1]] +; CHECK: [[BB5]]: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR4]] +; CHECK-NEXT: unreachable +; CHECK: [[BB6]]: +; CHECK-NEXT: [[RES:%.*]] = call <32 x half> @llvm.x86.avx512fp16.mul.ph.512(<32 x half> [[X1]], <32 x half> [[X2]], i32 4) +; CHECK-NEXT: store <32 x i16> zeroinitializer, ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <32 x half> [[RES]] +; + %res = call <32 x half> @llvm.x86.avx512fp16.mul.ph.512(<32 x half> %x1, <32 x half> %x2, i32 4) + ret <32 x half> %res +} + +define <32 x half> @test_int_x86_avx512fp16_mask_mul_ph_512(<32 x half> %src, <32 x half> %x1, <32 x half> %x2, i32 %msk, ptr %ptr) #0 { +; CHECK-LABEL: define <32 x half> @test_int_x86_avx512fp16_mask_mul_ph_512( +; CHECK-SAME: <32 x half> [[SRC:%.*]], <32 x half> [[X1:%.*]], <32 x half> [[X2:%.*]], i32 [[MSK:%.*]], ptr [[PTR:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 192) to ptr), align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <32 x i16>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8 +; CHECK-NEXT: [[TMP3:%.*]] = load <32 x i16>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 128) to ptr), align 8 +; CHECK-NEXT: [[TMP4:%.*]] = load <32 x i16>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP5:%.*]] = bitcast i32 [[TMP1]] to <32 x i1> +; CHECK-NEXT: [[MASK:%.*]] = bitcast i32 [[MSK]] to <32 x i1> +; CHECK-NEXT: [[TMP6:%.*]] = bitcast <32 x i16> [[TMP2]] to i512 +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i512 [[TMP6]], 0 +; CHECK-NEXT: [[TMP7:%.*]] = bitcast <32 x i16> [[TMP3]] to i512 +; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i512 [[TMP7]], 0 +; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]] +; CHECK-NEXT: br i1 [[_MSOR]], label %[[BB8:.*]], label %[[BB9:.*]], !prof [[PROF1]] +; CHECK: [[BB8]]: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR4]] +; CHECK-NEXT: unreachable +; CHECK: [[BB9]]: +; CHECK-NEXT: [[RES0:%.*]] = call <32 x half> @llvm.x86.avx512fp16.mul.ph.512(<32 x half> [[X1]], <32 x half> [[X2]], i32 4) +; CHECK-NEXT: [[TMP10:%.*]] = select <32 x i1> [[MASK]], <32 x i16> zeroinitializer, <32 x i16> [[TMP4]] +; CHECK-NEXT: [[TMP11:%.*]] = bitcast <32 x half> [[RES0]] to <32 x i16> +; CHECK-NEXT: [[TMP12:%.*]] = bitcast <32 x half> [[SRC]] to <32 x i16> +; CHECK-NEXT: [[TMP13:%.*]] = xor <32 x i16> [[TMP11]], [[TMP12]] +; CHECK-NEXT: [[TMP14:%.*]] = or <32 x i16> [[TMP13]], zeroinitializer +; CHECK-NEXT: [[TMP15:%.*]] = or <32 x i16> [[TMP14]], [[TMP4]] +; CHECK-NEXT: [[_MSPROP_SELECT:%.*]] = select <32 x i1> [[TMP5]], <32 x i16> [[TMP15]], <32 x i16> [[TMP10]] +; CHECK-NEXT: [[RES:%.*]] = select <32 x i1> [[MASK]], <32 x half> [[RES0]], <32 x half> [[SRC]] +; CHECK-NEXT: store <32 x i16> [[_MSPROP_SELECT]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <32 x half> [[RES]] +; + %mask = bitcast i32 %msk to <32 x i1> + %res0 = call <32 x half> @llvm.x86.avx512fp16.mul.ph.512(<32 x half> %x1, <32 x half> %x2, i32 4) + %res = select <32 x i1> %mask, <32 x half> %res0, <32 x half> %src + ret <32 x half> %res +} + +define <32 x half> @test_int_x86_avx512fp16_maskz_mul_ph_512(<32 x half> %src, <32 x half> %x1, <32 x half> %x2, i32 %msk, ptr %ptr) #0 { +; CHECK-LABEL: define <32 x half> @test_int_x86_avx512fp16_maskz_mul_ph_512( +; CHECK-SAME: <32 x half> [[SRC:%.*]], <32 x half> [[X1:%.*]], <32 x half> [[X2:%.*]], i32 [[MSK:%.*]], ptr [[PTR:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 192) to ptr), align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 200) to ptr), align 8 +; CHECK-NEXT: [[TMP3:%.*]] = load <32 x i16>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8 +; CHECK-NEXT: [[TMP4:%.*]] = load <32 x i16>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 128) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP5:%.*]] = bitcast i32 [[TMP1]] to <32 x i1> +; CHECK-NEXT: [[MASK:%.*]] = bitcast i32 [[MSK]] to <32 x i1> +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP2]], 0 +; CHECK-NEXT: br i1 [[_MSCMP]], label %[[BB6:.*]], label %[[BB7:.*]], !prof [[PROF1]] +; CHECK: [[BB6]]: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR4]] +; CHECK-NEXT: unreachable +; CHECK: [[BB7]]: +; CHECK-NEXT: [[VAL:%.*]] = load <32 x half>, ptr [[PTR]], align 64 +; CHECK-NEXT: [[TMP8:%.*]] = ptrtoint ptr [[PTR]] to i64 +; CHECK-NEXT: [[TMP9:%.*]] = xor i64 [[TMP8]], 87960930222080 +; CHECK-NEXT: [[TMP10:%.*]] = inttoptr i64 [[TMP9]] to ptr +; CHECK-NEXT: [[_MSLD:%.*]] = load <32 x i16>, ptr [[TMP10]], align 64 +; CHECK-NEXT: [[TMP11:%.*]] = bitcast <32 x i16> [[TMP3]] to i512 +; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i512 [[TMP11]], 0 +; CHECK-NEXT: [[TMP12:%.*]] = bitcast <32 x i16> [[TMP4]] to i512 +; CHECK-NEXT: [[_MSCMP3:%.*]] = icmp ne i512 [[TMP12]], 0 +; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP2]], [[_MSCMP3]] +; CHECK-NEXT: br i1 [[_MSOR]], label %[[BB13:.*]], label %[[BB14:.*]], !prof [[PROF1]] +; CHECK: [[BB13]]: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR4]] +; CHECK-NEXT: unreachable +; CHECK: [[BB14]]: +; CHECK-NEXT: [[RES0:%.*]] = call <32 x half> @llvm.x86.avx512fp16.mul.ph.512(<32 x half> [[X1]], <32 x half> [[X2]], i32 4) +; CHECK-NEXT: [[TMP15:%.*]] = select <32 x i1> [[MASK]], <32 x i16> zeroinitializer, <32 x i16> zeroinitializer +; CHECK-NEXT: [[TMP16:%.*]] = bitcast <32 x half> [[RES0]] to <32 x i16> +; CHECK-NEXT: [[TMP17:%.*]] = xor <32 x i16> [[TMP16]], zeroinitializer +; CHECK-NEXT: [[TMP18:%.*]] = or <32 x i16> [[TMP17]], zeroinitializer +; CHECK-NEXT: [[TMP19:%.*]] = or <32 x i16> [[TMP18]], zeroinitializer +; CHECK-NEXT: [[_MSPROP_SELECT:%.*]] = select <32 x i1> [[TMP5]], <32 x i16> [[TMP19]], <32 x i16> [[TMP15]] +; CHECK-NEXT: [[RES1:%.*]] = select <32 x i1> [[MASK]], <32 x half> [[RES0]], <32 x half> zeroinitializer +; CHECK-NEXT: [[TMP20:%.*]] = bitcast <32 x i16> [[TMP3]] to i512 +; CHECK-NEXT: [[_MSCMP4:%.*]] = icmp ne i512 [[TMP20]], 0 +; CHECK-NEXT: [[TMP21:%.*]] = bitcast <32 x i16> [[_MSLD]] to i512 +; CHECK-NEXT: [[_MSCMP5:%.*]] = icmp ne i512 [[TMP21]], 0 +; CHECK-NEXT: [[_MSOR6:%.*]] = or i1 [[_MSCMP4]], [[_MSCMP5]] +; CHECK-NEXT: br i1 [[_MSOR6]], label %[[BB22:.*]], label %[[BB23:.*]], !prof [[PROF1]] +; CHECK: [[BB22]]: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR4]] +; CHECK-NEXT: unreachable +; CHECK: [[BB23]]: +; CHECK-NEXT: [[T2:%.*]] = call <32 x half> @llvm.x86.avx512fp16.mul.ph.512(<32 x half> [[X1]], <32 x half> [[VAL]], i32 4) +; CHECK-NEXT: [[TMP24:%.*]] = select <32 x i1> [[MASK]], <32 x i16> zeroinitializer, <32 x i16> zeroinitializer +; CHECK-NEXT: [[TMP25:%.*]] = bitcast <32 x half> [[T2]] to <32 x i16> +; CHECK-NEXT: [[TMP26:%.*]] = xor <32 x i16> [[TMP25]], zeroinitializer +; CHECK-NEXT: [[TMP27:%.*]] = or <32 x i16> [[TMP26]], zeroinitializer +; CHECK-NEXT: [[TMP28:%.*]] = or <32 x i16> [[TMP27]], zeroinitializer +; CHECK-NEXT: [[_MSPROP_SELECT1:%.*]] = select <32 x i1> [[TMP5]], <32 x i16> [[TMP28]], <32 x i16> [[TMP24]] +; CHECK-NEXT: [[RES2:%.*]] = select <32 x i1> [[MASK]], <32 x half> [[T2]], <32 x half> zeroinitializer +; CHECK-NEXT: [[_MSPROP:%.*]] = or <32 x i16> [[_MSPROP_SELECT]], [[_MSPROP_SELECT1]] +; CHECK-NEXT: [[RES3:%.*]] = fmul <32 x half> [[RES1]], [[RES2]] +; CHECK-NEXT: store <32 x i16> [[_MSPROP]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <32 x half> [[RES3]] +; + %mask = bitcast i32 %msk to <32 x i1> + %val = load <32 x half>, ptr %ptr + %res0 = call <32 x half> @llvm.x86.avx512fp16.mul.ph.512(<32 x half> %x1, <32 x half> %x2, i32 4) + %res1 = select <32 x i1> %mask, <32 x half> %res0, <32 x half> zeroinitializer + %t2 = call <32 x half> @llvm.x86.avx512fp16.mul.ph.512(<32 x half> %x1, <32 x half> %val, i32 4) + %res2 = select <32 x i1> %mask, <32 x half> %t2, <32 x half> zeroinitializer + %res3 = fmul <32 x half> %res1, %res2 + ret <32 x half> %res3 +} + +define <32 x half> @test_int_x86_avx512fp16_mul_ph_512_round(<32 x half> %x1, <32 x half> %x2, <32 x half> %src, i32 %msk, ptr %ptr) #0 { +; CHECK-LABEL: define <32 x half> @test_int_x86_avx512fp16_mul_ph_512_round( +; CHECK-SAME: <32 x half> [[X1:%.*]], <32 x half> [[X2:%.*]], <32 x half> [[SRC:%.*]], i32 [[MSK:%.*]], ptr [[PTR:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 192) to ptr), align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <32 x i16>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP3:%.*]] = load <32 x i16>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8 +; CHECK-NEXT: [[TMP4:%.*]] = load <32 x i16>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 128) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP5:%.*]] = bitcast i32 [[TMP1]] to <32 x i1> +; CHECK-NEXT: [[MASK:%.*]] = bitcast i32 [[MSK]] to <32 x i1> +; CHECK-NEXT: [[TMP6:%.*]] = bitcast <32 x i16> [[TMP2]] to i512 +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i512 [[TMP6]], 0 +; CHECK-NEXT: [[TMP7:%.*]] = bitcast <32 x i16> [[TMP3]] to i512 +; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i512 [[TMP7]], 0 +; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]] +; CHECK-NEXT: br i1 [[_MSOR]], label %[[BB8:.*]], label %[[BB9:.*]], !prof [[PROF1]] +; CHECK: [[BB8]]: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR4]] +; CHECK-NEXT: unreachable +; CHECK: [[BB9]]: +; CHECK-NEXT: [[T1:%.*]] = call <32 x half> @llvm.x86.avx512fp16.mul.ph.512(<32 x half> [[X1]], <32 x half> [[X2]], i32 10) +; CHECK-NEXT: [[TMP10:%.*]] = select <32 x i1> [[MASK]], <32 x i16> zeroinitializer, <32 x i16> [[TMP4]] +; CHECK-NEXT: [[TMP11:%.*]] = bitcast <32 x half> [[T1]] to <32 x i16> +; CHECK-NEXT: [[TMP12:%.*]] = bitcast <32 x half> [[SRC]] to <32 x i16> +; CHECK-NEXT: [[TMP13:%.*]] = xor <32 x i16> [[TMP11]], [[TMP12]] +; CHECK-NEXT: [[TMP14:%.*]] = or <32 x i16> [[TMP13]], zeroinitializer +; CHECK-NEXT: [[TMP15:%.*]] = or <32 x i16> [[TMP14]], [[TMP4]] +; CHECK-NEXT: [[_MSPROP_SELECT:%.*]] = select <32 x i1> [[TMP5]], <32 x i16> [[TMP15]], <32 x i16> [[TMP10]] +; CHECK-NEXT: [[RES:%.*]] = select <32 x i1> [[MASK]], <32 x half> [[T1]], <32 x half> [[SRC]] +; CHECK-NEXT: store <32 x i16> [[_MSPROP_SELECT]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <32 x half> [[RES]] +; + %mask = bitcast i32 %msk to <32 x i1> + %t1 = call <32 x half> @llvm.x86.avx512fp16.mul.ph.512(<32 x half> %x1, <32 x half> %x2, i32 10) + %res = select <32 x i1> %mask, <32 x half> %t1, <32 x half> %src + ret <32 x half> %res +} + +declare <32 x half> @llvm.x86.avx512fp16.div.ph.512(<32 x half>, <32 x half>, i32) + +define <32 x half> @test_int_x86_avx512fp16_div_ph_512(<32 x half> %x1, <32 x half> %x2) #0 { +; CHECK-LABEL: define <32 x half> @test_int_x86_avx512fp16_div_ph_512( +; CHECK-SAME: <32 x half> [[X1:%.*]], <32 x half> [[X2:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: [[TMP1:%.*]] = load <32 x i16>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <32 x i16>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP3:%.*]] = bitcast <32 x i16> [[TMP1]] to i512 +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i512 [[TMP3]], 0 +; CHECK-NEXT: [[TMP4:%.*]] = bitcast <32 x i16> [[TMP2]] to i512 +; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i512 [[TMP4]], 0 +; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]] +; CHECK-NEXT: br i1 [[_MSOR]], label %[[BB5:.*]], label %[[BB6:.*]], !prof [[PROF1]] +; CHECK: [[BB5]]: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR4]] +; CHECK-NEXT: unreachable +; CHECK: [[BB6]]: +; CHECK-NEXT: [[RES:%.*]] = call <32 x half> @llvm.x86.avx512fp16.div.ph.512(<32 x half> [[X1]], <32 x half> [[X2]], i32 4) +; CHECK-NEXT: store <32 x i16> zeroinitializer, ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <32 x half> [[RES]] +; + %res = call <32 x half> @llvm.x86.avx512fp16.div.ph.512(<32 x half> %x1, <32 x half> %x2, i32 4) + ret <32 x half> %res +} + +define <32 x half> @test_int_x86_avx512fp16_mask_div_ph_512(<32 x half> %src, <32 x half> %x1, <32 x half> %x2, i32 %msk, ptr %ptr) #0 { +; CHECK-LABEL: define <32 x half> @test_int_x86_avx512fp16_mask_div_ph_512( +; CHECK-SAME: <32 x half> [[SRC:%.*]], <32 x half> [[X1:%.*]], <32 x half> [[X2:%.*]], i32 [[MSK:%.*]], ptr [[PTR:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 192) to ptr), align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <32 x i16>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8 +; CHECK-NEXT: [[TMP3:%.*]] = load <32 x i16>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 128) to ptr), align 8 +; CHECK-NEXT: [[TMP4:%.*]] = load <32 x i16>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP5:%.*]] = bitcast i32 [[TMP1]] to <32 x i1> +; CHECK-NEXT: [[MASK:%.*]] = bitcast i32 [[MSK]] to <32 x i1> +; CHECK-NEXT: [[TMP6:%.*]] = bitcast <32 x i16> [[TMP2]] to i512 +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i512 [[TMP6]], 0 +; CHECK-NEXT: [[TMP7:%.*]] = bitcast <32 x i16> [[TMP3]] to i512 +; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i512 [[TMP7]], 0 +; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]] +; CHECK-NEXT: br i1 [[_MSOR]], label %[[BB8:.*]], label %[[BB9:.*]], !prof [[PROF1]] +; CHECK: [[BB8]]: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR4]] +; CHECK-NEXT: unreachable +; CHECK: [[BB9]]: +; CHECK-NEXT: [[RES0:%.*]] = call <32 x half> @llvm.x86.avx512fp16.div.ph.512(<32 x half> [[X1]], <32 x half> [[X2]], i32 4) +; CHECK-NEXT: [[TMP10:%.*]] = select <32 x i1> [[MASK]], <32 x i16> zeroinitializer, <32 x i16> [[TMP4]] +; CHECK-NEXT: [[TMP11:%.*]] = bitcast <32 x half> [[RES0]] to <32 x i16> +; CHECK-NEXT: [[TMP12:%.*]] = bitcast <32 x half> [[SRC]] to <32 x i16> +; CHECK-NEXT: [[TMP13:%.*]] = xor <32 x i16> [[TMP11]], [[TMP12]] +; CHECK-NEXT: [[TMP14:%.*]] = or <32 x i16> [[TMP13]], zeroinitializer +; CHECK-NEXT: [[TMP15:%.*]] = or <32 x i16> [[TMP14]], [[TMP4]] +; CHECK-NEXT: [[_MSPROP_SELECT:%.*]] = select <32 x i1> [[TMP5]], <32 x i16> [[TMP15]], <32 x i16> [[TMP10]] +; CHECK-NEXT: [[RES:%.*]] = select <32 x i1> [[MASK]], <32 x half> [[RES0]], <32 x half> [[SRC]] +; CHECK-NEXT: store <32 x i16> [[_MSPROP_SELECT]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <32 x half> [[RES]] +; + %mask = bitcast i32 %msk to <32 x i1> + %res0 = call <32 x half> @llvm.x86.avx512fp16.div.ph.512(<32 x half> %x1, <32 x half> %x2, i32 4) + %res = select <32 x i1> %mask, <32 x half> %res0, <32 x half> %src + ret <32 x half> %res +} + +define <32 x half> @test_int_x86_avx512fp16_maskz_div_ph_512(<32 x half> %src, <32 x half> %x1, <32 x half> %x2, i32 %msk, ptr %ptr) #0 { +; CHECK-LABEL: define <32 x half> @test_int_x86_avx512fp16_maskz_div_ph_512( +; CHECK-SAME: <32 x half> [[SRC:%.*]], <32 x half> [[X1:%.*]], <32 x half> [[X2:%.*]], i32 [[MSK:%.*]], ptr [[PTR:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 192) to ptr), align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 200) to ptr), align 8 +; CHECK-NEXT: [[TMP3:%.*]] = load <32 x i16>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8 +; CHECK-NEXT: [[TMP4:%.*]] = load <32 x i16>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 128) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP5:%.*]] = bitcast i32 [[TMP1]] to <32 x i1> +; CHECK-NEXT: [[MASK:%.*]] = bitcast i32 [[MSK]] to <32 x i1> +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP2]], 0 +; CHECK-NEXT: br i1 [[_MSCMP]], label %[[BB6:.*]], label %[[BB7:.*]], !prof [[PROF1]] +; CHECK: [[BB6]]: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR4]] +; CHECK-NEXT: unreachable +; CHECK: [[BB7]]: +; CHECK-NEXT: [[VAL:%.*]] = load <32 x half>, ptr [[PTR]], align 64 +; CHECK-NEXT: [[TMP8:%.*]] = ptrtoint ptr [[PTR]] to i64 +; CHECK-NEXT: [[TMP9:%.*]] = xor i64 [[TMP8]], 87960930222080 +; CHECK-NEXT: [[TMP10:%.*]] = inttoptr i64 [[TMP9]] to ptr +; CHECK-NEXT: [[_MSLD:%.*]] = load <32 x i16>, ptr [[TMP10]], align 64 +; CHECK-NEXT: [[TMP11:%.*]] = bitcast <32 x i16> [[TMP3]] to i512 +; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i512 [[TMP11]], 0 +; CHECK-NEXT: [[TMP12:%.*]] = bitcast <32 x i16> [[TMP4]] to i512 +; CHECK-NEXT: [[_MSCMP3:%.*]] = icmp ne i512 [[TMP12]], 0 +; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP2]], [[_MSCMP3]] +; CHECK-NEXT: br i1 [[_MSOR]], label %[[BB13:.*]], label %[[BB14:.*]], !prof [[PROF1]] +; CHECK: [[BB13]]: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR4]] +; CHECK-NEXT: unreachable +; CHECK: [[BB14]]: +; CHECK-NEXT: [[RES0:%.*]] = call <32 x half> @llvm.x86.avx512fp16.div.ph.512(<32 x half> [[X1]], <32 x half> [[X2]], i32 4) +; CHECK-NEXT: [[TMP15:%.*]] = select <32 x i1> [[MASK]], <32 x i16> zeroinitializer, <32 x i16> zeroinitializer +; CHECK-NEXT: [[TMP16:%.*]] = bitcast <32 x half> [[RES0]] to <32 x i16> +; CHECK-NEXT: [[TMP17:%.*]] = xor <32 x i16> [[TMP16]], zeroinitializer +; CHECK-NEXT: [[TMP18:%.*]] = or <32 x i16> [[TMP17]], zeroinitializer +; CHECK-NEXT: [[TMP19:%.*]] = or <32 x i16> [[TMP18]], zeroinitializer +; CHECK-NEXT: [[_MSPROP_SELECT:%.*]] = select <32 x i1> [[TMP5]], <32 x i16> [[TMP19]], <32 x i16> [[TMP15]] +; CHECK-NEXT: [[RES1:%.*]] = select <32 x i1> [[MASK]], <32 x half> [[RES0]], <32 x half> zeroinitializer +; CHECK-NEXT: [[TMP20:%.*]] = bitcast <32 x i16> [[TMP3]] to i512 +; CHECK-NEXT: [[_MSCMP4:%.*]] = icmp ne i512 [[TMP20]], 0 +; CHECK-NEXT: [[TMP21:%.*]] = bitcast <32 x i16> [[_MSLD]] to i512 +; CHECK-NEXT: [[_MSCMP5:%.*]] = icmp ne i512 [[TMP21]], 0 +; CHECK-NEXT: [[_MSOR6:%.*]] = or i1 [[_MSCMP4]], [[_MSCMP5]] +; CHECK-NEXT: br i1 [[_MSOR6]], label %[[BB22:.*]], label %[[BB23:.*]], !prof [[PROF1]] +; CHECK: [[BB22]]: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR4]] +; CHECK-NEXT: unreachable +; CHECK: [[BB23]]: +; CHECK-NEXT: [[T2:%.*]] = call <32 x half> @llvm.x86.avx512fp16.div.ph.512(<32 x half> [[X1]], <32 x half> [[VAL]], i32 4) +; CHECK-NEXT: [[TMP24:%.*]] = select <32 x i1> [[MASK]], <32 x i16> zeroinitializer, <32 x i16> zeroinitializer +; CHECK-NEXT: [[TMP25:%.*]] = bitcast <32 x half> [[T2]] to <32 x i16> +; CHECK-NEXT: [[TMP26:%.*]] = xor <32 x i16> [[TMP25]], zeroinitializer +; CHECK-NEXT: [[TMP27:%.*]] = or <32 x i16> [[TMP26]], zeroinitializer +; CHECK-NEXT: [[TMP28:%.*]] = or <32 x i16> [[TMP27]], zeroinitializer +; CHECK-NEXT: [[_MSPROP_SELECT1:%.*]] = select <32 x i1> [[TMP5]], <32 x i16> [[TMP28]], <32 x i16> [[TMP24]] +; CHECK-NEXT: [[RES2:%.*]] = select <32 x i1> [[MASK]], <32 x half> [[T2]], <32 x half> zeroinitializer +; CHECK-NEXT: [[_MSPROP:%.*]] = or <32 x i16> [[_MSPROP_SELECT]], [[_MSPROP_SELECT1]] +; CHECK-NEXT: [[RES3:%.*]] = fdiv <32 x half> [[RES1]], [[RES2]] +; CHECK-NEXT: store <32 x i16> [[_MSPROP]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <32 x half> [[RES3]] +; + %mask = bitcast i32 %msk to <32 x i1> + %val = load <32 x half>, ptr %ptr + %res0 = call <32 x half> @llvm.x86.avx512fp16.div.ph.512(<32 x half> %x1, <32 x half> %x2, i32 4) + %res1 = select <32 x i1> %mask, <32 x half> %res0, <32 x half> zeroinitializer + %t2 = call <32 x half> @llvm.x86.avx512fp16.div.ph.512(<32 x half> %x1, <32 x half> %val, i32 4) + %res2 = select <32 x i1> %mask, <32 x half> %t2, <32 x half> zeroinitializer + %res3 = fdiv <32 x half> %res1, %res2 + ret <32 x half> %res3 +} + +define <32 x half> @test_int_x86_avx512fp16_div_ph_512_round(<32 x half> %x1, <32 x half> %x2, <32 x half> %src, i32 %msk, ptr %ptr) #0 { +; CHECK-LABEL: define <32 x half> @test_int_x86_avx512fp16_div_ph_512_round( +; CHECK-SAME: <32 x half> [[X1:%.*]], <32 x half> [[X2:%.*]], <32 x half> [[SRC:%.*]], i32 [[MSK:%.*]], ptr [[PTR:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 192) to ptr), align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <32 x i16>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP3:%.*]] = load <32 x i16>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8 +; CHECK-NEXT: [[TMP4:%.*]] = load <32 x i16>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 128) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP5:%.*]] = bitcast i32 [[TMP1]] to <32 x i1> +; CHECK-NEXT: [[MASK:%.*]] = bitcast i32 [[MSK]] to <32 x i1> +; CHECK-NEXT: [[TMP6:%.*]] = bitcast <32 x i16> [[TMP2]] to i512 +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i512 [[TMP6]], 0 +; CHECK-NEXT: [[TMP7:%.*]] = bitcast <32 x i16> [[TMP3]] to i512 +; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i512 [[TMP7]], 0 +; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]] +; CHECK-NEXT: br i1 [[_MSOR]], label %[[BB8:.*]], label %[[BB9:.*]], !prof [[PROF1]] +; CHECK: [[BB8]]: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR4]] +; CHECK-NEXT: unreachable +; CHECK: [[BB9]]: +; CHECK-NEXT: [[T1:%.*]] = call <32 x half> @llvm.x86.avx512fp16.div.ph.512(<32 x half> [[X1]], <32 x half> [[X2]], i32 10) +; CHECK-NEXT: [[TMP10:%.*]] = select <32 x i1> [[MASK]], <32 x i16> zeroinitializer, <32 x i16> [[TMP4]] +; CHECK-NEXT: [[TMP11:%.*]] = bitcast <32 x half> [[T1]] to <32 x i16> +; CHECK-NEXT: [[TMP12:%.*]] = bitcast <32 x half> [[SRC]] to <32 x i16> +; CHECK-NEXT: [[TMP13:%.*]] = xor <32 x i16> [[TMP11]], [[TMP12]] +; CHECK-NEXT: [[TMP14:%.*]] = or <32 x i16> [[TMP13]], zeroinitializer +; CHECK-NEXT: [[TMP15:%.*]] = or <32 x i16> [[TMP14]], [[TMP4]] +; CHECK-NEXT: [[_MSPROP_SELECT:%.*]] = select <32 x i1> [[TMP5]], <32 x i16> [[TMP15]], <32 x i16> [[TMP10]] +; CHECK-NEXT: [[RES:%.*]] = select <32 x i1> [[MASK]], <32 x half> [[T1]], <32 x half> [[SRC]] +; CHECK-NEXT: store <32 x i16> [[_MSPROP_SELECT]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <32 x half> [[RES]] +; + %mask = bitcast i32 %msk to <32 x i1> + %t1 = call <32 x half> @llvm.x86.avx512fp16.div.ph.512(<32 x half> %x1, <32 x half> %x2, i32 10) + %res = select <32 x i1> %mask, <32 x half> %t1, <32 x half> %src + ret <32 x half> %res +} + +declare <32 x half> @llvm.x86.avx512fp16.min.ph.512(<32 x half>, <32 x half>, i32) + +define <32 x half> @test_min_ph(<32 x half> %x1, <32 x half> %x2) #0 { +; CHECK-LABEL: define <32 x half> @test_min_ph( +; CHECK-SAME: <32 x half> [[X1:%.*]], <32 x half> [[X2:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: [[TMP1:%.*]] = load <32 x i16>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <32 x i16>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[_MSPROP:%.*]] = or <32 x i16> [[TMP1]], [[TMP2]] +; CHECK-NEXT: [[TMP3:%.*]] = trunc <32 x i16> [[_MSPROP]] to <32 x i1> +; CHECK-NEXT: [[RES0:%.*]] = fcmp olt <32 x half> [[X1]], [[X2]] +; CHECK-NEXT: [[TMP4:%.*]] = select <32 x i1> [[RES0]], <32 x i16> [[TMP1]], <32 x i16> [[TMP2]] +; CHECK-NEXT: [[TMP5:%.*]] = bitcast <32 x half> [[X1]] to <32 x i16> +; CHECK-NEXT: [[TMP6:%.*]] = bitcast <32 x half> [[X2]] to <32 x i16> +; CHECK-NEXT: [[TMP7:%.*]] = xor <32 x i16> [[TMP5]], [[TMP6]] +; CHECK-NEXT: [[TMP8:%.*]] = or <32 x i16> [[TMP7]], [[TMP1]] +; CHECK-NEXT: [[TMP9:%.*]] = or <32 x i16> [[TMP8]], [[TMP2]] +; CHECK-NEXT: [[_MSPROP_SELECT:%.*]] = select <32 x i1> [[TMP3]], <32 x i16> [[TMP9]], <32 x i16> [[TMP4]] +; CHECK-NEXT: [[RES1:%.*]] = select <32 x i1> [[RES0]], <32 x half> [[X1]], <32 x half> [[X2]] +; CHECK-NEXT: store <32 x i16> [[_MSPROP_SELECT]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <32 x half> [[RES1]] +; + %res0 = fcmp olt <32 x half> %x1, %x2 + %res1 = select <32 x i1> %res0, <32 x half> %x1, <32 x half> %x2 + ret <32 x half> %res1 +} + +define <32 x half> @test_int_x86_avx512fp16_min_ph_512_sae(<32 x half> %x1, <32 x half> %x2) #0 { +; CHECK-LABEL: define <32 x half> @test_int_x86_avx512fp16_min_ph_512_sae( +; CHECK-SAME: <32 x half> [[X1:%.*]], <32 x half> [[X2:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: [[TMP1:%.*]] = load <32 x i16>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <32 x i16>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP3:%.*]] = bitcast <32 x i16> [[TMP1]] to i512 +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i512 [[TMP3]], 0 +; CHECK-NEXT: [[TMP4:%.*]] = bitcast <32 x i16> [[TMP2]] to i512 +; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i512 [[TMP4]], 0 +; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]] +; CHECK-NEXT: br i1 [[_MSOR]], label %[[BB5:.*]], label %[[BB6:.*]], !prof [[PROF1]] +; CHECK: [[BB5]]: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR4]] +; CHECK-NEXT: unreachable +; CHECK: [[BB6]]: +; CHECK-NEXT: [[RES0:%.*]] = call <32 x half> @llvm.x86.avx512fp16.min.ph.512(<32 x half> [[X1]], <32 x half> [[X2]], i32 8) +; CHECK-NEXT: store <32 x i16> zeroinitializer, ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <32 x half> [[RES0]] +; + %res0 = call <32 x half> @llvm.x86.avx512fp16.min.ph.512(<32 x half> %x1, <32 x half> %x2, i32 8) + ret <32 x half> %res0 +} + +define <32 x half> @test_int_x86_avx512fp16_maskz_min_ph_512_sae(<32 x half> %x1, <32 x half> %x2, i32 %msk) #0 { +; CHECK-LABEL: define <32 x half> @test_int_x86_avx512fp16_maskz_min_ph_512_sae( +; CHECK-SAME: <32 x half> [[X1:%.*]], <32 x half> [[X2:%.*]], i32 [[MSK:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 128) to ptr), align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <32 x i16>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP3:%.*]] = load <32 x i16>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP4:%.*]] = bitcast i32 [[TMP1]] to <32 x i1> +; CHECK-NEXT: [[MASK:%.*]] = bitcast i32 [[MSK]] to <32 x i1> +; CHECK-NEXT: [[TMP5:%.*]] = bitcast <32 x i16> [[TMP2]] to i512 +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i512 [[TMP5]], 0 +; CHECK-NEXT: [[TMP6:%.*]] = bitcast <32 x i16> [[TMP3]] to i512 +; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i512 [[TMP6]], 0 +; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]] +; CHECK-NEXT: br i1 [[_MSOR]], label %[[BB7:.*]], label %[[BB8:.*]], !prof [[PROF1]] +; CHECK: [[BB7]]: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR4]] +; CHECK-NEXT: unreachable +; CHECK: [[BB8]]: +; CHECK-NEXT: [[RES0:%.*]] = call <32 x half> @llvm.x86.avx512fp16.min.ph.512(<32 x half> [[X1]], <32 x half> [[X2]], i32 8) +; CHECK-NEXT: [[TMP9:%.*]] = select <32 x i1> [[MASK]], <32 x i16> zeroinitializer, <32 x i16> zeroinitializer +; CHECK-NEXT: [[TMP10:%.*]] = bitcast <32 x half> [[RES0]] to <32 x i16> +; CHECK-NEXT: [[TMP11:%.*]] = xor <32 x i16> [[TMP10]], zeroinitializer +; CHECK-NEXT: [[TMP12:%.*]] = or <32 x i16> [[TMP11]], zeroinitializer +; CHECK-NEXT: [[TMP13:%.*]] = or <32 x i16> [[TMP12]], zeroinitializer +; CHECK-NEXT: [[_MSPROP_SELECT:%.*]] = select <32 x i1> [[TMP4]], <32 x i16> [[TMP13]], <32 x i16> [[TMP9]] +; CHECK-NEXT: [[RES1:%.*]] = select <32 x i1> [[MASK]], <32 x half> [[RES0]], <32 x half> zeroinitializer +; CHECK-NEXT: store <32 x i16> [[_MSPROP_SELECT]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <32 x half> [[RES1]] +; + %mask = bitcast i32 %msk to <32 x i1> + %res0 = call <32 x half> @llvm.x86.avx512fp16.min.ph.512(<32 x half> %x1, <32 x half> %x2, i32 8) + %res1 = select <32 x i1> %mask, <32 x half> %res0, <32 x half> zeroinitializer + ret <32 x half> %res1 +} + +declare <32 x half> @llvm.x86.avx512fp16.max.ph.512(<32 x half>, <32 x half>, i32) + +define <32 x half> @test_max_ph(<32 x half> %x1, <32 x half> %x2) #0 { +; CHECK-LABEL: define <32 x half> @test_max_ph( +; CHECK-SAME: <32 x half> [[X1:%.*]], <32 x half> [[X2:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: [[TMP1:%.*]] = load <32 x i16>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <32 x i16>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[_MSPROP:%.*]] = or <32 x i16> [[TMP1]], [[TMP2]] +; CHECK-NEXT: [[TMP3:%.*]] = trunc <32 x i16> [[_MSPROP]] to <32 x i1> +; CHECK-NEXT: [[RES0:%.*]] = fcmp ogt <32 x half> [[X1]], [[X2]] +; CHECK-NEXT: [[TMP4:%.*]] = select <32 x i1> [[RES0]], <32 x i16> [[TMP1]], <32 x i16> [[TMP2]] +; CHECK-NEXT: [[TMP5:%.*]] = bitcast <32 x half> [[X1]] to <32 x i16> +; CHECK-NEXT: [[TMP6:%.*]] = bitcast <32 x half> [[X2]] to <32 x i16> +; CHECK-NEXT: [[TMP7:%.*]] = xor <32 x i16> [[TMP5]], [[TMP6]] +; CHECK-NEXT: [[TMP8:%.*]] = or <32 x i16> [[TMP7]], [[TMP1]] +; CHECK-NEXT: [[TMP9:%.*]] = or <32 x i16> [[TMP8]], [[TMP2]] +; CHECK-NEXT: [[_MSPROP_SELECT:%.*]] = select <32 x i1> [[TMP3]], <32 x i16> [[TMP9]], <32 x i16> [[TMP4]] +; CHECK-NEXT: [[RES1:%.*]] = select <32 x i1> [[RES0]], <32 x half> [[X1]], <32 x half> [[X2]] +; CHECK-NEXT: store <32 x i16> [[_MSPROP_SELECT]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <32 x half> [[RES1]] +; + %res0 = fcmp ogt <32 x half> %x1, %x2 + %res1 = select <32 x i1> %res0, <32 x half> %x1, <32 x half> %x2 + ret <32 x half> %res1 +} + +define <32 x half> @test_int_x86_avx512fp16_max_ph_512_sae(<32 x half> %x1, <32 x half> %x2) #0 { +; CHECK-LABEL: define <32 x half> @test_int_x86_avx512fp16_max_ph_512_sae( +; CHECK-SAME: <32 x half> [[X1:%.*]], <32 x half> [[X2:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: [[TMP1:%.*]] = load <32 x i16>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <32 x i16>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP3:%.*]] = bitcast <32 x i16> [[TMP1]] to i512 +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i512 [[TMP3]], 0 +; CHECK-NEXT: [[TMP4:%.*]] = bitcast <32 x i16> [[TMP2]] to i512 +; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i512 [[TMP4]], 0 +; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]] +; CHECK-NEXT: br i1 [[_MSOR]], label %[[BB5:.*]], label %[[BB6:.*]], !prof [[PROF1]] +; CHECK: [[BB5]]: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR4]] +; CHECK-NEXT: unreachable +; CHECK: [[BB6]]: +; CHECK-NEXT: [[RES0:%.*]] = call <32 x half> @llvm.x86.avx512fp16.max.ph.512(<32 x half> [[X1]], <32 x half> [[X2]], i32 8) +; CHECK-NEXT: store <32 x i16> zeroinitializer, ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <32 x half> [[RES0]] +; + %res0 = call <32 x half> @llvm.x86.avx512fp16.max.ph.512(<32 x half> %x1, <32 x half> %x2, i32 8) + ret <32 x half> %res0 +} + +define <32 x half> @test_int_x86_avx512fp16_maskz_max_ph_512_sae(<32 x half> %x1, <32 x half> %x2, i32 %msk) #0 { +; CHECK-LABEL: define <32 x half> @test_int_x86_avx512fp16_maskz_max_ph_512_sae( +; CHECK-SAME: <32 x half> [[X1:%.*]], <32 x half> [[X2:%.*]], i32 [[MSK:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 128) to ptr), align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <32 x i16>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP3:%.*]] = load <32 x i16>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP4:%.*]] = bitcast i32 [[TMP1]] to <32 x i1> +; CHECK-NEXT: [[MASK:%.*]] = bitcast i32 [[MSK]] to <32 x i1> +; CHECK-NEXT: [[TMP5:%.*]] = bitcast <32 x i16> [[TMP2]] to i512 +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i512 [[TMP5]], 0 +; CHECK-NEXT: [[TMP6:%.*]] = bitcast <32 x i16> [[TMP3]] to i512 +; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i512 [[TMP6]], 0 +; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]] +; CHECK-NEXT: br i1 [[_MSOR]], label %[[BB7:.*]], label %[[BB8:.*]], !prof [[PROF1]] +; CHECK: [[BB7]]: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR4]] +; CHECK-NEXT: unreachable +; CHECK: [[BB8]]: +; CHECK-NEXT: [[RES0:%.*]] = call <32 x half> @llvm.x86.avx512fp16.max.ph.512(<32 x half> [[X1]], <32 x half> [[X2]], i32 8) +; CHECK-NEXT: [[TMP9:%.*]] = select <32 x i1> [[MASK]], <32 x i16> zeroinitializer, <32 x i16> zeroinitializer +; CHECK-NEXT: [[TMP10:%.*]] = bitcast <32 x half> [[RES0]] to <32 x i16> +; CHECK-NEXT: [[TMP11:%.*]] = xor <32 x i16> [[TMP10]], zeroinitializer +; CHECK-NEXT: [[TMP12:%.*]] = or <32 x i16> [[TMP11]], zeroinitializer +; CHECK-NEXT: [[TMP13:%.*]] = or <32 x i16> [[TMP12]], zeroinitializer +; CHECK-NEXT: [[_MSPROP_SELECT:%.*]] = select <32 x i1> [[TMP4]], <32 x i16> [[TMP13]], <32 x i16> [[TMP9]] +; CHECK-NEXT: [[RES1:%.*]] = select <32 x i1> [[MASK]], <32 x half> [[RES0]], <32 x half> zeroinitializer +; CHECK-NEXT: store <32 x i16> [[_MSPROP_SELECT]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <32 x half> [[RES1]] +; + %mask = bitcast i32 %msk to <32 x i1> + %res0 = call <32 x half> @llvm.x86.avx512fp16.max.ph.512(<32 x half> %x1, <32 x half> %x2, i32 8) + %res1 = select <32 x i1> %mask, <32 x half> %res0, <32 x half> zeroinitializer + ret <32 x half> %res1 +} + +declare <8 x double> @llvm.x86.avx512fp16.mask.vcvtph2pd.512(<8 x half>, <8 x double>, i8, i32) + +define <8 x double> @test_int_x86_avx512_mask_vcvt_ph2pd(<8 x half> %x0, <8 x double> %x1, i8 %x2) #0 { +; CHECK-LABEL: define <8 x double> @test_int_x86_avx512_mask_vcvt_ph2pd( +; CHECK-SAME: <8 x half> [[X0:%.*]], <8 x double> [[X1:%.*]], i8 [[X2:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i16>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <8 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8 +; CHECK-NEXT: [[TMP3:%.*]] = load i8, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 80) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP4:%.*]] = bitcast <8 x i16> [[TMP1]] to i128 +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i128 [[TMP4]], 0 +; CHECK-NEXT: [[TMP5:%.*]] = bitcast <8 x i64> [[TMP2]] to i512 +; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i512 [[TMP5]], 0 +; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]] +; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i8 [[TMP3]], 0 +; CHECK-NEXT: [[_MSOR3:%.*]] = or i1 [[_MSOR]], [[_MSCMP2]] +; CHECK-NEXT: br i1 [[_MSOR3]], label %[[BB6:.*]], label %[[BB7:.*]], !prof [[PROF1]] +; CHECK: [[BB6]]: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR4]] +; CHECK-NEXT: unreachable +; CHECK: [[BB7]]: +; CHECK-NEXT: [[RES:%.*]] = call <8 x double> @llvm.x86.avx512fp16.mask.vcvtph2pd.512(<8 x half> [[X0]], <8 x double> [[X1]], i8 [[X2]], i32 4) +; CHECK-NEXT: store <8 x i64> zeroinitializer, ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <8 x double> [[RES]] +; + %res = call <8 x double> @llvm.x86.avx512fp16.mask.vcvtph2pd.512(<8 x half> %x0, <8 x double> %x1, i8 %x2, i32 4) + ret <8 x double> %res +} + +define <8 x double> @test_int_x86_avx512_mask_vcvt_ph2pd_sae(<8 x half> %x0, <8 x double> %x1, i8 %x2) #0 { +; CHECK-LABEL: define <8 x double> @test_int_x86_avx512_mask_vcvt_ph2pd_sae( +; CHECK-SAME: <8 x half> [[X0:%.*]], <8 x double> [[X1:%.*]], i8 [[X2:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i16>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <8 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8 +; CHECK-NEXT: [[TMP3:%.*]] = load i8, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 80) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP4:%.*]] = bitcast <8 x i16> [[TMP1]] to i128 +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i128 [[TMP4]], 0 +; CHECK-NEXT: [[TMP5:%.*]] = bitcast <8 x i64> [[TMP2]] to i512 +; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i512 [[TMP5]], 0 +; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]] +; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i8 [[TMP3]], 0 +; CHECK-NEXT: [[_MSOR3:%.*]] = or i1 [[_MSOR]], [[_MSCMP2]] +; CHECK-NEXT: br i1 [[_MSOR3]], label %[[BB6:.*]], label %[[BB7:.*]], !prof [[PROF1]] +; CHECK: [[BB6]]: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR4]] +; CHECK-NEXT: unreachable +; CHECK: [[BB7]]: +; CHECK-NEXT: [[RES:%.*]] = call <8 x double> @llvm.x86.avx512fp16.mask.vcvtph2pd.512(<8 x half> [[X0]], <8 x double> [[X1]], i8 [[X2]], i32 8) +; CHECK-NEXT: store <8 x i64> zeroinitializer, ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <8 x double> [[RES]] +; + %res = call <8 x double> @llvm.x86.avx512fp16.mask.vcvtph2pd.512(<8 x half> %x0, <8 x double> %x1, i8 %x2, i32 8) + ret <8 x double> %res +} + +define <8 x double> @test_int_x86_avx512_mask_vcvt_ph2pd_nomask(<8 x half> %x0, <8 x double> %x1) #0 { +; CHECK-LABEL: define <8 x double> @test_int_x86_avx512_mask_vcvt_ph2pd_nomask( +; CHECK-SAME: <8 x half> [[X0:%.*]], <8 x double> [[X1:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i16>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <8 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP1]] to i128 +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i128 [[TMP3]], 0 +; CHECK-NEXT: [[TMP4:%.*]] = bitcast <8 x i64> [[TMP2]] to i512 +; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i512 [[TMP4]], 0 +; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]] +; CHECK-NEXT: br i1 [[_MSOR]], label %[[BB5:.*]], label %[[BB6:.*]], !prof [[PROF1]] +; CHECK: [[BB5]]: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR4]] +; CHECK-NEXT: unreachable +; CHECK: [[BB6]]: +; CHECK-NEXT: [[RES:%.*]] = call <8 x double> @llvm.x86.avx512fp16.mask.vcvtph2pd.512(<8 x half> [[X0]], <8 x double> [[X1]], i8 -1, i32 4) +; CHECK-NEXT: store <8 x i64> zeroinitializer, ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <8 x double> [[RES]] +; + %res = call <8 x double> @llvm.x86.avx512fp16.mask.vcvtph2pd.512(<8 x half> %x0, <8 x double> %x1, i8 -1, i32 4) + ret <8 x double> %res +} + +define <8 x double> @test_int_x86_avx512_mask_vcvt_ph2pd_load(ptr %px0, <8 x double> %x1, i8 %x2) #0 { +; CHECK-LABEL: define <8 x double> @test_int_x86_avx512_mask_vcvt_ph2pd_load( +; CHECK-SAME: ptr [[PX0:%.*]], <8 x double> [[X1:%.*]], i8 [[X2:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <8 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8 +; CHECK-NEXT: [[TMP3:%.*]] = load i8, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 72) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 +; CHECK-NEXT: br i1 [[_MSCMP]], label %[[BB4:.*]], label %[[BB5:.*]], !prof [[PROF1]] +; CHECK: [[BB4]]: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR4]] +; CHECK-NEXT: unreachable +; CHECK: [[BB5]]: +; CHECK-NEXT: [[X0:%.*]] = load <8 x half>, ptr [[PX0]], align 16 +; CHECK-NEXT: [[TMP6:%.*]] = ptrtoint ptr [[PX0]] to i64 +; CHECK-NEXT: [[TMP7:%.*]] = xor i64 [[TMP6]], 87960930222080 +; CHECK-NEXT: [[TMP8:%.*]] = inttoptr i64 [[TMP7]] to ptr +; CHECK-NEXT: [[_MSLD:%.*]] = load <8 x i16>, ptr [[TMP8]], align 16 +; CHECK-NEXT: [[TMP9:%.*]] = bitcast <8 x i16> [[_MSLD]] to i128 +; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i128 [[TMP9]], 0 +; CHECK-NEXT: [[TMP10:%.*]] = bitcast <8 x i64> [[TMP2]] to i512 +; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i512 [[TMP10]], 0 +; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP1]], [[_MSCMP2]] +; CHECK-NEXT: [[_MSCMP3:%.*]] = icmp ne i8 [[TMP3]], 0 +; CHECK-NEXT: [[_MSOR4:%.*]] = or i1 [[_MSOR]], [[_MSCMP3]] +; CHECK-NEXT: br i1 [[_MSOR4]], label %[[BB11:.*]], label %[[BB12:.*]], !prof [[PROF1]] +; CHECK: [[BB11]]: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR4]] +; CHECK-NEXT: unreachable +; CHECK: [[BB12]]: +; CHECK-NEXT: [[RES:%.*]] = call <8 x double> @llvm.x86.avx512fp16.mask.vcvtph2pd.512(<8 x half> [[X0]], <8 x double> [[X1]], i8 [[X2]], i32 4) +; CHECK-NEXT: store <8 x i64> zeroinitializer, ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <8 x double> [[RES]] +; + %x0 = load <8 x half>, ptr %px0, align 16 + %res = call <8 x double> @llvm.x86.avx512fp16.mask.vcvtph2pd.512(<8 x half> %x0, <8 x double> %x1, i8 %x2, i32 4) + ret <8 x double> %res +} + +declare <8 x half> @llvm.x86.avx512fp16.mask.vcvtpd2ph.512(<8 x double>, <8 x half>, i8, i32) + +define <8 x half> @test_int_x86_avx512_mask_vcvt_pd2ph(<8 x double> %x0, <8 x half> %x1, i8 %x2) #0 { +; CHECK-LABEL: define <8 x half> @test_int_x86_avx512_mask_vcvt_pd2ph( +; CHECK-SAME: <8 x double> [[X0:%.*]], <8 x half> [[X1:%.*]], i8 [[X2:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i64>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <8 x i16>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8 +; CHECK-NEXT: [[TMP3:%.*]] = load i8, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 80) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP4:%.*]] = bitcast <8 x i64> [[TMP1]] to i512 +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i512 [[TMP4]], 0 +; CHECK-NEXT: [[TMP5:%.*]] = bitcast <8 x i16> [[TMP2]] to i128 +; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i128 [[TMP5]], 0 +; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]] +; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i8 [[TMP3]], 0 +; CHECK-NEXT: [[_MSOR3:%.*]] = or i1 [[_MSOR]], [[_MSCMP2]] +; CHECK-NEXT: br i1 [[_MSOR3]], label %[[BB6:.*]], label %[[BB7:.*]], !prof [[PROF1]] +; CHECK: [[BB6]]: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR4]] +; CHECK-NEXT: unreachable +; CHECK: [[BB7]]: +; CHECK-NEXT: [[RES:%.*]] = call <8 x half> @llvm.x86.avx512fp16.mask.vcvtpd2ph.512(<8 x double> [[X0]], <8 x half> [[X1]], i8 [[X2]], i32 4) +; CHECK-NEXT: store <8 x i16> zeroinitializer, ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <8 x half> [[RES]] +; + %res = call <8 x half> @llvm.x86.avx512fp16.mask.vcvtpd2ph.512(<8 x double> %x0, <8 x half> %x1, i8 %x2, i32 4) + ret <8 x half> %res +} + +define <8 x half> @test_int_x86_avx512_mask_vcvt_pd2ph_r(<8 x double> %x0, <8 x half> %x1, i8 %x2) #0 { +; CHECK-LABEL: define <8 x half> @test_int_x86_avx512_mask_vcvt_pd2ph_r( +; CHECK-SAME: <8 x double> [[X0:%.*]], <8 x half> [[X1:%.*]], i8 [[X2:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i64>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <8 x i16>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8 +; CHECK-NEXT: [[TMP3:%.*]] = load i8, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 80) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP4:%.*]] = bitcast <8 x i64> [[TMP1]] to i512 +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i512 [[TMP4]], 0 +; CHECK-NEXT: [[TMP5:%.*]] = bitcast <8 x i16> [[TMP2]] to i128 +; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i128 [[TMP5]], 0 +; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]] +; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i8 [[TMP3]], 0 +; CHECK-NEXT: [[_MSOR3:%.*]] = or i1 [[_MSOR]], [[_MSCMP2]] +; CHECK-NEXT: br i1 [[_MSOR3]], label %[[BB6:.*]], label %[[BB7:.*]], !prof [[PROF1]] +; CHECK: [[BB6]]: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR4]] +; CHECK-NEXT: unreachable +; CHECK: [[BB7]]: +; CHECK-NEXT: [[RES:%.*]] = call <8 x half> @llvm.x86.avx512fp16.mask.vcvtpd2ph.512(<8 x double> [[X0]], <8 x half> [[X1]], i8 [[X2]], i32 11) +; CHECK-NEXT: store <8 x i16> zeroinitializer, ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <8 x half> [[RES]] +; + %res = call <8 x half> @llvm.x86.avx512fp16.mask.vcvtpd2ph.512(<8 x double> %x0, <8 x half> %x1, i8 %x2, i32 11) + ret <8 x half> %res +} + +define <8 x half> @test_int_x86_avx512_mask_vcvt_pd2ph_load(ptr %px0, <8 x half> %x1, i8 %x2) #0 { +; CHECK-LABEL: define <8 x half> @test_int_x86_avx512_mask_vcvt_pd2ph_load( +; CHECK-SAME: ptr [[PX0:%.*]], <8 x half> [[X1:%.*]], i8 [[X2:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <8 x i16>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8 +; CHECK-NEXT: [[TMP3:%.*]] = load i8, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 24) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 +; CHECK-NEXT: br i1 [[_MSCMP]], label %[[BB4:.*]], label %[[BB5:.*]], !prof [[PROF1]] +; CHECK: [[BB4]]: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR4]] +; CHECK-NEXT: unreachable +; CHECK: [[BB5]]: +; CHECK-NEXT: [[X0:%.*]] = load <8 x double>, ptr [[PX0]], align 64 +; CHECK-NEXT: [[TMP6:%.*]] = ptrtoint ptr [[PX0]] to i64 +; CHECK-NEXT: [[TMP7:%.*]] = xor i64 [[TMP6]], 87960930222080 +; CHECK-NEXT: [[TMP8:%.*]] = inttoptr i64 [[TMP7]] to ptr +; CHECK-NEXT: [[_MSLD:%.*]] = load <8 x i64>, ptr [[TMP8]], align 64 +; CHECK-NEXT: [[TMP9:%.*]] = bitcast <8 x i64> [[_MSLD]] to i512 +; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i512 [[TMP9]], 0 +; CHECK-NEXT: [[TMP10:%.*]] = bitcast <8 x i16> [[TMP2]] to i128 +; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i128 [[TMP10]], 0 +; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP1]], [[_MSCMP2]] +; CHECK-NEXT: [[_MSCMP3:%.*]] = icmp ne i8 [[TMP3]], 0 +; CHECK-NEXT: [[_MSOR4:%.*]] = or i1 [[_MSOR]], [[_MSCMP3]] +; CHECK-NEXT: br i1 [[_MSOR4]], label %[[BB11:.*]], label %[[BB12:.*]], !prof [[PROF1]] +; CHECK: [[BB11]]: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR4]] +; CHECK-NEXT: unreachable +; CHECK: [[BB12]]: +; CHECK-NEXT: [[RES:%.*]] = call <8 x half> @llvm.x86.avx512fp16.mask.vcvtpd2ph.512(<8 x double> [[X0]], <8 x half> [[X1]], i8 [[X2]], i32 4) +; CHECK-NEXT: store <8 x i16> zeroinitializer, ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <8 x half> [[RES]] +; + %x0 = load <8 x double>, ptr %px0, align 64 + %res = call <8 x half> @llvm.x86.avx512fp16.mask.vcvtpd2ph.512(<8 x double> %x0, <8 x half> %x1, i8 %x2, i32 4) + ret <8 x half> %res +} + +declare <8 x half> @llvm.x86.avx512fp16.mask.vcvtss2sh.round(<8 x half>, <4 x float>, <8 x half>, i8, i32) + +define <8 x half> @test_int_x86_avx512fp16_mask_cvt_ss2sh_round(<8 x half> %x0, <4 x float> %x1, <8 x half> %x2, i8 %x3) #0 { +; CHECK-LABEL: define <8 x half> @test_int_x86_avx512fp16_mask_cvt_ss2sh_round( +; CHECK-SAME: <8 x half> [[X0:%.*]], <4 x float> [[X1:%.*]], <8 x half> [[X2:%.*]], i8 [[X3:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i16>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <4 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8 +; CHECK-NEXT: [[TMP3:%.*]] = load <8 x i16>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 32) to ptr), align 8 +; CHECK-NEXT: [[TMP4:%.*]] = load i8, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 48) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP5:%.*]] = bitcast <8 x i16> [[TMP1]] to i128 +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i128 [[TMP5]], 0 +; CHECK-NEXT: [[TMP6:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 +; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i128 [[TMP6]], 0 +; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]] +; CHECK-NEXT: [[TMP7:%.*]] = bitcast <8 x i16> [[TMP3]] to i128 +; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i128 [[TMP7]], 0 +; CHECK-NEXT: [[_MSOR3:%.*]] = or i1 [[_MSOR]], [[_MSCMP2]] +; CHECK-NEXT: [[_MSCMP4:%.*]] = icmp ne i8 [[TMP4]], 0 +; CHECK-NEXT: [[_MSOR5:%.*]] = or i1 [[_MSOR3]], [[_MSCMP4]] +; CHECK-NEXT: br i1 [[_MSOR5]], label %[[BB8:.*]], label %[[BB9:.*]], !prof [[PROF1]] +; CHECK: [[BB8]]: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR4]] +; CHECK-NEXT: unreachable +; CHECK: [[BB9]]: +; CHECK-NEXT: [[RES:%.*]] = call <8 x half> @llvm.x86.avx512fp16.mask.vcvtss2sh.round(<8 x half> [[X0]], <4 x float> [[X1]], <8 x half> [[X2]], i8 [[X3]], i32 4) +; CHECK-NEXT: store <8 x i16> zeroinitializer, ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <8 x half> [[RES]] +; + %res = call <8 x half> @llvm.x86.avx512fp16.mask.vcvtss2sh.round(<8 x half> %x0, <4 x float> %x1, <8 x half> %x2, i8 %x3, i32 4) + ret <8 x half> %res +} + +define <8 x half> @test_int_x86_avx512fp16_mask_cvt_ss2sh_round_r(<8 x half> %x0, <4 x float> %x1, <8 x half> %x2, i8 %x3) #0 { +; CHECK-LABEL: define <8 x half> @test_int_x86_avx512fp16_mask_cvt_ss2sh_round_r( +; CHECK-SAME: <8 x half> [[X0:%.*]], <4 x float> [[X1:%.*]], <8 x half> [[X2:%.*]], i8 [[X3:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i16>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <4 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8 +; CHECK-NEXT: [[TMP3:%.*]] = load <8 x i16>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 32) to ptr), align 8 +; CHECK-NEXT: [[TMP4:%.*]] = load i8, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 48) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP5:%.*]] = bitcast <8 x i16> [[TMP1]] to i128 +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i128 [[TMP5]], 0 +; CHECK-NEXT: [[TMP6:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 +; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i128 [[TMP6]], 0 +; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]] +; CHECK-NEXT: [[TMP7:%.*]] = bitcast <8 x i16> [[TMP3]] to i128 +; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i128 [[TMP7]], 0 +; CHECK-NEXT: [[_MSOR3:%.*]] = or i1 [[_MSOR]], [[_MSCMP2]] +; CHECK-NEXT: [[_MSCMP4:%.*]] = icmp ne i8 [[TMP4]], 0 +; CHECK-NEXT: [[_MSOR5:%.*]] = or i1 [[_MSOR3]], [[_MSCMP4]] +; CHECK-NEXT: br i1 [[_MSOR5]], label %[[BB8:.*]], label %[[BB9:.*]], !prof [[PROF1]] +; CHECK: [[BB8]]: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR4]] +; CHECK-NEXT: unreachable +; CHECK: [[BB9]]: +; CHECK-NEXT: [[RES:%.*]] = call <8 x half> @llvm.x86.avx512fp16.mask.vcvtss2sh.round(<8 x half> [[X0]], <4 x float> [[X1]], <8 x half> [[X2]], i8 [[X3]], i32 11) +; CHECK-NEXT: store <8 x i16> zeroinitializer, ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <8 x half> [[RES]] +; + %res = call <8 x half> @llvm.x86.avx512fp16.mask.vcvtss2sh.round(<8 x half> %x0, <4 x float> %x1, <8 x half> %x2, i8 %x3, i32 11) + ret <8 x half> %res +} + +define <8 x half> @test_int_x86_avx512fp16_mask_cvt_ss2sh_round_nomask(<8 x half> %x0, <4 x float> %x1, <8 x half> %x2) #0 { +; CHECK-LABEL: define <8 x half> @test_int_x86_avx512fp16_mask_cvt_ss2sh_round_nomask( +; CHECK-SAME: <8 x half> [[X0:%.*]], <4 x float> [[X1:%.*]], <8 x half> [[X2:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i16>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <4 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8 +; CHECK-NEXT: [[TMP3:%.*]] = load <8 x i16>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 32) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP4:%.*]] = bitcast <8 x i16> [[TMP1]] to i128 +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i128 [[TMP4]], 0 +; CHECK-NEXT: [[TMP5:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 +; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i128 [[TMP5]], 0 +; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]] +; CHECK-NEXT: [[TMP6:%.*]] = bitcast <8 x i16> [[TMP3]] to i128 +; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i128 [[TMP6]], 0 +; CHECK-NEXT: [[_MSOR3:%.*]] = or i1 [[_MSOR]], [[_MSCMP2]] +; CHECK-NEXT: br i1 [[_MSOR3]], label %[[BB7:.*]], label %[[BB8:.*]], !prof [[PROF1]] +; CHECK: [[BB7]]: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR4]] +; CHECK-NEXT: unreachable +; CHECK: [[BB8]]: +; CHECK-NEXT: [[RES:%.*]] = call <8 x half> @llvm.x86.avx512fp16.mask.vcvtss2sh.round(<8 x half> [[X0]], <4 x float> [[X1]], <8 x half> [[X2]], i8 -1, i32 4) +; CHECK-NEXT: store <8 x i16> zeroinitializer, ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <8 x half> [[RES]] +; + %res = call <8 x half> @llvm.x86.avx512fp16.mask.vcvtss2sh.round(<8 x half> %x0, <4 x float> %x1, <8 x half> %x2, i8 -1, i32 4) + ret <8 x half> %res +} + +define <8 x half> @test_int_x86_avx512fp16_mask_cvt_ss2sh_round_z(<8 x half> %x0, <4 x float> %x1, i8 %x2) #0 { +; CHECK-LABEL: define <8 x half> @test_int_x86_avx512fp16_mask_cvt_ss2sh_round_z( +; CHECK-SAME: <8 x half> [[X0:%.*]], <4 x float> [[X1:%.*]], i8 [[X2:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i16>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <4 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8 +; CHECK-NEXT: [[TMP3:%.*]] = load i8, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 32) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP4:%.*]] = bitcast <8 x i16> [[TMP1]] to i128 +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i128 [[TMP4]], 0 +; CHECK-NEXT: [[TMP5:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 +; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i128 [[TMP5]], 0 +; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]] +; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i8 [[TMP3]], 0 +; CHECK-NEXT: [[_MSOR3:%.*]] = or i1 [[_MSOR]], [[_MSCMP2]] +; CHECK-NEXT: br i1 [[_MSOR3]], label %[[BB6:.*]], label %[[BB7:.*]], !prof [[PROF1]] +; CHECK: [[BB6]]: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR4]] +; CHECK-NEXT: unreachable +; CHECK: [[BB7]]: +; CHECK-NEXT: [[RES:%.*]] = call <8 x half> @llvm.x86.avx512fp16.mask.vcvtss2sh.round(<8 x half> [[X0]], <4 x float> [[X1]], <8 x half> zeroinitializer, i8 [[X2]], i32 4) +; CHECK-NEXT: store <8 x i16> zeroinitializer, ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <8 x half> [[RES]] +; + %res = call <8 x half> @llvm.x86.avx512fp16.mask.vcvtss2sh.round(<8 x half> %x0, <4 x float> %x1, <8 x half> zeroinitializer, i8 %x2, i32 4) + ret <8 x half> %res +} + +declare <8 x half> @llvm.x86.avx512fp16.mask.vcvtsd2sh.round(<8 x half>, <2 x double>, <8 x half>, i8, i32) + +define <8 x half> @test_int_x86_avx512fp16_mask_cvt_sd2sh_round(<8 x half> %x0, <2 x double> %x1, <8 x half> %x2, i8 %x3) #0 { +; CHECK-LABEL: define <8 x half> @test_int_x86_avx512fp16_mask_cvt_sd2sh_round( +; CHECK-SAME: <8 x half> [[X0:%.*]], <2 x double> [[X1:%.*]], <8 x half> [[X2:%.*]], i8 [[X3:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i16>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <2 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8 +; CHECK-NEXT: [[TMP3:%.*]] = load <8 x i16>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 32) to ptr), align 8 +; CHECK-NEXT: [[TMP4:%.*]] = load i8, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 48) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP5:%.*]] = bitcast <8 x i16> [[TMP1]] to i128 +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i128 [[TMP5]], 0 +; CHECK-NEXT: [[TMP6:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 +; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i128 [[TMP6]], 0 +; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]] +; CHECK-NEXT: [[TMP7:%.*]] = bitcast <8 x i16> [[TMP3]] to i128 +; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i128 [[TMP7]], 0 +; CHECK-NEXT: [[_MSOR3:%.*]] = or i1 [[_MSOR]], [[_MSCMP2]] +; CHECK-NEXT: [[_MSCMP4:%.*]] = icmp ne i8 [[TMP4]], 0 +; CHECK-NEXT: [[_MSOR5:%.*]] = or i1 [[_MSOR3]], [[_MSCMP4]] +; CHECK-NEXT: br i1 [[_MSOR5]], label %[[BB8:.*]], label %[[BB9:.*]], !prof [[PROF1]] +; CHECK: [[BB8]]: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR4]] +; CHECK-NEXT: unreachable +; CHECK: [[BB9]]: +; CHECK-NEXT: [[RES:%.*]] = call <8 x half> @llvm.x86.avx512fp16.mask.vcvtsd2sh.round(<8 x half> [[X0]], <2 x double> [[X1]], <8 x half> [[X2]], i8 [[X3]], i32 4) +; CHECK-NEXT: store <8 x i16> zeroinitializer, ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <8 x half> [[RES]] +; + %res = call <8 x half> @llvm.x86.avx512fp16.mask.vcvtsd2sh.round(<8 x half> %x0, <2 x double> %x1, <8 x half> %x2, i8 %x3, i32 4) + ret <8 x half> %res +} + +define <8 x half> @test_int_x86_avx512fp16_mask_cvt_sd2sh_round_r(<8 x half> %x0, <2 x double> %x1, <8 x half> %x2, i8 %x3) #0 { +; CHECK-LABEL: define <8 x half> @test_int_x86_avx512fp16_mask_cvt_sd2sh_round_r( +; CHECK-SAME: <8 x half> [[X0:%.*]], <2 x double> [[X1:%.*]], <8 x half> [[X2:%.*]], i8 [[X3:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i16>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <2 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8 +; CHECK-NEXT: [[TMP3:%.*]] = load <8 x i16>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 32) to ptr), align 8 +; CHECK-NEXT: [[TMP4:%.*]] = load i8, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 48) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP5:%.*]] = bitcast <8 x i16> [[TMP1]] to i128 +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i128 [[TMP5]], 0 +; CHECK-NEXT: [[TMP6:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 +; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i128 [[TMP6]], 0 +; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]] +; CHECK-NEXT: [[TMP7:%.*]] = bitcast <8 x i16> [[TMP3]] to i128 +; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i128 [[TMP7]], 0 +; CHECK-NEXT: [[_MSOR3:%.*]] = or i1 [[_MSOR]], [[_MSCMP2]] +; CHECK-NEXT: [[_MSCMP4:%.*]] = icmp ne i8 [[TMP4]], 0 +; CHECK-NEXT: [[_MSOR5:%.*]] = or i1 [[_MSOR3]], [[_MSCMP4]] +; CHECK-NEXT: br i1 [[_MSOR5]], label %[[BB8:.*]], label %[[BB9:.*]], !prof [[PROF1]] +; CHECK: [[BB8]]: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR4]] +; CHECK-NEXT: unreachable +; CHECK: [[BB9]]: +; CHECK-NEXT: [[RES:%.*]] = call <8 x half> @llvm.x86.avx512fp16.mask.vcvtsd2sh.round(<8 x half> [[X0]], <2 x double> [[X1]], <8 x half> [[X2]], i8 [[X3]], i32 11) +; CHECK-NEXT: store <8 x i16> zeroinitializer, ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <8 x half> [[RES]] +; + %res = call <8 x half> @llvm.x86.avx512fp16.mask.vcvtsd2sh.round(<8 x half> %x0, <2 x double> %x1, <8 x half> %x2, i8 %x3, i32 11) + ret <8 x half> %res +} + +define <8 x half> @test_int_x86_avx512fp16_mask_cvt_sd2sh_round_nomask(<8 x half> %x0, <2 x double> %x1, <8 x half> %x2) #0 { +; CHECK-LABEL: define <8 x half> @test_int_x86_avx512fp16_mask_cvt_sd2sh_round_nomask( +; CHECK-SAME: <8 x half> [[X0:%.*]], <2 x double> [[X1:%.*]], <8 x half> [[X2:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i16>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <2 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8 +; CHECK-NEXT: [[TMP3:%.*]] = load <8 x i16>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 32) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP4:%.*]] = bitcast <8 x i16> [[TMP1]] to i128 +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i128 [[TMP4]], 0 +; CHECK-NEXT: [[TMP5:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 +; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i128 [[TMP5]], 0 +; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]] +; CHECK-NEXT: [[TMP6:%.*]] = bitcast <8 x i16> [[TMP3]] to i128 +; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i128 [[TMP6]], 0 +; CHECK-NEXT: [[_MSOR3:%.*]] = or i1 [[_MSOR]], [[_MSCMP2]] +; CHECK-NEXT: br i1 [[_MSOR3]], label %[[BB7:.*]], label %[[BB8:.*]], !prof [[PROF1]] +; CHECK: [[BB7]]: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR4]] +; CHECK-NEXT: unreachable +; CHECK: [[BB8]]: +; CHECK-NEXT: [[RES:%.*]] = call <8 x half> @llvm.x86.avx512fp16.mask.vcvtsd2sh.round(<8 x half> [[X0]], <2 x double> [[X1]], <8 x half> [[X2]], i8 -1, i32 4) +; CHECK-NEXT: store <8 x i16> zeroinitializer, ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <8 x half> [[RES]] +; + %res = call <8 x half> @llvm.x86.avx512fp16.mask.vcvtsd2sh.round(<8 x half> %x0, <2 x double> %x1, <8 x half> %x2, i8 -1, i32 4) + ret <8 x half> %res +} + +define <8 x half> @test_int_x86_avx512fp16_mask_cvt_sd2sh_round_z(<8 x half> %x0, <2 x double> %x1, i8 %x2) #0 { +; CHECK-LABEL: define <8 x half> @test_int_x86_avx512fp16_mask_cvt_sd2sh_round_z( +; CHECK-SAME: <8 x half> [[X0:%.*]], <2 x double> [[X1:%.*]], i8 [[X2:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i16>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <2 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8 +; CHECK-NEXT: [[TMP3:%.*]] = load i8, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 32) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP4:%.*]] = bitcast <8 x i16> [[TMP1]] to i128 +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i128 [[TMP4]], 0 +; CHECK-NEXT: [[TMP5:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 +; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i128 [[TMP5]], 0 +; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]] +; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i8 [[TMP3]], 0 +; CHECK-NEXT: [[_MSOR3:%.*]] = or i1 [[_MSOR]], [[_MSCMP2]] +; CHECK-NEXT: br i1 [[_MSOR3]], label %[[BB6:.*]], label %[[BB7:.*]], !prof [[PROF1]] +; CHECK: [[BB6]]: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR4]] +; CHECK-NEXT: unreachable +; CHECK: [[BB7]]: +; CHECK-NEXT: [[RES:%.*]] = call <8 x half> @llvm.x86.avx512fp16.mask.vcvtsd2sh.round(<8 x half> [[X0]], <2 x double> [[X1]], <8 x half> zeroinitializer, i8 [[X2]], i32 4) +; CHECK-NEXT: store <8 x i16> zeroinitializer, ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <8 x half> [[RES]] +; + %res = call <8 x half> @llvm.x86.avx512fp16.mask.vcvtsd2sh.round(<8 x half> %x0, <2 x double> %x1, <8 x half> zeroinitializer, i8 %x2, i32 4) + ret <8 x half> %res +} + +declare <4 x float> @llvm.x86.avx512fp16.mask.vcvtsh2ss.round(<4 x float>, <8 x half>, <4 x float>, i8, i32) + +define <4 x float> @test_int_x86_avx512fp16_mask_cvt_sh2ss_round(<4 x float> %x0, <8 x half> %x1, <4 x float> %x2, i8 %x3) #0 { +; CHECK-LABEL: define <4 x float> @test_int_x86_avx512fp16_mask_cvt_sh2ss_round( +; CHECK-SAME: <4 x float> [[X0:%.*]], <8 x half> [[X1:%.*]], <4 x float> [[X2:%.*]], i8 [[X3:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: [[TMP1:%.*]] = load <4 x i32>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <8 x i16>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8 +; CHECK-NEXT: [[TMP3:%.*]] = load <4 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 32) to ptr), align 8 +; CHECK-NEXT: [[TMP4:%.*]] = load i8, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 48) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP5:%.*]] = bitcast <4 x i32> [[TMP1]] to i128 +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i128 [[TMP5]], 0 +; CHECK-NEXT: [[TMP6:%.*]] = bitcast <8 x i16> [[TMP2]] to i128 +; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i128 [[TMP6]], 0 +; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]] +; CHECK-NEXT: [[TMP7:%.*]] = bitcast <4 x i32> [[TMP3]] to i128 +; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i128 [[TMP7]], 0 +; CHECK-NEXT: [[_MSOR3:%.*]] = or i1 [[_MSOR]], [[_MSCMP2]] +; CHECK-NEXT: [[_MSCMP4:%.*]] = icmp ne i8 [[TMP4]], 0 +; CHECK-NEXT: [[_MSOR5:%.*]] = or i1 [[_MSOR3]], [[_MSCMP4]] +; CHECK-NEXT: br i1 [[_MSOR5]], label %[[BB8:.*]], label %[[BB9:.*]], !prof [[PROF1]] +; CHECK: [[BB8]]: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR4]] +; CHECK-NEXT: unreachable +; CHECK: [[BB9]]: +; CHECK-NEXT: [[RES:%.*]] = call <4 x float> @llvm.x86.avx512fp16.mask.vcvtsh2ss.round(<4 x float> [[X0]], <8 x half> [[X1]], <4 x float> [[X2]], i8 [[X3]], i32 4) +; CHECK-NEXT: store <4 x i32> zeroinitializer, ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <4 x float> [[RES]] +; + %res = call <4 x float> @llvm.x86.avx512fp16.mask.vcvtsh2ss.round(<4 x float> %x0, <8 x half> %x1, <4 x float> %x2, i8 %x3, i32 4) + ret <4 x float> %res +} + +define <4 x float> @test_int_x86_avx512fp16_mask_cvt_sh2ss_round_r(<4 x float> %x0, <8 x half> %x1, <4 x float> %x2, i8 %x3) #0 { +; CHECK-LABEL: define <4 x float> @test_int_x86_avx512fp16_mask_cvt_sh2ss_round_r( +; CHECK-SAME: <4 x float> [[X0:%.*]], <8 x half> [[X1:%.*]], <4 x float> [[X2:%.*]], i8 [[X3:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: [[TMP1:%.*]] = load <4 x i32>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <8 x i16>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8 +; CHECK-NEXT: [[TMP3:%.*]] = load <4 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 32) to ptr), align 8 +; CHECK-NEXT: [[TMP4:%.*]] = load i8, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 48) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP5:%.*]] = bitcast <4 x i32> [[TMP1]] to i128 +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i128 [[TMP5]], 0 +; CHECK-NEXT: [[TMP6:%.*]] = bitcast <8 x i16> [[TMP2]] to i128 +; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i128 [[TMP6]], 0 +; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]] +; CHECK-NEXT: [[TMP7:%.*]] = bitcast <4 x i32> [[TMP3]] to i128 +; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i128 [[TMP7]], 0 +; CHECK-NEXT: [[_MSOR3:%.*]] = or i1 [[_MSOR]], [[_MSCMP2]] +; CHECK-NEXT: [[_MSCMP4:%.*]] = icmp ne i8 [[TMP4]], 0 +; CHECK-NEXT: [[_MSOR5:%.*]] = or i1 [[_MSOR3]], [[_MSCMP4]] +; CHECK-NEXT: br i1 [[_MSOR5]], label %[[BB8:.*]], label %[[BB9:.*]], !prof [[PROF1]] +; CHECK: [[BB8]]: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR4]] +; CHECK-NEXT: unreachable +; CHECK: [[BB9]]: +; CHECK-NEXT: [[RES:%.*]] = call <4 x float> @llvm.x86.avx512fp16.mask.vcvtsh2ss.round(<4 x float> [[X0]], <8 x half> [[X1]], <4 x float> [[X2]], i8 [[X3]], i32 8) +; CHECK-NEXT: store <4 x i32> zeroinitializer, ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <4 x float> [[RES]] +; + %res = call <4 x float> @llvm.x86.avx512fp16.mask.vcvtsh2ss.round(<4 x float> %x0, <8 x half> %x1, <4 x float> %x2, i8 %x3, i32 8) + ret <4 x float> %res +} + +define <4 x float> @test_int_x86_avx512fp16_mask_cvt_sh2ss_round_nomask(<4 x float> %x0, <8 x half> %x1, <4 x float> %x2) #0 { +; CHECK-LABEL: define <4 x float> @test_int_x86_avx512fp16_mask_cvt_sh2ss_round_nomask( +; CHECK-SAME: <4 x float> [[X0:%.*]], <8 x half> [[X1:%.*]], <4 x float> [[X2:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: [[TMP1:%.*]] = load <4 x i32>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <8 x i16>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8 +; CHECK-NEXT: [[TMP3:%.*]] = load <4 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 32) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP4:%.*]] = bitcast <4 x i32> [[TMP1]] to i128 +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i128 [[TMP4]], 0 +; CHECK-NEXT: [[TMP5:%.*]] = bitcast <8 x i16> [[TMP2]] to i128 +; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i128 [[TMP5]], 0 +; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]] +; CHECK-NEXT: [[TMP6:%.*]] = bitcast <4 x i32> [[TMP3]] to i128 +; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i128 [[TMP6]], 0 +; CHECK-NEXT: [[_MSOR3:%.*]] = or i1 [[_MSOR]], [[_MSCMP2]] +; CHECK-NEXT: br i1 [[_MSOR3]], label %[[BB7:.*]], label %[[BB8:.*]], !prof [[PROF1]] +; CHECK: [[BB7]]: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR4]] +; CHECK-NEXT: unreachable +; CHECK: [[BB8]]: +; CHECK-NEXT: [[RES:%.*]] = call <4 x float> @llvm.x86.avx512fp16.mask.vcvtsh2ss.round(<4 x float> [[X0]], <8 x half> [[X1]], <4 x float> [[X2]], i8 -1, i32 4) +; CHECK-NEXT: store <4 x i32> zeroinitializer, ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <4 x float> [[RES]] +; + %res = call <4 x float> @llvm.x86.avx512fp16.mask.vcvtsh2ss.round(<4 x float> %x0, <8 x half> %x1, <4 x float> %x2, i8 -1, i32 4) + ret <4 x float> %res +} + +define <4 x float> @test_int_x86_avx512fp16_mask_cvt_sh2ss_round_z(<4 x float> %x0, <8 x half> %x1, i8 %x2) #0 { +; CHECK-LABEL: define <4 x float> @test_int_x86_avx512fp16_mask_cvt_sh2ss_round_z( +; CHECK-SAME: <4 x float> [[X0:%.*]], <8 x half> [[X1:%.*]], i8 [[X2:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: [[TMP1:%.*]] = load <4 x i32>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <8 x i16>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8 +; CHECK-NEXT: [[TMP3:%.*]] = load i8, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 32) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP4:%.*]] = bitcast <4 x i32> [[TMP1]] to i128 +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i128 [[TMP4]], 0 +; CHECK-NEXT: [[TMP5:%.*]] = bitcast <8 x i16> [[TMP2]] to i128 +; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i128 [[TMP5]], 0 +; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]] +; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i8 [[TMP3]], 0 +; CHECK-NEXT: [[_MSOR3:%.*]] = or i1 [[_MSOR]], [[_MSCMP2]] +; CHECK-NEXT: br i1 [[_MSOR3]], label %[[BB6:.*]], label %[[BB7:.*]], !prof [[PROF1]] +; CHECK: [[BB6]]: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR4]] +; CHECK-NEXT: unreachable +; CHECK: [[BB7]]: +; CHECK-NEXT: [[RES:%.*]] = call <4 x float> @llvm.x86.avx512fp16.mask.vcvtsh2ss.round(<4 x float> [[X0]], <8 x half> [[X1]], <4 x float> zeroinitializer, i8 [[X2]], i32 4) +; CHECK-NEXT: store <4 x i32> zeroinitializer, ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <4 x float> [[RES]] +; + %res = call <4 x float> @llvm.x86.avx512fp16.mask.vcvtsh2ss.round(<4 x float> %x0, <8 x half> %x1, <4 x float> zeroinitializer, i8 %x2, i32 4) + ret <4 x float> %res +} + +declare <2 x double> @llvm.x86.avx512fp16.mask.vcvtsh2sd.round(<2 x double>, <8 x half>, <2 x double>, i8, i32) + +define <2 x double> @test_int_x86_avx512fp16_mask_cvt_sh2sd_round(<2 x double> %x0, <8 x half> %x1, <2 x double> %x2, i8 %x3) #0 { +; CHECK-LABEL: define <2 x double> @test_int_x86_avx512fp16_mask_cvt_sh2sd_round( +; CHECK-SAME: <2 x double> [[X0:%.*]], <8 x half> [[X1:%.*]], <2 x double> [[X2:%.*]], i8 [[X3:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: [[TMP1:%.*]] = load <2 x i64>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <8 x i16>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8 +; CHECK-NEXT: [[TMP3:%.*]] = load <2 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 32) to ptr), align 8 +; CHECK-NEXT: [[TMP4:%.*]] = load i8, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 48) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP5:%.*]] = bitcast <2 x i64> [[TMP1]] to i128 +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i128 [[TMP5]], 0 +; CHECK-NEXT: [[TMP6:%.*]] = bitcast <8 x i16> [[TMP2]] to i128 +; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i128 [[TMP6]], 0 +; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]] +; CHECK-NEXT: [[TMP7:%.*]] = bitcast <2 x i64> [[TMP3]] to i128 +; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i128 [[TMP7]], 0 +; CHECK-NEXT: [[_MSOR3:%.*]] = or i1 [[_MSOR]], [[_MSCMP2]] +; CHECK-NEXT: [[_MSCMP4:%.*]] = icmp ne i8 [[TMP4]], 0 +; CHECK-NEXT: [[_MSOR5:%.*]] = or i1 [[_MSOR3]], [[_MSCMP4]] +; CHECK-NEXT: br i1 [[_MSOR5]], label %[[BB8:.*]], label %[[BB9:.*]], !prof [[PROF1]] +; CHECK: [[BB8]]: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR4]] +; CHECK-NEXT: unreachable +; CHECK: [[BB9]]: +; CHECK-NEXT: [[RES:%.*]] = call <2 x double> @llvm.x86.avx512fp16.mask.vcvtsh2sd.round(<2 x double> [[X0]], <8 x half> [[X1]], <2 x double> [[X2]], i8 [[X3]], i32 4) +; CHECK-NEXT: store <2 x i64> zeroinitializer, ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <2 x double> [[RES]] +; + %res = call <2 x double> @llvm.x86.avx512fp16.mask.vcvtsh2sd.round(<2 x double> %x0, <8 x half> %x1, <2 x double> %x2, i8 %x3, i32 4) + ret <2 x double> %res +} + +define <2 x double> @test_int_x86_avx512fp16_mask_cvt_sh2sd_round_r(<2 x double> %x0, <8 x half> %x1, <2 x double> %x2, i8 %x3) #0 { +; CHECK-LABEL: define <2 x double> @test_int_x86_avx512fp16_mask_cvt_sh2sd_round_r( +; CHECK-SAME: <2 x double> [[X0:%.*]], <8 x half> [[X1:%.*]], <2 x double> [[X2:%.*]], i8 [[X3:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: [[TMP1:%.*]] = load <2 x i64>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <8 x i16>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8 +; CHECK-NEXT: [[TMP3:%.*]] = load <2 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 32) to ptr), align 8 +; CHECK-NEXT: [[TMP4:%.*]] = load i8, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 48) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP5:%.*]] = bitcast <2 x i64> [[TMP1]] to i128 +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i128 [[TMP5]], 0 +; CHECK-NEXT: [[TMP6:%.*]] = bitcast <8 x i16> [[TMP2]] to i128 +; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i128 [[TMP6]], 0 +; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]] +; CHECK-NEXT: [[TMP7:%.*]] = bitcast <2 x i64> [[TMP3]] to i128 +; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i128 [[TMP7]], 0 +; CHECK-NEXT: [[_MSOR3:%.*]] = or i1 [[_MSOR]], [[_MSCMP2]] +; CHECK-NEXT: [[_MSCMP4:%.*]] = icmp ne i8 [[TMP4]], 0 +; CHECK-NEXT: [[_MSOR5:%.*]] = or i1 [[_MSOR3]], [[_MSCMP4]] +; CHECK-NEXT: br i1 [[_MSOR5]], label %[[BB8:.*]], label %[[BB9:.*]], !prof [[PROF1]] +; CHECK: [[BB8]]: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR4]] +; CHECK-NEXT: unreachable +; CHECK: [[BB9]]: +; CHECK-NEXT: [[RES:%.*]] = call <2 x double> @llvm.x86.avx512fp16.mask.vcvtsh2sd.round(<2 x double> [[X0]], <8 x half> [[X1]], <2 x double> [[X2]], i8 [[X3]], i32 8) +; CHECK-NEXT: store <2 x i64> zeroinitializer, ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <2 x double> [[RES]] +; + %res = call <2 x double> @llvm.x86.avx512fp16.mask.vcvtsh2sd.round(<2 x double> %x0, <8 x half> %x1, <2 x double> %x2, i8 %x3, i32 8) + ret <2 x double> %res +} + +define <2 x double> @test_int_x86_avx512fp16_mask_cvt_sh2sd_round_nomask(<2 x double> %x0, <8 x half> %x1, <2 x double> %x2) #0 { +; CHECK-LABEL: define <2 x double> @test_int_x86_avx512fp16_mask_cvt_sh2sd_round_nomask( +; CHECK-SAME: <2 x double> [[X0:%.*]], <8 x half> [[X1:%.*]], <2 x double> [[X2:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: [[TMP1:%.*]] = load <2 x i64>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <8 x i16>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8 +; CHECK-NEXT: [[TMP3:%.*]] = load <2 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 32) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP4:%.*]] = bitcast <2 x i64> [[TMP1]] to i128 +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i128 [[TMP4]], 0 +; CHECK-NEXT: [[TMP5:%.*]] = bitcast <8 x i16> [[TMP2]] to i128 +; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i128 [[TMP5]], 0 +; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]] +; CHECK-NEXT: [[TMP6:%.*]] = bitcast <2 x i64> [[TMP3]] to i128 +; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i128 [[TMP6]], 0 +; CHECK-NEXT: [[_MSOR3:%.*]] = or i1 [[_MSOR]], [[_MSCMP2]] +; CHECK-NEXT: br i1 [[_MSOR3]], label %[[BB7:.*]], label %[[BB8:.*]], !prof [[PROF1]] +; CHECK: [[BB7]]: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR4]] +; CHECK-NEXT: unreachable +; CHECK: [[BB8]]: +; CHECK-NEXT: [[RES:%.*]] = call <2 x double> @llvm.x86.avx512fp16.mask.vcvtsh2sd.round(<2 x double> [[X0]], <8 x half> [[X1]], <2 x double> [[X2]], i8 -1, i32 4) +; CHECK-NEXT: store <2 x i64> zeroinitializer, ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <2 x double> [[RES]] +; + %res = call <2 x double> @llvm.x86.avx512fp16.mask.vcvtsh2sd.round(<2 x double> %x0, <8 x half> %x1, <2 x double> %x2, i8 -1, i32 4) + ret <2 x double> %res +} + +define <2 x double> @test_int_x86_avx512fp16_mask_cvt_sh2sd_round_z(<2 x double> %x0, <8 x half> %x1, i8 %x2) #0 { +; CHECK-LABEL: define <2 x double> @test_int_x86_avx512fp16_mask_cvt_sh2sd_round_z( +; CHECK-SAME: <2 x double> [[X0:%.*]], <8 x half> [[X1:%.*]], i8 [[X2:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: [[TMP1:%.*]] = load <2 x i64>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <8 x i16>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8 +; CHECK-NEXT: [[TMP3:%.*]] = load i8, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 32) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP4:%.*]] = bitcast <2 x i64> [[TMP1]] to i128 +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i128 [[TMP4]], 0 +; CHECK-NEXT: [[TMP5:%.*]] = bitcast <8 x i16> [[TMP2]] to i128 +; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i128 [[TMP5]], 0 +; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]] +; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i8 [[TMP3]], 0 +; CHECK-NEXT: [[_MSOR3:%.*]] = or i1 [[_MSOR]], [[_MSCMP2]] +; CHECK-NEXT: br i1 [[_MSOR3]], label %[[BB6:.*]], label %[[BB7:.*]], !prof [[PROF1]] +; CHECK: [[BB6]]: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR4]] +; CHECK-NEXT: unreachable +; CHECK: [[BB7]]: +; CHECK-NEXT: [[RES:%.*]] = call <2 x double> @llvm.x86.avx512fp16.mask.vcvtsh2sd.round(<2 x double> [[X0]], <8 x half> [[X1]], <2 x double> zeroinitializer, i8 [[X2]], i32 4) +; CHECK-NEXT: store <2 x i64> zeroinitializer, ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <2 x double> [[RES]] +; + %res = call <2 x double> @llvm.x86.avx512fp16.mask.vcvtsh2sd.round(<2 x double> %x0, <8 x half> %x1, <2 x double> zeroinitializer, i8 %x2, i32 4) + ret <2 x double> %res +} + +declare <16 x float> @llvm.x86.avx512fp16.mask.vcvtph2psx.512(<16 x half>, <16 x float>, i16, i32) + +define <16 x float> @test_int_x86_avx512_cvt_ph2psx_512(<16 x half> %x0) #0 { +; CHECK-LABEL: define <16 x float> @test_int_x86_avx512_cvt_ph2psx_512( +; CHECK-SAME: <16 x half> [[X0:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: [[TMP1:%.*]] = load <16 x i16>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR4]] +; CHECK-NEXT: [[RES:%.*]] = call <16 x float> @llvm.x86.avx512fp16.mask.vcvtph2psx.512(<16 x half> [[X0]], <16 x float> undef, i16 -1, i32 4) +; CHECK-NEXT: store <16 x i32> zeroinitializer, ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <16 x float> [[RES]] +; + %res = call <16 x float> @llvm.x86.avx512fp16.mask.vcvtph2psx.512(<16 x half> %x0, <16 x float> undef, i16 -1, i32 4) + ret <16 x float> %res +} + +define <16 x float> @test_int_x86_avx512_mask_cvt_ph2psx_512(<16 x half> %x0, <16 x float> %x1, i16 %x2) #0 { +; CHECK-LABEL: define <16 x float> @test_int_x86_avx512_mask_cvt_ph2psx_512( +; CHECK-SAME: <16 x half> [[X0:%.*]], <16 x float> [[X1:%.*]], i16 [[X2:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: [[TMP1:%.*]] = load <16 x i16>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <16 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 32) to ptr), align 8 +; CHECK-NEXT: [[TMP3:%.*]] = load i16, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 96) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP4:%.*]] = bitcast <16 x i16> [[TMP1]] to i256 +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i256 [[TMP4]], 0 +; CHECK-NEXT: [[TMP5:%.*]] = bitcast <16 x i32> [[TMP2]] to i512 +; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i512 [[TMP5]], 0 +; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]] +; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i16 [[TMP3]], 0 +; CHECK-NEXT: [[_MSOR3:%.*]] = or i1 [[_MSOR]], [[_MSCMP2]] +; CHECK-NEXT: br i1 [[_MSOR3]], label %[[BB6:.*]], label %[[BB7:.*]], !prof [[PROF1]] +; CHECK: [[BB6]]: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR4]] +; CHECK-NEXT: unreachable +; CHECK: [[BB7]]: +; CHECK-NEXT: [[RES:%.*]] = call <16 x float> @llvm.x86.avx512fp16.mask.vcvtph2psx.512(<16 x half> [[X0]], <16 x float> [[X1]], i16 [[X2]], i32 4) +; CHECK-NEXT: store <16 x i32> zeroinitializer, ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <16 x float> [[RES]] +; + %res = call <16 x float> @llvm.x86.avx512fp16.mask.vcvtph2psx.512(<16 x half> %x0, <16 x float> %x1, i16 %x2, i32 4) + ret <16 x float> %res +} + +define <16 x float> @test_int_x86_avx512_maskz_cvt_ph2psx_512(<16 x half> %x0, i16 %x2) #0 { +; CHECK-LABEL: define <16 x float> @test_int_x86_avx512_maskz_cvt_ph2psx_512( +; CHECK-SAME: <16 x half> [[X0:%.*]], i16 [[X2:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: [[TMP1:%.*]] = load <16 x i16>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load i16, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 32) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i16> [[TMP1]] to i256 +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i256 [[TMP3]], 0 +; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i16 [[TMP2]], 0 +; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]] +; CHECK-NEXT: br i1 [[_MSOR]], label %[[BB4:.*]], label %[[BB5:.*]], !prof [[PROF1]] +; CHECK: [[BB4]]: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR4]] +; CHECK-NEXT: unreachable +; CHECK: [[BB5]]: +; CHECK-NEXT: [[RES:%.*]] = call <16 x float> @llvm.x86.avx512fp16.mask.vcvtph2psx.512(<16 x half> [[X0]], <16 x float> zeroinitializer, i16 [[X2]], i32 4) +; CHECK-NEXT: store <16 x i32> zeroinitializer, ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <16 x float> [[RES]] +; + %res = call <16 x float> @llvm.x86.avx512fp16.mask.vcvtph2psx.512(<16 x half> %x0, <16 x float> zeroinitializer, i16 %x2, i32 4) + ret <16 x float> %res +} + +define <16 x float> @test_int_x86_avx512_cvt_ph2psx_512r(<16 x half> %x0) #0 { +; CHECK-LABEL: define <16 x float> @test_int_x86_avx512_cvt_ph2psx_512r( +; CHECK-SAME: <16 x half> [[X0:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: [[TMP1:%.*]] = load <16 x i16>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR4]] +; CHECK-NEXT: [[RES:%.*]] = call <16 x float> @llvm.x86.avx512fp16.mask.vcvtph2psx.512(<16 x half> [[X0]], <16 x float> undef, i16 -1, i32 8) +; CHECK-NEXT: store <16 x i32> zeroinitializer, ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <16 x float> [[RES]] +; + %res = call <16 x float> @llvm.x86.avx512fp16.mask.vcvtph2psx.512(<16 x half> %x0, <16 x float> undef, i16 -1, i32 8) + ret <16 x float> %res +} + +define <16 x float> @test_int_x86_avx512_mask_cvt_ph2psx_512r(<16 x half> %x0, <16 x float> %x1, i16 %x2) #0 { +; CHECK-LABEL: define <16 x float> @test_int_x86_avx512_mask_cvt_ph2psx_512r( +; CHECK-SAME: <16 x half> [[X0:%.*]], <16 x float> [[X1:%.*]], i16 [[X2:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: [[TMP1:%.*]] = load <16 x i16>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <16 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 32) to ptr), align 8 +; CHECK-NEXT: [[TMP3:%.*]] = load i16, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 96) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP4:%.*]] = bitcast <16 x i16> [[TMP1]] to i256 +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i256 [[TMP4]], 0 +; CHECK-NEXT: [[TMP5:%.*]] = bitcast <16 x i32> [[TMP2]] to i512 +; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i512 [[TMP5]], 0 +; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]] +; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i16 [[TMP3]], 0 +; CHECK-NEXT: [[_MSOR3:%.*]] = or i1 [[_MSOR]], [[_MSCMP2]] +; CHECK-NEXT: br i1 [[_MSOR3]], label %[[BB6:.*]], label %[[BB7:.*]], !prof [[PROF1]] +; CHECK: [[BB6]]: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR4]] +; CHECK-NEXT: unreachable +; CHECK: [[BB7]]: +; CHECK-NEXT: [[RES:%.*]] = call <16 x float> @llvm.x86.avx512fp16.mask.vcvtph2psx.512(<16 x half> [[X0]], <16 x float> [[X1]], i16 [[X2]], i32 8) +; CHECK-NEXT: store <16 x i32> zeroinitializer, ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <16 x float> [[RES]] +; + %res = call <16 x float> @llvm.x86.avx512fp16.mask.vcvtph2psx.512(<16 x half> %x0, <16 x float> %x1, i16 %x2, i32 8) + ret <16 x float> %res +} + +define <16 x float> @test_int_x86_avx512_maskz_cvt_ph2psx_512r(<16 x half> %x0, i16 %x2) #0 { +; CHECK-LABEL: define <16 x float> @test_int_x86_avx512_maskz_cvt_ph2psx_512r( +; CHECK-SAME: <16 x half> [[X0:%.*]], i16 [[X2:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: [[TMP1:%.*]] = load <16 x i16>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load i16, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 32) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i16> [[TMP1]] to i256 +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i256 [[TMP3]], 0 +; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i16 [[TMP2]], 0 +; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]] +; CHECK-NEXT: br i1 [[_MSOR]], label %[[BB4:.*]], label %[[BB5:.*]], !prof [[PROF1]] +; CHECK: [[BB4]]: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR4]] +; CHECK-NEXT: unreachable +; CHECK: [[BB5]]: +; CHECK-NEXT: [[RES:%.*]] = call <16 x float> @llvm.x86.avx512fp16.mask.vcvtph2psx.512(<16 x half> [[X0]], <16 x float> zeroinitializer, i16 [[X2]], i32 8) +; CHECK-NEXT: store <16 x i32> zeroinitializer, ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <16 x float> [[RES]] +; + %res = call <16 x float> @llvm.x86.avx512fp16.mask.vcvtph2psx.512(<16 x half> %x0, <16 x float> zeroinitializer, i16 %x2, i32 8) + ret <16 x float> %res +} + +declare <16 x half> @llvm.x86.avx512fp16.mask.vcvtps2phx.512(<16 x float>, <16 x half>, i16, i32) + +define <16 x half> @test_int_x86_avx512_cvt_ps2phx_512(<16 x float> %x0) #0 { +; CHECK-LABEL: define <16 x half> @test_int_x86_avx512_cvt_ps2phx_512( +; CHECK-SAME: <16 x float> [[X0:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: [[TMP1:%.*]] = load <16 x i32>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR4]] +; CHECK-NEXT: [[RES:%.*]] = call <16 x half> @llvm.x86.avx512fp16.mask.vcvtps2phx.512(<16 x float> [[X0]], <16 x half> undef, i16 -1, i32 4) +; CHECK-NEXT: store <16 x i16> zeroinitializer, ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <16 x half> [[RES]] +; + %res = call <16 x half> @llvm.x86.avx512fp16.mask.vcvtps2phx.512(<16 x float> %x0, <16 x half> undef, i16 -1, i32 4) + ret <16 x half> %res +} + +define <16 x half> @test_int_x86_avx512_mask_cvt_ps2phx_512(<16 x float> %x0, <16 x half> %x1, i16 %x2) #0 { +; CHECK-LABEL: define <16 x half> @test_int_x86_avx512_mask_cvt_ps2phx_512( +; CHECK-SAME: <16 x float> [[X0:%.*]], <16 x half> [[X1:%.*]], i16 [[X2:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: [[TMP1:%.*]] = load <16 x i32>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <16 x i16>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8 +; CHECK-NEXT: [[TMP3:%.*]] = load i16, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 96) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP4:%.*]] = bitcast <16 x i32> [[TMP1]] to i512 +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i512 [[TMP4]], 0 +; CHECK-NEXT: [[TMP5:%.*]] = bitcast <16 x i16> [[TMP2]] to i256 +; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i256 [[TMP5]], 0 +; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]] +; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i16 [[TMP3]], 0 +; CHECK-NEXT: [[_MSOR3:%.*]] = or i1 [[_MSOR]], [[_MSCMP2]] +; CHECK-NEXT: br i1 [[_MSOR3]], label %[[BB6:.*]], label %[[BB7:.*]], !prof [[PROF1]] +; CHECK: [[BB6]]: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR4]] +; CHECK-NEXT: unreachable +; CHECK: [[BB7]]: +; CHECK-NEXT: [[RES:%.*]] = call <16 x half> @llvm.x86.avx512fp16.mask.vcvtps2phx.512(<16 x float> [[X0]], <16 x half> [[X1]], i16 [[X2]], i32 4) +; CHECK-NEXT: store <16 x i16> zeroinitializer, ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <16 x half> [[RES]] +; + %res = call <16 x half> @llvm.x86.avx512fp16.mask.vcvtps2phx.512(<16 x float> %x0, <16 x half> %x1, i16 %x2, i32 4) + ret <16 x half> %res +} + +define <16 x half> @test_int_x86_avx512_maskz_cvt_ps2phx_512(<16 x float> %x0, i16 %x2) #0 { +; CHECK-LABEL: define <16 x half> @test_int_x86_avx512_maskz_cvt_ps2phx_512( +; CHECK-SAME: <16 x float> [[X0:%.*]], i16 [[X2:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: [[TMP1:%.*]] = load <16 x i32>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load i16, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i32> [[TMP1]] to i512 +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i512 [[TMP3]], 0 +; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i16 [[TMP2]], 0 +; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]] +; CHECK-NEXT: br i1 [[_MSOR]], label %[[BB4:.*]], label %[[BB5:.*]], !prof [[PROF1]] +; CHECK: [[BB4]]: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR4]] +; CHECK-NEXT: unreachable +; CHECK: [[BB5]]: +; CHECK-NEXT: [[RES:%.*]] = call <16 x half> @llvm.x86.avx512fp16.mask.vcvtps2phx.512(<16 x float> [[X0]], <16 x half> zeroinitializer, i16 [[X2]], i32 4) +; CHECK-NEXT: store <16 x i16> zeroinitializer, ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <16 x half> [[RES]] +; + %res = call <16 x half> @llvm.x86.avx512fp16.mask.vcvtps2phx.512(<16 x float> %x0, <16 x half> zeroinitializer, i16 %x2, i32 4) + ret <16 x half> %res +} + +define <16 x half> @test_int_x86_avx512_mask_cvt_ps2phx_512r(<16 x float> %x0, <16 x half> %x1, i16 %x2) #0 { +; CHECK-LABEL: define <16 x half> @test_int_x86_avx512_mask_cvt_ps2phx_512r( +; CHECK-SAME: <16 x float> [[X0:%.*]], <16 x half> [[X1:%.*]], i16 [[X2:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: [[TMP1:%.*]] = load <16 x i32>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <16 x i16>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8 +; CHECK-NEXT: [[TMP3:%.*]] = load i16, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 96) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP4:%.*]] = bitcast <16 x i32> [[TMP1]] to i512 +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i512 [[TMP4]], 0 +; CHECK-NEXT: [[TMP5:%.*]] = bitcast <16 x i16> [[TMP2]] to i256 +; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i256 [[TMP5]], 0 +; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]] +; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i16 [[TMP3]], 0 +; CHECK-NEXT: [[_MSOR3:%.*]] = or i1 [[_MSOR]], [[_MSCMP2]] +; CHECK-NEXT: br i1 [[_MSOR3]], label %[[BB6:.*]], label %[[BB7:.*]], !prof [[PROF1]] +; CHECK: [[BB6]]: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR4]] +; CHECK-NEXT: unreachable +; CHECK: [[BB7]]: +; CHECK-NEXT: [[RES:%.*]] = call <16 x half> @llvm.x86.avx512fp16.mask.vcvtps2phx.512(<16 x float> [[X0]], <16 x half> [[X1]], i16 [[X2]], i32 9) +; CHECK-NEXT: [[TMP8:%.*]] = bitcast <16 x i32> [[TMP1]] to i512 +; CHECK-NEXT: [[_MSCMP4:%.*]] = icmp ne i512 [[TMP8]], 0 +; CHECK-NEXT: [[TMP9:%.*]] = bitcast <16 x i16> [[TMP2]] to i256 +; CHECK-NEXT: [[_MSCMP5:%.*]] = icmp ne i256 [[TMP9]], 0 +; CHECK-NEXT: [[_MSOR6:%.*]] = or i1 [[_MSCMP4]], [[_MSCMP5]] +; CHECK-NEXT: br i1 [[_MSOR6]], label %[[BB10:.*]], label %[[BB11:.*]], !prof [[PROF1]] +; CHECK: [[BB10]]: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR4]] +; CHECK-NEXT: unreachable +; CHECK: [[BB11]]: +; CHECK-NEXT: [[RES1:%.*]] = call <16 x half> @llvm.x86.avx512fp16.mask.vcvtps2phx.512(<16 x float> [[X0]], <16 x half> [[X1]], i16 -1, i32 10) +; CHECK-NEXT: [[RES2:%.*]] = fadd <16 x half> [[RES]], [[RES1]] +; CHECK-NEXT: store <16 x i16> zeroinitializer, ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <16 x half> [[RES2]] +; + %res = call <16 x half> @llvm.x86.avx512fp16.mask.vcvtps2phx.512(<16 x float> %x0, <16 x half> %x1, i16 %x2, i32 9) + %res1 = call <16 x half> @llvm.x86.avx512fp16.mask.vcvtps2phx.512(<16 x float> %x0, <16 x half> %x1, i16 -1, i32 10) + %res2 = fadd <16 x half> %res, %res1 + ret <16 x half> %res2 +} + +attributes #0 = { sanitize_memory } From 5c24a7b71f032712cadc4dc67eb05366bb2d3f3e Mon Sep 17 00:00:00 2001 From: Thurston Dang Date: Fri, 18 Apr 2025 06:05:57 +0000 Subject: [PATCH 2/6] undef -> poison --- .../X86/avx512fp16-arith-intrinsics.ll | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/llvm/test/Instrumentation/MemorySanitizer/X86/avx512fp16-arith-intrinsics.ll b/llvm/test/Instrumentation/MemorySanitizer/X86/avx512fp16-arith-intrinsics.ll index 1d59f65119c4c..121a17894fe9e 100644 --- a/llvm/test/Instrumentation/MemorySanitizer/X86/avx512fp16-arith-intrinsics.ll +++ b/llvm/test/Instrumentation/MemorySanitizer/X86/avx512fp16-arith-intrinsics.ll @@ -1590,11 +1590,11 @@ define <16 x float> @test_int_x86_avx512_cvt_ph2psx_512(<16 x half> %x0) #0 { ; CHECK-NEXT: [[TMP1:%.*]] = load <16 x i16>, ptr @__msan_param_tls, align 8 ; CHECK-NEXT: call void @llvm.donothing() ; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR4]] -; CHECK-NEXT: [[RES:%.*]] = call <16 x float> @llvm.x86.avx512fp16.mask.vcvtph2psx.512(<16 x half> [[X0]], <16 x float> undef, i16 -1, i32 4) +; CHECK-NEXT: [[RES:%.*]] = call <16 x float> @llvm.x86.avx512fp16.mask.vcvtph2psx.512(<16 x half> [[X0]], <16 x float> poison, i16 -1, i32 4) ; CHECK-NEXT: store <16 x i32> zeroinitializer, ptr @__msan_retval_tls, align 8 ; CHECK-NEXT: ret <16 x float> [[RES]] ; - %res = call <16 x float> @llvm.x86.avx512fp16.mask.vcvtph2psx.512(<16 x half> %x0, <16 x float> undef, i16 -1, i32 4) + %res = call <16 x float> @llvm.x86.avx512fp16.mask.vcvtph2psx.512(<16 x half> %x0, <16 x float> poison, i16 -1, i32 4) ret <16 x float> %res } @@ -1654,11 +1654,11 @@ define <16 x float> @test_int_x86_avx512_cvt_ph2psx_512r(<16 x half> %x0) #0 { ; CHECK-NEXT: [[TMP1:%.*]] = load <16 x i16>, ptr @__msan_param_tls, align 8 ; CHECK-NEXT: call void @llvm.donothing() ; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR4]] -; CHECK-NEXT: [[RES:%.*]] = call <16 x float> @llvm.x86.avx512fp16.mask.vcvtph2psx.512(<16 x half> [[X0]], <16 x float> undef, i16 -1, i32 8) +; CHECK-NEXT: [[RES:%.*]] = call <16 x float> @llvm.x86.avx512fp16.mask.vcvtph2psx.512(<16 x half> [[X0]], <16 x float> poison, i16 -1, i32 8) ; CHECK-NEXT: store <16 x i32> zeroinitializer, ptr @__msan_retval_tls, align 8 ; CHECK-NEXT: ret <16 x float> [[RES]] ; - %res = call <16 x float> @llvm.x86.avx512fp16.mask.vcvtph2psx.512(<16 x half> %x0, <16 x float> undef, i16 -1, i32 8) + %res = call <16 x float> @llvm.x86.avx512fp16.mask.vcvtph2psx.512(<16 x half> %x0, <16 x float> poison, i16 -1, i32 8) ret <16 x float> %res } @@ -1720,11 +1720,11 @@ define <16 x half> @test_int_x86_avx512_cvt_ps2phx_512(<16 x float> %x0) #0 { ; CHECK-NEXT: [[TMP1:%.*]] = load <16 x i32>, ptr @__msan_param_tls, align 8 ; CHECK-NEXT: call void @llvm.donothing() ; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR4]] -; CHECK-NEXT: [[RES:%.*]] = call <16 x half> @llvm.x86.avx512fp16.mask.vcvtps2phx.512(<16 x float> [[X0]], <16 x half> undef, i16 -1, i32 4) +; CHECK-NEXT: [[RES:%.*]] = call <16 x half> @llvm.x86.avx512fp16.mask.vcvtps2phx.512(<16 x float> [[X0]], <16 x half> poison, i16 -1, i32 4) ; CHECK-NEXT: store <16 x i16> zeroinitializer, ptr @__msan_retval_tls, align 8 ; CHECK-NEXT: ret <16 x half> [[RES]] ; - %res = call <16 x half> @llvm.x86.avx512fp16.mask.vcvtps2phx.512(<16 x float> %x0, <16 x half> undef, i16 -1, i32 4) + %res = call <16 x half> @llvm.x86.avx512fp16.mask.vcvtps2phx.512(<16 x float> %x0, <16 x half> poison, i16 -1, i32 4) ret <16 x half> %res } From f0caa0cff2db784f698a7d7a9da7f5bf537281e3 Mon Sep 17 00:00:00 2001 From: Thurston Dang Date: Fri, 18 Apr 2025 06:17:41 +0000 Subject: [PATCH 3/6] Add avx512fp16-arith-vl-intrinsics.ll --- .../X86/avx512fp16-arith-vl-intrinsics.ll | 1898 +++++++++++++++++ 1 file changed, 1898 insertions(+) create mode 100644 llvm/test/Instrumentation/MemorySanitizer/X86/avx512fp16-arith-vl-intrinsics.ll diff --git a/llvm/test/Instrumentation/MemorySanitizer/X86/avx512fp16-arith-vl-intrinsics.ll b/llvm/test/Instrumentation/MemorySanitizer/X86/avx512fp16-arith-vl-intrinsics.ll new file mode 100644 index 0000000000000..224bab6dda923 --- /dev/null +++ b/llvm/test/Instrumentation/MemorySanitizer/X86/avx512fp16-arith-vl-intrinsics.ll @@ -0,0 +1,1898 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5 +; RUN: opt %s -S -mattr=+avx512f -passes=msan 2>&1 | FileCheck %s +; +; Forked from llvm/test/CodeGen/X86/avx512fp16-arith-vl-intrinsics.ll +; +; Handled suboptimally (visitInstruction): +; - llvm.x86.avx512fp16.mask.vcvtpd2ph.128 +; - llvm.x86.avx512fp16.mask.vcvtpd2ph.256 +; - llvm.x86.avx512fp16.mask.vcvtph2pd.128 +; - llvm.x86.avx512fp16.mask.vcvtph2pd.256 +; - llvm.x86.avx512fp16.mask.vcvtph2psx.128 +; - llvm.x86.avx512fp16.mask.vcvtph2psx.256 +; - llvm.x86.avx512fp16.mask.vcvtph2udq.128 +; - llvm.x86.avx512fp16.mask.vcvtph2udq.256 +; - llvm.x86.avx512fp16.mask.vcvtps2phx.128 +; - llvm.x86.avx512fp16.mask.vcvtps2phx.256 +; - llvm.x86.avx512fp16.mask.vcvttph2dq.128 +; - llvm.x86.avx512fp16.mask.vcvttph2dq.256 +; - llvm.x86.avx512fp16.mask.vcvttph2udq.128 +; - llvm.x86.avx512fp16.mask.vcvttph2udq.256 +; +; Handled heuristically: +; - llvm.x86.avx512fp16.max.ph.128 +; - llvm.x86.avx512fp16.max.ph.256 +; - llvm.x86.avx512fp16.min.ph.128 +; - llvm.x86.avx512fp16.min.ph.256 + +target datalayout = "e-m:o-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128" +target triple = "x86_64-unknown-linux-gnu" + +define <16 x half> @test_int_x86_avx512fp16_add_ph_256(<16 x half> %x1, <16 x half> %x2) #0 { +; CHECK-LABEL: define <16 x half> @test_int_x86_avx512fp16_add_ph_256( +; CHECK-SAME: <16 x half> [[X1:%.*]], <16 x half> [[X2:%.*]]) #[[ATTR0:[0-9]+]] { +; CHECK-NEXT: [[TMP1:%.*]] = load <16 x i16>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <16 x i16>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 32) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[_MSPROP:%.*]] = or <16 x i16> [[TMP1]], [[TMP2]] +; CHECK-NEXT: [[RES:%.*]] = fadd <16 x half> [[X1]], [[X2]] +; CHECK-NEXT: store <16 x i16> [[_MSPROP]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <16 x half> [[RES]] +; + %res = fadd <16 x half> %x1, %x2 + ret <16 x half> %res +} + +define <16 x half> @test_int_x86_avx512fp16_mask_add_ph_256(<16 x half> %x1, <16 x half> %x2, <16 x half> %src, i16 %mask, ptr %ptr) #0 { +; CHECK-LABEL: define <16 x half> @test_int_x86_avx512fp16_mask_add_ph_256( +; CHECK-SAME: <16 x half> [[X1:%.*]], <16 x half> [[X2:%.*]], <16 x half> [[SRC:%.*]], i16 [[MASK:%.*]], ptr [[PTR:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[TMP1:%.*]] = load i16, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 96) to ptr), align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 104) to ptr), align 8 +; CHECK-NEXT: [[TMP3:%.*]] = load <16 x i16>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP4:%.*]] = load <16 x i16>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 32) to ptr), align 8 +; CHECK-NEXT: [[TMP5:%.*]] = load <16 x i16>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP6:%.*]] = bitcast i16 [[TMP1]] to <16 x i1> +; CHECK-NEXT: [[MSK:%.*]] = bitcast i16 [[MASK]] to <16 x i1> +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP2]], 0 +; CHECK-NEXT: br i1 [[_MSCMP]], label %[[BB7:.*]], label %[[BB8:.*]], !prof [[PROF1:![0-9]+]] +; CHECK: [[BB7]]: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR4:[0-9]+]] +; CHECK-NEXT: unreachable +; CHECK: [[BB8]]: +; CHECK-NEXT: [[VAL:%.*]] = load <16 x half>, ptr [[PTR]], align 32 +; CHECK-NEXT: [[TMP9:%.*]] = ptrtoint ptr [[PTR]] to i64 +; CHECK-NEXT: [[TMP10:%.*]] = xor i64 [[TMP9]], 87960930222080 +; CHECK-NEXT: [[TMP11:%.*]] = inttoptr i64 [[TMP10]] to ptr +; CHECK-NEXT: [[_MSLD:%.*]] = load <16 x i16>, ptr [[TMP11]], align 32 +; CHECK-NEXT: [[_MSPROP:%.*]] = or <16 x i16> [[TMP3]], [[TMP4]] +; CHECK-NEXT: [[RES0:%.*]] = fadd <16 x half> [[X1]], [[X2]] +; CHECK-NEXT: [[TMP12:%.*]] = select <16 x i1> [[MSK]], <16 x i16> [[_MSPROP]], <16 x i16> [[TMP5]] +; CHECK-NEXT: [[TMP13:%.*]] = bitcast <16 x half> [[RES0]] to <16 x i16> +; CHECK-NEXT: [[TMP14:%.*]] = bitcast <16 x half> [[SRC]] to <16 x i16> +; CHECK-NEXT: [[TMP15:%.*]] = xor <16 x i16> [[TMP13]], [[TMP14]] +; CHECK-NEXT: [[TMP16:%.*]] = or <16 x i16> [[TMP15]], [[_MSPROP]] +; CHECK-NEXT: [[TMP17:%.*]] = or <16 x i16> [[TMP16]], [[TMP5]] +; CHECK-NEXT: [[_MSPROP_SELECT:%.*]] = select <16 x i1> [[TMP6]], <16 x i16> [[TMP17]], <16 x i16> [[TMP12]] +; CHECK-NEXT: [[RES1:%.*]] = select <16 x i1> [[MSK]], <16 x half> [[RES0]], <16 x half> [[SRC]] +; CHECK-NEXT: [[_MSPROP1:%.*]] = or <16 x i16> [[TMP3]], [[_MSLD]] +; CHECK-NEXT: [[T3:%.*]] = fadd <16 x half> [[X1]], [[VAL]] +; CHECK-NEXT: [[TMP18:%.*]] = select <16 x i1> [[MSK]], <16 x i16> [[_MSPROP1]], <16 x i16> [[TMP5]] +; CHECK-NEXT: [[TMP19:%.*]] = bitcast <16 x half> [[T3]] to <16 x i16> +; CHECK-NEXT: [[TMP20:%.*]] = bitcast <16 x half> [[SRC]] to <16 x i16> +; CHECK-NEXT: [[TMP21:%.*]] = xor <16 x i16> [[TMP19]], [[TMP20]] +; CHECK-NEXT: [[TMP22:%.*]] = or <16 x i16> [[TMP21]], [[_MSPROP1]] +; CHECK-NEXT: [[TMP23:%.*]] = or <16 x i16> [[TMP22]], [[TMP5]] +; CHECK-NEXT: [[_MSPROP_SELECT2:%.*]] = select <16 x i1> [[TMP6]], <16 x i16> [[TMP23]], <16 x i16> [[TMP18]] +; CHECK-NEXT: [[RES2:%.*]] = select <16 x i1> [[MSK]], <16 x half> [[T3]], <16 x half> [[SRC]] +; CHECK-NEXT: [[_MSPROP3:%.*]] = or <16 x i16> [[_MSPROP_SELECT]], [[_MSPROP_SELECT2]] +; CHECK-NEXT: [[RES:%.*]] = fadd <16 x half> [[RES1]], [[RES2]] +; CHECK-NEXT: store <16 x i16> [[_MSPROP3]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <16 x half> [[RES]] +; + %msk = bitcast i16 %mask to <16 x i1> + %val = load <16 x half>, ptr %ptr + %res0 = fadd <16 x half> %x1, %x2 + %res1 = select <16 x i1> %msk, <16 x half> %res0, <16 x half> %src + %t3 = fadd <16 x half> %x1, %val + %res2 = select <16 x i1> %msk, <16 x half> %t3, <16 x half> %src + %res = fadd <16 x half> %res1 , %res2 + ret <16 x half> %res +} + +define <16 x half> @test_int_x86_avx512fp16_maskz_add_ph_256(<16 x half> %x1, <16 x half> %x2, i16 %mask, ptr %ptr) #0 { +; CHECK-LABEL: define <16 x half> @test_int_x86_avx512fp16_maskz_add_ph_256( +; CHECK-SAME: <16 x half> [[X1:%.*]], <16 x half> [[X2:%.*]], i16 [[MASK:%.*]], ptr [[PTR:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[TMP1:%.*]] = load i16, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <16 x i16>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP3:%.*]] = load <16 x i16>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 32) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP4:%.*]] = bitcast i16 [[TMP1]] to <16 x i1> +; CHECK-NEXT: [[MSK:%.*]] = bitcast i16 [[MASK]] to <16 x i1> +; CHECK-NEXT: [[_MSPROP:%.*]] = or <16 x i16> [[TMP2]], [[TMP3]] +; CHECK-NEXT: [[RES0:%.*]] = fadd <16 x half> [[X1]], [[X2]] +; CHECK-NEXT: [[TMP5:%.*]] = select <16 x i1> [[MSK]], <16 x i16> [[_MSPROP]], <16 x i16> zeroinitializer +; CHECK-NEXT: [[TMP6:%.*]] = bitcast <16 x half> [[RES0]] to <16 x i16> +; CHECK-NEXT: [[TMP7:%.*]] = xor <16 x i16> [[TMP6]], zeroinitializer +; CHECK-NEXT: [[TMP8:%.*]] = or <16 x i16> [[TMP7]], [[_MSPROP]] +; CHECK-NEXT: [[TMP9:%.*]] = or <16 x i16> [[TMP8]], zeroinitializer +; CHECK-NEXT: [[_MSPROP_SELECT:%.*]] = select <16 x i1> [[TMP4]], <16 x i16> [[TMP9]], <16 x i16> [[TMP5]] +; CHECK-NEXT: [[RES1:%.*]] = select <16 x i1> [[MSK]], <16 x half> [[RES0]], <16 x half> zeroinitializer +; CHECK-NEXT: store <16 x i16> [[_MSPROP_SELECT]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <16 x half> [[RES1]] +; + %msk = bitcast i16 %mask to <16 x i1> + %res0 = fadd <16 x half> %x1, %x2 + %res1 = select <16 x i1> %msk, <16 x half> %res0, <16 x half> zeroinitializer + ret <16 x half> %res1 +} + +define <8 x half> @test_int_x86_avx512fp16_add_ph_128(<8 x half> %x1, <8 x half> %x2) #0 { +; CHECK-LABEL: define <8 x half> @test_int_x86_avx512fp16_add_ph_128( +; CHECK-SAME: <8 x half> [[X1:%.*]], <8 x half> [[X2:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i16>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <8 x i16>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[_MSPROP:%.*]] = or <8 x i16> [[TMP1]], [[TMP2]] +; CHECK-NEXT: [[RES:%.*]] = fadd <8 x half> [[X1]], [[X2]] +; CHECK-NEXT: store <8 x i16> [[_MSPROP]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <8 x half> [[RES]] +; + %res = fadd <8 x half> %x1, %x2 + ret <8 x half> %res +} + +define <8 x half> @test_int_x86_avx512fp16_mask_add_ph_128(<8 x half> %x1, <8 x half> %x2, <8 x half> %src, i8 %mask, ptr %ptr) #0 { +; CHECK-LABEL: define <8 x half> @test_int_x86_avx512fp16_mask_add_ph_128( +; CHECK-SAME: <8 x half> [[X1:%.*]], <8 x half> [[X2:%.*]], <8 x half> [[SRC:%.*]], i8 [[MASK:%.*]], ptr [[PTR:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[TMP1:%.*]] = load i8, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 48) to ptr), align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 56) to ptr), align 8 +; CHECK-NEXT: [[TMP3:%.*]] = load <8 x i16>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP4:%.*]] = load <8 x i16>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8 +; CHECK-NEXT: [[TMP5:%.*]] = load <8 x i16>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 32) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP6:%.*]] = bitcast i8 [[TMP1]] to <8 x i1> +; CHECK-NEXT: [[MSK:%.*]] = bitcast i8 [[MASK]] to <8 x i1> +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP2]], 0 +; CHECK-NEXT: br i1 [[_MSCMP]], label %[[BB7:.*]], label %[[BB8:.*]], !prof [[PROF1]] +; CHECK: [[BB7]]: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR4]] +; CHECK-NEXT: unreachable +; CHECK: [[BB8]]: +; CHECK-NEXT: [[VAL:%.*]] = load <8 x half>, ptr [[PTR]], align 16 +; CHECK-NEXT: [[TMP9:%.*]] = ptrtoint ptr [[PTR]] to i64 +; CHECK-NEXT: [[TMP10:%.*]] = xor i64 [[TMP9]], 87960930222080 +; CHECK-NEXT: [[TMP11:%.*]] = inttoptr i64 [[TMP10]] to ptr +; CHECK-NEXT: [[_MSLD:%.*]] = load <8 x i16>, ptr [[TMP11]], align 16 +; CHECK-NEXT: [[_MSPROP:%.*]] = or <8 x i16> [[TMP3]], [[TMP4]] +; CHECK-NEXT: [[RES0:%.*]] = fadd <8 x half> [[X1]], [[X2]] +; CHECK-NEXT: [[TMP12:%.*]] = select <8 x i1> [[MSK]], <8 x i16> [[_MSPROP]], <8 x i16> [[TMP5]] +; CHECK-NEXT: [[TMP13:%.*]] = bitcast <8 x half> [[RES0]] to <8 x i16> +; CHECK-NEXT: [[TMP14:%.*]] = bitcast <8 x half> [[SRC]] to <8 x i16> +; CHECK-NEXT: [[TMP15:%.*]] = xor <8 x i16> [[TMP13]], [[TMP14]] +; CHECK-NEXT: [[TMP16:%.*]] = or <8 x i16> [[TMP15]], [[_MSPROP]] +; CHECK-NEXT: [[TMP17:%.*]] = or <8 x i16> [[TMP16]], [[TMP5]] +; CHECK-NEXT: [[_MSPROP_SELECT:%.*]] = select <8 x i1> [[TMP6]], <8 x i16> [[TMP17]], <8 x i16> [[TMP12]] +; CHECK-NEXT: [[RES1:%.*]] = select <8 x i1> [[MSK]], <8 x half> [[RES0]], <8 x half> [[SRC]] +; CHECK-NEXT: [[_MSPROP1:%.*]] = or <8 x i16> [[TMP3]], [[_MSLD]] +; CHECK-NEXT: [[T3:%.*]] = fadd <8 x half> [[X1]], [[VAL]] +; CHECK-NEXT: [[TMP18:%.*]] = select <8 x i1> [[MSK]], <8 x i16> [[_MSPROP1]], <8 x i16> [[TMP5]] +; CHECK-NEXT: [[TMP19:%.*]] = bitcast <8 x half> [[T3]] to <8 x i16> +; CHECK-NEXT: [[TMP20:%.*]] = bitcast <8 x half> [[SRC]] to <8 x i16> +; CHECK-NEXT: [[TMP21:%.*]] = xor <8 x i16> [[TMP19]], [[TMP20]] +; CHECK-NEXT: [[TMP22:%.*]] = or <8 x i16> [[TMP21]], [[_MSPROP1]] +; CHECK-NEXT: [[TMP23:%.*]] = or <8 x i16> [[TMP22]], [[TMP5]] +; CHECK-NEXT: [[_MSPROP_SELECT2:%.*]] = select <8 x i1> [[TMP6]], <8 x i16> [[TMP23]], <8 x i16> [[TMP18]] +; CHECK-NEXT: [[RES2:%.*]] = select <8 x i1> [[MSK]], <8 x half> [[T3]], <8 x half> [[SRC]] +; CHECK-NEXT: [[_MSPROP3:%.*]] = or <8 x i16> [[_MSPROP_SELECT]], [[_MSPROP_SELECT2]] +; CHECK-NEXT: [[RES:%.*]] = fadd <8 x half> [[RES1]], [[RES2]] +; CHECK-NEXT: store <8 x i16> [[_MSPROP3]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <8 x half> [[RES]] +; + %msk = bitcast i8 %mask to <8 x i1> + %val = load <8 x half>, ptr %ptr + %res0 = fadd <8 x half> %x1, %x2 + %res1 = select <8 x i1> %msk, <8 x half> %res0, <8 x half> %src + %t3 = fadd <8 x half> %x1, %val + %res2 = select <8 x i1> %msk, <8 x half> %t3, <8 x half> %src + %res = fadd <8 x half> %res1 , %res2 + ret <8 x half> %res +} + +define <8 x half> @test_int_x86_avx512fp16_maskz_add_ph_128(<8 x half> %x1, <8 x half> %x2, i8 %mask, ptr %ptr) #0 { +; CHECK-LABEL: define <8 x half> @test_int_x86_avx512fp16_maskz_add_ph_128( +; CHECK-SAME: <8 x half> [[X1:%.*]], <8 x half> [[X2:%.*]], i8 [[MASK:%.*]], ptr [[PTR:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[TMP1:%.*]] = load i8, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 32) to ptr), align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <8 x i16>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP3:%.*]] = load <8 x i16>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP4:%.*]] = bitcast i8 [[TMP1]] to <8 x i1> +; CHECK-NEXT: [[MSK:%.*]] = bitcast i8 [[MASK]] to <8 x i1> +; CHECK-NEXT: [[_MSPROP:%.*]] = or <8 x i16> [[TMP2]], [[TMP3]] +; CHECK-NEXT: [[RES0:%.*]] = fadd <8 x half> [[X1]], [[X2]] +; CHECK-NEXT: [[TMP5:%.*]] = select <8 x i1> [[MSK]], <8 x i16> [[_MSPROP]], <8 x i16> zeroinitializer +; CHECK-NEXT: [[TMP6:%.*]] = bitcast <8 x half> [[RES0]] to <8 x i16> +; CHECK-NEXT: [[TMP7:%.*]] = xor <8 x i16> [[TMP6]], zeroinitializer +; CHECK-NEXT: [[TMP8:%.*]] = or <8 x i16> [[TMP7]], [[_MSPROP]] +; CHECK-NEXT: [[TMP9:%.*]] = or <8 x i16> [[TMP8]], zeroinitializer +; CHECK-NEXT: [[_MSPROP_SELECT:%.*]] = select <8 x i1> [[TMP4]], <8 x i16> [[TMP9]], <8 x i16> [[TMP5]] +; CHECK-NEXT: [[RES1:%.*]] = select <8 x i1> [[MSK]], <8 x half> [[RES0]], <8 x half> zeroinitializer +; CHECK-NEXT: store <8 x i16> [[_MSPROP_SELECT]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <8 x half> [[RES1]] +; + %msk = bitcast i8 %mask to <8 x i1> + %res0 = fadd <8 x half> %x1, %x2 + %res1 = select <8 x i1> %msk, <8 x half> %res0, <8 x half> zeroinitializer + ret <8 x half> %res1 +} + +define <16 x half> @test_int_x86_avx512fp16_sub_ph_256(<16 x half> %x1, <16 x half> %x2) #0 { +; CHECK-LABEL: define <16 x half> @test_int_x86_avx512fp16_sub_ph_256( +; CHECK-SAME: <16 x half> [[X1:%.*]], <16 x half> [[X2:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[TMP1:%.*]] = load <16 x i16>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <16 x i16>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 32) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[_MSPROP:%.*]] = or <16 x i16> [[TMP1]], [[TMP2]] +; CHECK-NEXT: [[RES:%.*]] = fsub <16 x half> [[X1]], [[X2]] +; CHECK-NEXT: store <16 x i16> [[_MSPROP]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <16 x half> [[RES]] +; + %res = fsub <16 x half> %x1, %x2 + ret <16 x half> %res +} + +define <16 x half> @test_int_x86_avx512fp16_mask_sub_ph_256(<16 x half> %x1, <16 x half> %x2, <16 x half> %src, i16 %mask, ptr %ptr) #0 { +; CHECK-LABEL: define <16 x half> @test_int_x86_avx512fp16_mask_sub_ph_256( +; CHECK-SAME: <16 x half> [[X1:%.*]], <16 x half> [[X2:%.*]], <16 x half> [[SRC:%.*]], i16 [[MASK:%.*]], ptr [[PTR:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[TMP1:%.*]] = load i16, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 96) to ptr), align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 104) to ptr), align 8 +; CHECK-NEXT: [[TMP3:%.*]] = load <16 x i16>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP4:%.*]] = load <16 x i16>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 32) to ptr), align 8 +; CHECK-NEXT: [[TMP5:%.*]] = load <16 x i16>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP6:%.*]] = bitcast i16 [[TMP1]] to <16 x i1> +; CHECK-NEXT: [[MSK:%.*]] = bitcast i16 [[MASK]] to <16 x i1> +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP2]], 0 +; CHECK-NEXT: br i1 [[_MSCMP]], label %[[BB7:.*]], label %[[BB8:.*]], !prof [[PROF1]] +; CHECK: [[BB7]]: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR4]] +; CHECK-NEXT: unreachable +; CHECK: [[BB8]]: +; CHECK-NEXT: [[VAL:%.*]] = load <16 x half>, ptr [[PTR]], align 32 +; CHECK-NEXT: [[TMP9:%.*]] = ptrtoint ptr [[PTR]] to i64 +; CHECK-NEXT: [[TMP10:%.*]] = xor i64 [[TMP9]], 87960930222080 +; CHECK-NEXT: [[TMP11:%.*]] = inttoptr i64 [[TMP10]] to ptr +; CHECK-NEXT: [[_MSLD:%.*]] = load <16 x i16>, ptr [[TMP11]], align 32 +; CHECK-NEXT: [[_MSPROP:%.*]] = or <16 x i16> [[TMP3]], [[TMP4]] +; CHECK-NEXT: [[RES0:%.*]] = fsub <16 x half> [[X1]], [[X2]] +; CHECK-NEXT: [[TMP12:%.*]] = select <16 x i1> [[MSK]], <16 x i16> [[_MSPROP]], <16 x i16> [[TMP5]] +; CHECK-NEXT: [[TMP13:%.*]] = bitcast <16 x half> [[RES0]] to <16 x i16> +; CHECK-NEXT: [[TMP14:%.*]] = bitcast <16 x half> [[SRC]] to <16 x i16> +; CHECK-NEXT: [[TMP15:%.*]] = xor <16 x i16> [[TMP13]], [[TMP14]] +; CHECK-NEXT: [[TMP16:%.*]] = or <16 x i16> [[TMP15]], [[_MSPROP]] +; CHECK-NEXT: [[TMP17:%.*]] = or <16 x i16> [[TMP16]], [[TMP5]] +; CHECK-NEXT: [[_MSPROP_SELECT:%.*]] = select <16 x i1> [[TMP6]], <16 x i16> [[TMP17]], <16 x i16> [[TMP12]] +; CHECK-NEXT: [[RES1:%.*]] = select <16 x i1> [[MSK]], <16 x half> [[RES0]], <16 x half> [[SRC]] +; CHECK-NEXT: [[_MSPROP1:%.*]] = or <16 x i16> [[TMP3]], [[_MSLD]] +; CHECK-NEXT: [[T3:%.*]] = fsub <16 x half> [[X1]], [[VAL]] +; CHECK-NEXT: [[TMP18:%.*]] = select <16 x i1> [[MSK]], <16 x i16> [[_MSPROP1]], <16 x i16> [[TMP5]] +; CHECK-NEXT: [[TMP19:%.*]] = bitcast <16 x half> [[T3]] to <16 x i16> +; CHECK-NEXT: [[TMP20:%.*]] = bitcast <16 x half> [[SRC]] to <16 x i16> +; CHECK-NEXT: [[TMP21:%.*]] = xor <16 x i16> [[TMP19]], [[TMP20]] +; CHECK-NEXT: [[TMP22:%.*]] = or <16 x i16> [[TMP21]], [[_MSPROP1]] +; CHECK-NEXT: [[TMP23:%.*]] = or <16 x i16> [[TMP22]], [[TMP5]] +; CHECK-NEXT: [[_MSPROP_SELECT2:%.*]] = select <16 x i1> [[TMP6]], <16 x i16> [[TMP23]], <16 x i16> [[TMP18]] +; CHECK-NEXT: [[RES2:%.*]] = select <16 x i1> [[MSK]], <16 x half> [[T3]], <16 x half> [[SRC]] +; CHECK-NEXT: [[_MSPROP3:%.*]] = or <16 x i16> [[_MSPROP_SELECT]], [[_MSPROP_SELECT2]] +; CHECK-NEXT: [[RES:%.*]] = fsub <16 x half> [[RES1]], [[RES2]] +; CHECK-NEXT: store <16 x i16> [[_MSPROP3]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <16 x half> [[RES]] +; + %msk = bitcast i16 %mask to <16 x i1> + %val = load <16 x half>, ptr %ptr + %res0 = fsub <16 x half> %x1, %x2 + %res1 = select <16 x i1> %msk, <16 x half> %res0, <16 x half> %src + %t3 = fsub <16 x half> %x1, %val + %res2 = select <16 x i1> %msk, <16 x half> %t3, <16 x half> %src + %res = fsub <16 x half> %res1 , %res2 + ret <16 x half> %res +} + +define <16 x half> @test_int_x86_avx512fp16_maskz_sub_ph_256(<16 x half> %x1, <16 x half> %x2, i16 %mask, ptr %ptr) #0 { +; CHECK-LABEL: define <16 x half> @test_int_x86_avx512fp16_maskz_sub_ph_256( +; CHECK-SAME: <16 x half> [[X1:%.*]], <16 x half> [[X2:%.*]], i16 [[MASK:%.*]], ptr [[PTR:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[TMP1:%.*]] = load i16, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <16 x i16>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP3:%.*]] = load <16 x i16>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 32) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP4:%.*]] = bitcast i16 [[TMP1]] to <16 x i1> +; CHECK-NEXT: [[MSK:%.*]] = bitcast i16 [[MASK]] to <16 x i1> +; CHECK-NEXT: [[_MSPROP:%.*]] = or <16 x i16> [[TMP2]], [[TMP3]] +; CHECK-NEXT: [[RES0:%.*]] = fsub <16 x half> [[X1]], [[X2]] +; CHECK-NEXT: [[TMP5:%.*]] = select <16 x i1> [[MSK]], <16 x i16> [[_MSPROP]], <16 x i16> zeroinitializer +; CHECK-NEXT: [[TMP6:%.*]] = bitcast <16 x half> [[RES0]] to <16 x i16> +; CHECK-NEXT: [[TMP7:%.*]] = xor <16 x i16> [[TMP6]], zeroinitializer +; CHECK-NEXT: [[TMP8:%.*]] = or <16 x i16> [[TMP7]], [[_MSPROP]] +; CHECK-NEXT: [[TMP9:%.*]] = or <16 x i16> [[TMP8]], zeroinitializer +; CHECK-NEXT: [[_MSPROP_SELECT:%.*]] = select <16 x i1> [[TMP4]], <16 x i16> [[TMP9]], <16 x i16> [[TMP5]] +; CHECK-NEXT: [[RES1:%.*]] = select <16 x i1> [[MSK]], <16 x half> [[RES0]], <16 x half> zeroinitializer +; CHECK-NEXT: store <16 x i16> [[_MSPROP_SELECT]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <16 x half> [[RES1]] +; + %msk = bitcast i16 %mask to <16 x i1> + %res0 = fsub <16 x half> %x1, %x2 + %res1 = select <16 x i1> %msk, <16 x half> %res0, <16 x half> zeroinitializer + ret <16 x half> %res1 +} + +define <8 x half> @test_int_x86_avx512fp16_sub_ph_128(<8 x half> %x1, <8 x half> %x2) #0 { +; CHECK-LABEL: define <8 x half> @test_int_x86_avx512fp16_sub_ph_128( +; CHECK-SAME: <8 x half> [[X1:%.*]], <8 x half> [[X2:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i16>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <8 x i16>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[_MSPROP:%.*]] = or <8 x i16> [[TMP1]], [[TMP2]] +; CHECK-NEXT: [[RES:%.*]] = fsub <8 x half> [[X1]], [[X2]] +; CHECK-NEXT: store <8 x i16> [[_MSPROP]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <8 x half> [[RES]] +; + %res = fsub <8 x half> %x1, %x2 + ret <8 x half> %res +} + +define <8 x half> @test_int_x86_avx512fp16_mask_sub_ph_128(<8 x half> %x1, <8 x half> %x2, <8 x half> %src, i8 %mask, ptr %ptr) #0 { +; CHECK-LABEL: define <8 x half> @test_int_x86_avx512fp16_mask_sub_ph_128( +; CHECK-SAME: <8 x half> [[X1:%.*]], <8 x half> [[X2:%.*]], <8 x half> [[SRC:%.*]], i8 [[MASK:%.*]], ptr [[PTR:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[TMP1:%.*]] = load i8, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 48) to ptr), align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 56) to ptr), align 8 +; CHECK-NEXT: [[TMP3:%.*]] = load <8 x i16>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP4:%.*]] = load <8 x i16>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8 +; CHECK-NEXT: [[TMP5:%.*]] = load <8 x i16>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 32) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP6:%.*]] = bitcast i8 [[TMP1]] to <8 x i1> +; CHECK-NEXT: [[MSK:%.*]] = bitcast i8 [[MASK]] to <8 x i1> +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP2]], 0 +; CHECK-NEXT: br i1 [[_MSCMP]], label %[[BB7:.*]], label %[[BB8:.*]], !prof [[PROF1]] +; CHECK: [[BB7]]: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR4]] +; CHECK-NEXT: unreachable +; CHECK: [[BB8]]: +; CHECK-NEXT: [[VAL:%.*]] = load <8 x half>, ptr [[PTR]], align 16 +; CHECK-NEXT: [[TMP9:%.*]] = ptrtoint ptr [[PTR]] to i64 +; CHECK-NEXT: [[TMP10:%.*]] = xor i64 [[TMP9]], 87960930222080 +; CHECK-NEXT: [[TMP11:%.*]] = inttoptr i64 [[TMP10]] to ptr +; CHECK-NEXT: [[_MSLD:%.*]] = load <8 x i16>, ptr [[TMP11]], align 16 +; CHECK-NEXT: [[_MSPROP:%.*]] = or <8 x i16> [[TMP3]], [[TMP4]] +; CHECK-NEXT: [[RES0:%.*]] = fsub <8 x half> [[X1]], [[X2]] +; CHECK-NEXT: [[TMP12:%.*]] = select <8 x i1> [[MSK]], <8 x i16> [[_MSPROP]], <8 x i16> [[TMP5]] +; CHECK-NEXT: [[TMP13:%.*]] = bitcast <8 x half> [[RES0]] to <8 x i16> +; CHECK-NEXT: [[TMP14:%.*]] = bitcast <8 x half> [[SRC]] to <8 x i16> +; CHECK-NEXT: [[TMP15:%.*]] = xor <8 x i16> [[TMP13]], [[TMP14]] +; CHECK-NEXT: [[TMP16:%.*]] = or <8 x i16> [[TMP15]], [[_MSPROP]] +; CHECK-NEXT: [[TMP17:%.*]] = or <8 x i16> [[TMP16]], [[TMP5]] +; CHECK-NEXT: [[_MSPROP_SELECT:%.*]] = select <8 x i1> [[TMP6]], <8 x i16> [[TMP17]], <8 x i16> [[TMP12]] +; CHECK-NEXT: [[RES1:%.*]] = select <8 x i1> [[MSK]], <8 x half> [[RES0]], <8 x half> [[SRC]] +; CHECK-NEXT: [[_MSPROP1:%.*]] = or <8 x i16> [[TMP3]], [[_MSLD]] +; CHECK-NEXT: [[T3:%.*]] = fsub <8 x half> [[X1]], [[VAL]] +; CHECK-NEXT: [[TMP18:%.*]] = select <8 x i1> [[MSK]], <8 x i16> [[_MSPROP1]], <8 x i16> [[TMP5]] +; CHECK-NEXT: [[TMP19:%.*]] = bitcast <8 x half> [[T3]] to <8 x i16> +; CHECK-NEXT: [[TMP20:%.*]] = bitcast <8 x half> [[SRC]] to <8 x i16> +; CHECK-NEXT: [[TMP21:%.*]] = xor <8 x i16> [[TMP19]], [[TMP20]] +; CHECK-NEXT: [[TMP22:%.*]] = or <8 x i16> [[TMP21]], [[_MSPROP1]] +; CHECK-NEXT: [[TMP23:%.*]] = or <8 x i16> [[TMP22]], [[TMP5]] +; CHECK-NEXT: [[_MSPROP_SELECT2:%.*]] = select <8 x i1> [[TMP6]], <8 x i16> [[TMP23]], <8 x i16> [[TMP18]] +; CHECK-NEXT: [[RES2:%.*]] = select <8 x i1> [[MSK]], <8 x half> [[T3]], <8 x half> [[SRC]] +; CHECK-NEXT: [[_MSPROP3:%.*]] = or <8 x i16> [[_MSPROP_SELECT]], [[_MSPROP_SELECT2]] +; CHECK-NEXT: [[RES:%.*]] = fsub <8 x half> [[RES1]], [[RES2]] +; CHECK-NEXT: store <8 x i16> [[_MSPROP3]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <8 x half> [[RES]] +; + %msk = bitcast i8 %mask to <8 x i1> + %val = load <8 x half>, ptr %ptr + %res0 = fsub <8 x half> %x1, %x2 + %res1 = select <8 x i1> %msk, <8 x half> %res0, <8 x half> %src + %t3 = fsub <8 x half> %x1, %val + %res2 = select <8 x i1> %msk, <8 x half> %t3, <8 x half> %src + %res = fsub <8 x half> %res1 , %res2 + ret <8 x half> %res +} + +define <8 x half> @test_int_x86_avx512fp16_maskz_sub_ph_128(<8 x half> %x1, <8 x half> %x2, i8 %mask, ptr %ptr) #0 { +; CHECK-LABEL: define <8 x half> @test_int_x86_avx512fp16_maskz_sub_ph_128( +; CHECK-SAME: <8 x half> [[X1:%.*]], <8 x half> [[X2:%.*]], i8 [[MASK:%.*]], ptr [[PTR:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[TMP1:%.*]] = load i8, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 32) to ptr), align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <8 x i16>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP3:%.*]] = load <8 x i16>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP4:%.*]] = bitcast i8 [[TMP1]] to <8 x i1> +; CHECK-NEXT: [[MSK:%.*]] = bitcast i8 [[MASK]] to <8 x i1> +; CHECK-NEXT: [[_MSPROP:%.*]] = or <8 x i16> [[TMP2]], [[TMP3]] +; CHECK-NEXT: [[RES0:%.*]] = fsub <8 x half> [[X1]], [[X2]] +; CHECK-NEXT: [[TMP5:%.*]] = select <8 x i1> [[MSK]], <8 x i16> [[_MSPROP]], <8 x i16> zeroinitializer +; CHECK-NEXT: [[TMP6:%.*]] = bitcast <8 x half> [[RES0]] to <8 x i16> +; CHECK-NEXT: [[TMP7:%.*]] = xor <8 x i16> [[TMP6]], zeroinitializer +; CHECK-NEXT: [[TMP8:%.*]] = or <8 x i16> [[TMP7]], [[_MSPROP]] +; CHECK-NEXT: [[TMP9:%.*]] = or <8 x i16> [[TMP8]], zeroinitializer +; CHECK-NEXT: [[_MSPROP_SELECT:%.*]] = select <8 x i1> [[TMP4]], <8 x i16> [[TMP9]], <8 x i16> [[TMP5]] +; CHECK-NEXT: [[RES1:%.*]] = select <8 x i1> [[MSK]], <8 x half> [[RES0]], <8 x half> zeroinitializer +; CHECK-NEXT: store <8 x i16> [[_MSPROP_SELECT]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <8 x half> [[RES1]] +; + %msk = bitcast i8 %mask to <8 x i1> + %res0 = fsub <8 x half> %x1, %x2 + %res1 = select <8 x i1> %msk, <8 x half> %res0, <8 x half> zeroinitializer + ret <8 x half> %res1 +} + +define <16 x half> @test_int_x86_avx512fp16_mul_ph_256(<16 x half> %x1, <16 x half> %x2) #0 { +; CHECK-LABEL: define <16 x half> @test_int_x86_avx512fp16_mul_ph_256( +; CHECK-SAME: <16 x half> [[X1:%.*]], <16 x half> [[X2:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[TMP1:%.*]] = load <16 x i16>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <16 x i16>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 32) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[_MSPROP:%.*]] = or <16 x i16> [[TMP1]], [[TMP2]] +; CHECK-NEXT: [[RES:%.*]] = fmul <16 x half> [[X1]], [[X2]] +; CHECK-NEXT: store <16 x i16> [[_MSPROP]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <16 x half> [[RES]] +; + %res = fmul <16 x half> %x1, %x2 + ret <16 x half> %res +} + +define <16 x half> @test_int_x86_avx512fp16_mask_mul_ph_256(<16 x half> %x1, <16 x half> %x2, <16 x half> %src, i16 %mask, ptr %ptr) #0 { +; CHECK-LABEL: define <16 x half> @test_int_x86_avx512fp16_mask_mul_ph_256( +; CHECK-SAME: <16 x half> [[X1:%.*]], <16 x half> [[X2:%.*]], <16 x half> [[SRC:%.*]], i16 [[MASK:%.*]], ptr [[PTR:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[TMP1:%.*]] = load i16, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 96) to ptr), align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 104) to ptr), align 8 +; CHECK-NEXT: [[TMP3:%.*]] = load <16 x i16>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP4:%.*]] = load <16 x i16>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 32) to ptr), align 8 +; CHECK-NEXT: [[TMP5:%.*]] = load <16 x i16>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP6:%.*]] = bitcast i16 [[TMP1]] to <16 x i1> +; CHECK-NEXT: [[MSK:%.*]] = bitcast i16 [[MASK]] to <16 x i1> +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP2]], 0 +; CHECK-NEXT: br i1 [[_MSCMP]], label %[[BB7:.*]], label %[[BB8:.*]], !prof [[PROF1]] +; CHECK: [[BB7]]: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR4]] +; CHECK-NEXT: unreachable +; CHECK: [[BB8]]: +; CHECK-NEXT: [[VAL:%.*]] = load <16 x half>, ptr [[PTR]], align 32 +; CHECK-NEXT: [[TMP9:%.*]] = ptrtoint ptr [[PTR]] to i64 +; CHECK-NEXT: [[TMP10:%.*]] = xor i64 [[TMP9]], 87960930222080 +; CHECK-NEXT: [[TMP11:%.*]] = inttoptr i64 [[TMP10]] to ptr +; CHECK-NEXT: [[_MSLD:%.*]] = load <16 x i16>, ptr [[TMP11]], align 32 +; CHECK-NEXT: [[_MSPROP:%.*]] = or <16 x i16> [[TMP3]], [[TMP4]] +; CHECK-NEXT: [[RES0:%.*]] = fmul <16 x half> [[X1]], [[X2]] +; CHECK-NEXT: [[TMP12:%.*]] = select <16 x i1> [[MSK]], <16 x i16> [[_MSPROP]], <16 x i16> [[TMP5]] +; CHECK-NEXT: [[TMP13:%.*]] = bitcast <16 x half> [[RES0]] to <16 x i16> +; CHECK-NEXT: [[TMP14:%.*]] = bitcast <16 x half> [[SRC]] to <16 x i16> +; CHECK-NEXT: [[TMP15:%.*]] = xor <16 x i16> [[TMP13]], [[TMP14]] +; CHECK-NEXT: [[TMP16:%.*]] = or <16 x i16> [[TMP15]], [[_MSPROP]] +; CHECK-NEXT: [[TMP17:%.*]] = or <16 x i16> [[TMP16]], [[TMP5]] +; CHECK-NEXT: [[_MSPROP_SELECT:%.*]] = select <16 x i1> [[TMP6]], <16 x i16> [[TMP17]], <16 x i16> [[TMP12]] +; CHECK-NEXT: [[RES1:%.*]] = select <16 x i1> [[MSK]], <16 x half> [[RES0]], <16 x half> [[SRC]] +; CHECK-NEXT: [[_MSPROP1:%.*]] = or <16 x i16> [[TMP3]], [[_MSLD]] +; CHECK-NEXT: [[T3:%.*]] = fmul <16 x half> [[X1]], [[VAL]] +; CHECK-NEXT: [[TMP18:%.*]] = select <16 x i1> [[MSK]], <16 x i16> [[_MSPROP1]], <16 x i16> [[TMP5]] +; CHECK-NEXT: [[TMP19:%.*]] = bitcast <16 x half> [[T3]] to <16 x i16> +; CHECK-NEXT: [[TMP20:%.*]] = bitcast <16 x half> [[SRC]] to <16 x i16> +; CHECK-NEXT: [[TMP21:%.*]] = xor <16 x i16> [[TMP19]], [[TMP20]] +; CHECK-NEXT: [[TMP22:%.*]] = or <16 x i16> [[TMP21]], [[_MSPROP1]] +; CHECK-NEXT: [[TMP23:%.*]] = or <16 x i16> [[TMP22]], [[TMP5]] +; CHECK-NEXT: [[_MSPROP_SELECT2:%.*]] = select <16 x i1> [[TMP6]], <16 x i16> [[TMP23]], <16 x i16> [[TMP18]] +; CHECK-NEXT: [[RES2:%.*]] = select <16 x i1> [[MSK]], <16 x half> [[T3]], <16 x half> [[SRC]] +; CHECK-NEXT: [[_MSPROP3:%.*]] = or <16 x i16> [[_MSPROP_SELECT]], [[_MSPROP_SELECT2]] +; CHECK-NEXT: [[RES:%.*]] = fmul <16 x half> [[RES1]], [[RES2]] +; CHECK-NEXT: store <16 x i16> [[_MSPROP3]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <16 x half> [[RES]] +; + %msk = bitcast i16 %mask to <16 x i1> + %val = load <16 x half>, ptr %ptr + %res0 = fmul <16 x half> %x1, %x2 + %res1 = select <16 x i1> %msk, <16 x half> %res0, <16 x half> %src + %t3 = fmul <16 x half> %x1, %val + %res2 = select <16 x i1> %msk, <16 x half> %t3, <16 x half> %src + %res = fmul <16 x half> %res1 , %res2 + ret <16 x half> %res +} + +define <16 x half> @test_int_x86_avx512fp16_maskz_mul_ph_256(<16 x half> %x1, <16 x half> %x2, i16 %mask, ptr %ptr) #0 { +; CHECK-LABEL: define <16 x half> @test_int_x86_avx512fp16_maskz_mul_ph_256( +; CHECK-SAME: <16 x half> [[X1:%.*]], <16 x half> [[X2:%.*]], i16 [[MASK:%.*]], ptr [[PTR:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[TMP1:%.*]] = load i16, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <16 x i16>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP3:%.*]] = load <16 x i16>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 32) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP4:%.*]] = bitcast i16 [[TMP1]] to <16 x i1> +; CHECK-NEXT: [[MSK:%.*]] = bitcast i16 [[MASK]] to <16 x i1> +; CHECK-NEXT: [[_MSPROP:%.*]] = or <16 x i16> [[TMP2]], [[TMP3]] +; CHECK-NEXT: [[RES0:%.*]] = fmul <16 x half> [[X1]], [[X2]] +; CHECK-NEXT: [[TMP5:%.*]] = select <16 x i1> [[MSK]], <16 x i16> [[_MSPROP]], <16 x i16> zeroinitializer +; CHECK-NEXT: [[TMP6:%.*]] = bitcast <16 x half> [[RES0]] to <16 x i16> +; CHECK-NEXT: [[TMP7:%.*]] = xor <16 x i16> [[TMP6]], zeroinitializer +; CHECK-NEXT: [[TMP8:%.*]] = or <16 x i16> [[TMP7]], [[_MSPROP]] +; CHECK-NEXT: [[TMP9:%.*]] = or <16 x i16> [[TMP8]], zeroinitializer +; CHECK-NEXT: [[_MSPROP_SELECT:%.*]] = select <16 x i1> [[TMP4]], <16 x i16> [[TMP9]], <16 x i16> [[TMP5]] +; CHECK-NEXT: [[RES1:%.*]] = select <16 x i1> [[MSK]], <16 x half> [[RES0]], <16 x half> zeroinitializer +; CHECK-NEXT: store <16 x i16> [[_MSPROP_SELECT]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <16 x half> [[RES1]] +; + %msk = bitcast i16 %mask to <16 x i1> + %res0 = fmul <16 x half> %x1, %x2 + %res1 = select <16 x i1> %msk, <16 x half> %res0, <16 x half> zeroinitializer + ret <16 x half> %res1 +} + +define <8 x half> @test_int_x86_avx512fp16_mul_ph_128(<8 x half> %x1, <8 x half> %x2) #0 { +; CHECK-LABEL: define <8 x half> @test_int_x86_avx512fp16_mul_ph_128( +; CHECK-SAME: <8 x half> [[X1:%.*]], <8 x half> [[X2:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i16>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <8 x i16>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[_MSPROP:%.*]] = or <8 x i16> [[TMP1]], [[TMP2]] +; CHECK-NEXT: [[RES:%.*]] = fmul <8 x half> [[X1]], [[X2]] +; CHECK-NEXT: store <8 x i16> [[_MSPROP]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <8 x half> [[RES]] +; + %res = fmul <8 x half> %x1, %x2 + ret <8 x half> %res +} + +define <8 x half> @test_int_x86_avx512fp16_mask_mul_ph_128(<8 x half> %x1, <8 x half> %x2, <8 x half> %src, i8 %mask, ptr %ptr) #0 { +; CHECK-LABEL: define <8 x half> @test_int_x86_avx512fp16_mask_mul_ph_128( +; CHECK-SAME: <8 x half> [[X1:%.*]], <8 x half> [[X2:%.*]], <8 x half> [[SRC:%.*]], i8 [[MASK:%.*]], ptr [[PTR:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[TMP1:%.*]] = load i8, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 48) to ptr), align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 56) to ptr), align 8 +; CHECK-NEXT: [[TMP3:%.*]] = load <8 x i16>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP4:%.*]] = load <8 x i16>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8 +; CHECK-NEXT: [[TMP5:%.*]] = load <8 x i16>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 32) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP6:%.*]] = bitcast i8 [[TMP1]] to <8 x i1> +; CHECK-NEXT: [[MSK:%.*]] = bitcast i8 [[MASK]] to <8 x i1> +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP2]], 0 +; CHECK-NEXT: br i1 [[_MSCMP]], label %[[BB7:.*]], label %[[BB8:.*]], !prof [[PROF1]] +; CHECK: [[BB7]]: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR4]] +; CHECK-NEXT: unreachable +; CHECK: [[BB8]]: +; CHECK-NEXT: [[VAL:%.*]] = load <8 x half>, ptr [[PTR]], align 16 +; CHECK-NEXT: [[TMP9:%.*]] = ptrtoint ptr [[PTR]] to i64 +; CHECK-NEXT: [[TMP10:%.*]] = xor i64 [[TMP9]], 87960930222080 +; CHECK-NEXT: [[TMP11:%.*]] = inttoptr i64 [[TMP10]] to ptr +; CHECK-NEXT: [[_MSLD:%.*]] = load <8 x i16>, ptr [[TMP11]], align 16 +; CHECK-NEXT: [[_MSPROP:%.*]] = or <8 x i16> [[TMP3]], [[TMP4]] +; CHECK-NEXT: [[RES0:%.*]] = fmul <8 x half> [[X1]], [[X2]] +; CHECK-NEXT: [[TMP12:%.*]] = select <8 x i1> [[MSK]], <8 x i16> [[_MSPROP]], <8 x i16> [[TMP5]] +; CHECK-NEXT: [[TMP13:%.*]] = bitcast <8 x half> [[RES0]] to <8 x i16> +; CHECK-NEXT: [[TMP14:%.*]] = bitcast <8 x half> [[SRC]] to <8 x i16> +; CHECK-NEXT: [[TMP15:%.*]] = xor <8 x i16> [[TMP13]], [[TMP14]] +; CHECK-NEXT: [[TMP16:%.*]] = or <8 x i16> [[TMP15]], [[_MSPROP]] +; CHECK-NEXT: [[TMP17:%.*]] = or <8 x i16> [[TMP16]], [[TMP5]] +; CHECK-NEXT: [[_MSPROP_SELECT:%.*]] = select <8 x i1> [[TMP6]], <8 x i16> [[TMP17]], <8 x i16> [[TMP12]] +; CHECK-NEXT: [[RES1:%.*]] = select <8 x i1> [[MSK]], <8 x half> [[RES0]], <8 x half> [[SRC]] +; CHECK-NEXT: [[_MSPROP1:%.*]] = or <8 x i16> [[TMP3]], [[_MSLD]] +; CHECK-NEXT: [[T3:%.*]] = fmul <8 x half> [[X1]], [[VAL]] +; CHECK-NEXT: [[TMP18:%.*]] = select <8 x i1> [[MSK]], <8 x i16> [[_MSPROP1]], <8 x i16> [[TMP5]] +; CHECK-NEXT: [[TMP19:%.*]] = bitcast <8 x half> [[T3]] to <8 x i16> +; CHECK-NEXT: [[TMP20:%.*]] = bitcast <8 x half> [[SRC]] to <8 x i16> +; CHECK-NEXT: [[TMP21:%.*]] = xor <8 x i16> [[TMP19]], [[TMP20]] +; CHECK-NEXT: [[TMP22:%.*]] = or <8 x i16> [[TMP21]], [[_MSPROP1]] +; CHECK-NEXT: [[TMP23:%.*]] = or <8 x i16> [[TMP22]], [[TMP5]] +; CHECK-NEXT: [[_MSPROP_SELECT2:%.*]] = select <8 x i1> [[TMP6]], <8 x i16> [[TMP23]], <8 x i16> [[TMP18]] +; CHECK-NEXT: [[RES2:%.*]] = select <8 x i1> [[MSK]], <8 x half> [[T3]], <8 x half> [[SRC]] +; CHECK-NEXT: [[_MSPROP3:%.*]] = or <8 x i16> [[_MSPROP_SELECT]], [[_MSPROP_SELECT2]] +; CHECK-NEXT: [[RES:%.*]] = fmul <8 x half> [[RES1]], [[RES2]] +; CHECK-NEXT: store <8 x i16> [[_MSPROP3]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <8 x half> [[RES]] +; + %msk = bitcast i8 %mask to <8 x i1> + %val = load <8 x half>, ptr %ptr + %res0 = fmul <8 x half> %x1, %x2 + %res1 = select <8 x i1> %msk, <8 x half> %res0, <8 x half> %src + %t3 = fmul <8 x half> %x1, %val + %res2 = select <8 x i1> %msk, <8 x half> %t3, <8 x half> %src + %res = fmul <8 x half> %res1 , %res2 + ret <8 x half> %res +} + +define <8 x half> @test_int_x86_avx512fp16_maskz_mul_ph_128(<8 x half> %x1, <8 x half> %x2, i8 %mask, ptr %ptr) #0 { +; CHECK-LABEL: define <8 x half> @test_int_x86_avx512fp16_maskz_mul_ph_128( +; CHECK-SAME: <8 x half> [[X1:%.*]], <8 x half> [[X2:%.*]], i8 [[MASK:%.*]], ptr [[PTR:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[TMP1:%.*]] = load i8, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 32) to ptr), align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <8 x i16>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP3:%.*]] = load <8 x i16>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP4:%.*]] = bitcast i8 [[TMP1]] to <8 x i1> +; CHECK-NEXT: [[MSK:%.*]] = bitcast i8 [[MASK]] to <8 x i1> +; CHECK-NEXT: [[_MSPROP:%.*]] = or <8 x i16> [[TMP2]], [[TMP3]] +; CHECK-NEXT: [[RES0:%.*]] = fmul <8 x half> [[X1]], [[X2]] +; CHECK-NEXT: [[TMP5:%.*]] = select <8 x i1> [[MSK]], <8 x i16> [[_MSPROP]], <8 x i16> zeroinitializer +; CHECK-NEXT: [[TMP6:%.*]] = bitcast <8 x half> [[RES0]] to <8 x i16> +; CHECK-NEXT: [[TMP7:%.*]] = xor <8 x i16> [[TMP6]], zeroinitializer +; CHECK-NEXT: [[TMP8:%.*]] = or <8 x i16> [[TMP7]], [[_MSPROP]] +; CHECK-NEXT: [[TMP9:%.*]] = or <8 x i16> [[TMP8]], zeroinitializer +; CHECK-NEXT: [[_MSPROP_SELECT:%.*]] = select <8 x i1> [[TMP4]], <8 x i16> [[TMP9]], <8 x i16> [[TMP5]] +; CHECK-NEXT: [[RES1:%.*]] = select <8 x i1> [[MSK]], <8 x half> [[RES0]], <8 x half> zeroinitializer +; CHECK-NEXT: store <8 x i16> [[_MSPROP_SELECT]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <8 x half> [[RES1]] +; + %msk = bitcast i8 %mask to <8 x i1> + %res0 = fmul <8 x half> %x1, %x2 + %res1 = select <8 x i1> %msk, <8 x half> %res0, <8 x half> zeroinitializer + ret <8 x half> %res1 +} + +define <16 x half> @test_int_x86_avx512fp16_div_ph_256(<16 x half> %x1, <16 x half> %x2) #0 { +; CHECK-LABEL: define <16 x half> @test_int_x86_avx512fp16_div_ph_256( +; CHECK-SAME: <16 x half> [[X1:%.*]], <16 x half> [[X2:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[TMP1:%.*]] = load <16 x i16>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <16 x i16>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 32) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[_MSPROP:%.*]] = or <16 x i16> [[TMP1]], [[TMP2]] +; CHECK-NEXT: [[RES:%.*]] = fdiv <16 x half> [[X1]], [[X2]] +; CHECK-NEXT: store <16 x i16> [[_MSPROP]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <16 x half> [[RES]] +; + %res = fdiv <16 x half> %x1, %x2 + ret <16 x half> %res +} + +define <16 x half> @test_int_x86_avx512fp16_div_ph_256_fast(<16 x half> %x1, <16 x half> %x2) #0 { +; CHECK-LABEL: define <16 x half> @test_int_x86_avx512fp16_div_ph_256_fast( +; CHECK-SAME: <16 x half> [[X1:%.*]], <16 x half> [[X2:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[TMP1:%.*]] = load <16 x i16>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <16 x i16>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 32) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[_MSPROP:%.*]] = or <16 x i16> [[TMP1]], [[TMP2]] +; CHECK-NEXT: [[RES:%.*]] = fdiv fast <16 x half> [[X1]], [[X2]] +; CHECK-NEXT: store <16 x i16> [[_MSPROP]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <16 x half> [[RES]] +; + %res = fdiv fast <16 x half> %x1, %x2 + ret <16 x half> %res +} + +define <16 x half> @test_int_x86_avx512fp16_mask_div_ph_256(<16 x half> %x1, <16 x half> %x2, <16 x half> %src, i16 %mask, ptr %ptr) #0 { +; CHECK-LABEL: define <16 x half> @test_int_x86_avx512fp16_mask_div_ph_256( +; CHECK-SAME: <16 x half> [[X1:%.*]], <16 x half> [[X2:%.*]], <16 x half> [[SRC:%.*]], i16 [[MASK:%.*]], ptr [[PTR:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[TMP1:%.*]] = load i16, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 96) to ptr), align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 104) to ptr), align 8 +; CHECK-NEXT: [[TMP3:%.*]] = load <16 x i16>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP4:%.*]] = load <16 x i16>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 32) to ptr), align 8 +; CHECK-NEXT: [[TMP5:%.*]] = load <16 x i16>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP6:%.*]] = bitcast i16 [[TMP1]] to <16 x i1> +; CHECK-NEXT: [[MSK:%.*]] = bitcast i16 [[MASK]] to <16 x i1> +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP2]], 0 +; CHECK-NEXT: br i1 [[_MSCMP]], label %[[BB7:.*]], label %[[BB8:.*]], !prof [[PROF1]] +; CHECK: [[BB7]]: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR4]] +; CHECK-NEXT: unreachable +; CHECK: [[BB8]]: +; CHECK-NEXT: [[VAL:%.*]] = load <16 x half>, ptr [[PTR]], align 32 +; CHECK-NEXT: [[TMP9:%.*]] = ptrtoint ptr [[PTR]] to i64 +; CHECK-NEXT: [[TMP10:%.*]] = xor i64 [[TMP9]], 87960930222080 +; CHECK-NEXT: [[TMP11:%.*]] = inttoptr i64 [[TMP10]] to ptr +; CHECK-NEXT: [[_MSLD:%.*]] = load <16 x i16>, ptr [[TMP11]], align 32 +; CHECK-NEXT: [[_MSPROP:%.*]] = or <16 x i16> [[TMP3]], [[TMP4]] +; CHECK-NEXT: [[RES0:%.*]] = fdiv <16 x half> [[X1]], [[X2]] +; CHECK-NEXT: [[TMP12:%.*]] = select <16 x i1> [[MSK]], <16 x i16> [[_MSPROP]], <16 x i16> [[TMP5]] +; CHECK-NEXT: [[TMP13:%.*]] = bitcast <16 x half> [[RES0]] to <16 x i16> +; CHECK-NEXT: [[TMP14:%.*]] = bitcast <16 x half> [[SRC]] to <16 x i16> +; CHECK-NEXT: [[TMP15:%.*]] = xor <16 x i16> [[TMP13]], [[TMP14]] +; CHECK-NEXT: [[TMP16:%.*]] = or <16 x i16> [[TMP15]], [[_MSPROP]] +; CHECK-NEXT: [[TMP17:%.*]] = or <16 x i16> [[TMP16]], [[TMP5]] +; CHECK-NEXT: [[_MSPROP_SELECT:%.*]] = select <16 x i1> [[TMP6]], <16 x i16> [[TMP17]], <16 x i16> [[TMP12]] +; CHECK-NEXT: [[RES1:%.*]] = select <16 x i1> [[MSK]], <16 x half> [[RES0]], <16 x half> [[SRC]] +; CHECK-NEXT: [[_MSPROP1:%.*]] = or <16 x i16> [[TMP3]], [[_MSLD]] +; CHECK-NEXT: [[T3:%.*]] = fdiv <16 x half> [[X1]], [[VAL]] +; CHECK-NEXT: [[TMP18:%.*]] = select <16 x i1> [[MSK]], <16 x i16> [[_MSPROP1]], <16 x i16> [[TMP5]] +; CHECK-NEXT: [[TMP19:%.*]] = bitcast <16 x half> [[T3]] to <16 x i16> +; CHECK-NEXT: [[TMP20:%.*]] = bitcast <16 x half> [[SRC]] to <16 x i16> +; CHECK-NEXT: [[TMP21:%.*]] = xor <16 x i16> [[TMP19]], [[TMP20]] +; CHECK-NEXT: [[TMP22:%.*]] = or <16 x i16> [[TMP21]], [[_MSPROP1]] +; CHECK-NEXT: [[TMP23:%.*]] = or <16 x i16> [[TMP22]], [[TMP5]] +; CHECK-NEXT: [[_MSPROP_SELECT2:%.*]] = select <16 x i1> [[TMP6]], <16 x i16> [[TMP23]], <16 x i16> [[TMP18]] +; CHECK-NEXT: [[RES2:%.*]] = select <16 x i1> [[MSK]], <16 x half> [[T3]], <16 x half> [[SRC]] +; CHECK-NEXT: [[_MSPROP3:%.*]] = or <16 x i16> [[_MSPROP_SELECT]], [[_MSPROP_SELECT2]] +; CHECK-NEXT: [[RES:%.*]] = fdiv <16 x half> [[RES1]], [[RES2]] +; CHECK-NEXT: store <16 x i16> [[_MSPROP3]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <16 x half> [[RES]] +; + %msk = bitcast i16 %mask to <16 x i1> + %val = load <16 x half>, ptr %ptr + %res0 = fdiv <16 x half> %x1, %x2 + %res1 = select <16 x i1> %msk, <16 x half> %res0, <16 x half> %src + %t3 = fdiv <16 x half> %x1, %val + %res2 = select <16 x i1> %msk, <16 x half> %t3, <16 x half> %src + %res = fdiv <16 x half> %res1 , %res2 + ret <16 x half> %res +} + +define <16 x half> @test_int_x86_avx512fp16_maskz_div_ph_256(<16 x half> %x1, <16 x half> %x2, i16 %mask, ptr %ptr) #0 { +; CHECK-LABEL: define <16 x half> @test_int_x86_avx512fp16_maskz_div_ph_256( +; CHECK-SAME: <16 x half> [[X1:%.*]], <16 x half> [[X2:%.*]], i16 [[MASK:%.*]], ptr [[PTR:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[TMP1:%.*]] = load i16, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <16 x i16>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP3:%.*]] = load <16 x i16>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 32) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP4:%.*]] = bitcast i16 [[TMP1]] to <16 x i1> +; CHECK-NEXT: [[MSK:%.*]] = bitcast i16 [[MASK]] to <16 x i1> +; CHECK-NEXT: [[_MSPROP:%.*]] = or <16 x i16> [[TMP2]], [[TMP3]] +; CHECK-NEXT: [[RES0:%.*]] = fdiv <16 x half> [[X1]], [[X2]] +; CHECK-NEXT: [[TMP5:%.*]] = select <16 x i1> [[MSK]], <16 x i16> [[_MSPROP]], <16 x i16> zeroinitializer +; CHECK-NEXT: [[TMP6:%.*]] = bitcast <16 x half> [[RES0]] to <16 x i16> +; CHECK-NEXT: [[TMP7:%.*]] = xor <16 x i16> [[TMP6]], zeroinitializer +; CHECK-NEXT: [[TMP8:%.*]] = or <16 x i16> [[TMP7]], [[_MSPROP]] +; CHECK-NEXT: [[TMP9:%.*]] = or <16 x i16> [[TMP8]], zeroinitializer +; CHECK-NEXT: [[_MSPROP_SELECT:%.*]] = select <16 x i1> [[TMP4]], <16 x i16> [[TMP9]], <16 x i16> [[TMP5]] +; CHECK-NEXT: [[RES1:%.*]] = select <16 x i1> [[MSK]], <16 x half> [[RES0]], <16 x half> zeroinitializer +; CHECK-NEXT: store <16 x i16> [[_MSPROP_SELECT]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <16 x half> [[RES1]] +; + %msk = bitcast i16 %mask to <16 x i1> + %res0 = fdiv <16 x half> %x1, %x2 + %res1 = select <16 x i1> %msk, <16 x half> %res0, <16 x half> zeroinitializer + ret <16 x half> %res1 +} + +define <8 x half> @test_int_x86_avx512fp16_div_ph_128(<8 x half> %x1, <8 x half> %x2) #0 { +; CHECK-LABEL: define <8 x half> @test_int_x86_avx512fp16_div_ph_128( +; CHECK-SAME: <8 x half> [[X1:%.*]], <8 x half> [[X2:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i16>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <8 x i16>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[_MSPROP:%.*]] = or <8 x i16> [[TMP1]], [[TMP2]] +; CHECK-NEXT: [[RES:%.*]] = fdiv <8 x half> [[X1]], [[X2]] +; CHECK-NEXT: store <8 x i16> [[_MSPROP]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <8 x half> [[RES]] +; + %res = fdiv <8 x half> %x1, %x2 + ret <8 x half> %res +} + +define <8 x half> @test_int_x86_avx512fp16_div_ph_128_fast(<8 x half> %x1, <8 x half> %x2) #0 { +; CHECK-LABEL: define <8 x half> @test_int_x86_avx512fp16_div_ph_128_fast( +; CHECK-SAME: <8 x half> [[X1:%.*]], <8 x half> [[X2:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i16>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <8 x i16>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[_MSPROP:%.*]] = or <8 x i16> [[TMP1]], [[TMP2]] +; CHECK-NEXT: [[RES:%.*]] = fdiv fast <8 x half> [[X1]], [[X2]] +; CHECK-NEXT: store <8 x i16> [[_MSPROP]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <8 x half> [[RES]] +; + %res = fdiv fast <8 x half> %x1, %x2 + ret <8 x half> %res +} + +define <8 x half> @test_int_x86_avx512fp16_mask_div_ph_128(<8 x half> %x1, <8 x half> %x2, <8 x half> %src, i8 %mask, ptr %ptr) #0 { +; CHECK-LABEL: define <8 x half> @test_int_x86_avx512fp16_mask_div_ph_128( +; CHECK-SAME: <8 x half> [[X1:%.*]], <8 x half> [[X2:%.*]], <8 x half> [[SRC:%.*]], i8 [[MASK:%.*]], ptr [[PTR:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[TMP1:%.*]] = load i8, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 48) to ptr), align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 56) to ptr), align 8 +; CHECK-NEXT: [[TMP3:%.*]] = load <8 x i16>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP4:%.*]] = load <8 x i16>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8 +; CHECK-NEXT: [[TMP5:%.*]] = load <8 x i16>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 32) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP6:%.*]] = bitcast i8 [[TMP1]] to <8 x i1> +; CHECK-NEXT: [[MSK:%.*]] = bitcast i8 [[MASK]] to <8 x i1> +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP2]], 0 +; CHECK-NEXT: br i1 [[_MSCMP]], label %[[BB7:.*]], label %[[BB8:.*]], !prof [[PROF1]] +; CHECK: [[BB7]]: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR4]] +; CHECK-NEXT: unreachable +; CHECK: [[BB8]]: +; CHECK-NEXT: [[VAL:%.*]] = load <8 x half>, ptr [[PTR]], align 16 +; CHECK-NEXT: [[TMP9:%.*]] = ptrtoint ptr [[PTR]] to i64 +; CHECK-NEXT: [[TMP10:%.*]] = xor i64 [[TMP9]], 87960930222080 +; CHECK-NEXT: [[TMP11:%.*]] = inttoptr i64 [[TMP10]] to ptr +; CHECK-NEXT: [[_MSLD:%.*]] = load <8 x i16>, ptr [[TMP11]], align 16 +; CHECK-NEXT: [[_MSPROP:%.*]] = or <8 x i16> [[TMP3]], [[TMP4]] +; CHECK-NEXT: [[RES0:%.*]] = fdiv <8 x half> [[X1]], [[X2]] +; CHECK-NEXT: [[TMP12:%.*]] = select <8 x i1> [[MSK]], <8 x i16> [[_MSPROP]], <8 x i16> [[TMP5]] +; CHECK-NEXT: [[TMP13:%.*]] = bitcast <8 x half> [[RES0]] to <8 x i16> +; CHECK-NEXT: [[TMP14:%.*]] = bitcast <8 x half> [[SRC]] to <8 x i16> +; CHECK-NEXT: [[TMP15:%.*]] = xor <8 x i16> [[TMP13]], [[TMP14]] +; CHECK-NEXT: [[TMP16:%.*]] = or <8 x i16> [[TMP15]], [[_MSPROP]] +; CHECK-NEXT: [[TMP17:%.*]] = or <8 x i16> [[TMP16]], [[TMP5]] +; CHECK-NEXT: [[_MSPROP_SELECT:%.*]] = select <8 x i1> [[TMP6]], <8 x i16> [[TMP17]], <8 x i16> [[TMP12]] +; CHECK-NEXT: [[RES1:%.*]] = select <8 x i1> [[MSK]], <8 x half> [[RES0]], <8 x half> [[SRC]] +; CHECK-NEXT: [[_MSPROP1:%.*]] = or <8 x i16> [[TMP3]], [[_MSLD]] +; CHECK-NEXT: [[T3:%.*]] = fdiv <8 x half> [[X1]], [[VAL]] +; CHECK-NEXT: [[TMP18:%.*]] = select <8 x i1> [[MSK]], <8 x i16> [[_MSPROP1]], <8 x i16> [[TMP5]] +; CHECK-NEXT: [[TMP19:%.*]] = bitcast <8 x half> [[T3]] to <8 x i16> +; CHECK-NEXT: [[TMP20:%.*]] = bitcast <8 x half> [[SRC]] to <8 x i16> +; CHECK-NEXT: [[TMP21:%.*]] = xor <8 x i16> [[TMP19]], [[TMP20]] +; CHECK-NEXT: [[TMP22:%.*]] = or <8 x i16> [[TMP21]], [[_MSPROP1]] +; CHECK-NEXT: [[TMP23:%.*]] = or <8 x i16> [[TMP22]], [[TMP5]] +; CHECK-NEXT: [[_MSPROP_SELECT2:%.*]] = select <8 x i1> [[TMP6]], <8 x i16> [[TMP23]], <8 x i16> [[TMP18]] +; CHECK-NEXT: [[RES2:%.*]] = select <8 x i1> [[MSK]], <8 x half> [[T3]], <8 x half> [[SRC]] +; CHECK-NEXT: [[_MSPROP3:%.*]] = or <8 x i16> [[_MSPROP_SELECT]], [[_MSPROP_SELECT2]] +; CHECK-NEXT: [[RES:%.*]] = fdiv <8 x half> [[RES1]], [[RES2]] +; CHECK-NEXT: store <8 x i16> [[_MSPROP3]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <8 x half> [[RES]] +; + %msk = bitcast i8 %mask to <8 x i1> + %val = load <8 x half>, ptr %ptr + %res0 = fdiv <8 x half> %x1, %x2 + %res1 = select <8 x i1> %msk, <8 x half> %res0, <8 x half> %src + %t3 = fdiv <8 x half> %x1, %val + %res2 = select <8 x i1> %msk, <8 x half> %t3, <8 x half> %src + %res = fdiv <8 x half> %res1 , %res2 + ret <8 x half> %res +} + +define <8 x half> @test_int_x86_avx512fp16_maskz_div_ph_128(<8 x half> %x1, <8 x half> %x2, i8 %mask, ptr %ptr) #0 { +; CHECK-LABEL: define <8 x half> @test_int_x86_avx512fp16_maskz_div_ph_128( +; CHECK-SAME: <8 x half> [[X1:%.*]], <8 x half> [[X2:%.*]], i8 [[MASK:%.*]], ptr [[PTR:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[TMP1:%.*]] = load i8, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 32) to ptr), align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <8 x i16>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP3:%.*]] = load <8 x i16>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP4:%.*]] = bitcast i8 [[TMP1]] to <8 x i1> +; CHECK-NEXT: [[MSK:%.*]] = bitcast i8 [[MASK]] to <8 x i1> +; CHECK-NEXT: [[_MSPROP:%.*]] = or <8 x i16> [[TMP2]], [[TMP3]] +; CHECK-NEXT: [[RES0:%.*]] = fdiv <8 x half> [[X1]], [[X2]] +; CHECK-NEXT: [[TMP5:%.*]] = select <8 x i1> [[MSK]], <8 x i16> [[_MSPROP]], <8 x i16> zeroinitializer +; CHECK-NEXT: [[TMP6:%.*]] = bitcast <8 x half> [[RES0]] to <8 x i16> +; CHECK-NEXT: [[TMP7:%.*]] = xor <8 x i16> [[TMP6]], zeroinitializer +; CHECK-NEXT: [[TMP8:%.*]] = or <8 x i16> [[TMP7]], [[_MSPROP]] +; CHECK-NEXT: [[TMP9:%.*]] = or <8 x i16> [[TMP8]], zeroinitializer +; CHECK-NEXT: [[_MSPROP_SELECT:%.*]] = select <8 x i1> [[TMP4]], <8 x i16> [[TMP9]], <8 x i16> [[TMP5]] +; CHECK-NEXT: [[RES1:%.*]] = select <8 x i1> [[MSK]], <8 x half> [[RES0]], <8 x half> zeroinitializer +; CHECK-NEXT: store <8 x i16> [[_MSPROP_SELECT]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <8 x half> [[RES1]] +; + %msk = bitcast i8 %mask to <8 x i1> + %res0 = fdiv <8 x half> %x1, %x2 + %res1 = select <8 x i1> %msk, <8 x half> %res0, <8 x half> zeroinitializer + ret <8 x half> %res1 +} + +define <16 x half> @test_min_ph_256(<16 x half> %x1, <16 x half> %x2) #0 { +; CHECK-LABEL: define <16 x half> @test_min_ph_256( +; CHECK-SAME: <16 x half> [[X1:%.*]], <16 x half> [[X2:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[TMP1:%.*]] = load <16 x i16>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <16 x i16>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 32) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[_MSPROP:%.*]] = or <16 x i16> [[TMP1]], [[TMP2]] +; CHECK-NEXT: [[TMP3:%.*]] = trunc <16 x i16> [[_MSPROP]] to <16 x i1> +; CHECK-NEXT: [[RES0:%.*]] = fcmp olt <16 x half> [[X1]], [[X2]] +; CHECK-NEXT: [[TMP4:%.*]] = select <16 x i1> [[RES0]], <16 x i16> [[TMP1]], <16 x i16> [[TMP2]] +; CHECK-NEXT: [[TMP5:%.*]] = bitcast <16 x half> [[X1]] to <16 x i16> +; CHECK-NEXT: [[TMP6:%.*]] = bitcast <16 x half> [[X2]] to <16 x i16> +; CHECK-NEXT: [[TMP7:%.*]] = xor <16 x i16> [[TMP5]], [[TMP6]] +; CHECK-NEXT: [[TMP8:%.*]] = or <16 x i16> [[TMP7]], [[TMP1]] +; CHECK-NEXT: [[TMP9:%.*]] = or <16 x i16> [[TMP8]], [[TMP2]] +; CHECK-NEXT: [[_MSPROP_SELECT:%.*]] = select <16 x i1> [[TMP3]], <16 x i16> [[TMP9]], <16 x i16> [[TMP4]] +; CHECK-NEXT: [[RES1:%.*]] = select <16 x i1> [[RES0]], <16 x half> [[X1]], <16 x half> [[X2]] +; CHECK-NEXT: store <16 x i16> [[_MSPROP_SELECT]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <16 x half> [[RES1]] +; + %res0 = fcmp olt <16 x half> %x1, %x2 + %res1 = select <16 x i1> %res0, <16 x half> %x1, <16 x half> %x2 + ret <16 x half> %res1 +} + +define <16 x half> @test_max_ph_256(<16 x half> %x1, <16 x half> %x2) #0 { +; CHECK-LABEL: define <16 x half> @test_max_ph_256( +; CHECK-SAME: <16 x half> [[X1:%.*]], <16 x half> [[X2:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[TMP1:%.*]] = load <16 x i16>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <16 x i16>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 32) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[_MSPROP:%.*]] = or <16 x i16> [[TMP1]], [[TMP2]] +; CHECK-NEXT: [[TMP3:%.*]] = trunc <16 x i16> [[_MSPROP]] to <16 x i1> +; CHECK-NEXT: [[RES0:%.*]] = fcmp ogt <16 x half> [[X1]], [[X2]] +; CHECK-NEXT: [[TMP4:%.*]] = select <16 x i1> [[RES0]], <16 x i16> [[TMP1]], <16 x i16> [[TMP2]] +; CHECK-NEXT: [[TMP5:%.*]] = bitcast <16 x half> [[X1]] to <16 x i16> +; CHECK-NEXT: [[TMP6:%.*]] = bitcast <16 x half> [[X2]] to <16 x i16> +; CHECK-NEXT: [[TMP7:%.*]] = xor <16 x i16> [[TMP5]], [[TMP6]] +; CHECK-NEXT: [[TMP8:%.*]] = or <16 x i16> [[TMP7]], [[TMP1]] +; CHECK-NEXT: [[TMP9:%.*]] = or <16 x i16> [[TMP8]], [[TMP2]] +; CHECK-NEXT: [[_MSPROP_SELECT:%.*]] = select <16 x i1> [[TMP3]], <16 x i16> [[TMP9]], <16 x i16> [[TMP4]] +; CHECK-NEXT: [[RES1:%.*]] = select <16 x i1> [[RES0]], <16 x half> [[X1]], <16 x half> [[X2]] +; CHECK-NEXT: store <16 x i16> [[_MSPROP_SELECT]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <16 x half> [[RES1]] +; + %res0 = fcmp ogt <16 x half> %x1, %x2 + %res1 = select <16 x i1> %res0, <16 x half> %x1, <16 x half> %x2 + ret <16 x half> %res1 +} + +define <8 x half> @test_min_ph_128(<8 x half> %x1, <8 x half> %x2) #0 { +; CHECK-LABEL: define <8 x half> @test_min_ph_128( +; CHECK-SAME: <8 x half> [[X1:%.*]], <8 x half> [[X2:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i16>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <8 x i16>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[_MSPROP:%.*]] = or <8 x i16> [[TMP1]], [[TMP2]] +; CHECK-NEXT: [[TMP3:%.*]] = trunc <8 x i16> [[_MSPROP]] to <8 x i1> +; CHECK-NEXT: [[RES0:%.*]] = fcmp olt <8 x half> [[X1]], [[X2]] +; CHECK-NEXT: [[TMP4:%.*]] = select <8 x i1> [[RES0]], <8 x i16> [[TMP1]], <8 x i16> [[TMP2]] +; CHECK-NEXT: [[TMP5:%.*]] = bitcast <8 x half> [[X1]] to <8 x i16> +; CHECK-NEXT: [[TMP6:%.*]] = bitcast <8 x half> [[X2]] to <8 x i16> +; CHECK-NEXT: [[TMP7:%.*]] = xor <8 x i16> [[TMP5]], [[TMP6]] +; CHECK-NEXT: [[TMP8:%.*]] = or <8 x i16> [[TMP7]], [[TMP1]] +; CHECK-NEXT: [[TMP9:%.*]] = or <8 x i16> [[TMP8]], [[TMP2]] +; CHECK-NEXT: [[_MSPROP_SELECT:%.*]] = select <8 x i1> [[TMP3]], <8 x i16> [[TMP9]], <8 x i16> [[TMP4]] +; CHECK-NEXT: [[RES1:%.*]] = select <8 x i1> [[RES0]], <8 x half> [[X1]], <8 x half> [[X2]] +; CHECK-NEXT: store <8 x i16> [[_MSPROP_SELECT]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <8 x half> [[RES1]] +; + %res0 = fcmp olt <8 x half> %x1, %x2 + %res1 = select <8 x i1> %res0, <8 x half> %x1, <8 x half> %x2 + ret <8 x half> %res1 +} + +define <8 x half> @test_max_ph_128(<8 x half> %x1, <8 x half> %x2) #0 { +; CHECK-LABEL: define <8 x half> @test_max_ph_128( +; CHECK-SAME: <8 x half> [[X1:%.*]], <8 x half> [[X2:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i16>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <8 x i16>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[_MSPROP:%.*]] = or <8 x i16> [[TMP1]], [[TMP2]] +; CHECK-NEXT: [[TMP3:%.*]] = trunc <8 x i16> [[_MSPROP]] to <8 x i1> +; CHECK-NEXT: [[RES0:%.*]] = fcmp ogt <8 x half> [[X1]], [[X2]] +; CHECK-NEXT: [[TMP4:%.*]] = select <8 x i1> [[RES0]], <8 x i16> [[TMP1]], <8 x i16> [[TMP2]] +; CHECK-NEXT: [[TMP5:%.*]] = bitcast <8 x half> [[X1]] to <8 x i16> +; CHECK-NEXT: [[TMP6:%.*]] = bitcast <8 x half> [[X2]] to <8 x i16> +; CHECK-NEXT: [[TMP7:%.*]] = xor <8 x i16> [[TMP5]], [[TMP6]] +; CHECK-NEXT: [[TMP8:%.*]] = or <8 x i16> [[TMP7]], [[TMP1]] +; CHECK-NEXT: [[TMP9:%.*]] = or <8 x i16> [[TMP8]], [[TMP2]] +; CHECK-NEXT: [[_MSPROP_SELECT:%.*]] = select <8 x i1> [[TMP3]], <8 x i16> [[TMP9]], <8 x i16> [[TMP4]] +; CHECK-NEXT: [[RES1:%.*]] = select <8 x i1> [[RES0]], <8 x half> [[X1]], <8 x half> [[X2]] +; CHECK-NEXT: store <8 x i16> [[_MSPROP_SELECT]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <8 x half> [[RES1]] +; + %res0 = fcmp ogt <8 x half> %x1, %x2 + %res1 = select <8 x i1> %res0, <8 x half> %x1, <8 x half> %x2 + ret <8 x half> %res1 +} + +declare <8 x half> @llvm.x86.avx512fp16.max.ph.128(<8 x half>, <8 x half>) +declare <16 x half> @llvm.x86.avx512fp16.max.ph.256(<16 x half>, <16 x half>) + +define <8 x half> @test_max_ph_128_2(<8 x half> %x1, <8 x half> %x2) #0 { +; CHECK-LABEL: define <8 x half> @test_max_ph_128_2( +; CHECK-SAME: <8 x half> [[X1:%.*]], <8 x half> [[X2:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i16>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <8 x i16>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[_MSPROP:%.*]] = or <8 x i16> [[TMP1]], [[TMP2]] +; CHECK-NEXT: [[RES0:%.*]] = call <8 x half> @llvm.x86.avx512fp16.max.ph.128(<8 x half> [[X1]], <8 x half> [[X2]]) +; CHECK-NEXT: store <8 x i16> [[_MSPROP]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <8 x half> [[RES0]] +; + %res0 = call <8 x half> @llvm.x86.avx512fp16.max.ph.128(<8 x half> %x1, <8 x half> %x2) + ret <8 x half> %res0 +} + +define <16 x half> @test_max_ph_256_2(<16 x half> %x1, <16 x half> %x2) #0 { +; CHECK-LABEL: define <16 x half> @test_max_ph_256_2( +; CHECK-SAME: <16 x half> [[X1:%.*]], <16 x half> [[X2:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[TMP1:%.*]] = load <16 x i16>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <16 x i16>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 32) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[_MSPROP:%.*]] = or <16 x i16> [[TMP1]], [[TMP2]] +; CHECK-NEXT: [[RES0:%.*]] = call <16 x half> @llvm.x86.avx512fp16.max.ph.256(<16 x half> [[X1]], <16 x half> [[X2]]) +; CHECK-NEXT: store <16 x i16> [[_MSPROP]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <16 x half> [[RES0]] +; + %res0 = call <16 x half> @llvm.x86.avx512fp16.max.ph.256(<16 x half> %x1, <16 x half> %x2) + ret <16 x half> %res0 +} + +declare <8 x half> @llvm.x86.avx512fp16.min.ph.128(<8 x half>, <8 x half>) +declare <16 x half> @llvm.x86.avx512fp16.min.ph.256(<16 x half>, <16 x half>) + +define <8 x half> @test_min_ph_128_2(<8 x half> %x1, <8 x half> %x2) #0 { +; CHECK-LABEL: define <8 x half> @test_min_ph_128_2( +; CHECK-SAME: <8 x half> [[X1:%.*]], <8 x half> [[X2:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i16>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <8 x i16>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[_MSPROP:%.*]] = or <8 x i16> [[TMP1]], [[TMP2]] +; CHECK-NEXT: [[RES0:%.*]] = call <8 x half> @llvm.x86.avx512fp16.min.ph.128(<8 x half> [[X1]], <8 x half> [[X2]]) +; CHECK-NEXT: store <8 x i16> [[_MSPROP]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <8 x half> [[RES0]] +; + %res0 = call <8 x half> @llvm.x86.avx512fp16.min.ph.128(<8 x half> %x1, <8 x half> %x2) + ret <8 x half> %res0 +} + +define <16 x half> @test_min_ph_256_2(<16 x half> %x1, <16 x half> %x2) #0 { +; CHECK-LABEL: define <16 x half> @test_min_ph_256_2( +; CHECK-SAME: <16 x half> [[X1:%.*]], <16 x half> [[X2:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[TMP1:%.*]] = load <16 x i16>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <16 x i16>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 32) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[_MSPROP:%.*]] = or <16 x i16> [[TMP1]], [[TMP2]] +; CHECK-NEXT: [[RES0:%.*]] = call <16 x half> @llvm.x86.avx512fp16.min.ph.256(<16 x half> [[X1]], <16 x half> [[X2]]) +; CHECK-NEXT: store <16 x i16> [[_MSPROP]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <16 x half> [[RES0]] +; + %res0 = call <16 x half> @llvm.x86.avx512fp16.min.ph.256(<16 x half> %x1, <16 x half> %x2) + ret <16 x half> %res0 +} + +declare <4 x double> @llvm.x86.avx512fp16.mask.vcvtph2pd.256(<8 x half>, <4 x double>, i8) + +define <4 x double> @test_int_x86_avx512_mask_vcvt_ph2pd_256(<8 x half> %x0, <4 x double> %x1, i8 %x2) #0 { +; CHECK-LABEL: define <4 x double> @test_int_x86_avx512_mask_vcvt_ph2pd_256( +; CHECK-SAME: <8 x half> [[X0:%.*]], <4 x double> [[X1:%.*]], i8 [[X2:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i16>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <4 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8 +; CHECK-NEXT: [[TMP3:%.*]] = load i8, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 48) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP4:%.*]] = bitcast <8 x i16> [[TMP1]] to i128 +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i128 [[TMP4]], 0 +; CHECK-NEXT: [[TMP5:%.*]] = bitcast <4 x i64> [[TMP2]] to i256 +; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i256 [[TMP5]], 0 +; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]] +; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i8 [[TMP3]], 0 +; CHECK-NEXT: [[_MSOR3:%.*]] = or i1 [[_MSOR]], [[_MSCMP2]] +; CHECK-NEXT: br i1 [[_MSOR3]], label %[[BB6:.*]], label %[[BB7:.*]], !prof [[PROF1]] +; CHECK: [[BB6]]: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR4]] +; CHECK-NEXT: unreachable +; CHECK: [[BB7]]: +; CHECK-NEXT: [[RES:%.*]] = call <4 x double> @llvm.x86.avx512fp16.mask.vcvtph2pd.256(<8 x half> [[X0]], <4 x double> [[X1]], i8 [[X2]]) +; CHECK-NEXT: store <4 x i64> zeroinitializer, ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <4 x double> [[RES]] +; + %res = call <4 x double> @llvm.x86.avx512fp16.mask.vcvtph2pd.256(<8 x half> %x0, <4 x double> %x1, i8 %x2) + ret <4 x double> %res +} + +define <4 x double> @test_int_x86_avx512_mask_vcvt_ph2pd_256_nomask(<8 x half> %x0, <4 x double> %x1) #0 { +; CHECK-LABEL: define <4 x double> @test_int_x86_avx512_mask_vcvt_ph2pd_256_nomask( +; CHECK-SAME: <8 x half> [[X0:%.*]], <4 x double> [[X1:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i16>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <4 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP1]] to i128 +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i128 [[TMP3]], 0 +; CHECK-NEXT: [[TMP4:%.*]] = bitcast <4 x i64> [[TMP2]] to i256 +; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i256 [[TMP4]], 0 +; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]] +; CHECK-NEXT: br i1 [[_MSOR]], label %[[BB5:.*]], label %[[BB6:.*]], !prof [[PROF1]] +; CHECK: [[BB5]]: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR4]] +; CHECK-NEXT: unreachable +; CHECK: [[BB6]]: +; CHECK-NEXT: [[RES:%.*]] = call <4 x double> @llvm.x86.avx512fp16.mask.vcvtph2pd.256(<8 x half> [[X0]], <4 x double> [[X1]], i8 -1) +; CHECK-NEXT: store <4 x i64> zeroinitializer, ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <4 x double> [[RES]] +; + %res = call <4 x double> @llvm.x86.avx512fp16.mask.vcvtph2pd.256(<8 x half> %x0, <4 x double> %x1, i8 -1) + ret <4 x double> %res +} + +declare <2 x double> @llvm.x86.avx512fp16.mask.vcvtph2pd.128(<8 x half>, <2 x double>, i8) + +define <2 x double> @test_int_x86_avx512_mask_vcvt_ph2pd_128(<8 x half> %x0, <2 x double> %x1, i8 %x2) #0 { +; CHECK-LABEL: define <2 x double> @test_int_x86_avx512_mask_vcvt_ph2pd_128( +; CHECK-SAME: <8 x half> [[X0:%.*]], <2 x double> [[X1:%.*]], i8 [[X2:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i16>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <2 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8 +; CHECK-NEXT: [[TMP3:%.*]] = load i8, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 32) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP4:%.*]] = bitcast <8 x i16> [[TMP1]] to i128 +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i128 [[TMP4]], 0 +; CHECK-NEXT: [[TMP5:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 +; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i128 [[TMP5]], 0 +; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]] +; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i8 [[TMP3]], 0 +; CHECK-NEXT: [[_MSOR3:%.*]] = or i1 [[_MSOR]], [[_MSCMP2]] +; CHECK-NEXT: br i1 [[_MSOR3]], label %[[BB6:.*]], label %[[BB7:.*]], !prof [[PROF1]] +; CHECK: [[BB6]]: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR4]] +; CHECK-NEXT: unreachable +; CHECK: [[BB7]]: +; CHECK-NEXT: [[RES:%.*]] = call <2 x double> @llvm.x86.avx512fp16.mask.vcvtph2pd.128(<8 x half> [[X0]], <2 x double> [[X1]], i8 [[X2]]) +; CHECK-NEXT: store <2 x i64> zeroinitializer, ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <2 x double> [[RES]] +; + %res = call <2 x double> @llvm.x86.avx512fp16.mask.vcvtph2pd.128(<8 x half> %x0, <2 x double> %x1, i8 %x2) + ret <2 x double> %res +} + +define <2 x double> @test_int_x86_avx512_mask_vcvt_ph2pd_128_nomask(<8 x half> %x0, <2 x double> %x1) #0 { +; CHECK-LABEL: define <2 x double> @test_int_x86_avx512_mask_vcvt_ph2pd_128_nomask( +; CHECK-SAME: <8 x half> [[X0:%.*]], <2 x double> [[X1:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i16>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <2 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP1]] to i128 +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i128 [[TMP3]], 0 +; CHECK-NEXT: [[TMP4:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 +; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i128 [[TMP4]], 0 +; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]] +; CHECK-NEXT: br i1 [[_MSOR]], label %[[BB5:.*]], label %[[BB6:.*]], !prof [[PROF1]] +; CHECK: [[BB5]]: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR4]] +; CHECK-NEXT: unreachable +; CHECK: [[BB6]]: +; CHECK-NEXT: [[RES:%.*]] = call <2 x double> @llvm.x86.avx512fp16.mask.vcvtph2pd.128(<8 x half> [[X0]], <2 x double> [[X1]], i8 -1) +; CHECK-NEXT: store <2 x i64> zeroinitializer, ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <2 x double> [[RES]] +; + %res = call <2 x double> @llvm.x86.avx512fp16.mask.vcvtph2pd.128(<8 x half> %x0, <2 x double> %x1, i8 -1) + ret <2 x double> %res +} + +declare <8 x half> @llvm.x86.avx512fp16.mask.vcvtpd2ph.256(<4 x double>, <8 x half>, i8) + +define <8 x half> @test_int_x86_avx512_mask_vcvt_pd2ph_256(<4 x double> %x0, <8 x half> %x1, i8 %x2) #0 { +; CHECK-LABEL: define <8 x half> @test_int_x86_avx512_mask_vcvt_pd2ph_256( +; CHECK-SAME: <4 x double> [[X0:%.*]], <8 x half> [[X1:%.*]], i8 [[X2:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[TMP1:%.*]] = load <4 x i64>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <8 x i16>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 32) to ptr), align 8 +; CHECK-NEXT: [[TMP3:%.*]] = load i8, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 48) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP4:%.*]] = bitcast <4 x i64> [[TMP1]] to i256 +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i256 [[TMP4]], 0 +; CHECK-NEXT: [[TMP5:%.*]] = bitcast <8 x i16> [[TMP2]] to i128 +; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i128 [[TMP5]], 0 +; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]] +; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i8 [[TMP3]], 0 +; CHECK-NEXT: [[_MSOR3:%.*]] = or i1 [[_MSOR]], [[_MSCMP2]] +; CHECK-NEXT: br i1 [[_MSOR3]], label %[[BB6:.*]], label %[[BB7:.*]], !prof [[PROF1]] +; CHECK: [[BB6]]: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR4]] +; CHECK-NEXT: unreachable +; CHECK: [[BB7]]: +; CHECK-NEXT: [[RES:%.*]] = call <8 x half> @llvm.x86.avx512fp16.mask.vcvtpd2ph.256(<4 x double> [[X0]], <8 x half> [[X1]], i8 [[X2]]) +; CHECK-NEXT: store <8 x i16> zeroinitializer, ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <8 x half> [[RES]] +; + %res = call <8 x half> @llvm.x86.avx512fp16.mask.vcvtpd2ph.256(<4 x double> %x0, <8 x half> %x1, i8 %x2) + ret <8 x half> %res +} + +define <8 x half> @test_int_x86_avx512_mask_vcvt_pd2ph_256_load(ptr %px0, <8 x half> %x1, i8 %x2) #0 { +; CHECK-LABEL: define <8 x half> @test_int_x86_avx512_mask_vcvt_pd2ph_256_load( +; CHECK-SAME: ptr [[PX0:%.*]], <8 x half> [[X1:%.*]], i8 [[X2:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <8 x i16>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8 +; CHECK-NEXT: [[TMP3:%.*]] = load i8, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 24) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 +; CHECK-NEXT: br i1 [[_MSCMP]], label %[[BB4:.*]], label %[[BB5:.*]], !prof [[PROF1]] +; CHECK: [[BB4]]: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR4]] +; CHECK-NEXT: unreachable +; CHECK: [[BB5]]: +; CHECK-NEXT: [[X0:%.*]] = load <4 x double>, ptr [[PX0]], align 32 +; CHECK-NEXT: [[TMP6:%.*]] = ptrtoint ptr [[PX0]] to i64 +; CHECK-NEXT: [[TMP7:%.*]] = xor i64 [[TMP6]], 87960930222080 +; CHECK-NEXT: [[TMP8:%.*]] = inttoptr i64 [[TMP7]] to ptr +; CHECK-NEXT: [[_MSLD:%.*]] = load <4 x i64>, ptr [[TMP8]], align 32 +; CHECK-NEXT: [[TMP9:%.*]] = bitcast <4 x i64> [[_MSLD]] to i256 +; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i256 [[TMP9]], 0 +; CHECK-NEXT: [[TMP10:%.*]] = bitcast <8 x i16> [[TMP2]] to i128 +; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i128 [[TMP10]], 0 +; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP1]], [[_MSCMP2]] +; CHECK-NEXT: [[_MSCMP3:%.*]] = icmp ne i8 [[TMP3]], 0 +; CHECK-NEXT: [[_MSOR4:%.*]] = or i1 [[_MSOR]], [[_MSCMP3]] +; CHECK-NEXT: br i1 [[_MSOR4]], label %[[BB11:.*]], label %[[BB12:.*]], !prof [[PROF1]] +; CHECK: [[BB11]]: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR4]] +; CHECK-NEXT: unreachable +; CHECK: [[BB12]]: +; CHECK-NEXT: [[RES:%.*]] = call <8 x half> @llvm.x86.avx512fp16.mask.vcvtpd2ph.256(<4 x double> [[X0]], <8 x half> [[X1]], i8 [[X2]]) +; CHECK-NEXT: store <8 x i16> zeroinitializer, ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <8 x half> [[RES]] +; + %x0 = load <4 x double>, ptr %px0, align 32 + %res = call <8 x half> @llvm.x86.avx512fp16.mask.vcvtpd2ph.256(<4 x double> %x0, <8 x half> %x1, i8 %x2) + ret <8 x half> %res +} + +declare <8 x half> @llvm.x86.avx512fp16.mask.vcvtpd2ph.128(<2 x double>, <8 x half>, i8) + +define <8 x half> @test_int_x86_avx512_mask_vcvt_pd2ph_128(<2 x double> %x0, <8 x half> %x1, i8 %x2) #0 { +; CHECK-LABEL: define <8 x half> @test_int_x86_avx512_mask_vcvt_pd2ph_128( +; CHECK-SAME: <2 x double> [[X0:%.*]], <8 x half> [[X1:%.*]], i8 [[X2:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[TMP1:%.*]] = load <2 x i64>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <8 x i16>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8 +; CHECK-NEXT: [[TMP3:%.*]] = load i8, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 32) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP4:%.*]] = bitcast <2 x i64> [[TMP1]] to i128 +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i128 [[TMP4]], 0 +; CHECK-NEXT: [[TMP5:%.*]] = bitcast <8 x i16> [[TMP2]] to i128 +; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i128 [[TMP5]], 0 +; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]] +; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i8 [[TMP3]], 0 +; CHECK-NEXT: [[_MSOR3:%.*]] = or i1 [[_MSOR]], [[_MSCMP2]] +; CHECK-NEXT: br i1 [[_MSOR3]], label %[[BB6:.*]], label %[[BB7:.*]], !prof [[PROF1]] +; CHECK: [[BB6]]: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR4]] +; CHECK-NEXT: unreachable +; CHECK: [[BB7]]: +; CHECK-NEXT: [[RES:%.*]] = call <8 x half> @llvm.x86.avx512fp16.mask.vcvtpd2ph.128(<2 x double> [[X0]], <8 x half> [[X1]], i8 [[X2]]) +; CHECK-NEXT: store <8 x i16> zeroinitializer, ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <8 x half> [[RES]] +; + %res = call <8 x half> @llvm.x86.avx512fp16.mask.vcvtpd2ph.128(<2 x double> %x0, <8 x half> %x1, i8 %x2) + ret <8 x half> %res +} + +define <8 x half> @test_int_x86_avx512_mask_vcvt_pd2ph_128_load(ptr %px0, <8 x half> %x1, i8 %x2) #0 { +; CHECK-LABEL: define <8 x half> @test_int_x86_avx512_mask_vcvt_pd2ph_128_load( +; CHECK-SAME: ptr [[PX0:%.*]], <8 x half> [[X1:%.*]], i8 [[X2:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <8 x i16>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8 +; CHECK-NEXT: [[TMP3:%.*]] = load i8, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 24) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 +; CHECK-NEXT: br i1 [[_MSCMP]], label %[[BB4:.*]], label %[[BB5:.*]], !prof [[PROF1]] +; CHECK: [[BB4]]: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR4]] +; CHECK-NEXT: unreachable +; CHECK: [[BB5]]: +; CHECK-NEXT: [[X0:%.*]] = load <2 x double>, ptr [[PX0]], align 16 +; CHECK-NEXT: [[TMP6:%.*]] = ptrtoint ptr [[PX0]] to i64 +; CHECK-NEXT: [[TMP7:%.*]] = xor i64 [[TMP6]], 87960930222080 +; CHECK-NEXT: [[TMP8:%.*]] = inttoptr i64 [[TMP7]] to ptr +; CHECK-NEXT: [[_MSLD:%.*]] = load <2 x i64>, ptr [[TMP8]], align 16 +; CHECK-NEXT: [[TMP9:%.*]] = bitcast <2 x i64> [[_MSLD]] to i128 +; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i128 [[TMP9]], 0 +; CHECK-NEXT: [[TMP10:%.*]] = bitcast <8 x i16> [[TMP2]] to i128 +; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i128 [[TMP10]], 0 +; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP1]], [[_MSCMP2]] +; CHECK-NEXT: [[_MSCMP3:%.*]] = icmp ne i8 [[TMP3]], 0 +; CHECK-NEXT: [[_MSOR4:%.*]] = or i1 [[_MSOR]], [[_MSCMP3]] +; CHECK-NEXT: br i1 [[_MSOR4]], label %[[BB11:.*]], label %[[BB12:.*]], !prof [[PROF1]] +; CHECK: [[BB11]]: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR4]] +; CHECK-NEXT: unreachable +; CHECK: [[BB12]]: +; CHECK-NEXT: [[RES:%.*]] = call <8 x half> @llvm.x86.avx512fp16.mask.vcvtpd2ph.128(<2 x double> [[X0]], <8 x half> [[X1]], i8 [[X2]]) +; CHECK-NEXT: store <8 x i16> zeroinitializer, ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <8 x half> [[RES]] +; + %x0 = load <2 x double>, ptr %px0, align 16 + %res = call <8 x half> @llvm.x86.avx512fp16.mask.vcvtpd2ph.128(<2 x double> %x0, <8 x half> %x1, i8 %x2) + ret <8 x half> %res +} + +declare <4 x i32> @llvm.x86.avx512fp16.mask.vcvtph2udq.128(<8 x half>, <4 x i32>, i8) + +define <4 x i32> @test_int_x86_avx512_cvt_ph2udq_128(<8 x half> %x0) #0 { +; CHECK-LABEL: define <4 x i32> @test_int_x86_avx512_cvt_ph2udq_128( +; CHECK-SAME: <8 x half> [[X0:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i16>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR4]] +; CHECK-NEXT: [[RES:%.*]] = call <4 x i32> @llvm.x86.avx512fp16.mask.vcvtph2udq.128(<8 x half> [[X0]], <4 x i32> undef, i8 -1) +; CHECK-NEXT: store <4 x i32> zeroinitializer, ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <4 x i32> [[RES]] +; + %res = call <4 x i32> @llvm.x86.avx512fp16.mask.vcvtph2udq.128(<8 x half> %x0, <4 x i32> undef, i8 -1) + ret <4 x i32> %res +} + +define <4 x i32> @test_int_x86_avx512_mask_cvt_ph2udq_128(<8 x half> %x0, <4 x i32> %x1, i8 %x2) #0 { +; CHECK-LABEL: define <4 x i32> @test_int_x86_avx512_mask_cvt_ph2udq_128( +; CHECK-SAME: <8 x half> [[X0:%.*]], <4 x i32> [[X1:%.*]], i8 [[X2:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i16>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <4 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8 +; CHECK-NEXT: [[TMP3:%.*]] = load i8, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 32) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP4:%.*]] = bitcast <8 x i16> [[TMP1]] to i128 +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i128 [[TMP4]], 0 +; CHECK-NEXT: [[TMP5:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 +; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i128 [[TMP5]], 0 +; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]] +; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i8 [[TMP3]], 0 +; CHECK-NEXT: [[_MSOR3:%.*]] = or i1 [[_MSOR]], [[_MSCMP2]] +; CHECK-NEXT: br i1 [[_MSOR3]], label %[[BB6:.*]], label %[[BB7:.*]], !prof [[PROF1]] +; CHECK: [[BB6]]: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR4]] +; CHECK-NEXT: unreachable +; CHECK: [[BB7]]: +; CHECK-NEXT: [[RES:%.*]] = call <4 x i32> @llvm.x86.avx512fp16.mask.vcvtph2udq.128(<8 x half> [[X0]], <4 x i32> [[X1]], i8 [[X2]]) +; CHECK-NEXT: store <4 x i32> zeroinitializer, ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <4 x i32> [[RES]] +; + %res = call <4 x i32> @llvm.x86.avx512fp16.mask.vcvtph2udq.128(<8 x half> %x0, <4 x i32> %x1, i8 %x2) + ret <4 x i32> %res +} + +define <4 x i32> @test_int_x86_avx512_maskz_cvt_ph2udq_128(<8 x half> %x0, i8 %x2) #0 { +; CHECK-LABEL: define <4 x i32> @test_int_x86_avx512_maskz_cvt_ph2udq_128( +; CHECK-SAME: <8 x half> [[X0:%.*]], i8 [[X2:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i16>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load i8, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP1]] to i128 +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i128 [[TMP3]], 0 +; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i8 [[TMP2]], 0 +; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]] +; CHECK-NEXT: br i1 [[_MSOR]], label %[[BB4:.*]], label %[[BB5:.*]], !prof [[PROF1]] +; CHECK: [[BB4]]: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR4]] +; CHECK-NEXT: unreachable +; CHECK: [[BB5]]: +; CHECK-NEXT: [[RES:%.*]] = call <4 x i32> @llvm.x86.avx512fp16.mask.vcvtph2udq.128(<8 x half> [[X0]], <4 x i32> zeroinitializer, i8 [[X2]]) +; CHECK-NEXT: store <4 x i32> zeroinitializer, ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <4 x i32> [[RES]] +; + %res = call <4 x i32> @llvm.x86.avx512fp16.mask.vcvtph2udq.128(<8 x half> %x0, <4 x i32> zeroinitializer, i8 %x2) + ret <4 x i32> %res +} + +declare <8 x i32> @llvm.x86.avx512fp16.mask.vcvtph2udq.256(<8 x half>, <8 x i32>, i8) + +define <8 x i32> @test_int_x86_avx512_cvt_ph2udq_256(<8 x half> %x0) #0 { +; CHECK-LABEL: define <8 x i32> @test_int_x86_avx512_cvt_ph2udq_256( +; CHECK-SAME: <8 x half> [[X0:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i16>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR4]] +; CHECK-NEXT: [[RES:%.*]] = call <8 x i32> @llvm.x86.avx512fp16.mask.vcvtph2udq.256(<8 x half> [[X0]], <8 x i32> undef, i8 -1) +; CHECK-NEXT: store <8 x i32> zeroinitializer, ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <8 x i32> [[RES]] +; + %res = call <8 x i32> @llvm.x86.avx512fp16.mask.vcvtph2udq.256(<8 x half> %x0, <8 x i32> undef, i8 -1) + ret <8 x i32> %res +} + +define <8 x i32> @test_int_x86_avx512_mask_cvt_ph2udq_256(<8 x half> %x0, <8 x i32> %x1, i8 %x2) #0 { +; CHECK-LABEL: define <8 x i32> @test_int_x86_avx512_mask_cvt_ph2udq_256( +; CHECK-SAME: <8 x half> [[X0:%.*]], <8 x i32> [[X1:%.*]], i8 [[X2:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i16>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <8 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8 +; CHECK-NEXT: [[TMP3:%.*]] = load i8, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 48) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP4:%.*]] = bitcast <8 x i16> [[TMP1]] to i128 +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i128 [[TMP4]], 0 +; CHECK-NEXT: [[TMP5:%.*]] = bitcast <8 x i32> [[TMP2]] to i256 +; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i256 [[TMP5]], 0 +; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]] +; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i8 [[TMP3]], 0 +; CHECK-NEXT: [[_MSOR3:%.*]] = or i1 [[_MSOR]], [[_MSCMP2]] +; CHECK-NEXT: br i1 [[_MSOR3]], label %[[BB6:.*]], label %[[BB7:.*]], !prof [[PROF1]] +; CHECK: [[BB6]]: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR4]] +; CHECK-NEXT: unreachable +; CHECK: [[BB7]]: +; CHECK-NEXT: [[RES:%.*]] = call <8 x i32> @llvm.x86.avx512fp16.mask.vcvtph2udq.256(<8 x half> [[X0]], <8 x i32> [[X1]], i8 [[X2]]) +; CHECK-NEXT: store <8 x i32> zeroinitializer, ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <8 x i32> [[RES]] +; + %res = call <8 x i32> @llvm.x86.avx512fp16.mask.vcvtph2udq.256(<8 x half> %x0, <8 x i32> %x1, i8 %x2) + ret <8 x i32> %res +} + +define <8 x i32> @test_int_x86_avx512_maskz_cvt_ph2udq_256(<8 x half> %x0, i8 %x2) #0 { +; CHECK-LABEL: define <8 x i32> @test_int_x86_avx512_maskz_cvt_ph2udq_256( +; CHECK-SAME: <8 x half> [[X0:%.*]], i8 [[X2:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i16>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load i8, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP1]] to i128 +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i128 [[TMP3]], 0 +; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i8 [[TMP2]], 0 +; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]] +; CHECK-NEXT: br i1 [[_MSOR]], label %[[BB4:.*]], label %[[BB5:.*]], !prof [[PROF1]] +; CHECK: [[BB4]]: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR4]] +; CHECK-NEXT: unreachable +; CHECK: [[BB5]]: +; CHECK-NEXT: [[RES:%.*]] = call <8 x i32> @llvm.x86.avx512fp16.mask.vcvtph2udq.256(<8 x half> [[X0]], <8 x i32> zeroinitializer, i8 [[X2]]) +; CHECK-NEXT: store <8 x i32> zeroinitializer, ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <8 x i32> [[RES]] +; + %res = call <8 x i32> @llvm.x86.avx512fp16.mask.vcvtph2udq.256(<8 x half> %x0, <8 x i32> zeroinitializer, i8 %x2) + ret <8 x i32> %res +} + +declare <4 x i32> @llvm.x86.avx512fp16.mask.vcvttph2dq.128(<8 x half>, <4 x i32>, i8) + +define <4 x i32> @test_int_x86_avx512_cvtt_ph2dq_128(<8 x half> %x0) #0 { +; CHECK-LABEL: define <4 x i32> @test_int_x86_avx512_cvtt_ph2dq_128( +; CHECK-SAME: <8 x half> [[X0:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i16>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR4]] +; CHECK-NEXT: [[RES:%.*]] = call <4 x i32> @llvm.x86.avx512fp16.mask.vcvttph2dq.128(<8 x half> [[X0]], <4 x i32> undef, i8 -1) +; CHECK-NEXT: store <4 x i32> zeroinitializer, ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <4 x i32> [[RES]] +; + %res = call <4 x i32> @llvm.x86.avx512fp16.mask.vcvttph2dq.128(<8 x half> %x0, <4 x i32> undef, i8 -1) + ret <4 x i32> %res +} + +define <4 x i32> @test_int_x86_avx512_mask_cvtt_ph2dq_128(<8 x half> %x0, <4 x i32> %x1, i8 %x2) #0 { +; CHECK-LABEL: define <4 x i32> @test_int_x86_avx512_mask_cvtt_ph2dq_128( +; CHECK-SAME: <8 x half> [[X0:%.*]], <4 x i32> [[X1:%.*]], i8 [[X2:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i16>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <4 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8 +; CHECK-NEXT: [[TMP3:%.*]] = load i8, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 32) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP4:%.*]] = bitcast <8 x i16> [[TMP1]] to i128 +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i128 [[TMP4]], 0 +; CHECK-NEXT: [[TMP5:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 +; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i128 [[TMP5]], 0 +; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]] +; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i8 [[TMP3]], 0 +; CHECK-NEXT: [[_MSOR3:%.*]] = or i1 [[_MSOR]], [[_MSCMP2]] +; CHECK-NEXT: br i1 [[_MSOR3]], label %[[BB6:.*]], label %[[BB7:.*]], !prof [[PROF1]] +; CHECK: [[BB6]]: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR4]] +; CHECK-NEXT: unreachable +; CHECK: [[BB7]]: +; CHECK-NEXT: [[RES:%.*]] = call <4 x i32> @llvm.x86.avx512fp16.mask.vcvttph2dq.128(<8 x half> [[X0]], <4 x i32> [[X1]], i8 [[X2]]) +; CHECK-NEXT: store <4 x i32> zeroinitializer, ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <4 x i32> [[RES]] +; + %res = call <4 x i32> @llvm.x86.avx512fp16.mask.vcvttph2dq.128(<8 x half> %x0, <4 x i32> %x1, i8 %x2) + ret <4 x i32> %res +} + +define <4 x i32> @test_int_x86_avx512_maskz_cvtt_ph2dq_128(<8 x half> %x0, i8 %x2) #0 { +; CHECK-LABEL: define <4 x i32> @test_int_x86_avx512_maskz_cvtt_ph2dq_128( +; CHECK-SAME: <8 x half> [[X0:%.*]], i8 [[X2:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i16>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load i8, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP1]] to i128 +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i128 [[TMP3]], 0 +; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i8 [[TMP2]], 0 +; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]] +; CHECK-NEXT: br i1 [[_MSOR]], label %[[BB4:.*]], label %[[BB5:.*]], !prof [[PROF1]] +; CHECK: [[BB4]]: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR4]] +; CHECK-NEXT: unreachable +; CHECK: [[BB5]]: +; CHECK-NEXT: [[RES:%.*]] = call <4 x i32> @llvm.x86.avx512fp16.mask.vcvttph2dq.128(<8 x half> [[X0]], <4 x i32> zeroinitializer, i8 [[X2]]) +; CHECK-NEXT: store <4 x i32> zeroinitializer, ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <4 x i32> [[RES]] +; + %res = call <4 x i32> @llvm.x86.avx512fp16.mask.vcvttph2dq.128(<8 x half> %x0, <4 x i32> zeroinitializer, i8 %x2) + ret <4 x i32> %res +} + +declare <8 x i32> @llvm.x86.avx512fp16.mask.vcvttph2dq.256(<8 x half>, <8 x i32>, i8) + +define <8 x i32> @test_int_x86_avx512_cvtt_ph2dq_256(<8 x half> %x0) #0 { +; CHECK-LABEL: define <8 x i32> @test_int_x86_avx512_cvtt_ph2dq_256( +; CHECK-SAME: <8 x half> [[X0:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i16>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR4]] +; CHECK-NEXT: [[RES:%.*]] = call <8 x i32> @llvm.x86.avx512fp16.mask.vcvttph2dq.256(<8 x half> [[X0]], <8 x i32> undef, i8 -1) +; CHECK-NEXT: store <8 x i32> zeroinitializer, ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <8 x i32> [[RES]] +; + %res = call <8 x i32> @llvm.x86.avx512fp16.mask.vcvttph2dq.256(<8 x half> %x0, <8 x i32> undef, i8 -1) + ret <8 x i32> %res +} + +define <8 x i32> @test_int_x86_avx512_mask_cvtt_ph2dq_256(<8 x half> %x0, <8 x i32> %x1, i8 %x2) #0 { +; CHECK-LABEL: define <8 x i32> @test_int_x86_avx512_mask_cvtt_ph2dq_256( +; CHECK-SAME: <8 x half> [[X0:%.*]], <8 x i32> [[X1:%.*]], i8 [[X2:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i16>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <8 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8 +; CHECK-NEXT: [[TMP3:%.*]] = load i8, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 48) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP4:%.*]] = bitcast <8 x i16> [[TMP1]] to i128 +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i128 [[TMP4]], 0 +; CHECK-NEXT: [[TMP5:%.*]] = bitcast <8 x i32> [[TMP2]] to i256 +; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i256 [[TMP5]], 0 +; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]] +; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i8 [[TMP3]], 0 +; CHECK-NEXT: [[_MSOR3:%.*]] = or i1 [[_MSOR]], [[_MSCMP2]] +; CHECK-NEXT: br i1 [[_MSOR3]], label %[[BB6:.*]], label %[[BB7:.*]], !prof [[PROF1]] +; CHECK: [[BB6]]: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR4]] +; CHECK-NEXT: unreachable +; CHECK: [[BB7]]: +; CHECK-NEXT: [[RES:%.*]] = call <8 x i32> @llvm.x86.avx512fp16.mask.vcvttph2dq.256(<8 x half> [[X0]], <8 x i32> [[X1]], i8 [[X2]]) +; CHECK-NEXT: store <8 x i32> zeroinitializer, ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <8 x i32> [[RES]] +; + %res = call <8 x i32> @llvm.x86.avx512fp16.mask.vcvttph2dq.256(<8 x half> %x0, <8 x i32> %x1, i8 %x2) + ret <8 x i32> %res +} + +define <8 x i32> @test_int_x86_avx512_maskz_cvtt_ph2dq_256(<8 x half> %x0, i8 %x2) #0 { +; CHECK-LABEL: define <8 x i32> @test_int_x86_avx512_maskz_cvtt_ph2dq_256( +; CHECK-SAME: <8 x half> [[X0:%.*]], i8 [[X2:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i16>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load i8, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP1]] to i128 +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i128 [[TMP3]], 0 +; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i8 [[TMP2]], 0 +; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]] +; CHECK-NEXT: br i1 [[_MSOR]], label %[[BB4:.*]], label %[[BB5:.*]], !prof [[PROF1]] +; CHECK: [[BB4]]: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR4]] +; CHECK-NEXT: unreachable +; CHECK: [[BB5]]: +; CHECK-NEXT: [[RES:%.*]] = call <8 x i32> @llvm.x86.avx512fp16.mask.vcvttph2dq.256(<8 x half> [[X0]], <8 x i32> zeroinitializer, i8 [[X2]]) +; CHECK-NEXT: store <8 x i32> zeroinitializer, ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <8 x i32> [[RES]] +; + %res = call <8 x i32> @llvm.x86.avx512fp16.mask.vcvttph2dq.256(<8 x half> %x0, <8 x i32> zeroinitializer, i8 %x2) + ret <8 x i32> %res +} + +declare <4 x i32> @llvm.x86.avx512fp16.mask.vcvttph2udq.128(<8 x half>, <4 x i32>, i8) + +define <4 x i32> @test_int_x86_avx512_cvtt_ph2udq_128(<8 x half> %x0) #0 { +; CHECK-LABEL: define <4 x i32> @test_int_x86_avx512_cvtt_ph2udq_128( +; CHECK-SAME: <8 x half> [[X0:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i16>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR4]] +; CHECK-NEXT: [[RES:%.*]] = call <4 x i32> @llvm.x86.avx512fp16.mask.vcvttph2udq.128(<8 x half> [[X0]], <4 x i32> undef, i8 -1) +; CHECK-NEXT: store <4 x i32> zeroinitializer, ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <4 x i32> [[RES]] +; + %res = call <4 x i32> @llvm.x86.avx512fp16.mask.vcvttph2udq.128(<8 x half> %x0, <4 x i32> undef, i8 -1) + ret <4 x i32> %res +} + +define <4 x i32> @test_int_x86_avx512_mask_cvtt_ph2udq_128(<8 x half> %x0, <4 x i32> %x1, i8 %x2) #0 { +; CHECK-LABEL: define <4 x i32> @test_int_x86_avx512_mask_cvtt_ph2udq_128( +; CHECK-SAME: <8 x half> [[X0:%.*]], <4 x i32> [[X1:%.*]], i8 [[X2:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i16>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <4 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8 +; CHECK-NEXT: [[TMP3:%.*]] = load i8, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 32) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP4:%.*]] = bitcast <8 x i16> [[TMP1]] to i128 +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i128 [[TMP4]], 0 +; CHECK-NEXT: [[TMP5:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 +; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i128 [[TMP5]], 0 +; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]] +; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i8 [[TMP3]], 0 +; CHECK-NEXT: [[_MSOR3:%.*]] = or i1 [[_MSOR]], [[_MSCMP2]] +; CHECK-NEXT: br i1 [[_MSOR3]], label %[[BB6:.*]], label %[[BB7:.*]], !prof [[PROF1]] +; CHECK: [[BB6]]: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR4]] +; CHECK-NEXT: unreachable +; CHECK: [[BB7]]: +; CHECK-NEXT: [[RES:%.*]] = call <4 x i32> @llvm.x86.avx512fp16.mask.vcvttph2udq.128(<8 x half> [[X0]], <4 x i32> [[X1]], i8 [[X2]]) +; CHECK-NEXT: store <4 x i32> zeroinitializer, ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <4 x i32> [[RES]] +; + %res = call <4 x i32> @llvm.x86.avx512fp16.mask.vcvttph2udq.128(<8 x half> %x0, <4 x i32> %x1, i8 %x2) + ret <4 x i32> %res +} + +define <4 x i32> @test_int_x86_avx512_maskz_cvtt_ph2udq_128(<8 x half> %x0, i8 %x2) #0 { +; CHECK-LABEL: define <4 x i32> @test_int_x86_avx512_maskz_cvtt_ph2udq_128( +; CHECK-SAME: <8 x half> [[X0:%.*]], i8 [[X2:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i16>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load i8, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP1]] to i128 +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i128 [[TMP3]], 0 +; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i8 [[TMP2]], 0 +; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]] +; CHECK-NEXT: br i1 [[_MSOR]], label %[[BB4:.*]], label %[[BB5:.*]], !prof [[PROF1]] +; CHECK: [[BB4]]: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR4]] +; CHECK-NEXT: unreachable +; CHECK: [[BB5]]: +; CHECK-NEXT: [[RES:%.*]] = call <4 x i32> @llvm.x86.avx512fp16.mask.vcvttph2udq.128(<8 x half> [[X0]], <4 x i32> zeroinitializer, i8 [[X2]]) +; CHECK-NEXT: store <4 x i32> zeroinitializer, ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <4 x i32> [[RES]] +; + %res = call <4 x i32> @llvm.x86.avx512fp16.mask.vcvttph2udq.128(<8 x half> %x0, <4 x i32> zeroinitializer, i8 %x2) + ret <4 x i32> %res +} + +declare <8 x i32> @llvm.x86.avx512fp16.mask.vcvttph2udq.256(<8 x half>, <8 x i32>, i8) + +define <8 x i32> @test_int_x86_avx512_cvtt_ph2udq_256(<8 x half> %x0) #0 { +; CHECK-LABEL: define <8 x i32> @test_int_x86_avx512_cvtt_ph2udq_256( +; CHECK-SAME: <8 x half> [[X0:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i16>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR4]] +; CHECK-NEXT: [[RES:%.*]] = call <8 x i32> @llvm.x86.avx512fp16.mask.vcvttph2udq.256(<8 x half> [[X0]], <8 x i32> undef, i8 -1) +; CHECK-NEXT: store <8 x i32> zeroinitializer, ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <8 x i32> [[RES]] +; + %res = call <8 x i32> @llvm.x86.avx512fp16.mask.vcvttph2udq.256(<8 x half> %x0, <8 x i32> undef, i8 -1) + ret <8 x i32> %res +} + +define <8 x i32> @test_int_x86_avx512_mask_cvtt_ph2udq_256(<8 x half> %x0, <8 x i32> %x1, i8 %x2) #0 { +; CHECK-LABEL: define <8 x i32> @test_int_x86_avx512_mask_cvtt_ph2udq_256( +; CHECK-SAME: <8 x half> [[X0:%.*]], <8 x i32> [[X1:%.*]], i8 [[X2:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i16>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <8 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8 +; CHECK-NEXT: [[TMP3:%.*]] = load i8, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 48) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP4:%.*]] = bitcast <8 x i16> [[TMP1]] to i128 +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i128 [[TMP4]], 0 +; CHECK-NEXT: [[TMP5:%.*]] = bitcast <8 x i32> [[TMP2]] to i256 +; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i256 [[TMP5]], 0 +; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]] +; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i8 [[TMP3]], 0 +; CHECK-NEXT: [[_MSOR3:%.*]] = or i1 [[_MSOR]], [[_MSCMP2]] +; CHECK-NEXT: br i1 [[_MSOR3]], label %[[BB6:.*]], label %[[BB7:.*]], !prof [[PROF1]] +; CHECK: [[BB6]]: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR4]] +; CHECK-NEXT: unreachable +; CHECK: [[BB7]]: +; CHECK-NEXT: [[RES:%.*]] = call <8 x i32> @llvm.x86.avx512fp16.mask.vcvttph2udq.256(<8 x half> [[X0]], <8 x i32> [[X1]], i8 [[X2]]) +; CHECK-NEXT: store <8 x i32> zeroinitializer, ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <8 x i32> [[RES]] +; + %res = call <8 x i32> @llvm.x86.avx512fp16.mask.vcvttph2udq.256(<8 x half> %x0, <8 x i32> %x1, i8 %x2) + ret <8 x i32> %res +} + +define <8 x i32> @test_int_x86_avx512_maskz_cvtt_ph2udq_256(<8 x half> %x0, i8 %x2) #0 { +; CHECK-LABEL: define <8 x i32> @test_int_x86_avx512_maskz_cvtt_ph2udq_256( +; CHECK-SAME: <8 x half> [[X0:%.*]], i8 [[X2:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i16>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load i8, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP1]] to i128 +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i128 [[TMP3]], 0 +; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i8 [[TMP2]], 0 +; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]] +; CHECK-NEXT: br i1 [[_MSOR]], label %[[BB4:.*]], label %[[BB5:.*]], !prof [[PROF1]] +; CHECK: [[BB4]]: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR4]] +; CHECK-NEXT: unreachable +; CHECK: [[BB5]]: +; CHECK-NEXT: [[RES:%.*]] = call <8 x i32> @llvm.x86.avx512fp16.mask.vcvttph2udq.256(<8 x half> [[X0]], <8 x i32> zeroinitializer, i8 [[X2]]) +; CHECK-NEXT: store <8 x i32> zeroinitializer, ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <8 x i32> [[RES]] +; + %res = call <8 x i32> @llvm.x86.avx512fp16.mask.vcvttph2udq.256(<8 x half> %x0, <8 x i32> zeroinitializer, i8 %x2) + ret <8 x i32> %res +} + +declare <4 x float> @llvm.x86.avx512fp16.mask.vcvtph2psx.128(<8 x half>, <4 x float>, i8) + +define <4 x float> @test_int_x86_avx512_cvt_ph2psx_128(<8 x half> %x0) #0 { +; CHECK-LABEL: define <4 x float> @test_int_x86_avx512_cvt_ph2psx_128( +; CHECK-SAME: <8 x half> [[X0:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i16>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR4]] +; CHECK-NEXT: [[RES:%.*]] = call <4 x float> @llvm.x86.avx512fp16.mask.vcvtph2psx.128(<8 x half> [[X0]], <4 x float> undef, i8 -1) +; CHECK-NEXT: store <4 x i32> zeroinitializer, ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <4 x float> [[RES]] +; + %res = call <4 x float> @llvm.x86.avx512fp16.mask.vcvtph2psx.128(<8 x half> %x0, <4 x float> undef, i8 -1) + ret <4 x float> %res +} + +define <4 x float> @test_int_x86_avx512_mask_cvt_ph2psx_128(<8 x half> %x0, <4 x float> %x1, i8 %x2) #0 { +; CHECK-LABEL: define <4 x float> @test_int_x86_avx512_mask_cvt_ph2psx_128( +; CHECK-SAME: <8 x half> [[X0:%.*]], <4 x float> [[X1:%.*]], i8 [[X2:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i16>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <4 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8 +; CHECK-NEXT: [[TMP3:%.*]] = load i8, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 32) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP4:%.*]] = bitcast <8 x i16> [[TMP1]] to i128 +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i128 [[TMP4]], 0 +; CHECK-NEXT: [[TMP5:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 +; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i128 [[TMP5]], 0 +; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]] +; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i8 [[TMP3]], 0 +; CHECK-NEXT: [[_MSOR3:%.*]] = or i1 [[_MSOR]], [[_MSCMP2]] +; CHECK-NEXT: br i1 [[_MSOR3]], label %[[BB6:.*]], label %[[BB7:.*]], !prof [[PROF1]] +; CHECK: [[BB6]]: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR4]] +; CHECK-NEXT: unreachable +; CHECK: [[BB7]]: +; CHECK-NEXT: [[RES:%.*]] = call <4 x float> @llvm.x86.avx512fp16.mask.vcvtph2psx.128(<8 x half> [[X0]], <4 x float> [[X1]], i8 [[X2]]) +; CHECK-NEXT: store <4 x i32> zeroinitializer, ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <4 x float> [[RES]] +; + %res = call <4 x float> @llvm.x86.avx512fp16.mask.vcvtph2psx.128(<8 x half> %x0, <4 x float> %x1, i8 %x2) + ret <4 x float> %res +} + +define <4 x float> @test_int_x86_avx512_maskz_cvt_ph2psx_128(<8 x half> %x0, i8 %x2) #0 { +; CHECK-LABEL: define <4 x float> @test_int_x86_avx512_maskz_cvt_ph2psx_128( +; CHECK-SAME: <8 x half> [[X0:%.*]], i8 [[X2:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i16>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load i8, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP1]] to i128 +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i128 [[TMP3]], 0 +; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i8 [[TMP2]], 0 +; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]] +; CHECK-NEXT: br i1 [[_MSOR]], label %[[BB4:.*]], label %[[BB5:.*]], !prof [[PROF1]] +; CHECK: [[BB4]]: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR4]] +; CHECK-NEXT: unreachable +; CHECK: [[BB5]]: +; CHECK-NEXT: [[RES:%.*]] = call <4 x float> @llvm.x86.avx512fp16.mask.vcvtph2psx.128(<8 x half> [[X0]], <4 x float> zeroinitializer, i8 [[X2]]) +; CHECK-NEXT: store <4 x i32> zeroinitializer, ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <4 x float> [[RES]] +; + %res = call <4 x float> @llvm.x86.avx512fp16.mask.vcvtph2psx.128(<8 x half> %x0, <4 x float> zeroinitializer, i8 %x2) + ret <4 x float> %res +} + +declare <8 x float> @llvm.x86.avx512fp16.mask.vcvtph2psx.256(<8 x half>, <8 x float>, i8) + +define <8 x float> @test_int_x86_avx512_cvt_ph2psx_256(<8 x half> %x0) #0 { +; CHECK-LABEL: define <8 x float> @test_int_x86_avx512_cvt_ph2psx_256( +; CHECK-SAME: <8 x half> [[X0:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i16>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR4]] +; CHECK-NEXT: [[RES:%.*]] = call <8 x float> @llvm.x86.avx512fp16.mask.vcvtph2psx.256(<8 x half> [[X0]], <8 x float> undef, i8 -1) +; CHECK-NEXT: store <8 x i32> zeroinitializer, ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <8 x float> [[RES]] +; + %res = call <8 x float> @llvm.x86.avx512fp16.mask.vcvtph2psx.256(<8 x half> %x0, <8 x float> undef, i8 -1) + ret <8 x float> %res +} + +define <8 x float> @test_int_x86_avx512_mask_cvt_ph2psx_256(<8 x half> %x0, <8 x float> %x1, i8 %x2) #0 { +; CHECK-LABEL: define <8 x float> @test_int_x86_avx512_mask_cvt_ph2psx_256( +; CHECK-SAME: <8 x half> [[X0:%.*]], <8 x float> [[X1:%.*]], i8 [[X2:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i16>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <8 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8 +; CHECK-NEXT: [[TMP3:%.*]] = load i8, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 48) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP4:%.*]] = bitcast <8 x i16> [[TMP1]] to i128 +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i128 [[TMP4]], 0 +; CHECK-NEXT: [[TMP5:%.*]] = bitcast <8 x i32> [[TMP2]] to i256 +; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i256 [[TMP5]], 0 +; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]] +; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i8 [[TMP3]], 0 +; CHECK-NEXT: [[_MSOR3:%.*]] = or i1 [[_MSOR]], [[_MSCMP2]] +; CHECK-NEXT: br i1 [[_MSOR3]], label %[[BB6:.*]], label %[[BB7:.*]], !prof [[PROF1]] +; CHECK: [[BB6]]: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR4]] +; CHECK-NEXT: unreachable +; CHECK: [[BB7]]: +; CHECK-NEXT: [[RES:%.*]] = call <8 x float> @llvm.x86.avx512fp16.mask.vcvtph2psx.256(<8 x half> [[X0]], <8 x float> [[X1]], i8 [[X2]]) +; CHECK-NEXT: store <8 x i32> zeroinitializer, ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <8 x float> [[RES]] +; + %res = call <8 x float> @llvm.x86.avx512fp16.mask.vcvtph2psx.256(<8 x half> %x0, <8 x float> %x1, i8 %x2) + ret <8 x float> %res +} + +define <8 x float> @test_int_x86_avx512_maskz_cvt_ph2psx_256(<8 x half> %x0, i8 %x2) #0 { +; CHECK-LABEL: define <8 x float> @test_int_x86_avx512_maskz_cvt_ph2psx_256( +; CHECK-SAME: <8 x half> [[X0:%.*]], i8 [[X2:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i16>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load i8, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP1]] to i128 +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i128 [[TMP3]], 0 +; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i8 [[TMP2]], 0 +; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]] +; CHECK-NEXT: br i1 [[_MSOR]], label %[[BB4:.*]], label %[[BB5:.*]], !prof [[PROF1]] +; CHECK: [[BB4]]: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR4]] +; CHECK-NEXT: unreachable +; CHECK: [[BB5]]: +; CHECK-NEXT: [[RES:%.*]] = call <8 x float> @llvm.x86.avx512fp16.mask.vcvtph2psx.256(<8 x half> [[X0]], <8 x float> zeroinitializer, i8 [[X2]]) +; CHECK-NEXT: store <8 x i32> zeroinitializer, ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <8 x float> [[RES]] +; + %res = call <8 x float> @llvm.x86.avx512fp16.mask.vcvtph2psx.256(<8 x half> %x0, <8 x float> zeroinitializer, i8 %x2) + ret <8 x float> %res +} + +declare <8 x half> @llvm.x86.avx512fp16.mask.vcvtps2phx.128(<4 x float>, <8 x half>, i8) + +define <8 x half> @test_int_x86_avx512_mask_cvt_ps2phx_128(<4 x float> %x0, <8 x half> %x1, i8 %x2) #0 { +; CHECK-LABEL: define <8 x half> @test_int_x86_avx512_mask_cvt_ps2phx_128( +; CHECK-SAME: <4 x float> [[X0:%.*]], <8 x half> [[X1:%.*]], i8 [[X2:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[TMP1:%.*]] = load <4 x i32>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <8 x i16>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8 +; CHECK-NEXT: [[TMP3:%.*]] = load i8, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 32) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP4:%.*]] = bitcast <4 x i32> [[TMP1]] to i128 +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i128 [[TMP4]], 0 +; CHECK-NEXT: [[TMP5:%.*]] = bitcast <8 x i16> [[TMP2]] to i128 +; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i128 [[TMP5]], 0 +; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]] +; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i8 [[TMP3]], 0 +; CHECK-NEXT: [[_MSOR3:%.*]] = or i1 [[_MSOR]], [[_MSCMP2]] +; CHECK-NEXT: br i1 [[_MSOR3]], label %[[BB6:.*]], label %[[BB7:.*]], !prof [[PROF1]] +; CHECK: [[BB6]]: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR4]] +; CHECK-NEXT: unreachable +; CHECK: [[BB7]]: +; CHECK-NEXT: [[RES:%.*]] = call <8 x half> @llvm.x86.avx512fp16.mask.vcvtps2phx.128(<4 x float> [[X0]], <8 x half> [[X1]], i8 [[X2]]) +; CHECK-NEXT: [[TMP8:%.*]] = bitcast <4 x i32> [[TMP1]] to i128 +; CHECK-NEXT: [[_MSCMP4:%.*]] = icmp ne i128 [[TMP8]], 0 +; CHECK-NEXT: [[TMP9:%.*]] = bitcast <8 x i16> [[TMP2]] to i128 +; CHECK-NEXT: [[_MSCMP5:%.*]] = icmp ne i128 [[TMP9]], 0 +; CHECK-NEXT: [[_MSOR6:%.*]] = or i1 [[_MSCMP4]], [[_MSCMP5]] +; CHECK-NEXT: br i1 [[_MSOR6]], label %[[BB10:.*]], label %[[BB11:.*]], !prof [[PROF1]] +; CHECK: [[BB10]]: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR4]] +; CHECK-NEXT: unreachable +; CHECK: [[BB11]]: +; CHECK-NEXT: [[RES1:%.*]] = call <8 x half> @llvm.x86.avx512fp16.mask.vcvtps2phx.128(<4 x float> [[X0]], <8 x half> [[X1]], i8 -1) +; CHECK-NEXT: [[RES2:%.*]] = fadd <8 x half> [[RES]], [[RES1]] +; CHECK-NEXT: store <8 x i16> zeroinitializer, ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <8 x half> [[RES2]] +; + %res = call <8 x half> @llvm.x86.avx512fp16.mask.vcvtps2phx.128(<4 x float> %x0, <8 x half> %x1, i8 %x2) + %res1 = call <8 x half> @llvm.x86.avx512fp16.mask.vcvtps2phx.128(<4 x float> %x0, <8 x half> %x1, i8 -1) + %res2 = fadd <8 x half> %res, %res1 + ret <8 x half> %res2 +} + +declare <8 x half> @llvm.x86.avx512fp16.mask.vcvtps2phx.256(<8 x float>, <8 x half>, i8) + +define <8 x half> @test_int_x86_avx512_cvt_ps2phx_256(<8 x float> %x0) #0 { +; CHECK-LABEL: define <8 x half> @test_int_x86_avx512_cvt_ps2phx_256( +; CHECK-SAME: <8 x float> [[X0:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i32>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR4]] +; CHECK-NEXT: [[RES:%.*]] = call <8 x half> @llvm.x86.avx512fp16.mask.vcvtps2phx.256(<8 x float> [[X0]], <8 x half> undef, i8 -1) +; CHECK-NEXT: store <8 x i16> zeroinitializer, ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <8 x half> [[RES]] +; + %res = call <8 x half> @llvm.x86.avx512fp16.mask.vcvtps2phx.256(<8 x float> %x0, <8 x half> undef, i8 -1) + ret <8 x half> %res +} + +define <8 x half> @test_int_x86_avx512_mask_cvt_ps2phx_256(<8 x float> %x0, <8 x half> %x1, i8 %x2) #0 { +; CHECK-LABEL: define <8 x half> @test_int_x86_avx512_mask_cvt_ps2phx_256( +; CHECK-SAME: <8 x float> [[X0:%.*]], <8 x half> [[X1:%.*]], i8 [[X2:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i32>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <8 x i16>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 32) to ptr), align 8 +; CHECK-NEXT: [[TMP3:%.*]] = load i8, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 48) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP4:%.*]] = bitcast <8 x i32> [[TMP1]] to i256 +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i256 [[TMP4]], 0 +; CHECK-NEXT: [[TMP5:%.*]] = bitcast <8 x i16> [[TMP2]] to i128 +; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i128 [[TMP5]], 0 +; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]] +; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i8 [[TMP3]], 0 +; CHECK-NEXT: [[_MSOR3:%.*]] = or i1 [[_MSOR]], [[_MSCMP2]] +; CHECK-NEXT: br i1 [[_MSOR3]], label %[[BB6:.*]], label %[[BB7:.*]], !prof [[PROF1]] +; CHECK: [[BB6]]: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR4]] +; CHECK-NEXT: unreachable +; CHECK: [[BB7]]: +; CHECK-NEXT: [[RES:%.*]] = call <8 x half> @llvm.x86.avx512fp16.mask.vcvtps2phx.256(<8 x float> [[X0]], <8 x half> [[X1]], i8 [[X2]]) +; CHECK-NEXT: store <8 x i16> zeroinitializer, ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <8 x half> [[RES]] +; + %res = call <8 x half> @llvm.x86.avx512fp16.mask.vcvtps2phx.256(<8 x float> %x0, <8 x half> %x1, i8 %x2) + ret <8 x half> %res +} + +define <8 x half> @test_int_x86_avx512_maskz_cvt_ps2phx_256(<8 x float> %x0, i8 %x2) #0 { +; CHECK-LABEL: define <8 x half> @test_int_x86_avx512_maskz_cvt_ps2phx_256( +; CHECK-SAME: <8 x float> [[X0:%.*]], i8 [[X2:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i32>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load i8, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 32) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i32> [[TMP1]] to i256 +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i256 [[TMP3]], 0 +; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i8 [[TMP2]], 0 +; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]] +; CHECK-NEXT: br i1 [[_MSOR]], label %[[BB4:.*]], label %[[BB5:.*]], !prof [[PROF1]] +; CHECK: [[BB4]]: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR4]] +; CHECK-NEXT: unreachable +; CHECK: [[BB5]]: +; CHECK-NEXT: [[RES:%.*]] = call <8 x half> @llvm.x86.avx512fp16.mask.vcvtps2phx.256(<8 x float> [[X0]], <8 x half> zeroinitializer, i8 [[X2]]) +; CHECK-NEXT: store <8 x i16> zeroinitializer, ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <8 x half> [[RES]] +; + %res = call <8 x half> @llvm.x86.avx512fp16.mask.vcvtps2phx.256(<8 x float> %x0, <8 x half> zeroinitializer, i8 %x2) + ret <8 x half> %res +} + +attributes #0 = { sanitize_memory } From 24e17ebbc4a8cae1fe1790de2043f6e720a21390 Mon Sep 17 00:00:00 2001 From: Thurston Dang Date: Fri, 18 Apr 2025 06:41:41 +0000 Subject: [PATCH 4/6] Add avx512fp16-intrinsics.ll --- .../X86/avx512fp16-intrinsics.ll | 3248 +++++++++++++++++ 1 file changed, 3248 insertions(+) create mode 100644 llvm/test/Instrumentation/MemorySanitizer/X86/avx512fp16-intrinsics.ll diff --git a/llvm/test/Instrumentation/MemorySanitizer/X86/avx512fp16-intrinsics.ll b/llvm/test/Instrumentation/MemorySanitizer/X86/avx512fp16-intrinsics.ll new file mode 100644 index 0000000000000..fd39f90b09243 --- /dev/null +++ b/llvm/test/Instrumentation/MemorySanitizer/X86/avx512fp16-intrinsics.ll @@ -0,0 +1,3248 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5 +; RUN: opt %s -S -mattr=+avx512f -passes=msan 2>&1 | FileCheck %s +; +; Forked from llvm/test/CodeGen/X86/avx512fp16-arith-intrinsics.ll +; +; Handled suboptimally (visitInstruction): +; - llvm.x86.avx512fp16.fpclass.ph.512 +; - llvm.x86.avx512fp16.mask.add.sh.round +; - llvm.x86.avx512fp16.mask.cmp.sh +; - llvm.x86.avx512fp16.mask.div.sh.round +; - llvm.x86.avx512fp16.mask.fpclass.sh +; - llvm.x86.avx512fp16.mask.getexp.ph.512 +; - llvm.x86.avx512fp16.mask.getexp.sh +; - llvm.x86.avx512fp16.mask.getmant.ph.512 +; - llvm.x86.avx512fp16.mask.getmant.sh +; - llvm.x86.avx512fp16.mask.max.sh.round +; - llvm.x86.avx512fp16.mask.min.sh.round +; - llvm.x86.avx512fp16.mask.mul.sh.round +; - llvm.x86.avx512fp16.mask.rcp.ph.512 +; - llvm.x86.avx512fp16.mask.rcp.sh +; - llvm.x86.avx512fp16.mask.reduce.ph.512 +; - llvm.x86.avx512fp16.mask.reduce.sh +; - llvm.x86.avx512fp16.mask.rndscale.ph.512 +; - llvm.x86.avx512fp16.mask.rndscale.sh +; - llvm.x86.avx512fp16.mask.rsqrt.ph.512 +; - llvm.x86.avx512fp16.mask.rsqrt.sh +; - llvm.x86.avx512fp16.mask.scalef.ph.512 +; - llvm.x86.avx512fp16.mask.scalef.sh +; - llvm.x86.avx512fp16.mask.sqrt.sh +; - llvm.x86.avx512fp16.mask.sub.sh.round +; - llvm.x86.avx512fp16.mask.vcvtph2dq.512 +; - llvm.x86.avx512fp16.mask.vcvtph2qq.512 +; - llvm.x86.avx512fp16.mask.vcvtph2udq.512 +; - llvm.x86.avx512fp16.mask.vcvtph2uqq.512 +; - llvm.x86.avx512fp16.mask.vcvttph2dq.512 +; - llvm.x86.avx512fp16.mask.vcvttph2udq.512 +; - llvm.x86.avx512fp16.mask.vcvttph2uqq.512 +; - llvm.x86.avx512fp16.sqrt.ph.512 +; - llvm.x86.avx512fp16.vcomi.sh +; - llvm.x86.avx512fp16.vcvtsh2si32 +; - llvm.x86.avx512fp16.vcvtsh2si64 +; - llvm.x86.avx512fp16.vcvtsh2usi32 +; - llvm.x86.avx512fp16.vcvtsh2usi64 +; - llvm.x86.avx512fp16.vcvtsi2sh +; - llvm.x86.avx512fp16.vcvtsi642sh +; - llvm.x86.avx512fp16.vcvttsh2si32 +; - llvm.x86.avx512fp16.vcvttsh2si64 +; - llvm.x86.avx512fp16.vcvttsh2usi32 +; - llvm.x86.avx512fp16.vcvttsh2usi64 +; - llvm.x86.avx512fp16.vcvtusi2sh +; - llvm.x86.avx512fp16.vcvtusi642sh +; - llvm.x86.avx512.sitofp.round.v16f16.v16i32 +; - llvm.x86.avx512.sitofp.round.v8f16.v8i64 +; - llvm.x86.avx512.uitofp.round.v16f16.v16i32 +; - llvm.x86.avx512.uitofp.round.v8f16.v8i64 +; +; Handled heuristically: +; - llvm.sqrt.f16 +; - llvm.sqrt.v32f16 + +target datalayout = "e-m:o-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128" +target triple = "x86_64-unknown-linux-gnu" + +declare i32 @llvm.x86.avx512fp16.vcomi.sh(<8 x half>, <8 x half>, i32, i32) + +define i32 @test_x86_avx512fp16_ucomi_sh_lt(<8 x half> %a0, <8 x half> %a1) #0 { +; CHECK-LABEL: define i32 @test_x86_avx512fp16_ucomi_sh_lt( +; CHECK-SAME: <8 x half> [[A0:%.*]], <8 x half> [[A1:%.*]]) #[[ATTR1:[0-9]+]] { +; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i16>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <8 x i16>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP1]] to i128 +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i128 [[TMP3]], 0 +; CHECK-NEXT: [[TMP4:%.*]] = bitcast <8 x i16> [[TMP2]] to i128 +; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i128 [[TMP4]], 0 +; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]] +; CHECK-NEXT: br i1 [[_MSOR]], label %[[BB5:.*]], label %[[BB6:.*]], !prof [[PROF1:![0-9]+]] +; CHECK: [[BB5]]: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR8:[0-9]+]] +; CHECK-NEXT: unreachable +; CHECK: [[BB6]]: +; CHECK-NEXT: [[RES:%.*]] = call i32 @llvm.x86.avx512fp16.vcomi.sh(<8 x half> [[A0]], <8 x half> [[A1]], i32 9, i32 4) +; CHECK-NEXT: store i32 0, ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret i32 [[RES]] +; + %res = call i32 @llvm.x86.avx512fp16.vcomi.sh(<8 x half> %a0, <8 x half> %a1, i32 9, i32 4) + ret i32 %res +} + +declare <32 x half> @llvm.x86.avx512fp16.sqrt.ph.512(<32 x half>, i32) nounwind readnone + +define <32 x half> @test_sqrt_ph_512(<32 x half> %a0) #0 { +; CHECK-LABEL: define <32 x half> @test_sqrt_ph_512( +; CHECK-SAME: <32 x half> [[A0:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: [[TMP1:%.*]] = load <32 x i16>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP2:%.*]] = call <32 x half> @llvm.sqrt.v32f16(<32 x half> [[A0]]) +; CHECK-NEXT: store <32 x i16> [[TMP1]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <32 x half> [[TMP2]] +; + %1 = call <32 x half> @llvm.sqrt.v32f16(<32 x half> %a0) + ret <32 x half> %1 +} + +define <32 x half> @test_sqrt_ph_512_fast(<32 x half> %a0, <32 x half> %a1) #0 { +; CHECK-LABEL: define <32 x half> @test_sqrt_ph_512_fast( +; CHECK-SAME: <32 x half> [[A0:%.*]], <32 x half> [[A1:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: [[TMP1:%.*]] = load <32 x i16>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <32 x i16>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP3:%.*]] = call fast <32 x half> @llvm.sqrt.v32f16(<32 x half> [[A0]]) +; CHECK-NEXT: [[_MSPROP:%.*]] = or <32 x i16> [[TMP2]], [[TMP1]] +; CHECK-NEXT: [[TMP4:%.*]] = fdiv fast <32 x half> [[A1]], [[TMP3]] +; CHECK-NEXT: store <32 x i16> [[_MSPROP]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <32 x half> [[TMP4]] +; + %1 = call fast <32 x half> @llvm.sqrt.v32f16(<32 x half> %a0) + %2 = fdiv fast <32 x half> %a1, %1 + ret <32 x half> %2 +} + +define <32 x half> @test_sqrt_ph_512_fast_estimate_attribute(<32 x half> %a0, <32 x half> %a1) "reciprocal-estimates"="vec-sqrt" { +; CHECK-LABEL: define <32 x half> @test_sqrt_ph_512_fast_estimate_attribute( +; CHECK-SAME: <32 x half> [[A0:%.*]], <32 x half> [[A1:%.*]]) #[[ATTR2:[0-9]+]] { +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP1:%.*]] = call fast <32 x half> @llvm.sqrt.v32f16(<32 x half> [[A0]]) +; CHECK-NEXT: [[TMP2:%.*]] = fdiv fast <32 x half> [[A1]], [[TMP1]] +; CHECK-NEXT: store <32 x i16> zeroinitializer, ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <32 x half> [[TMP2]] +; + %1 = call fast <32 x half> @llvm.sqrt.v32f16(<32 x half> %a0) + %2 = fdiv fast <32 x half> %a1, %1 + ret <32 x half> %2 +} + +define <32 x half> @test_sqrt_ph_512_fast_estimate_attribute_2(<32 x half> %a0, <32 x half> %a1) "reciprocal-estimates"="vec-sqrth:1" { +; CHECK-LABEL: define <32 x half> @test_sqrt_ph_512_fast_estimate_attribute_2( +; CHECK-SAME: <32 x half> [[A0:%.*]], <32 x half> [[A1:%.*]]) #[[ATTR3:[0-9]+]] { +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP1:%.*]] = call fast <32 x half> @llvm.sqrt.v32f16(<32 x half> [[A0]]) +; CHECK-NEXT: [[TMP2:%.*]] = fdiv fast <32 x half> [[A1]], [[TMP1]] +; CHECK-NEXT: store <32 x i16> zeroinitializer, ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <32 x half> [[TMP2]] +; + %1 = call fast <32 x half> @llvm.sqrt.v32f16(<32 x half> %a0) + %2 = fdiv fast <32 x half> %a1, %1 + ret <32 x half> %2 +} + +define <32 x half> @test_mask_sqrt_ph_512(<32 x half> %a0, <32 x half> %passthru, i32 %mask) #0 { +; CHECK-LABEL: define <32 x half> @test_mask_sqrt_ph_512( +; CHECK-SAME: <32 x half> [[A0:%.*]], <32 x half> [[PASSTHRU:%.*]], i32 [[MASK:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: [[TMP1:%.*]] = load <32 x i16>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 128) to ptr), align 8 +; CHECK-NEXT: [[TMP3:%.*]] = load <32 x i16>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP4:%.*]] = call <32 x half> @llvm.sqrt.v32f16(<32 x half> [[A0]]) +; CHECK-NEXT: [[TMP5:%.*]] = bitcast i32 [[TMP2]] to <32 x i1> +; CHECK-NEXT: [[TMP6:%.*]] = bitcast i32 [[MASK]] to <32 x i1> +; CHECK-NEXT: [[TMP7:%.*]] = select <32 x i1> [[TMP6]], <32 x i16> [[TMP1]], <32 x i16> [[TMP3]] +; CHECK-NEXT: [[TMP8:%.*]] = bitcast <32 x half> [[TMP4]] to <32 x i16> +; CHECK-NEXT: [[TMP9:%.*]] = bitcast <32 x half> [[PASSTHRU]] to <32 x i16> +; CHECK-NEXT: [[TMP10:%.*]] = xor <32 x i16> [[TMP8]], [[TMP9]] +; CHECK-NEXT: [[TMP11:%.*]] = or <32 x i16> [[TMP10]], [[TMP1]] +; CHECK-NEXT: [[TMP12:%.*]] = or <32 x i16> [[TMP11]], [[TMP3]] +; CHECK-NEXT: [[_MSPROP_SELECT:%.*]] = select <32 x i1> [[TMP5]], <32 x i16> [[TMP12]], <32 x i16> [[TMP7]] +; CHECK-NEXT: [[TMP13:%.*]] = select <32 x i1> [[TMP6]], <32 x half> [[TMP4]], <32 x half> [[PASSTHRU]] +; CHECK-NEXT: store <32 x i16> [[_MSPROP_SELECT]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <32 x half> [[TMP13]] +; + %1 = call <32 x half> @llvm.sqrt.v32f16(<32 x half> %a0) + %2 = bitcast i32 %mask to <32 x i1> + %3 = select <32 x i1> %2, <32 x half> %1, <32 x half> %passthru + ret <32 x half> %3 +} + +define <32 x half> @test_maskz_sqrt_ph_512(<32 x half> %a0, i32 %mask) #0 { +; CHECK-LABEL: define <32 x half> @test_maskz_sqrt_ph_512( +; CHECK-SAME: <32 x half> [[A0:%.*]], i32 [[MASK:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: [[TMP1:%.*]] = load <32 x i16>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP3:%.*]] = call <32 x half> @llvm.sqrt.v32f16(<32 x half> [[A0]]) +; CHECK-NEXT: [[TMP4:%.*]] = bitcast i32 [[TMP2]] to <32 x i1> +; CHECK-NEXT: [[TMP5:%.*]] = bitcast i32 [[MASK]] to <32 x i1> +; CHECK-NEXT: [[TMP6:%.*]] = select <32 x i1> [[TMP5]], <32 x i16> [[TMP1]], <32 x i16> zeroinitializer +; CHECK-NEXT: [[TMP7:%.*]] = bitcast <32 x half> [[TMP3]] to <32 x i16> +; CHECK-NEXT: [[TMP8:%.*]] = xor <32 x i16> [[TMP7]], zeroinitializer +; CHECK-NEXT: [[TMP9:%.*]] = or <32 x i16> [[TMP8]], [[TMP1]] +; CHECK-NEXT: [[TMP10:%.*]] = or <32 x i16> [[TMP9]], zeroinitializer +; CHECK-NEXT: [[_MSPROP_SELECT:%.*]] = select <32 x i1> [[TMP4]], <32 x i16> [[TMP10]], <32 x i16> [[TMP6]] +; CHECK-NEXT: [[TMP11:%.*]] = select <32 x i1> [[TMP5]], <32 x half> [[TMP3]], <32 x half> zeroinitializer +; CHECK-NEXT: store <32 x i16> [[_MSPROP_SELECT]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <32 x half> [[TMP11]] +; + %1 = call <32 x half> @llvm.sqrt.v32f16(<32 x half> %a0) + %2 = bitcast i32 %mask to <32 x i1> + %3 = select <32 x i1> %2, <32 x half> %1, <32 x half> zeroinitializer + ret <32 x half> %3 +} + +declare <32 x half> @llvm.sqrt.v32f16(<32 x half>) + +define <32 x half> @test_sqrt_round_ph_512(<32 x half> %a0) #0 { +; CHECK-LABEL: define <32 x half> @test_sqrt_round_ph_512( +; CHECK-SAME: <32 x half> [[A0:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: [[TMP1:%.*]] = load <32 x i16>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP2:%.*]] = bitcast <32 x i16> [[TMP1]] to i512 +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i512 [[TMP2]], 0 +; CHECK-NEXT: br i1 [[_MSCMP]], label %[[BB3:.*]], label %[[BB4:.*]], !prof [[PROF1]] +; CHECK: [[BB3]]: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR8]] +; CHECK-NEXT: unreachable +; CHECK: [[BB4]]: +; CHECK-NEXT: [[TMP5:%.*]] = call <32 x half> @llvm.x86.avx512fp16.sqrt.ph.512(<32 x half> [[A0]], i32 11) +; CHECK-NEXT: store <32 x i16> zeroinitializer, ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <32 x half> [[TMP5]] +; + %1 = call <32 x half> @llvm.x86.avx512fp16.sqrt.ph.512(<32 x half> %a0, i32 11) + ret <32 x half> %1 +} + +define <32 x half> @test_mask_sqrt_round_ph_512(<32 x half> %a0, <32 x half> %passthru, i32 %mask) #0 { +; CHECK-LABEL: define <32 x half> @test_mask_sqrt_round_ph_512( +; CHECK-SAME: <32 x half> [[A0:%.*]], <32 x half> [[PASSTHRU:%.*]], i32 [[MASK:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: [[TMP1:%.*]] = load <32 x i16>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 128) to ptr), align 8 +; CHECK-NEXT: [[TMP3:%.*]] = load <32 x i16>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP4:%.*]] = bitcast <32 x i16> [[TMP1]] to i512 +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i512 [[TMP4]], 0 +; CHECK-NEXT: br i1 [[_MSCMP]], label %[[BB5:.*]], label %[[BB6:.*]], !prof [[PROF1]] +; CHECK: [[BB5]]: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR8]] +; CHECK-NEXT: unreachable +; CHECK: [[BB6]]: +; CHECK-NEXT: [[TMP7:%.*]] = call <32 x half> @llvm.x86.avx512fp16.sqrt.ph.512(<32 x half> [[A0]], i32 11) +; CHECK-NEXT: [[TMP8:%.*]] = bitcast i32 [[TMP2]] to <32 x i1> +; CHECK-NEXT: [[TMP9:%.*]] = bitcast i32 [[MASK]] to <32 x i1> +; CHECK-NEXT: [[TMP10:%.*]] = select <32 x i1> [[TMP9]], <32 x i16> zeroinitializer, <32 x i16> [[TMP3]] +; CHECK-NEXT: [[TMP11:%.*]] = bitcast <32 x half> [[TMP7]] to <32 x i16> +; CHECK-NEXT: [[TMP12:%.*]] = bitcast <32 x half> [[PASSTHRU]] to <32 x i16> +; CHECK-NEXT: [[TMP13:%.*]] = xor <32 x i16> [[TMP11]], [[TMP12]] +; CHECK-NEXT: [[TMP14:%.*]] = or <32 x i16> [[TMP13]], zeroinitializer +; CHECK-NEXT: [[TMP15:%.*]] = or <32 x i16> [[TMP14]], [[TMP3]] +; CHECK-NEXT: [[_MSPROP_SELECT:%.*]] = select <32 x i1> [[TMP8]], <32 x i16> [[TMP15]], <32 x i16> [[TMP10]] +; CHECK-NEXT: [[TMP16:%.*]] = select <32 x i1> [[TMP9]], <32 x half> [[TMP7]], <32 x half> [[PASSTHRU]] +; CHECK-NEXT: store <32 x i16> [[_MSPROP_SELECT]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <32 x half> [[TMP16]] +; + %1 = call <32 x half> @llvm.x86.avx512fp16.sqrt.ph.512(<32 x half> %a0, i32 11) + %2 = bitcast i32 %mask to <32 x i1> + %3 = select <32 x i1> %2, <32 x half> %1, <32 x half> %passthru + ret <32 x half> %3 +} + +define <32 x half> @test_maskz_sqrt_round_ph_512(<32 x half> %a0, i32 %mask) #0 { +; CHECK-LABEL: define <32 x half> @test_maskz_sqrt_round_ph_512( +; CHECK-SAME: <32 x half> [[A0:%.*]], i32 [[MASK:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: [[TMP1:%.*]] = load <32 x i16>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP3:%.*]] = bitcast <32 x i16> [[TMP1]] to i512 +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i512 [[TMP3]], 0 +; CHECK-NEXT: br i1 [[_MSCMP]], label %[[BB4:.*]], label %[[BB5:.*]], !prof [[PROF1]] +; CHECK: [[BB4]]: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR8]] +; CHECK-NEXT: unreachable +; CHECK: [[BB5]]: +; CHECK-NEXT: [[TMP6:%.*]] = call <32 x half> @llvm.x86.avx512fp16.sqrt.ph.512(<32 x half> [[A0]], i32 11) +; CHECK-NEXT: [[TMP7:%.*]] = bitcast i32 [[TMP2]] to <32 x i1> +; CHECK-NEXT: [[TMP8:%.*]] = bitcast i32 [[MASK]] to <32 x i1> +; CHECK-NEXT: [[TMP9:%.*]] = select <32 x i1> [[TMP8]], <32 x i16> zeroinitializer, <32 x i16> zeroinitializer +; CHECK-NEXT: [[TMP10:%.*]] = bitcast <32 x half> [[TMP6]] to <32 x i16> +; CHECK-NEXT: [[TMP11:%.*]] = xor <32 x i16> [[TMP10]], zeroinitializer +; CHECK-NEXT: [[TMP12:%.*]] = or <32 x i16> [[TMP11]], zeroinitializer +; CHECK-NEXT: [[TMP13:%.*]] = or <32 x i16> [[TMP12]], zeroinitializer +; CHECK-NEXT: [[_MSPROP_SELECT:%.*]] = select <32 x i1> [[TMP7]], <32 x i16> [[TMP13]], <32 x i16> [[TMP9]] +; CHECK-NEXT: [[TMP14:%.*]] = select <32 x i1> [[TMP8]], <32 x half> [[TMP6]], <32 x half> zeroinitializer +; CHECK-NEXT: store <32 x i16> [[_MSPROP_SELECT]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <32 x half> [[TMP14]] +; + %1 = call <32 x half> @llvm.x86.avx512fp16.sqrt.ph.512(<32 x half> %a0, i32 11) + %2 = bitcast i32 %mask to <32 x i1> + %3 = select <32 x i1> %2, <32 x half> %1, <32 x half> zeroinitializer + ret <32 x half> %3 +} + +declare <8 x half> @llvm.x86.avx512fp16.mask.sqrt.sh(<8 x half>, <8 x half>, <8 x half>, i8, i32) nounwind readnone + +define <8 x half> @test_sqrt_sh(<8 x half> %a0, <8 x half> %a1, <8 x half> %a2, i8 %mask) #0 { +; CHECK-LABEL: define <8 x half> @test_sqrt_sh( +; CHECK-SAME: <8 x half> [[A0:%.*]], <8 x half> [[A1:%.*]], <8 x half> [[A2:%.*]], i8 [[MASK:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i16>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <8 x i16>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8 +; CHECK-NEXT: [[TMP3:%.*]] = load <8 x i16>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 32) to ptr), align 8 +; CHECK-NEXT: [[TMP4:%.*]] = load i8, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 48) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP5:%.*]] = bitcast <8 x i16> [[TMP1]] to i128 +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i128 [[TMP5]], 0 +; CHECK-NEXT: [[TMP6:%.*]] = bitcast <8 x i16> [[TMP2]] to i128 +; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i128 [[TMP6]], 0 +; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]] +; CHECK-NEXT: [[TMP7:%.*]] = bitcast <8 x i16> [[TMP3]] to i128 +; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i128 [[TMP7]], 0 +; CHECK-NEXT: [[_MSOR3:%.*]] = or i1 [[_MSOR]], [[_MSCMP2]] +; CHECK-NEXT: [[_MSCMP4:%.*]] = icmp ne i8 [[TMP4]], 0 +; CHECK-NEXT: [[_MSOR5:%.*]] = or i1 [[_MSOR3]], [[_MSCMP4]] +; CHECK-NEXT: br i1 [[_MSOR5]], label %[[BB8:.*]], label %[[BB9:.*]], !prof [[PROF1]] +; CHECK: [[BB8]]: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR8]] +; CHECK-NEXT: unreachable +; CHECK: [[BB9]]: +; CHECK-NEXT: [[RES:%.*]] = call <8 x half> @llvm.x86.avx512fp16.mask.sqrt.sh(<8 x half> [[A0]], <8 x half> [[A1]], <8 x half> [[A2]], i8 [[MASK]], i32 4) +; CHECK-NEXT: store <8 x i16> zeroinitializer, ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <8 x half> [[RES]] +; + %res = call <8 x half> @llvm.x86.avx512fp16.mask.sqrt.sh(<8 x half> %a0, <8 x half> %a1, <8 x half> %a2, i8 %mask, i32 4) + ret <8 x half> %res +} + +define half @test_sqrt_sh2(half %a0, half %a1) #0 { +; CHECK-LABEL: define half @test_sqrt_sh2( +; CHECK-SAME: half [[A0:%.*]], half [[A1:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: [[TMP1:%.*]] = load i16, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load i16, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP3:%.*]] = call fast half @llvm.sqrt.f16(half [[A0]]) +; CHECK-NEXT: [[_MSPROP:%.*]] = or i16 [[TMP2]], [[TMP1]] +; CHECK-NEXT: [[TMP4:%.*]] = fdiv fast half [[A1]], [[TMP3]] +; CHECK-NEXT: store i16 [[_MSPROP]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret half [[TMP4]] +; + %1 = call fast half @llvm.sqrt.f16(half %a0) + %2 = fdiv fast half %a1, %1 + ret half %2 +} + +define half @test_sqrt_sh3(half %a0, half %a1) #0 { +; CHECK-LABEL: define half @test_sqrt_sh3( +; CHECK-SAME: half [[A0:%.*]], half [[A1:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: [[TMP1:%.*]] = load i16, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP2:%.*]] = call fast half @llvm.sqrt.f16(half [[A0]]) +; CHECK-NEXT: store i16 [[TMP1]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret half [[TMP2]] +; + %1 = call fast half @llvm.sqrt.f16(half %a0) + ret half %1 +} + +declare half @llvm.sqrt.f16(half) + +define <8 x half> @test_sqrt_sh_r(<8 x half> %a0, <8 x half> %a1, <8 x half> %a2, i8 %mask) #0 { +; CHECK-LABEL: define <8 x half> @test_sqrt_sh_r( +; CHECK-SAME: <8 x half> [[A0:%.*]], <8 x half> [[A1:%.*]], <8 x half> [[A2:%.*]], i8 [[MASK:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i16>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <8 x i16>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8 +; CHECK-NEXT: [[TMP3:%.*]] = load <8 x i16>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 32) to ptr), align 8 +; CHECK-NEXT: [[TMP4:%.*]] = load i8, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 48) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP5:%.*]] = bitcast <8 x i16> [[TMP1]] to i128 +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i128 [[TMP5]], 0 +; CHECK-NEXT: [[TMP6:%.*]] = bitcast <8 x i16> [[TMP2]] to i128 +; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i128 [[TMP6]], 0 +; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]] +; CHECK-NEXT: [[TMP7:%.*]] = bitcast <8 x i16> [[TMP3]] to i128 +; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i128 [[TMP7]], 0 +; CHECK-NEXT: [[_MSOR3:%.*]] = or i1 [[_MSOR]], [[_MSCMP2]] +; CHECK-NEXT: [[_MSCMP4:%.*]] = icmp ne i8 [[TMP4]], 0 +; CHECK-NEXT: [[_MSOR5:%.*]] = or i1 [[_MSOR3]], [[_MSCMP4]] +; CHECK-NEXT: br i1 [[_MSOR5]], label %[[BB8:.*]], label %[[BB9:.*]], !prof [[PROF1]] +; CHECK: [[BB8]]: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR8]] +; CHECK-NEXT: unreachable +; CHECK: [[BB9]]: +; CHECK-NEXT: [[RES:%.*]] = call <8 x half> @llvm.x86.avx512fp16.mask.sqrt.sh(<8 x half> [[A0]], <8 x half> [[A1]], <8 x half> [[A2]], i8 [[MASK]], i32 10) +; CHECK-NEXT: store <8 x i16> zeroinitializer, ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <8 x half> [[RES]] +; + %res = call <8 x half> @llvm.x86.avx512fp16.mask.sqrt.sh(<8 x half> %a0, <8 x half> %a1, <8 x half> %a2, i8 %mask, i32 10) + ret <8 x half> %res +} + +define <8 x half> @test_sqrt_sh_nomask(<8 x half> %a0, <8 x half> %a1, <8 x half> %a2) #0 { +; CHECK-LABEL: define <8 x half> @test_sqrt_sh_nomask( +; CHECK-SAME: <8 x half> [[A0:%.*]], <8 x half> [[A1:%.*]], <8 x half> [[A2:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i16>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <8 x i16>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8 +; CHECK-NEXT: [[TMP3:%.*]] = load <8 x i16>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 32) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP4:%.*]] = bitcast <8 x i16> [[TMP1]] to i128 +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i128 [[TMP4]], 0 +; CHECK-NEXT: [[TMP5:%.*]] = bitcast <8 x i16> [[TMP2]] to i128 +; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i128 [[TMP5]], 0 +; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]] +; CHECK-NEXT: [[TMP6:%.*]] = bitcast <8 x i16> [[TMP3]] to i128 +; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i128 [[TMP6]], 0 +; CHECK-NEXT: [[_MSOR3:%.*]] = or i1 [[_MSOR]], [[_MSCMP2]] +; CHECK-NEXT: br i1 [[_MSOR3]], label %[[BB7:.*]], label %[[BB8:.*]], !prof [[PROF1]] +; CHECK: [[BB7]]: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR8]] +; CHECK-NEXT: unreachable +; CHECK: [[BB8]]: +; CHECK-NEXT: [[RES:%.*]] = call <8 x half> @llvm.x86.avx512fp16.mask.sqrt.sh(<8 x half> [[A0]], <8 x half> [[A1]], <8 x half> [[A2]], i8 -1, i32 4) +; CHECK-NEXT: store <8 x i16> zeroinitializer, ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <8 x half> [[RES]] +; + %res = call <8 x half> @llvm.x86.avx512fp16.mask.sqrt.sh(<8 x half> %a0, <8 x half> %a1, <8 x half> %a2, i8 -1, i32 4) + ret <8 x half> %res +} + +define <8 x half> @test_sqrt_sh_z(<8 x half> %a0, <8 x half> %a1, <8 x half> %a2, i8 %mask) #0 { +; CHECK-LABEL: define <8 x half> @test_sqrt_sh_z( +; CHECK-SAME: <8 x half> [[A0:%.*]], <8 x half> [[A1:%.*]], <8 x half> [[A2:%.*]], i8 [[MASK:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i16>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <8 x i16>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8 +; CHECK-NEXT: [[TMP3:%.*]] = load i8, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 48) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP4:%.*]] = bitcast <8 x i16> [[TMP1]] to i128 +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i128 [[TMP4]], 0 +; CHECK-NEXT: [[TMP5:%.*]] = bitcast <8 x i16> [[TMP2]] to i128 +; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i128 [[TMP5]], 0 +; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]] +; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i8 [[TMP3]], 0 +; CHECK-NEXT: [[_MSOR3:%.*]] = or i1 [[_MSOR]], [[_MSCMP2]] +; CHECK-NEXT: br i1 [[_MSOR3]], label %[[BB6:.*]], label %[[BB7:.*]], !prof [[PROF1]] +; CHECK: [[BB6]]: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR8]] +; CHECK-NEXT: unreachable +; CHECK: [[BB7]]: +; CHECK-NEXT: [[RES:%.*]] = call <8 x half> @llvm.x86.avx512fp16.mask.sqrt.sh(<8 x half> [[A0]], <8 x half> [[A1]], <8 x half> zeroinitializer, i8 [[MASK]], i32 10) +; CHECK-NEXT: store <8 x i16> zeroinitializer, ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <8 x half> [[RES]] +; + %res = call <8 x half> @llvm.x86.avx512fp16.mask.sqrt.sh(<8 x half> %a0, <8 x half> %a1, <8 x half> zeroinitializer, i8 %mask, i32 10) + ret <8 x half> %res +} + +declare <32 x half> @llvm.x86.avx512fp16.mask.rsqrt.ph.512(<32 x half>, <32 x half>, i32) +declare <8 x half> @llvm.x86.avx512fp16.mask.rsqrt.sh(<8 x half>, <8 x half>, <8 x half>, i8) + +define <32 x half> @test_rsqrt_ph_512(<32 x half> %a0) #0 { +; CHECK-LABEL: define <32 x half> @test_rsqrt_ph_512( +; CHECK-SAME: <32 x half> [[A0:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: [[TMP1:%.*]] = load <32 x i16>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP2:%.*]] = bitcast <32 x i16> [[TMP1]] to i512 +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i512 [[TMP2]], 0 +; CHECK-NEXT: br i1 [[_MSCMP]], label %[[BB3:.*]], label %[[BB4:.*]], !prof [[PROF1]] +; CHECK: [[BB3]]: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR8]] +; CHECK-NEXT: unreachable +; CHECK: [[BB4]]: +; CHECK-NEXT: [[RES:%.*]] = call <32 x half> @llvm.x86.avx512fp16.mask.rsqrt.ph.512(<32 x half> [[A0]], <32 x half> zeroinitializer, i32 -1) +; CHECK-NEXT: store <32 x i16> zeroinitializer, ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <32 x half> [[RES]] +; + %res = call <32 x half> @llvm.x86.avx512fp16.mask.rsqrt.ph.512(<32 x half> %a0, <32 x half> zeroinitializer, i32 -1) + ret <32 x half> %res +} + +define <8 x half> @test_rsqrt_sh(<8 x half> %a0, <8 x half> %a1, <8 x half> %a2) #0 { +; CHECK-LABEL: define <8 x half> @test_rsqrt_sh( +; CHECK-SAME: <8 x half> [[A0:%.*]], <8 x half> [[A1:%.*]], <8 x half> [[A2:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i16>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <8 x i16>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 32) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP1]] to i128 +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i128 [[TMP3]], 0 +; CHECK-NEXT: [[TMP4:%.*]] = bitcast <8 x i16> [[TMP1]] to i128 +; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i128 [[TMP4]], 0 +; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]] +; CHECK-NEXT: [[TMP5:%.*]] = bitcast <8 x i16> [[TMP2]] to i128 +; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i128 [[TMP5]], 0 +; CHECK-NEXT: [[_MSOR3:%.*]] = or i1 [[_MSOR]], [[_MSCMP2]] +; CHECK-NEXT: br i1 [[_MSOR3]], label %[[BB6:.*]], label %[[BB7:.*]], !prof [[PROF1]] +; CHECK: [[BB6]]: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR8]] +; CHECK-NEXT: unreachable +; CHECK: [[BB7]]: +; CHECK-NEXT: [[RES:%.*]] = call <8 x half> @llvm.x86.avx512fp16.mask.rsqrt.sh(<8 x half> [[A0]], <8 x half> [[A0]], <8 x half> [[A2]], i8 -1) +; CHECK-NEXT: store <8 x i16> zeroinitializer, ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <8 x half> [[RES]] +; + %res = call <8 x half> @llvm.x86.avx512fp16.mask.rsqrt.sh(<8 x half> %a0, <8 x half> %a0, <8 x half> %a2, i8 -1) + ret <8 x half> %res +} + +define <8 x half> @test_rsqrt_sh_load(<8 x half> %a0, ptr %a1ptr) #0 { +; CHECK-LABEL: define <8 x half> @test_rsqrt_sh_load( +; CHECK-SAME: <8 x half> [[A0:%.*]], ptr [[A1PTR:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <8 x i16>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 +; CHECK-NEXT: br i1 [[_MSCMP]], label %[[BB3:.*]], label %[[BB4:.*]], !prof [[PROF1]] +; CHECK: [[BB3]]: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR8]] +; CHECK-NEXT: unreachable +; CHECK: [[BB4]]: +; CHECK-NEXT: [[A1:%.*]] = load <8 x half>, ptr [[A1PTR]], align 16 +; CHECK-NEXT: [[TMP5:%.*]] = ptrtoint ptr [[A1PTR]] to i64 +; CHECK-NEXT: [[TMP6:%.*]] = xor i64 [[TMP5]], 87960930222080 +; CHECK-NEXT: [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr +; CHECK-NEXT: [[_MSLD:%.*]] = load <8 x i16>, ptr [[TMP7]], align 16 +; CHECK-NEXT: [[TMP8:%.*]] = bitcast <8 x i16> [[TMP2]] to i128 +; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i128 [[TMP8]], 0 +; CHECK-NEXT: [[TMP9:%.*]] = bitcast <8 x i16> [[_MSLD]] to i128 +; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i128 [[TMP9]], 0 +; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP1]], [[_MSCMP2]] +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR8]] +; CHECK-NEXT: [[RES:%.*]] = call <8 x half> @llvm.x86.avx512fp16.mask.rsqrt.sh(<8 x half> [[A0]], <8 x half> [[A1]], <8 x half> undef, i8 -1) +; CHECK-NEXT: store <8 x i16> zeroinitializer, ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <8 x half> [[RES]] +; + %a1 = load <8 x half>, ptr %a1ptr + %res = call <8 x half> @llvm.x86.avx512fp16.mask.rsqrt.sh(<8 x half> %a0, <8 x half> %a1, <8 x half> undef, i8 -1) + ret <8 x half> %res +} + +define <8 x half> @test_rsqrt_sh_maskz(<8 x half> %a0, i8 %mask) #0 { +; CHECK-LABEL: define <8 x half> @test_rsqrt_sh_maskz( +; CHECK-SAME: <8 x half> [[A0:%.*]], i8 [[MASK:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i16>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load i8, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP1]] to i128 +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i128 [[TMP3]], 0 +; CHECK-NEXT: [[TMP4:%.*]] = bitcast <8 x i16> [[TMP1]] to i128 +; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i128 [[TMP4]], 0 +; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]] +; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i8 [[TMP2]], 0 +; CHECK-NEXT: [[_MSOR3:%.*]] = or i1 [[_MSOR]], [[_MSCMP2]] +; CHECK-NEXT: br i1 [[_MSOR3]], label %[[BB5:.*]], label %[[BB6:.*]], !prof [[PROF1]] +; CHECK: [[BB5]]: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR8]] +; CHECK-NEXT: unreachable +; CHECK: [[BB6]]: +; CHECK-NEXT: [[RES:%.*]] = call <8 x half> @llvm.x86.avx512fp16.mask.rsqrt.sh(<8 x half> [[A0]], <8 x half> [[A0]], <8 x half> zeroinitializer, i8 [[MASK]]) +; CHECK-NEXT: store <8 x i16> zeroinitializer, ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <8 x half> [[RES]] +; + %res = call <8 x half> @llvm.x86.avx512fp16.mask.rsqrt.sh(<8 x half> %a0, <8 x half> %a0, <8 x half> zeroinitializer, i8 %mask) + ret <8 x half> %res +} + +define <8 x half> @test_rsqrt_sh_mask(<8 x half> %a0, <8 x half> %b0, <8 x half> %c0, i8 %mask) #0 { +; CHECK-LABEL: define <8 x half> @test_rsqrt_sh_mask( +; CHECK-SAME: <8 x half> [[A0:%.*]], <8 x half> [[B0:%.*]], <8 x half> [[C0:%.*]], i8 [[MASK:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i16>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <8 x i16>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8 +; CHECK-NEXT: [[TMP3:%.*]] = load <8 x i16>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 32) to ptr), align 8 +; CHECK-NEXT: [[TMP4:%.*]] = load i8, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 48) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP5:%.*]] = bitcast <8 x i16> [[TMP1]] to i128 +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i128 [[TMP5]], 0 +; CHECK-NEXT: [[TMP6:%.*]] = bitcast <8 x i16> [[TMP2]] to i128 +; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i128 [[TMP6]], 0 +; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]] +; CHECK-NEXT: [[TMP7:%.*]] = bitcast <8 x i16> [[TMP3]] to i128 +; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i128 [[TMP7]], 0 +; CHECK-NEXT: [[_MSOR3:%.*]] = or i1 [[_MSOR]], [[_MSCMP2]] +; CHECK-NEXT: [[_MSCMP4:%.*]] = icmp ne i8 [[TMP4]], 0 +; CHECK-NEXT: [[_MSOR5:%.*]] = or i1 [[_MSOR3]], [[_MSCMP4]] +; CHECK-NEXT: br i1 [[_MSOR5]], label %[[BB8:.*]], label %[[BB9:.*]], !prof [[PROF1]] +; CHECK: [[BB8]]: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR8]] +; CHECK-NEXT: unreachable +; CHECK: [[BB9]]: +; CHECK-NEXT: [[RES:%.*]] = call <8 x half> @llvm.x86.avx512fp16.mask.rsqrt.sh(<8 x half> [[A0]], <8 x half> [[B0]], <8 x half> [[C0]], i8 [[MASK]]) +; CHECK-NEXT: store <8 x i16> zeroinitializer, ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <8 x half> [[RES]] +; + %res = call <8 x half> @llvm.x86.avx512fp16.mask.rsqrt.sh(<8 x half> %a0, <8 x half> %b0, <8 x half> %c0, i8 %mask) + ret <8 x half> %res +} + +declare <32 x i1> @llvm.x86.avx512fp16.fpclass.ph.512(<32 x half>, i32) + +define i32 @test_int_x86_avx512_fpclass_ph_512(<32 x half> %x0) #0 { +; CHECK-LABEL: define i32 @test_int_x86_avx512_fpclass_ph_512( +; CHECK-SAME: <32 x half> [[X0:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: [[TMP1:%.*]] = load <32 x i16>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP2:%.*]] = bitcast <32 x i16> [[TMP1]] to i512 +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i512 [[TMP2]], 0 +; CHECK-NEXT: br i1 [[_MSCMP]], label %[[BB3:.*]], label %[[BB4:.*]], !prof [[PROF1]] +; CHECK: [[BB3]]: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR8]] +; CHECK-NEXT: unreachable +; CHECK: [[BB4]]: +; CHECK-NEXT: [[RES:%.*]] = call <32 x i1> @llvm.x86.avx512fp16.fpclass.ph.512(<32 x half> [[X0]], i32 4) +; CHECK-NEXT: [[TMP5:%.*]] = bitcast <32 x i16> [[TMP1]] to i512 +; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i512 [[TMP5]], 0 +; CHECK-NEXT: br i1 [[_MSCMP1]], label %[[BB6:.*]], label %[[BB7:.*]], !prof [[PROF1]] +; CHECK: [[BB6]]: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR8]] +; CHECK-NEXT: unreachable +; CHECK: [[BB7]]: +; CHECK-NEXT: [[RES1:%.*]] = call <32 x i1> @llvm.x86.avx512fp16.fpclass.ph.512(<32 x half> [[X0]], i32 2) +; CHECK-NEXT: [[TMP8:%.*]] = and <32 x i1> [[RES1]], zeroinitializer +; CHECK-NEXT: [[TMP9:%.*]] = and <32 x i1> zeroinitializer, [[RES]] +; CHECK-NEXT: [[TMP10:%.*]] = or <32 x i1> zeroinitializer, [[TMP8]] +; CHECK-NEXT: [[TMP11:%.*]] = or <32 x i1> [[TMP10]], [[TMP9]] +; CHECK-NEXT: [[TMP12:%.*]] = and <32 x i1> [[RES1]], [[RES]] +; CHECK-NEXT: [[TMP13:%.*]] = bitcast <32 x i1> [[TMP11]] to i32 +; CHECK-NEXT: [[TMP14:%.*]] = bitcast <32 x i1> [[TMP12]] to i32 +; CHECK-NEXT: store i32 [[TMP13]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret i32 [[TMP14]] +; + %res = call <32 x i1> @llvm.x86.avx512fp16.fpclass.ph.512(<32 x half> %x0, i32 4) + %res1 = call <32 x i1> @llvm.x86.avx512fp16.fpclass.ph.512(<32 x half> %x0, i32 2) + %1 = and <32 x i1> %res1, %res + %2 = bitcast <32 x i1> %1 to i32 + ret i32 %2 +} + +declare i8 @llvm.x86.avx512fp16.mask.fpclass.sh(<8 x half>, i32, i8) + +define i8 @test_int_x86_avx512_mask_fpclass_sh(<8 x half> %x0) #0 { +; CHECK-LABEL: define i8 @test_int_x86_avx512_mask_fpclass_sh( +; CHECK-SAME: <8 x half> [[X0:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i16>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x i16> [[TMP1]] to i128 +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i128 [[TMP2]], 0 +; CHECK-NEXT: br i1 [[_MSCMP]], label %[[BB3:.*]], label %[[BB4:.*]], !prof [[PROF1]] +; CHECK: [[BB3]]: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR8]] +; CHECK-NEXT: unreachable +; CHECK: [[BB4]]: +; CHECK-NEXT: [[RES:%.*]] = call i8 @llvm.x86.avx512fp16.mask.fpclass.sh(<8 x half> [[X0]], i32 2, i8 -1) +; CHECK-NEXT: [[TMP5:%.*]] = bitcast <8 x i16> [[TMP1]] to i128 +; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i128 [[TMP5]], 0 +; CHECK-NEXT: br i1 [[_MSCMP1]], label %[[BB6:.*]], label %[[BB7:.*]], !prof [[PROF1]] +; CHECK: [[BB6]]: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR8]] +; CHECK-NEXT: unreachable +; CHECK: [[BB7]]: +; CHECK-NEXT: [[RES1:%.*]] = call i8 @llvm.x86.avx512fp16.mask.fpclass.sh(<8 x half> [[X0]], i32 4, i8 [[RES]]) +; CHECK-NEXT: store i8 0, ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret i8 [[RES1]] +; + %res = call i8 @llvm.x86.avx512fp16.mask.fpclass.sh(<8 x half> %x0, i32 2, i8 -1) + %res1 = call i8 @llvm.x86.avx512fp16.mask.fpclass.sh(<8 x half> %x0, i32 4, i8 %res) + ret i8 %res1 +} + +define i8 @test_int_x86_avx512_mask_fpclass_sh_load(ptr %x0ptr) #0 { +; CHECK-LABEL: define i8 @test_int_x86_avx512_mask_fpclass_sh_load( +; CHECK-SAME: ptr [[X0PTR:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 +; CHECK-NEXT: br i1 [[_MSCMP]], label %[[BB2:.*]], label %[[BB3:.*]], !prof [[PROF1]] +; CHECK: [[BB2]]: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR8]] +; CHECK-NEXT: unreachable +; CHECK: [[BB3]]: +; CHECK-NEXT: [[X0:%.*]] = load <8 x half>, ptr [[X0PTR]], align 16 +; CHECK-NEXT: [[TMP4:%.*]] = ptrtoint ptr [[X0PTR]] to i64 +; CHECK-NEXT: [[TMP5:%.*]] = xor i64 [[TMP4]], 87960930222080 +; CHECK-NEXT: [[TMP6:%.*]] = inttoptr i64 [[TMP5]] to ptr +; CHECK-NEXT: [[_MSLD:%.*]] = load <8 x i16>, ptr [[TMP6]], align 16 +; CHECK-NEXT: [[TMP7:%.*]] = bitcast <8 x i16> [[_MSLD]] to i128 +; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i128 [[TMP7]], 0 +; CHECK-NEXT: br i1 [[_MSCMP1]], label %[[BB8:.*]], label %[[BB9:.*]], !prof [[PROF1]] +; CHECK: [[BB8]]: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR8]] +; CHECK-NEXT: unreachable +; CHECK: [[BB9]]: +; CHECK-NEXT: [[RES:%.*]] = call i8 @llvm.x86.avx512fp16.mask.fpclass.sh(<8 x half> [[X0]], i32 4, i8 -1) +; CHECK-NEXT: store i8 0, ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret i8 [[RES]] +; + %x0 = load <8 x half>, ptr %x0ptr + %res = call i8 @llvm.x86.avx512fp16.mask.fpclass.sh(<8 x half> %x0, i32 4, i8 -1) + ret i8 %res +} + +declare <32 x half> @llvm.x86.avx512fp16.mask.rcp.ph.512(<32 x half>, <32 x half>, i32) + +define <32 x half> @test_rcp_ph_512(<32 x half> %a0, <32 x half> %a1, i32 %mask) #0 { +; CHECK-LABEL: define <32 x half> @test_rcp_ph_512( +; CHECK-SAME: <32 x half> [[A0:%.*]], <32 x half> [[A1:%.*]], i32 [[MASK:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: [[TMP1:%.*]] = load <32 x i16>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <32 x i16>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8 +; CHECK-NEXT: [[TMP3:%.*]] = load i32, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 128) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP4:%.*]] = bitcast <32 x i16> [[TMP1]] to i512 +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i512 [[TMP4]], 0 +; CHECK-NEXT: [[TMP5:%.*]] = bitcast <32 x i16> [[TMP2]] to i512 +; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i512 [[TMP5]], 0 +; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]] +; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i32 [[TMP3]], 0 +; CHECK-NEXT: [[_MSOR3:%.*]] = or i1 [[_MSOR]], [[_MSCMP2]] +; CHECK-NEXT: br i1 [[_MSOR3]], label %[[BB6:.*]], label %[[BB7:.*]], !prof [[PROF1]] +; CHECK: [[BB6]]: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR8]] +; CHECK-NEXT: unreachable +; CHECK: [[BB7]]: +; CHECK-NEXT: [[RES:%.*]] = call <32 x half> @llvm.x86.avx512fp16.mask.rcp.ph.512(<32 x half> [[A0]], <32 x half> [[A1]], i32 [[MASK]]) +; CHECK-NEXT: store <32 x i16> zeroinitializer, ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <32 x half> [[RES]] +; + %res = call <32 x half> @llvm.x86.avx512fp16.mask.rcp.ph.512(<32 x half> %a0, <32 x half> %a1, i32 %mask) + ret <32 x half> %res +} + +declare <8 x half> @llvm.x86.avx512fp16.mask.rcp.sh(<8 x half>, <8 x half>, <8 x half>, i8) + +define <8 x half> @test_rcp_sh(<8 x half> %a0) #0 { +; CHECK-LABEL: define <8 x half> @test_rcp_sh( +; CHECK-SAME: <8 x half> [[A0:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i16>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x i16> [[TMP1]] to i128 +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i128 [[TMP2]], 0 +; CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP1]] to i128 +; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i128 [[TMP3]], 0 +; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]] +; CHECK-NEXT: br i1 [[_MSOR]], label %[[BB4:.*]], label %[[BB5:.*]], !prof [[PROF1]] +; CHECK: [[BB4]]: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR8]] +; CHECK-NEXT: unreachable +; CHECK: [[BB5]]: +; CHECK-NEXT: [[RES:%.*]] = call <8 x half> @llvm.x86.avx512fp16.mask.rcp.sh(<8 x half> [[A0]], <8 x half> [[A0]], <8 x half> zeroinitializer, i8 -1) +; CHECK-NEXT: store <8 x i16> zeroinitializer, ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <8 x half> [[RES]] +; + %res = call <8 x half> @llvm.x86.avx512fp16.mask.rcp.sh(<8 x half> %a0, <8 x half> %a0, <8 x half> zeroinitializer, i8 -1) + ret <8 x half> %res +} + +define <8 x half> @test_rcp_sh_load(<8 x half> %a0, ptr %a1ptr) #0 { +; CHECK-LABEL: define <8 x half> @test_rcp_sh_load( +; CHECK-SAME: <8 x half> [[A0:%.*]], ptr [[A1PTR:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <8 x i16>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 +; CHECK-NEXT: br i1 [[_MSCMP]], label %[[BB3:.*]], label %[[BB4:.*]], !prof [[PROF1]] +; CHECK: [[BB3]]: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR8]] +; CHECK-NEXT: unreachable +; CHECK: [[BB4]]: +; CHECK-NEXT: [[A1:%.*]] = load <8 x half>, ptr [[A1PTR]], align 16 +; CHECK-NEXT: [[TMP5:%.*]] = ptrtoint ptr [[A1PTR]] to i64 +; CHECK-NEXT: [[TMP6:%.*]] = xor i64 [[TMP5]], 87960930222080 +; CHECK-NEXT: [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr +; CHECK-NEXT: [[_MSLD:%.*]] = load <8 x i16>, ptr [[TMP7]], align 16 +; CHECK-NEXT: [[TMP8:%.*]] = bitcast <8 x i16> [[TMP2]] to i128 +; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i128 [[TMP8]], 0 +; CHECK-NEXT: [[TMP9:%.*]] = bitcast <8 x i16> [[_MSLD]] to i128 +; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i128 [[TMP9]], 0 +; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP1]], [[_MSCMP2]] +; CHECK-NEXT: br i1 [[_MSOR]], label %[[BB10:.*]], label %[[BB11:.*]], !prof [[PROF1]] +; CHECK: [[BB10]]: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR8]] +; CHECK-NEXT: unreachable +; CHECK: [[BB11]]: +; CHECK-NEXT: [[RES:%.*]] = call <8 x half> @llvm.x86.avx512fp16.mask.rcp.sh(<8 x half> [[A0]], <8 x half> [[A1]], <8 x half> zeroinitializer, i8 -1) +; CHECK-NEXT: store <8 x i16> zeroinitializer, ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <8 x half> [[RES]] +; + %a1 = load <8 x half>, ptr %a1ptr + %res = call <8 x half> @llvm.x86.avx512fp16.mask.rcp.sh(<8 x half> %a0, <8 x half> %a1, <8 x half> zeroinitializer, i8 -1) + ret <8 x half> %res +} + +declare <32 x half> @llvm.x86.avx512fp16.mask.reduce.ph.512(<32 x half>, i32, <32 x half>, i32, i32) + +define <32 x half>@test_int_x86_avx512_mask_reduce_ph_512(<32 x half> %x0, <32 x half> %x2, i32 %x3) #0 { +; CHECK-LABEL: define <32 x half> @test_int_x86_avx512_mask_reduce_ph_512( +; CHECK-SAME: <32 x half> [[X0:%.*]], <32 x half> [[X2:%.*]], i32 [[X3:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: [[TMP1:%.*]] = load <32 x i16>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <32 x i16>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8 +; CHECK-NEXT: [[TMP3:%.*]] = load i32, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 128) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP4:%.*]] = bitcast <32 x i16> [[TMP1]] to i512 +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i512 [[TMP4]], 0 +; CHECK-NEXT: [[TMP5:%.*]] = bitcast <32 x i16> [[TMP2]] to i512 +; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i512 [[TMP5]], 0 +; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]] +; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i32 [[TMP3]], 0 +; CHECK-NEXT: [[_MSOR3:%.*]] = or i1 [[_MSOR]], [[_MSCMP2]] +; CHECK-NEXT: br i1 [[_MSOR3]], label %[[BB6:.*]], label %[[BB7:.*]], !prof [[PROF1]] +; CHECK: [[BB6]]: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR8]] +; CHECK-NEXT: unreachable +; CHECK: [[BB7]]: +; CHECK-NEXT: [[RES:%.*]] = call <32 x half> @llvm.x86.avx512fp16.mask.reduce.ph.512(<32 x half> [[X0]], i32 8, <32 x half> [[X2]], i32 [[X3]], i32 4) +; CHECK-NEXT: [[TMP8:%.*]] = bitcast <32 x i16> [[TMP1]] to i512 +; CHECK-NEXT: [[_MSCMP4:%.*]] = icmp ne i512 [[TMP8]], 0 +; CHECK-NEXT: [[TMP9:%.*]] = bitcast <32 x i16> [[TMP2]] to i512 +; CHECK-NEXT: [[_MSCMP5:%.*]] = icmp ne i512 [[TMP9]], 0 +; CHECK-NEXT: [[_MSOR6:%.*]] = or i1 [[_MSCMP4]], [[_MSCMP5]] +; CHECK-NEXT: br i1 [[_MSOR6]], label %[[BB10:.*]], label %[[BB11:.*]], !prof [[PROF1]] +; CHECK: [[BB10]]: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR8]] +; CHECK-NEXT: unreachable +; CHECK: [[BB11]]: +; CHECK-NEXT: [[RES1:%.*]] = call <32 x half> @llvm.x86.avx512fp16.mask.reduce.ph.512(<32 x half> [[X0]], i32 4, <32 x half> [[X2]], i32 -1, i32 8) +; CHECK-NEXT: [[RES2:%.*]] = fadd <32 x half> [[RES]], [[RES1]] +; CHECK-NEXT: store <32 x i16> zeroinitializer, ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <32 x half> [[RES2]] +; + %res = call <32 x half> @llvm.x86.avx512fp16.mask.reduce.ph.512(<32 x half> %x0, i32 8, <32 x half> %x2, i32 %x3, i32 4) + %res1 = call <32 x half> @llvm.x86.avx512fp16.mask.reduce.ph.512(<32 x half> %x0, i32 4, <32 x half> %x2, i32 -1, i32 8) + %res2 = fadd <32 x half> %res, %res1 + ret <32 x half> %res2 +} + +declare <8 x half> @llvm.x86.avx512fp16.mask.reduce.sh(<8 x half>, <8 x half>,<8 x half>, i8, i32, i32) + +define <8 x half>@test_int_x86_avx512_mask_reduce_sh(<8 x half> %x0, <8 x half> %x1, <8 x half> %x3, i8 %x4) #0 { +; CHECK-LABEL: define <8 x half> @test_int_x86_avx512_mask_reduce_sh( +; CHECK-SAME: <8 x half> [[X0:%.*]], <8 x half> [[X1:%.*]], <8 x half> [[X3:%.*]], i8 [[X4:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i16>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <8 x i16>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8 +; CHECK-NEXT: [[TMP3:%.*]] = load <8 x i16>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 32) to ptr), align 8 +; CHECK-NEXT: [[TMP4:%.*]] = load i8, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 48) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP5:%.*]] = bitcast <8 x i16> [[TMP1]] to i128 +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i128 [[TMP5]], 0 +; CHECK-NEXT: [[TMP6:%.*]] = bitcast <8 x i16> [[TMP2]] to i128 +; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i128 [[TMP6]], 0 +; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]] +; CHECK-NEXT: [[TMP7:%.*]] = bitcast <8 x i16> [[TMP3]] to i128 +; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i128 [[TMP7]], 0 +; CHECK-NEXT: [[_MSOR3:%.*]] = or i1 [[_MSOR]], [[_MSCMP2]] +; CHECK-NEXT: [[_MSCMP4:%.*]] = icmp ne i8 [[TMP4]], 0 +; CHECK-NEXT: [[_MSOR5:%.*]] = or i1 [[_MSOR3]], [[_MSCMP4]] +; CHECK-NEXT: br i1 [[_MSOR5]], label %[[BB8:.*]], label %[[BB9:.*]], !prof [[PROF1]] +; CHECK: [[BB8]]: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR8]] +; CHECK-NEXT: unreachable +; CHECK: [[BB9]]: +; CHECK-NEXT: [[RES:%.*]] = call <8 x half> @llvm.x86.avx512fp16.mask.reduce.sh(<8 x half> [[X0]], <8 x half> [[X1]], <8 x half> [[X3]], i8 [[X4]], i32 4, i32 4) +; CHECK-NEXT: store <8 x i16> zeroinitializer, ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <8 x half> [[RES]] +; + %res = call <8 x half> @llvm.x86.avx512fp16.mask.reduce.sh(<8 x half> %x0, <8 x half> %x1, <8 x half> %x3, i8 %x4, i32 4, i32 4) + ret <8 x half> %res +} + +define <8 x half>@test_int_x86_avx512_mask_reduce_sh_nomask(<8 x half> %x0, <8 x half> %x1, <8 x half> %x3) #0 { +; CHECK-LABEL: define <8 x half> @test_int_x86_avx512_mask_reduce_sh_nomask( +; CHECK-SAME: <8 x half> [[X0:%.*]], <8 x half> [[X1:%.*]], <8 x half> [[X3:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i16>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <8 x i16>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8 +; CHECK-NEXT: [[TMP3:%.*]] = load <8 x i16>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 32) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP4:%.*]] = bitcast <8 x i16> [[TMP1]] to i128 +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i128 [[TMP4]], 0 +; CHECK-NEXT: [[TMP5:%.*]] = bitcast <8 x i16> [[TMP2]] to i128 +; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i128 [[TMP5]], 0 +; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]] +; CHECK-NEXT: [[TMP6:%.*]] = bitcast <8 x i16> [[TMP3]] to i128 +; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i128 [[TMP6]], 0 +; CHECK-NEXT: [[_MSOR3:%.*]] = or i1 [[_MSOR]], [[_MSCMP2]] +; CHECK-NEXT: br i1 [[_MSOR3]], label %[[BB7:.*]], label %[[BB8:.*]], !prof [[PROF1]] +; CHECK: [[BB7]]: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR8]] +; CHECK-NEXT: unreachable +; CHECK: [[BB8]]: +; CHECK-NEXT: [[RES:%.*]] = call <8 x half> @llvm.x86.avx512fp16.mask.reduce.sh(<8 x half> [[X0]], <8 x half> [[X1]], <8 x half> [[X3]], i8 -1, i32 4, i32 8) +; CHECK-NEXT: store <8 x i16> zeroinitializer, ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <8 x half> [[RES]] +; + %res = call <8 x half> @llvm.x86.avx512fp16.mask.reduce.sh(<8 x half> %x0, <8 x half> %x1, <8 x half> %x3, i8 -1, i32 4, i32 8) + ret <8 x half> %res +} + +declare <32 x half> @llvm.x86.avx512fp16.mask.rndscale.ph.512(<32 x half>, i32, <32 x half>, i32, i32) + +define <32 x half>@test_int_x86_avx512_mask_rndscale_ph_512(<32 x half> %x0, <32 x half> %x2, i32 %x3) #0 { +; CHECK-LABEL: define <32 x half> @test_int_x86_avx512_mask_rndscale_ph_512( +; CHECK-SAME: <32 x half> [[X0:%.*]], <32 x half> [[X2:%.*]], i32 [[X3:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: [[TMP1:%.*]] = load <32 x i16>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <32 x i16>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8 +; CHECK-NEXT: [[TMP3:%.*]] = load i32, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 128) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP4:%.*]] = bitcast <32 x i16> [[TMP1]] to i512 +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i512 [[TMP4]], 0 +; CHECK-NEXT: [[TMP5:%.*]] = bitcast <32 x i16> [[TMP2]] to i512 +; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i512 [[TMP5]], 0 +; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]] +; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i32 [[TMP3]], 0 +; CHECK-NEXT: [[_MSOR3:%.*]] = or i1 [[_MSOR]], [[_MSCMP2]] +; CHECK-NEXT: br i1 [[_MSOR3]], label %[[BB6:.*]], label %[[BB7:.*]], !prof [[PROF1]] +; CHECK: [[BB6]]: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR8]] +; CHECK-NEXT: unreachable +; CHECK: [[BB7]]: +; CHECK-NEXT: [[RES:%.*]] = call <32 x half> @llvm.x86.avx512fp16.mask.rndscale.ph.512(<32 x half> [[X0]], i32 8, <32 x half> [[X2]], i32 [[X3]], i32 4) +; CHECK-NEXT: [[TMP8:%.*]] = bitcast <32 x i16> [[TMP1]] to i512 +; CHECK-NEXT: [[_MSCMP4:%.*]] = icmp ne i512 [[TMP8]], 0 +; CHECK-NEXT: [[TMP9:%.*]] = bitcast <32 x i16> [[TMP2]] to i512 +; CHECK-NEXT: [[_MSCMP5:%.*]] = icmp ne i512 [[TMP9]], 0 +; CHECK-NEXT: [[_MSOR6:%.*]] = or i1 [[_MSCMP4]], [[_MSCMP5]] +; CHECK-NEXT: br i1 [[_MSOR6]], label %[[BB10:.*]], label %[[BB11:.*]], !prof [[PROF1]] +; CHECK: [[BB10]]: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR8]] +; CHECK-NEXT: unreachable +; CHECK: [[BB11]]: +; CHECK-NEXT: [[RES1:%.*]] = call <32 x half> @llvm.x86.avx512fp16.mask.rndscale.ph.512(<32 x half> [[X0]], i32 4, <32 x half> [[X2]], i32 -1, i32 8) +; CHECK-NEXT: [[RES2:%.*]] = fadd <32 x half> [[RES]], [[RES1]] +; CHECK-NEXT: store <32 x i16> zeroinitializer, ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <32 x half> [[RES2]] +; + %res = call <32 x half> @llvm.x86.avx512fp16.mask.rndscale.ph.512(<32 x half> %x0, i32 8, <32 x half> %x2, i32 %x3, i32 4) + %res1 = call <32 x half> @llvm.x86.avx512fp16.mask.rndscale.ph.512(<32 x half> %x0, i32 4, <32 x half> %x2, i32 -1, i32 8) + %res2 = fadd <32 x half> %res, %res1 + ret <32 x half> %res2 +} + +declare <8 x half> @llvm.x86.avx512fp16.mask.rndscale.sh(<8 x half>, <8 x half>,<8 x half>, i8, i32, i32) + +define <8 x half>@test_int_x86_avx512_mask_rndscale_sh(<8 x half> %x0, <8 x half> %x1, <8 x half> %x3, i8 %x4) #0 { +; CHECK-LABEL: define <8 x half> @test_int_x86_avx512_mask_rndscale_sh( +; CHECK-SAME: <8 x half> [[X0:%.*]], <8 x half> [[X1:%.*]], <8 x half> [[X3:%.*]], i8 [[X4:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i16>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <8 x i16>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8 +; CHECK-NEXT: [[TMP3:%.*]] = load <8 x i16>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 32) to ptr), align 8 +; CHECK-NEXT: [[TMP4:%.*]] = load i8, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 48) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP5:%.*]] = bitcast <8 x i16> [[TMP1]] to i128 +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i128 [[TMP5]], 0 +; CHECK-NEXT: [[TMP6:%.*]] = bitcast <8 x i16> [[TMP2]] to i128 +; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i128 [[TMP6]], 0 +; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]] +; CHECK-NEXT: [[TMP7:%.*]] = bitcast <8 x i16> [[TMP3]] to i128 +; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i128 [[TMP7]], 0 +; CHECK-NEXT: [[_MSOR3:%.*]] = or i1 [[_MSOR]], [[_MSCMP2]] +; CHECK-NEXT: [[_MSCMP4:%.*]] = icmp ne i8 [[TMP4]], 0 +; CHECK-NEXT: [[_MSOR5:%.*]] = or i1 [[_MSOR3]], [[_MSCMP4]] +; CHECK-NEXT: br i1 [[_MSOR5]], label %[[BB8:.*]], label %[[BB9:.*]], !prof [[PROF1]] +; CHECK: [[BB8]]: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR8]] +; CHECK-NEXT: unreachable +; CHECK: [[BB9]]: +; CHECK-NEXT: [[RES:%.*]] = call <8 x half> @llvm.x86.avx512fp16.mask.rndscale.sh(<8 x half> [[X0]], <8 x half> [[X1]], <8 x half> [[X3]], i8 [[X4]], i32 4, i32 4) +; CHECK-NEXT: store <8 x i16> zeroinitializer, ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <8 x half> [[RES]] +; + %res = call <8 x half> @llvm.x86.avx512fp16.mask.rndscale.sh(<8 x half> %x0, <8 x half> %x1, <8 x half> %x3, i8 %x4, i32 4, i32 4) + ret <8 x half> %res +} + +define <8 x half>@test_int_x86_avx512_mask_rndscale_sh_nomask(<8 x half> %x0, <8 x half> %x1, <8 x half> %x3) #0 { +; CHECK-LABEL: define <8 x half> @test_int_x86_avx512_mask_rndscale_sh_nomask( +; CHECK-SAME: <8 x half> [[X0:%.*]], <8 x half> [[X1:%.*]], <8 x half> [[X3:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i16>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <8 x i16>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8 +; CHECK-NEXT: [[TMP3:%.*]] = load <8 x i16>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 32) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP4:%.*]] = bitcast <8 x i16> [[TMP1]] to i128 +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i128 [[TMP4]], 0 +; CHECK-NEXT: [[TMP5:%.*]] = bitcast <8 x i16> [[TMP2]] to i128 +; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i128 [[TMP5]], 0 +; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]] +; CHECK-NEXT: [[TMP6:%.*]] = bitcast <8 x i16> [[TMP3]] to i128 +; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i128 [[TMP6]], 0 +; CHECK-NEXT: [[_MSOR3:%.*]] = or i1 [[_MSOR]], [[_MSCMP2]] +; CHECK-NEXT: br i1 [[_MSOR3]], label %[[BB7:.*]], label %[[BB8:.*]], !prof [[PROF1]] +; CHECK: [[BB7]]: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR8]] +; CHECK-NEXT: unreachable +; CHECK: [[BB8]]: +; CHECK-NEXT: [[RES:%.*]] = call <8 x half> @llvm.x86.avx512fp16.mask.rndscale.sh(<8 x half> [[X0]], <8 x half> [[X1]], <8 x half> [[X3]], i8 -1, i32 4, i32 8) +; CHECK-NEXT: store <8 x i16> zeroinitializer, ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <8 x half> [[RES]] +; + %res = call <8 x half> @llvm.x86.avx512fp16.mask.rndscale.sh(<8 x half> %x0, <8 x half> %x1, <8 x half> %x3, i8 -1, i32 4, i32 8) + ret <8 x half> %res +} + +declare <32 x half> @llvm.x86.avx512fp16.mask.getexp.ph.512(<32 x half>, <32 x half>, i32, i32) + +define <32 x half>@test_int_x86_avx512_mask_getexp_ph_512(<32 x half> %x0, <32 x half> %x1, i32 %x2) #0 { +; CHECK-LABEL: define <32 x half> @test_int_x86_avx512_mask_getexp_ph_512( +; CHECK-SAME: <32 x half> [[X0:%.*]], <32 x half> [[X1:%.*]], i32 [[X2:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: [[TMP1:%.*]] = load <32 x i16>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <32 x i16>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8 +; CHECK-NEXT: [[TMP3:%.*]] = load i32, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 128) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP4:%.*]] = bitcast <32 x i16> [[TMP1]] to i512 +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i512 [[TMP4]], 0 +; CHECK-NEXT: [[TMP5:%.*]] = bitcast <32 x i16> [[TMP2]] to i512 +; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i512 [[TMP5]], 0 +; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]] +; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i32 [[TMP3]], 0 +; CHECK-NEXT: [[_MSOR3:%.*]] = or i1 [[_MSOR]], [[_MSCMP2]] +; CHECK-NEXT: br i1 [[_MSOR3]], label %[[BB6:.*]], label %[[BB7:.*]], !prof [[PROF1]] +; CHECK: [[BB6]]: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR8]] +; CHECK-NEXT: unreachable +; CHECK: [[BB7]]: +; CHECK-NEXT: [[RES1:%.*]] = call <32 x half> @llvm.x86.avx512fp16.mask.getexp.ph.512(<32 x half> [[X0]], <32 x half> [[X1]], i32 [[X2]], i32 4) +; CHECK-NEXT: [[TMP8:%.*]] = bitcast <32 x i16> [[TMP1]] to i512 +; CHECK-NEXT: [[_MSCMP4:%.*]] = icmp ne i512 [[TMP8]], 0 +; CHECK-NEXT: br i1 [[_MSCMP4]], label %[[BB9:.*]], label %[[BB10:.*]], !prof [[PROF1]] +; CHECK: [[BB9]]: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR8]] +; CHECK-NEXT: unreachable +; CHECK: [[BB10]]: +; CHECK-NEXT: [[RES2:%.*]] = call <32 x half> @llvm.x86.avx512fp16.mask.getexp.ph.512(<32 x half> [[X0]], <32 x half> zeroinitializer, i32 -1, i32 8) +; CHECK-NEXT: [[RES3:%.*]] = fadd <32 x half> [[RES1]], [[RES2]] +; CHECK-NEXT: store <32 x i16> zeroinitializer, ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <32 x half> [[RES3]] +; + %res1 = call <32 x half> @llvm.x86.avx512fp16.mask.getexp.ph.512(<32 x half> %x0, <32 x half> %x1, i32 %x2, i32 4) + %res2 = call <32 x half> @llvm.x86.avx512fp16.mask.getexp.ph.512(<32 x half> %x0, <32 x half> zeroinitializer, i32 -1, i32 8) + %res3 = fadd <32 x half> %res1, %res2 + ret <32 x half> %res3 +} + +declare <8 x half> @llvm.x86.avx512fp16.mask.getexp.sh(<8 x half>, <8 x half>,<8 x half>, i8, i32) + +define <8 x half>@test_int_x86_avx512_mask_getexp_sh(<8 x half> %x0, <8 x half> %x1, <8 x half> %x3, i8 %x4) #0 { +; CHECK-LABEL: define <8 x half> @test_int_x86_avx512_mask_getexp_sh( +; CHECK-SAME: <8 x half> [[X0:%.*]], <8 x half> [[X1:%.*]], <8 x half> [[X3:%.*]], i8 [[X4:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i16>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <8 x i16>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8 +; CHECK-NEXT: [[TMP3:%.*]] = load <8 x i16>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 32) to ptr), align 8 +; CHECK-NEXT: [[TMP4:%.*]] = load i8, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 48) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP5:%.*]] = bitcast <8 x i16> [[TMP1]] to i128 +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i128 [[TMP5]], 0 +; CHECK-NEXT: [[TMP6:%.*]] = bitcast <8 x i16> [[TMP2]] to i128 +; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i128 [[TMP6]], 0 +; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]] +; CHECK-NEXT: [[TMP7:%.*]] = bitcast <8 x i16> [[TMP3]] to i128 +; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i128 [[TMP7]], 0 +; CHECK-NEXT: [[_MSOR3:%.*]] = or i1 [[_MSOR]], [[_MSCMP2]] +; CHECK-NEXT: [[_MSCMP4:%.*]] = icmp ne i8 [[TMP4]], 0 +; CHECK-NEXT: [[_MSOR5:%.*]] = or i1 [[_MSOR3]], [[_MSCMP4]] +; CHECK-NEXT: br i1 [[_MSOR5]], label %[[BB8:.*]], label %[[BB9:.*]], !prof [[PROF1]] +; CHECK: [[BB8]]: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR8]] +; CHECK-NEXT: unreachable +; CHECK: [[BB9]]: +; CHECK-NEXT: [[RES:%.*]] = call <8 x half> @llvm.x86.avx512fp16.mask.getexp.sh(<8 x half> [[X0]], <8 x half> [[X1]], <8 x half> [[X3]], i8 [[X4]], i32 4) +; CHECK-NEXT: store <8 x i16> zeroinitializer, ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <8 x half> [[RES]] +; + %res = call <8 x half> @llvm.x86.avx512fp16.mask.getexp.sh(<8 x half> %x0, <8 x half> %x1, <8 x half> %x3, i8 %x4, i32 4) + ret <8 x half> %res +} + +define <8 x half>@test_int_x86_avx512_mask_getexp_sh_nomask(<8 x half> %x0, <8 x half> %x1, <8 x half> %x3) #0 { +; CHECK-LABEL: define <8 x half> @test_int_x86_avx512_mask_getexp_sh_nomask( +; CHECK-SAME: <8 x half> [[X0:%.*]], <8 x half> [[X1:%.*]], <8 x half> [[X3:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i16>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <8 x i16>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8 +; CHECK-NEXT: [[TMP3:%.*]] = load <8 x i16>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 32) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP4:%.*]] = bitcast <8 x i16> [[TMP1]] to i128 +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i128 [[TMP4]], 0 +; CHECK-NEXT: [[TMP5:%.*]] = bitcast <8 x i16> [[TMP2]] to i128 +; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i128 [[TMP5]], 0 +; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]] +; CHECK-NEXT: [[TMP6:%.*]] = bitcast <8 x i16> [[TMP3]] to i128 +; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i128 [[TMP6]], 0 +; CHECK-NEXT: [[_MSOR3:%.*]] = or i1 [[_MSOR]], [[_MSCMP2]] +; CHECK-NEXT: br i1 [[_MSOR3]], label %[[BB7:.*]], label %[[BB8:.*]], !prof [[PROF1]] +; CHECK: [[BB7]]: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR8]] +; CHECK-NEXT: unreachable +; CHECK: [[BB8]]: +; CHECK-NEXT: [[RES:%.*]] = call <8 x half> @llvm.x86.avx512fp16.mask.getexp.sh(<8 x half> [[X0]], <8 x half> [[X1]], <8 x half> [[X3]], i8 -1, i32 8) +; CHECK-NEXT: store <8 x i16> zeroinitializer, ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <8 x half> [[RES]] +; + %res = call <8 x half> @llvm.x86.avx512fp16.mask.getexp.sh(<8 x half> %x0, <8 x half> %x1, <8 x half> %x3, i8 -1, i32 8) + ret <8 x half> %res +} + +define <8 x half>@test_int_x86_avx512_mask_getexp_sh_load(<8 x half> %x0, ptr %x1ptr) #0 { +; CHECK-LABEL: define <8 x half> @test_int_x86_avx512_mask_getexp_sh_load( +; CHECK-SAME: <8 x half> [[X0:%.*]], ptr [[X1PTR:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <8 x i16>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 +; CHECK-NEXT: br i1 [[_MSCMP]], label %[[BB3:.*]], label %[[BB4:.*]], !prof [[PROF1]] +; CHECK: [[BB3]]: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR8]] +; CHECK-NEXT: unreachable +; CHECK: [[BB4]]: +; CHECK-NEXT: [[X1:%.*]] = load <8 x half>, ptr [[X1PTR]], align 16 +; CHECK-NEXT: [[TMP5:%.*]] = ptrtoint ptr [[X1PTR]] to i64 +; CHECK-NEXT: [[TMP6:%.*]] = xor i64 [[TMP5]], 87960930222080 +; CHECK-NEXT: [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr +; CHECK-NEXT: [[_MSLD:%.*]] = load <8 x i16>, ptr [[TMP7]], align 16 +; CHECK-NEXT: [[TMP8:%.*]] = bitcast <8 x i16> [[TMP2]] to i128 +; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i128 [[TMP8]], 0 +; CHECK-NEXT: [[TMP9:%.*]] = bitcast <8 x i16> [[_MSLD]] to i128 +; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i128 [[TMP9]], 0 +; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP1]], [[_MSCMP2]] +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR8]] +; CHECK-NEXT: [[RES:%.*]] = call <8 x half> @llvm.x86.avx512fp16.mask.getexp.sh(<8 x half> [[X0]], <8 x half> [[X1]], <8 x half> undef, i8 -1, i32 4) +; CHECK-NEXT: store <8 x i16> zeroinitializer, ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <8 x half> [[RES]] +; + %x1 = load <8 x half>, ptr %x1ptr + %res = call <8 x half> @llvm.x86.avx512fp16.mask.getexp.sh(<8 x half> %x0, <8 x half> %x1, <8 x half> undef, i8 -1, i32 4) + ret <8 x half> %res +} + +declare <32 x half> @llvm.x86.avx512fp16.mask.getmant.ph.512(<32 x half>, i32, <32 x half>, i32, i32) + +define <32 x half>@test_int_x86_avx512_mask_getmant_ph_512(<32 x half> %x0, <32 x half> %x2, i32 %x3) #0 { +; CHECK-LABEL: define <32 x half> @test_int_x86_avx512_mask_getmant_ph_512( +; CHECK-SAME: <32 x half> [[X0:%.*]], <32 x half> [[X2:%.*]], i32 [[X3:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: [[TMP1:%.*]] = load <32 x i16>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <32 x i16>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8 +; CHECK-NEXT: [[TMP3:%.*]] = load i32, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 128) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP4:%.*]] = bitcast <32 x i16> [[TMP1]] to i512 +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i512 [[TMP4]], 0 +; CHECK-NEXT: [[TMP5:%.*]] = bitcast <32 x i16> [[TMP2]] to i512 +; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i512 [[TMP5]], 0 +; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]] +; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i32 [[TMP3]], 0 +; CHECK-NEXT: [[_MSOR3:%.*]] = or i1 [[_MSOR]], [[_MSCMP2]] +; CHECK-NEXT: br i1 [[_MSOR3]], label %[[BB6:.*]], label %[[BB7:.*]], !prof [[PROF1]] +; CHECK: [[BB6]]: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR8]] +; CHECK-NEXT: unreachable +; CHECK: [[BB7]]: +; CHECK-NEXT: [[RES:%.*]] = call <32 x half> @llvm.x86.avx512fp16.mask.getmant.ph.512(<32 x half> [[X0]], i32 8, <32 x half> [[X2]], i32 [[X3]], i32 4) +; CHECK-NEXT: [[TMP8:%.*]] = bitcast <32 x i16> [[TMP1]] to i512 +; CHECK-NEXT: [[_MSCMP4:%.*]] = icmp ne i512 [[TMP8]], 0 +; CHECK-NEXT: [[TMP9:%.*]] = bitcast <32 x i16> [[TMP2]] to i512 +; CHECK-NEXT: [[_MSCMP5:%.*]] = icmp ne i512 [[TMP9]], 0 +; CHECK-NEXT: [[_MSOR6:%.*]] = or i1 [[_MSCMP4]], [[_MSCMP5]] +; CHECK-NEXT: br i1 [[_MSOR6]], label %[[BB10:.*]], label %[[BB11:.*]], !prof [[PROF1]] +; CHECK: [[BB10]]: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR8]] +; CHECK-NEXT: unreachable +; CHECK: [[BB11]]: +; CHECK-NEXT: [[RES1:%.*]] = call <32 x half> @llvm.x86.avx512fp16.mask.getmant.ph.512(<32 x half> [[X0]], i32 4, <32 x half> [[X2]], i32 -1, i32 8) +; CHECK-NEXT: [[RES2:%.*]] = fadd <32 x half> [[RES]], [[RES1]] +; CHECK-NEXT: store <32 x i16> zeroinitializer, ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <32 x half> [[RES2]] +; + %res = call <32 x half> @llvm.x86.avx512fp16.mask.getmant.ph.512(<32 x half> %x0, i32 8, <32 x half> %x2, i32 %x3, i32 4) + %res1 = call <32 x half> @llvm.x86.avx512fp16.mask.getmant.ph.512(<32 x half> %x0, i32 4, <32 x half> %x2, i32 -1, i32 8) + %res2 = fadd <32 x half> %res, %res1 + ret <32 x half> %res2 +} + +declare <8 x half> @llvm.x86.avx512fp16.mask.getmant.sh(<8 x half>, <8 x half>, i32, <8 x half>, i8, i32) + +define <8 x half>@test_int_x86_avx512_mask_getmant_sh(<8 x half> %x0, <8 x half> %x1, <8 x half> %x3, i8 %x4) #0 { +; CHECK-LABEL: define <8 x half> @test_int_x86_avx512_mask_getmant_sh( +; CHECK-SAME: <8 x half> [[X0:%.*]], <8 x half> [[X1:%.*]], <8 x half> [[X3:%.*]], i8 [[X4:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i16>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <8 x i16>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8 +; CHECK-NEXT: [[TMP3:%.*]] = load <8 x i16>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 32) to ptr), align 8 +; CHECK-NEXT: [[TMP4:%.*]] = load i8, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 48) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP5:%.*]] = bitcast <8 x i16> [[TMP1]] to i128 +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i128 [[TMP5]], 0 +; CHECK-NEXT: [[TMP6:%.*]] = bitcast <8 x i16> [[TMP2]] to i128 +; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i128 [[TMP6]], 0 +; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]] +; CHECK-NEXT: [[TMP7:%.*]] = bitcast <8 x i16> [[TMP3]] to i128 +; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i128 [[TMP7]], 0 +; CHECK-NEXT: [[_MSOR3:%.*]] = or i1 [[_MSOR]], [[_MSCMP2]] +; CHECK-NEXT: [[_MSCMP4:%.*]] = icmp ne i8 [[TMP4]], 0 +; CHECK-NEXT: [[_MSOR5:%.*]] = or i1 [[_MSOR3]], [[_MSCMP4]] +; CHECK-NEXT: br i1 [[_MSOR5]], label %[[BB8:.*]], label %[[BB9:.*]], !prof [[PROF1]] +; CHECK: [[BB8]]: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR8]] +; CHECK-NEXT: unreachable +; CHECK: [[BB9]]: +; CHECK-NEXT: [[RES:%.*]] = call <8 x half> @llvm.x86.avx512fp16.mask.getmant.sh(<8 x half> [[X0]], <8 x half> [[X1]], i32 11, <8 x half> [[X3]], i8 [[X4]], i32 4) +; CHECK-NEXT: store <8 x i16> zeroinitializer, ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <8 x half> [[RES]] +; + %res = call <8 x half> @llvm.x86.avx512fp16.mask.getmant.sh(<8 x half> %x0, <8 x half> %x1, i32 11, <8 x half> %x3, i8 %x4, i32 4) + ret <8 x half> %res +} + +define <8 x half>@test_int_x86_avx512_mask_getmant_sh_nomask(<8 x half> %x0, <8 x half> %x1, <8 x half> %x3) #0 { +; CHECK-LABEL: define <8 x half> @test_int_x86_avx512_mask_getmant_sh_nomask( +; CHECK-SAME: <8 x half> [[X0:%.*]], <8 x half> [[X1:%.*]], <8 x half> [[X3:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i16>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <8 x i16>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8 +; CHECK-NEXT: [[TMP3:%.*]] = load <8 x i16>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 32) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP4:%.*]] = bitcast <8 x i16> [[TMP1]] to i128 +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i128 [[TMP4]], 0 +; CHECK-NEXT: [[TMP5:%.*]] = bitcast <8 x i16> [[TMP2]] to i128 +; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i128 [[TMP5]], 0 +; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]] +; CHECK-NEXT: [[TMP6:%.*]] = bitcast <8 x i16> [[TMP3]] to i128 +; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i128 [[TMP6]], 0 +; CHECK-NEXT: [[_MSOR3:%.*]] = or i1 [[_MSOR]], [[_MSCMP2]] +; CHECK-NEXT: br i1 [[_MSOR3]], label %[[BB7:.*]], label %[[BB8:.*]], !prof [[PROF1]] +; CHECK: [[BB7]]: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR8]] +; CHECK-NEXT: unreachable +; CHECK: [[BB8]]: +; CHECK-NEXT: [[RES:%.*]] = call <8 x half> @llvm.x86.avx512fp16.mask.getmant.sh(<8 x half> [[X0]], <8 x half> [[X1]], i32 11, <8 x half> [[X3]], i8 -1, i32 4) +; CHECK-NEXT: store <8 x i16> zeroinitializer, ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <8 x half> [[RES]] +; + %res = call <8 x half> @llvm.x86.avx512fp16.mask.getmant.sh(<8 x half> %x0, <8 x half> %x1, i32 11, <8 x half> %x3, i8 -1, i32 4) + ret <8 x half> %res +} + +define <8 x half>@test_int_x86_avx512_mask_getmant_sh_z(<8 x half> %x0, <8 x half> %x1, i8 %x4) #0 { +; CHECK-LABEL: define <8 x half> @test_int_x86_avx512_mask_getmant_sh_z( +; CHECK-SAME: <8 x half> [[X0:%.*]], <8 x half> [[X1:%.*]], i8 [[X4:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i16>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <8 x i16>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8 +; CHECK-NEXT: [[TMP3:%.*]] = load i8, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 32) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP4:%.*]] = bitcast <8 x i16> [[TMP1]] to i128 +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i128 [[TMP4]], 0 +; CHECK-NEXT: [[TMP5:%.*]] = bitcast <8 x i16> [[TMP2]] to i128 +; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i128 [[TMP5]], 0 +; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]] +; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i8 [[TMP3]], 0 +; CHECK-NEXT: [[_MSOR3:%.*]] = or i1 [[_MSOR]], [[_MSCMP2]] +; CHECK-NEXT: br i1 [[_MSOR3]], label %[[BB6:.*]], label %[[BB7:.*]], !prof [[PROF1]] +; CHECK: [[BB6]]: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR8]] +; CHECK-NEXT: unreachable +; CHECK: [[BB7]]: +; CHECK-NEXT: [[RES:%.*]] = call <8 x half> @llvm.x86.avx512fp16.mask.getmant.sh(<8 x half> [[X0]], <8 x half> [[X1]], i32 11, <8 x half> zeroinitializer, i8 [[X4]], i32 4) +; CHECK-NEXT: store <8 x i16> zeroinitializer, ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <8 x half> [[RES]] +; + %res = call <8 x half> @llvm.x86.avx512fp16.mask.getmant.sh(<8 x half> %x0, <8 x half> %x1, i32 11, <8 x half> zeroinitializer, i8 %x4, i32 4) + ret <8 x half> %res +} + +declare <32 x half> @llvm.x86.avx512fp16.mask.scalef.ph.512(<32 x half>, <32 x half>, <32 x half>, i32, i32) + +define <32 x half>@test_int_x86_avx512_mask_scalef_ph_512(<32 x half> %x0, <32 x half> %x1, <32 x half> %x2, i32 %x3) #0 { +; CHECK-LABEL: define <32 x half> @test_int_x86_avx512_mask_scalef_ph_512( +; CHECK-SAME: <32 x half> [[X0:%.*]], <32 x half> [[X1:%.*]], <32 x half> [[X2:%.*]], i32 [[X3:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 192) to ptr), align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <32 x i16>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP3:%.*]] = load <32 x i16>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8 +; CHECK-NEXT: [[TMP4:%.*]] = load <32 x i16>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 128) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP5:%.*]] = bitcast i32 [[TMP1]] to <32 x i1> +; CHECK-NEXT: [[MASK:%.*]] = bitcast i32 [[X3]] to <32 x i1> +; CHECK-NEXT: [[TMP6:%.*]] = bitcast <32 x i16> [[TMP2]] to i512 +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i512 [[TMP6]], 0 +; CHECK-NEXT: [[TMP7:%.*]] = bitcast <32 x i16> [[TMP3]] to i512 +; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i512 [[TMP7]], 0 +; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]] +; CHECK-NEXT: [[TMP8:%.*]] = bitcast <32 x i16> [[TMP4]] to i512 +; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i512 [[TMP8]], 0 +; CHECK-NEXT: [[_MSOR3:%.*]] = or i1 [[_MSOR]], [[_MSCMP2]] +; CHECK-NEXT: [[_MSCMP4:%.*]] = icmp ne i32 [[TMP1]], 0 +; CHECK-NEXT: [[_MSOR5:%.*]] = or i1 [[_MSOR3]], [[_MSCMP4]] +; CHECK-NEXT: br i1 [[_MSOR5]], label %[[BB9:.*]], label %[[BB10:.*]], !prof [[PROF1]] +; CHECK: [[BB9]]: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR8]] +; CHECK-NEXT: unreachable +; CHECK: [[BB10]]: +; CHECK-NEXT: [[RES1:%.*]] = call <32 x half> @llvm.x86.avx512fp16.mask.scalef.ph.512(<32 x half> [[X0]], <32 x half> [[X1]], <32 x half> [[X2]], i32 [[X3]], i32 11) +; CHECK-NEXT: [[TMP11:%.*]] = bitcast <32 x i16> [[TMP2]] to i512 +; CHECK-NEXT: [[_MSCMP6:%.*]] = icmp ne i512 [[TMP11]], 0 +; CHECK-NEXT: [[TMP12:%.*]] = bitcast <32 x i16> [[TMP3]] to i512 +; CHECK-NEXT: [[_MSCMP7:%.*]] = icmp ne i512 [[TMP12]], 0 +; CHECK-NEXT: [[_MSOR8:%.*]] = or i1 [[_MSCMP6]], [[_MSCMP7]] +; CHECK-NEXT: br i1 [[_MSOR8]], label %[[BB13:.*]], label %[[BB14:.*]], !prof [[PROF1]] +; CHECK: [[BB13]]: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR8]] +; CHECK-NEXT: unreachable +; CHECK: [[BB14]]: +; CHECK-NEXT: [[RES2:%.*]] = call <32 x half> @llvm.x86.avx512fp16.mask.scalef.ph.512(<32 x half> [[X0]], <32 x half> [[X1]], <32 x half> zeroinitializer, i32 -1, i32 8) +; CHECK-NEXT: [[RES3:%.*]] = fadd <32 x half> [[RES1]], [[RES2]] +; CHECK-NEXT: store <32 x i16> zeroinitializer, ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <32 x half> [[RES3]] +; + %mask = bitcast i32 %x3 to <32 x i1> + %res1 = call <32 x half> @llvm.x86.avx512fp16.mask.scalef.ph.512(<32 x half> %x0, <32 x half> %x1, <32 x half> %x2, i32 %x3, i32 11) + %res2 = call <32 x half> @llvm.x86.avx512fp16.mask.scalef.ph.512(<32 x half> %x0, <32 x half> %x1, <32 x half> zeroinitializer, i32 -1, i32 8) + %res3 = fadd <32 x half> %res1, %res2 + ret <32 x half> %res3 +} + +declare <8 x half> @llvm.x86.avx512fp16.mask.scalef.sh(<8 x half>, <8 x half>,<8 x half>, i8, i32) + +define <8 x half>@test_int_x86_avx512_mask_scalef_sh(<8 x half> %x0, <8 x half> %x1, <8 x half> %x3, i8 %x4) #0 { +; CHECK-LABEL: define <8 x half> @test_int_x86_avx512_mask_scalef_sh( +; CHECK-SAME: <8 x half> [[X0:%.*]], <8 x half> [[X1:%.*]], <8 x half> [[X3:%.*]], i8 [[X4:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i16>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <8 x i16>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8 +; CHECK-NEXT: [[TMP3:%.*]] = load <8 x i16>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 32) to ptr), align 8 +; CHECK-NEXT: [[TMP4:%.*]] = load i8, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 48) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP5:%.*]] = bitcast <8 x i16> [[TMP1]] to i128 +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i128 [[TMP5]], 0 +; CHECK-NEXT: [[TMP6:%.*]] = bitcast <8 x i16> [[TMP2]] to i128 +; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i128 [[TMP6]], 0 +; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]] +; CHECK-NEXT: [[TMP7:%.*]] = bitcast <8 x i16> [[TMP3]] to i128 +; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i128 [[TMP7]], 0 +; CHECK-NEXT: [[_MSOR3:%.*]] = or i1 [[_MSOR]], [[_MSCMP2]] +; CHECK-NEXT: [[_MSCMP4:%.*]] = icmp ne i8 [[TMP4]], 0 +; CHECK-NEXT: [[_MSOR5:%.*]] = or i1 [[_MSOR3]], [[_MSCMP4]] +; CHECK-NEXT: br i1 [[_MSOR5]], label %[[BB8:.*]], label %[[BB9:.*]], !prof [[PROF1]] +; CHECK: [[BB8]]: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR8]] +; CHECK-NEXT: unreachable +; CHECK: [[BB9]]: +; CHECK-NEXT: [[RES:%.*]] = call <8 x half> @llvm.x86.avx512fp16.mask.scalef.sh(<8 x half> [[X0]], <8 x half> [[X1]], <8 x half> [[X3]], i8 [[X4]], i32 4) +; CHECK-NEXT: store <8 x i16> zeroinitializer, ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <8 x half> [[RES]] +; + %res = call <8 x half> @llvm.x86.avx512fp16.mask.scalef.sh(<8 x half> %x0, <8 x half> %x1, <8 x half> %x3, i8 %x4, i32 4) + ret <8 x half> %res +} + +define <8 x half>@test_int_x86_avx512_mask_scalef_sh_nomask(<8 x half> %x0, <8 x half> %x1, <8 x half> %x3) #0 { +; CHECK-LABEL: define <8 x half> @test_int_x86_avx512_mask_scalef_sh_nomask( +; CHECK-SAME: <8 x half> [[X0:%.*]], <8 x half> [[X1:%.*]], <8 x half> [[X3:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i16>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <8 x i16>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8 +; CHECK-NEXT: [[TMP3:%.*]] = load <8 x i16>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 32) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP4:%.*]] = bitcast <8 x i16> [[TMP1]] to i128 +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i128 [[TMP4]], 0 +; CHECK-NEXT: [[TMP5:%.*]] = bitcast <8 x i16> [[TMP2]] to i128 +; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i128 [[TMP5]], 0 +; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]] +; CHECK-NEXT: [[TMP6:%.*]] = bitcast <8 x i16> [[TMP3]] to i128 +; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i128 [[TMP6]], 0 +; CHECK-NEXT: [[_MSOR3:%.*]] = or i1 [[_MSOR]], [[_MSCMP2]] +; CHECK-NEXT: br i1 [[_MSOR3]], label %[[BB7:.*]], label %[[BB8:.*]], !prof [[PROF1]] +; CHECK: [[BB7]]: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR8]] +; CHECK-NEXT: unreachable +; CHECK: [[BB8]]: +; CHECK-NEXT: [[RES:%.*]] = call <8 x half> @llvm.x86.avx512fp16.mask.scalef.sh(<8 x half> [[X0]], <8 x half> [[X1]], <8 x half> [[X3]], i8 -1, i32 8) +; CHECK-NEXT: store <8 x i16> zeroinitializer, ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <8 x half> [[RES]] +; + %res = call <8 x half> @llvm.x86.avx512fp16.mask.scalef.sh(<8 x half> %x0, <8 x half> %x1, <8 x half> %x3, i8 -1, i32 8) + ret <8 x half> %res +} + +define <8 x half>@test_int_x86_avx512_mask_scalef_sh_load(<8 x half> %x0, ptr %x1ptr) #0 { +; CHECK-LABEL: define <8 x half> @test_int_x86_avx512_mask_scalef_sh_load( +; CHECK-SAME: <8 x half> [[X0:%.*]], ptr [[X1PTR:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <8 x i16>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 +; CHECK-NEXT: br i1 [[_MSCMP]], label %[[BB3:.*]], label %[[BB4:.*]], !prof [[PROF1]] +; CHECK: [[BB3]]: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR8]] +; CHECK-NEXT: unreachable +; CHECK: [[BB4]]: +; CHECK-NEXT: [[X1:%.*]] = load <8 x half>, ptr [[X1PTR]], align 16 +; CHECK-NEXT: [[TMP5:%.*]] = ptrtoint ptr [[X1PTR]] to i64 +; CHECK-NEXT: [[TMP6:%.*]] = xor i64 [[TMP5]], 87960930222080 +; CHECK-NEXT: [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr +; CHECK-NEXT: [[_MSLD:%.*]] = load <8 x i16>, ptr [[TMP7]], align 16 +; CHECK-NEXT: [[TMP8:%.*]] = bitcast <8 x i16> [[TMP2]] to i128 +; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i128 [[TMP8]], 0 +; CHECK-NEXT: [[TMP9:%.*]] = bitcast <8 x i16> [[_MSLD]] to i128 +; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i128 [[TMP9]], 0 +; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP1]], [[_MSCMP2]] +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR8]] +; CHECK-NEXT: [[RES:%.*]] = call <8 x half> @llvm.x86.avx512fp16.mask.scalef.sh(<8 x half> [[X0]], <8 x half> [[X1]], <8 x half> undef, i8 -1, i32 4) +; CHECK-NEXT: store <8 x i16> zeroinitializer, ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <8 x half> [[RES]] +; + %x1 = load <8 x half>, ptr %x1ptr + %res = call <8 x half> @llvm.x86.avx512fp16.mask.scalef.sh(<8 x half> %x0, <8 x half> %x1, <8 x half> undef, i8 -1, i32 4) + ret <8 x half> %res +} + +declare <8 x half> @llvm.x86.avx512fp16.mask.add.sh.round(<8 x half>, <8 x half>, <8 x half>, i8, i32) + +define <8 x half> @test_int_x86_avx512fp16_mask_add_sh(<8 x half> %x1, <8 x half> %x2, <8 x half> %src, i8 %mask, ptr %ptr) #0 { +; CHECK-LABEL: define <8 x half> @test_int_x86_avx512fp16_mask_add_sh( +; CHECK-SAME: <8 x half> [[X1:%.*]], <8 x half> [[X2:%.*]], <8 x half> [[SRC:%.*]], i8 [[MASK:%.*]], ptr [[PTR:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 56) to ptr), align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <8 x i16>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP3:%.*]] = load <8 x i16>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8 +; CHECK-NEXT: [[TMP4:%.*]] = load <8 x i16>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 32) to ptr), align 8 +; CHECK-NEXT: [[TMP5:%.*]] = load i8, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 48) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 +; CHECK-NEXT: br i1 [[_MSCMP]], label %[[BB6:.*]], label %[[BB7:.*]], !prof [[PROF1]] +; CHECK: [[BB6]]: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR8]] +; CHECK-NEXT: unreachable +; CHECK: [[BB7]]: +; CHECK-NEXT: [[VAL_HALF:%.*]] = load half, ptr [[PTR]], align 2 +; CHECK-NEXT: [[TMP8:%.*]] = ptrtoint ptr [[PTR]] to i64 +; CHECK-NEXT: [[TMP9:%.*]] = xor i64 [[TMP8]], 87960930222080 +; CHECK-NEXT: [[TMP10:%.*]] = inttoptr i64 [[TMP9]] to ptr +; CHECK-NEXT: [[_MSLD:%.*]] = load i16, ptr [[TMP10]], align 2 +; CHECK-NEXT: [[_MSPROP:%.*]] = insertelement <8 x i16> splat (i16 -1), i16 [[_MSLD]], i32 0 +; CHECK-NEXT: [[VAL:%.*]] = insertelement <8 x half> undef, half [[VAL_HALF]], i32 0 +; CHECK-NEXT: [[TMP11:%.*]] = bitcast <8 x i16> [[TMP2]] to i128 +; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i128 [[TMP11]], 0 +; CHECK-NEXT: [[TMP12:%.*]] = bitcast <8 x i16> [[TMP3]] to i128 +; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i128 [[TMP12]], 0 +; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP1]], [[_MSCMP2]] +; CHECK-NEXT: br i1 [[_MSOR]], label %[[BB13:.*]], label %[[BB14:.*]], !prof [[PROF1]] +; CHECK: [[BB13]]: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR8]] +; CHECK-NEXT: unreachable +; CHECK: [[BB14]]: +; CHECK-NEXT: [[RES0:%.*]] = call <8 x half> @llvm.x86.avx512fp16.mask.add.sh.round(<8 x half> [[X1]], <8 x half> [[X2]], <8 x half> zeroinitializer, i8 -1, i32 4) +; CHECK-NEXT: [[TMP15:%.*]] = bitcast <8 x i16> [[TMP3]] to i128 +; CHECK-NEXT: [[_MSCMP3:%.*]] = icmp ne i128 [[TMP15]], 0 +; CHECK-NEXT: [[TMP16:%.*]] = bitcast <8 x i16> [[TMP4]] to i128 +; CHECK-NEXT: [[_MSCMP4:%.*]] = icmp ne i128 [[TMP16]], 0 +; CHECK-NEXT: [[_MSOR5:%.*]] = or i1 [[_MSCMP3]], [[_MSCMP4]] +; CHECK-NEXT: [[_MSCMP6:%.*]] = icmp ne i8 [[TMP5]], 0 +; CHECK-NEXT: [[_MSOR7:%.*]] = or i1 [[_MSOR5]], [[_MSCMP6]] +; CHECK-NEXT: br i1 [[_MSOR7]], label %[[BB17:.*]], label %[[BB18:.*]], !prof [[PROF1]] +; CHECK: [[BB17]]: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR8]] +; CHECK-NEXT: unreachable +; CHECK: [[BB18]]: +; CHECK-NEXT: [[RES1:%.*]] = call <8 x half> @llvm.x86.avx512fp16.mask.add.sh.round(<8 x half> [[RES0]], <8 x half> [[X2]], <8 x half> [[SRC]], i8 [[MASK]], i32 4) +; CHECK-NEXT: [[TMP19:%.*]] = bitcast <8 x i16> [[TMP3]] to i128 +; CHECK-NEXT: [[_MSCMP8:%.*]] = icmp ne i128 [[TMP19]], 0 +; CHECK-NEXT: [[_MSCMP9:%.*]] = icmp ne i8 [[TMP5]], 0 +; CHECK-NEXT: [[_MSOR10:%.*]] = or i1 [[_MSCMP8]], [[_MSCMP9]] +; CHECK-NEXT: br i1 [[_MSOR10]], label %[[BB20:.*]], label %[[BB21:.*]], !prof [[PROF1]] +; CHECK: [[BB20]]: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR8]] +; CHECK-NEXT: unreachable +; CHECK: [[BB21]]: +; CHECK-NEXT: [[RES2:%.*]] = call <8 x half> @llvm.x86.avx512fp16.mask.add.sh.round(<8 x half> [[RES1]], <8 x half> [[X2]], <8 x half> zeroinitializer, i8 [[MASK]], i32 4) +; CHECK-NEXT: [[TMP22:%.*]] = bitcast <8 x i16> [[_MSPROP]] to i128 +; CHECK-NEXT: [[_MSCMP11:%.*]] = icmp ne i128 [[TMP22]], 0 +; CHECK-NEXT: [[TMP23:%.*]] = bitcast <8 x i16> [[TMP4]] to i128 +; CHECK-NEXT: [[_MSCMP12:%.*]] = icmp ne i128 [[TMP23]], 0 +; CHECK-NEXT: [[_MSOR13:%.*]] = or i1 [[_MSCMP11]], [[_MSCMP12]] +; CHECK-NEXT: [[_MSCMP14:%.*]] = icmp ne i8 [[TMP5]], 0 +; CHECK-NEXT: [[_MSOR15:%.*]] = or i1 [[_MSOR13]], [[_MSCMP14]] +; CHECK-NEXT: br i1 [[_MSOR15]], label %[[BB24:.*]], label %[[BB25:.*]], !prof [[PROF1]] +; CHECK: [[BB24]]: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR8]] +; CHECK-NEXT: unreachable +; CHECK: [[BB25]]: +; CHECK-NEXT: [[RES3:%.*]] = call <8 x half> @llvm.x86.avx512fp16.mask.add.sh.round(<8 x half> [[RES2]], <8 x half> [[VAL]], <8 x half> [[SRC]], i8 [[MASK]], i32 4) +; CHECK-NEXT: store <8 x i16> zeroinitializer, ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <8 x half> [[RES3]] +; + %val.half = load half,ptr %ptr + %val = insertelement <8 x half> undef, half %val.half, i32 0 + %res0 = call <8 x half> @llvm.x86.avx512fp16.mask.add.sh.round(<8 x half> %x1, <8 x half> %x2, <8 x half> zeroinitializer, i8 -1, i32 4) + %res1 = call <8 x half> @llvm.x86.avx512fp16.mask.add.sh.round(<8 x half> %res0, <8 x half> %x2, <8 x half> %src , i8 %mask, i32 4) + %res2 = call <8 x half> @llvm.x86.avx512fp16.mask.add.sh.round(<8 x half> %res1, <8 x half> %x2, <8 x half> zeroinitializer , i8 %mask, i32 4) + %res3 = call <8 x half> @llvm.x86.avx512fp16.mask.add.sh.round(<8 x half> %res2, <8 x half> %val, <8 x half> %src , i8 %mask, i32 4) + ret <8 x half> %res3 +} + +declare <8 x half> @llvm.x86.avx512fp16.mask.sub.sh.round(<8 x half>, <8 x half>, <8 x half>, i8, i32) + +define <8 x half> @test_int_x86_avx512fp16_mask_sub_sh(<8 x half> %x1, <8 x half> %x2, <8 x half> %src, i8 %mask, ptr %ptr) #0 { +; CHECK-LABEL: define <8 x half> @test_int_x86_avx512fp16_mask_sub_sh( +; CHECK-SAME: <8 x half> [[X1:%.*]], <8 x half> [[X2:%.*]], <8 x half> [[SRC:%.*]], i8 [[MASK:%.*]], ptr [[PTR:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 56) to ptr), align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <8 x i16>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP3:%.*]] = load <8 x i16>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8 +; CHECK-NEXT: [[TMP4:%.*]] = load <8 x i16>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 32) to ptr), align 8 +; CHECK-NEXT: [[TMP5:%.*]] = load i8, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 48) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 +; CHECK-NEXT: br i1 [[_MSCMP]], label %[[BB6:.*]], label %[[BB7:.*]], !prof [[PROF1]] +; CHECK: [[BB6]]: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR8]] +; CHECK-NEXT: unreachable +; CHECK: [[BB7]]: +; CHECK-NEXT: [[VAL_HALF:%.*]] = load half, ptr [[PTR]], align 2 +; CHECK-NEXT: [[TMP8:%.*]] = ptrtoint ptr [[PTR]] to i64 +; CHECK-NEXT: [[TMP9:%.*]] = xor i64 [[TMP8]], 87960930222080 +; CHECK-NEXT: [[TMP10:%.*]] = inttoptr i64 [[TMP9]] to ptr +; CHECK-NEXT: [[_MSLD:%.*]] = load i16, ptr [[TMP10]], align 2 +; CHECK-NEXT: [[_MSPROP:%.*]] = insertelement <8 x i16> splat (i16 -1), i16 [[_MSLD]], i32 0 +; CHECK-NEXT: [[VAL:%.*]] = insertelement <8 x half> undef, half [[VAL_HALF]], i32 0 +; CHECK-NEXT: [[TMP11:%.*]] = bitcast <8 x i16> [[TMP2]] to i128 +; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i128 [[TMP11]], 0 +; CHECK-NEXT: [[TMP12:%.*]] = bitcast <8 x i16> [[TMP3]] to i128 +; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i128 [[TMP12]], 0 +; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP1]], [[_MSCMP2]] +; CHECK-NEXT: br i1 [[_MSOR]], label %[[BB13:.*]], label %[[BB14:.*]], !prof [[PROF1]] +; CHECK: [[BB13]]: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR8]] +; CHECK-NEXT: unreachable +; CHECK: [[BB14]]: +; CHECK-NEXT: [[RES0:%.*]] = call <8 x half> @llvm.x86.avx512fp16.mask.sub.sh.round(<8 x half> [[X1]], <8 x half> [[X2]], <8 x half> zeroinitializer, i8 -1, i32 4) +; CHECK-NEXT: [[TMP15:%.*]] = bitcast <8 x i16> [[TMP3]] to i128 +; CHECK-NEXT: [[_MSCMP3:%.*]] = icmp ne i128 [[TMP15]], 0 +; CHECK-NEXT: [[TMP16:%.*]] = bitcast <8 x i16> [[TMP4]] to i128 +; CHECK-NEXT: [[_MSCMP4:%.*]] = icmp ne i128 [[TMP16]], 0 +; CHECK-NEXT: [[_MSOR5:%.*]] = or i1 [[_MSCMP3]], [[_MSCMP4]] +; CHECK-NEXT: [[_MSCMP6:%.*]] = icmp ne i8 [[TMP5]], 0 +; CHECK-NEXT: [[_MSOR7:%.*]] = or i1 [[_MSOR5]], [[_MSCMP6]] +; CHECK-NEXT: br i1 [[_MSOR7]], label %[[BB17:.*]], label %[[BB18:.*]], !prof [[PROF1]] +; CHECK: [[BB17]]: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR8]] +; CHECK-NEXT: unreachable +; CHECK: [[BB18]]: +; CHECK-NEXT: [[RES1:%.*]] = call <8 x half> @llvm.x86.avx512fp16.mask.sub.sh.round(<8 x half> [[RES0]], <8 x half> [[X2]], <8 x half> [[SRC]], i8 [[MASK]], i32 4) +; CHECK-NEXT: [[TMP19:%.*]] = bitcast <8 x i16> [[TMP3]] to i128 +; CHECK-NEXT: [[_MSCMP8:%.*]] = icmp ne i128 [[TMP19]], 0 +; CHECK-NEXT: [[_MSCMP9:%.*]] = icmp ne i8 [[TMP5]], 0 +; CHECK-NEXT: [[_MSOR10:%.*]] = or i1 [[_MSCMP8]], [[_MSCMP9]] +; CHECK-NEXT: br i1 [[_MSOR10]], label %[[BB20:.*]], label %[[BB21:.*]], !prof [[PROF1]] +; CHECK: [[BB20]]: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR8]] +; CHECK-NEXT: unreachable +; CHECK: [[BB21]]: +; CHECK-NEXT: [[RES2:%.*]] = call <8 x half> @llvm.x86.avx512fp16.mask.sub.sh.round(<8 x half> [[RES1]], <8 x half> [[X2]], <8 x half> zeroinitializer, i8 [[MASK]], i32 4) +; CHECK-NEXT: [[TMP22:%.*]] = bitcast <8 x i16> [[_MSPROP]] to i128 +; CHECK-NEXT: [[_MSCMP11:%.*]] = icmp ne i128 [[TMP22]], 0 +; CHECK-NEXT: [[TMP23:%.*]] = bitcast <8 x i16> [[TMP4]] to i128 +; CHECK-NEXT: [[_MSCMP12:%.*]] = icmp ne i128 [[TMP23]], 0 +; CHECK-NEXT: [[_MSOR13:%.*]] = or i1 [[_MSCMP11]], [[_MSCMP12]] +; CHECK-NEXT: [[_MSCMP14:%.*]] = icmp ne i8 [[TMP5]], 0 +; CHECK-NEXT: [[_MSOR15:%.*]] = or i1 [[_MSOR13]], [[_MSCMP14]] +; CHECK-NEXT: br i1 [[_MSOR15]], label %[[BB24:.*]], label %[[BB25:.*]], !prof [[PROF1]] +; CHECK: [[BB24]]: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR8]] +; CHECK-NEXT: unreachable +; CHECK: [[BB25]]: +; CHECK-NEXT: [[RES3:%.*]] = call <8 x half> @llvm.x86.avx512fp16.mask.sub.sh.round(<8 x half> [[RES2]], <8 x half> [[VAL]], <8 x half> [[SRC]], i8 [[MASK]], i32 4) +; CHECK-NEXT: store <8 x i16> zeroinitializer, ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <8 x half> [[RES3]] +; + %val.half = load half,ptr %ptr + %val = insertelement <8 x half> undef, half %val.half, i32 0 + %res0 = call <8 x half> @llvm.x86.avx512fp16.mask.sub.sh.round(<8 x half> %x1, <8 x half> %x2, <8 x half> zeroinitializer, i8 -1, i32 4) + %res1 = call <8 x half> @llvm.x86.avx512fp16.mask.sub.sh.round(<8 x half> %res0, <8 x half> %x2, <8 x half> %src , i8 %mask, i32 4) + %res2 = call <8 x half> @llvm.x86.avx512fp16.mask.sub.sh.round(<8 x half> %res1, <8 x half> %x2, <8 x half> zeroinitializer , i8 %mask, i32 4) + %res3 = call <8 x half> @llvm.x86.avx512fp16.mask.sub.sh.round(<8 x half> %res2, <8 x half> %val, <8 x half> %src , i8 %mask, i32 4) + ret <8 x half> %res3 +} + +declare <8 x half> @llvm.x86.avx512fp16.mask.mul.sh.round(<8 x half>, <8 x half>, <8 x half>, i8, i32) + +define <8 x half> @test_int_x86_avx512fp16_mask_mul_sh(<8 x half> %x1, <8 x half> %x2, <8 x half> %src, i8 %mask, ptr %ptr) #0 { +; CHECK-LABEL: define <8 x half> @test_int_x86_avx512fp16_mask_mul_sh( +; CHECK-SAME: <8 x half> [[X1:%.*]], <8 x half> [[X2:%.*]], <8 x half> [[SRC:%.*]], i8 [[MASK:%.*]], ptr [[PTR:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 56) to ptr), align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <8 x i16>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP3:%.*]] = load <8 x i16>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8 +; CHECK-NEXT: [[TMP4:%.*]] = load <8 x i16>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 32) to ptr), align 8 +; CHECK-NEXT: [[TMP5:%.*]] = load i8, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 48) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 +; CHECK-NEXT: br i1 [[_MSCMP]], label %[[BB6:.*]], label %[[BB7:.*]], !prof [[PROF1]] +; CHECK: [[BB6]]: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR8]] +; CHECK-NEXT: unreachable +; CHECK: [[BB7]]: +; CHECK-NEXT: [[VAL_HALF:%.*]] = load half, ptr [[PTR]], align 2 +; CHECK-NEXT: [[TMP8:%.*]] = ptrtoint ptr [[PTR]] to i64 +; CHECK-NEXT: [[TMP9:%.*]] = xor i64 [[TMP8]], 87960930222080 +; CHECK-NEXT: [[TMP10:%.*]] = inttoptr i64 [[TMP9]] to ptr +; CHECK-NEXT: [[_MSLD:%.*]] = load i16, ptr [[TMP10]], align 2 +; CHECK-NEXT: [[_MSPROP:%.*]] = insertelement <8 x i16> splat (i16 -1), i16 [[_MSLD]], i32 0 +; CHECK-NEXT: [[VAL:%.*]] = insertelement <8 x half> undef, half [[VAL_HALF]], i32 0 +; CHECK-NEXT: [[TMP11:%.*]] = bitcast <8 x i16> [[TMP2]] to i128 +; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i128 [[TMP11]], 0 +; CHECK-NEXT: [[TMP12:%.*]] = bitcast <8 x i16> [[TMP3]] to i128 +; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i128 [[TMP12]], 0 +; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP1]], [[_MSCMP2]] +; CHECK-NEXT: br i1 [[_MSOR]], label %[[BB13:.*]], label %[[BB14:.*]], !prof [[PROF1]] +; CHECK: [[BB13]]: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR8]] +; CHECK-NEXT: unreachable +; CHECK: [[BB14]]: +; CHECK-NEXT: [[RES0:%.*]] = call <8 x half> @llvm.x86.avx512fp16.mask.mul.sh.round(<8 x half> [[X1]], <8 x half> [[X2]], <8 x half> zeroinitializer, i8 -1, i32 4) +; CHECK-NEXT: [[TMP15:%.*]] = bitcast <8 x i16> [[TMP3]] to i128 +; CHECK-NEXT: [[_MSCMP3:%.*]] = icmp ne i128 [[TMP15]], 0 +; CHECK-NEXT: [[TMP16:%.*]] = bitcast <8 x i16> [[TMP4]] to i128 +; CHECK-NEXT: [[_MSCMP4:%.*]] = icmp ne i128 [[TMP16]], 0 +; CHECK-NEXT: [[_MSOR5:%.*]] = or i1 [[_MSCMP3]], [[_MSCMP4]] +; CHECK-NEXT: [[_MSCMP6:%.*]] = icmp ne i8 [[TMP5]], 0 +; CHECK-NEXT: [[_MSOR7:%.*]] = or i1 [[_MSOR5]], [[_MSCMP6]] +; CHECK-NEXT: br i1 [[_MSOR7]], label %[[BB17:.*]], label %[[BB18:.*]], !prof [[PROF1]] +; CHECK: [[BB17]]: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR8]] +; CHECK-NEXT: unreachable +; CHECK: [[BB18]]: +; CHECK-NEXT: [[RES1:%.*]] = call <8 x half> @llvm.x86.avx512fp16.mask.mul.sh.round(<8 x half> [[RES0]], <8 x half> [[X2]], <8 x half> [[SRC]], i8 [[MASK]], i32 4) +; CHECK-NEXT: [[TMP19:%.*]] = bitcast <8 x i16> [[TMP3]] to i128 +; CHECK-NEXT: [[_MSCMP8:%.*]] = icmp ne i128 [[TMP19]], 0 +; CHECK-NEXT: [[_MSCMP9:%.*]] = icmp ne i8 [[TMP5]], 0 +; CHECK-NEXT: [[_MSOR10:%.*]] = or i1 [[_MSCMP8]], [[_MSCMP9]] +; CHECK-NEXT: br i1 [[_MSOR10]], label %[[BB20:.*]], label %[[BB21:.*]], !prof [[PROF1]] +; CHECK: [[BB20]]: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR8]] +; CHECK-NEXT: unreachable +; CHECK: [[BB21]]: +; CHECK-NEXT: [[RES2:%.*]] = call <8 x half> @llvm.x86.avx512fp16.mask.mul.sh.round(<8 x half> [[RES1]], <8 x half> [[X2]], <8 x half> zeroinitializer, i8 [[MASK]], i32 4) +; CHECK-NEXT: [[TMP22:%.*]] = bitcast <8 x i16> [[_MSPROP]] to i128 +; CHECK-NEXT: [[_MSCMP11:%.*]] = icmp ne i128 [[TMP22]], 0 +; CHECK-NEXT: [[TMP23:%.*]] = bitcast <8 x i16> [[TMP4]] to i128 +; CHECK-NEXT: [[_MSCMP12:%.*]] = icmp ne i128 [[TMP23]], 0 +; CHECK-NEXT: [[_MSOR13:%.*]] = or i1 [[_MSCMP11]], [[_MSCMP12]] +; CHECK-NEXT: [[_MSCMP14:%.*]] = icmp ne i8 [[TMP5]], 0 +; CHECK-NEXT: [[_MSOR15:%.*]] = or i1 [[_MSOR13]], [[_MSCMP14]] +; CHECK-NEXT: br i1 [[_MSOR15]], label %[[BB24:.*]], label %[[BB25:.*]], !prof [[PROF1]] +; CHECK: [[BB24]]: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR8]] +; CHECK-NEXT: unreachable +; CHECK: [[BB25]]: +; CHECK-NEXT: [[RES3:%.*]] = call <8 x half> @llvm.x86.avx512fp16.mask.mul.sh.round(<8 x half> [[RES2]], <8 x half> [[VAL]], <8 x half> [[SRC]], i8 [[MASK]], i32 4) +; CHECK-NEXT: store <8 x i16> zeroinitializer, ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <8 x half> [[RES3]] +; + %val.half = load half,ptr %ptr + %val = insertelement <8 x half> undef, half %val.half, i32 0 + %res0 = call <8 x half> @llvm.x86.avx512fp16.mask.mul.sh.round(<8 x half> %x1, <8 x half> %x2, <8 x half> zeroinitializer, i8 -1, i32 4) + %res1 = call <8 x half> @llvm.x86.avx512fp16.mask.mul.sh.round(<8 x half> %res0, <8 x half> %x2, <8 x half> %src , i8 %mask, i32 4) + %res2 = call <8 x half> @llvm.x86.avx512fp16.mask.mul.sh.round(<8 x half> %res1, <8 x half> %x2, <8 x half> zeroinitializer , i8 %mask, i32 4) + %res3 = call <8 x half> @llvm.x86.avx512fp16.mask.mul.sh.round(<8 x half> %res2, <8 x half> %val, <8 x half> %src , i8 %mask, i32 4) + ret <8 x half> %res3 +} + +declare <8 x half> @llvm.x86.avx512fp16.mask.div.sh.round(<8 x half>, <8 x half>, <8 x half>, i8, i32) + +define <8 x half> @test_int_x86_avx512fp16_mask_div_sh(<8 x half> %x1, <8 x half> %x2, <8 x half> %src, i8 %mask, ptr %ptr) #0 { +; CHECK-LABEL: define <8 x half> @test_int_x86_avx512fp16_mask_div_sh( +; CHECK-SAME: <8 x half> [[X1:%.*]], <8 x half> [[X2:%.*]], <8 x half> [[SRC:%.*]], i8 [[MASK:%.*]], ptr [[PTR:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 56) to ptr), align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <8 x i16>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP3:%.*]] = load <8 x i16>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8 +; CHECK-NEXT: [[TMP4:%.*]] = load <8 x i16>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 32) to ptr), align 8 +; CHECK-NEXT: [[TMP5:%.*]] = load i8, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 48) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 +; CHECK-NEXT: br i1 [[_MSCMP]], label %[[BB6:.*]], label %[[BB7:.*]], !prof [[PROF1]] +; CHECK: [[BB6]]: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR8]] +; CHECK-NEXT: unreachable +; CHECK: [[BB7]]: +; CHECK-NEXT: [[VAL_HALF:%.*]] = load half, ptr [[PTR]], align 2 +; CHECK-NEXT: [[TMP8:%.*]] = ptrtoint ptr [[PTR]] to i64 +; CHECK-NEXT: [[TMP9:%.*]] = xor i64 [[TMP8]], 87960930222080 +; CHECK-NEXT: [[TMP10:%.*]] = inttoptr i64 [[TMP9]] to ptr +; CHECK-NEXT: [[_MSLD:%.*]] = load i16, ptr [[TMP10]], align 2 +; CHECK-NEXT: [[_MSPROP:%.*]] = insertelement <8 x i16> splat (i16 -1), i16 [[_MSLD]], i32 0 +; CHECK-NEXT: [[VAL:%.*]] = insertelement <8 x half> undef, half [[VAL_HALF]], i32 0 +; CHECK-NEXT: [[TMP11:%.*]] = bitcast <8 x i16> [[TMP2]] to i128 +; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i128 [[TMP11]], 0 +; CHECK-NEXT: [[TMP12:%.*]] = bitcast <8 x i16> [[TMP3]] to i128 +; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i128 [[TMP12]], 0 +; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP1]], [[_MSCMP2]] +; CHECK-NEXT: br i1 [[_MSOR]], label %[[BB13:.*]], label %[[BB14:.*]], !prof [[PROF1]] +; CHECK: [[BB13]]: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR8]] +; CHECK-NEXT: unreachable +; CHECK: [[BB14]]: +; CHECK-NEXT: [[RES0:%.*]] = call <8 x half> @llvm.x86.avx512fp16.mask.div.sh.round(<8 x half> [[X1]], <8 x half> [[X2]], <8 x half> zeroinitializer, i8 -1, i32 4) +; CHECK-NEXT: [[TMP15:%.*]] = bitcast <8 x i16> [[TMP3]] to i128 +; CHECK-NEXT: [[_MSCMP3:%.*]] = icmp ne i128 [[TMP15]], 0 +; CHECK-NEXT: [[TMP16:%.*]] = bitcast <8 x i16> [[TMP4]] to i128 +; CHECK-NEXT: [[_MSCMP4:%.*]] = icmp ne i128 [[TMP16]], 0 +; CHECK-NEXT: [[_MSOR5:%.*]] = or i1 [[_MSCMP3]], [[_MSCMP4]] +; CHECK-NEXT: [[_MSCMP6:%.*]] = icmp ne i8 [[TMP5]], 0 +; CHECK-NEXT: [[_MSOR7:%.*]] = or i1 [[_MSOR5]], [[_MSCMP6]] +; CHECK-NEXT: br i1 [[_MSOR7]], label %[[BB17:.*]], label %[[BB18:.*]], !prof [[PROF1]] +; CHECK: [[BB17]]: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR8]] +; CHECK-NEXT: unreachable +; CHECK: [[BB18]]: +; CHECK-NEXT: [[RES1:%.*]] = call <8 x half> @llvm.x86.avx512fp16.mask.div.sh.round(<8 x half> [[RES0]], <8 x half> [[X2]], <8 x half> [[SRC]], i8 [[MASK]], i32 4) +; CHECK-NEXT: [[TMP19:%.*]] = bitcast <8 x i16> [[TMP3]] to i128 +; CHECK-NEXT: [[_MSCMP8:%.*]] = icmp ne i128 [[TMP19]], 0 +; CHECK-NEXT: [[_MSCMP9:%.*]] = icmp ne i8 [[TMP5]], 0 +; CHECK-NEXT: [[_MSOR10:%.*]] = or i1 [[_MSCMP8]], [[_MSCMP9]] +; CHECK-NEXT: br i1 [[_MSOR10]], label %[[BB20:.*]], label %[[BB21:.*]], !prof [[PROF1]] +; CHECK: [[BB20]]: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR8]] +; CHECK-NEXT: unreachable +; CHECK: [[BB21]]: +; CHECK-NEXT: [[RES2:%.*]] = call <8 x half> @llvm.x86.avx512fp16.mask.div.sh.round(<8 x half> [[RES1]], <8 x half> [[X2]], <8 x half> zeroinitializer, i8 [[MASK]], i32 4) +; CHECK-NEXT: [[TMP22:%.*]] = bitcast <8 x i16> [[_MSPROP]] to i128 +; CHECK-NEXT: [[_MSCMP11:%.*]] = icmp ne i128 [[TMP22]], 0 +; CHECK-NEXT: [[TMP23:%.*]] = bitcast <8 x i16> [[TMP4]] to i128 +; CHECK-NEXT: [[_MSCMP12:%.*]] = icmp ne i128 [[TMP23]], 0 +; CHECK-NEXT: [[_MSOR13:%.*]] = or i1 [[_MSCMP11]], [[_MSCMP12]] +; CHECK-NEXT: [[_MSCMP14:%.*]] = icmp ne i8 [[TMP5]], 0 +; CHECK-NEXT: [[_MSOR15:%.*]] = or i1 [[_MSOR13]], [[_MSCMP14]] +; CHECK-NEXT: br i1 [[_MSOR15]], label %[[BB24:.*]], label %[[BB25:.*]], !prof [[PROF1]] +; CHECK: [[BB24]]: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR8]] +; CHECK-NEXT: unreachable +; CHECK: [[BB25]]: +; CHECK-NEXT: [[RES3:%.*]] = call <8 x half> @llvm.x86.avx512fp16.mask.div.sh.round(<8 x half> [[RES2]], <8 x half> [[VAL]], <8 x half> [[SRC]], i8 [[MASK]], i32 4) +; CHECK-NEXT: store <8 x i16> zeroinitializer, ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <8 x half> [[RES3]] +; + %val.half = load half,ptr %ptr + %val = insertelement <8 x half> undef, half %val.half, i32 0 + %res0 = call <8 x half> @llvm.x86.avx512fp16.mask.div.sh.round(<8 x half> %x1, <8 x half> %x2, <8 x half> zeroinitializer, i8 -1, i32 4) + %res1 = call <8 x half> @llvm.x86.avx512fp16.mask.div.sh.round(<8 x half> %res0, <8 x half> %x2, <8 x half> %src , i8 %mask, i32 4) + %res2 = call <8 x half> @llvm.x86.avx512fp16.mask.div.sh.round(<8 x half> %res1, <8 x half> %x2, <8 x half> zeroinitializer , i8 %mask, i32 4) + %res3 = call <8 x half> @llvm.x86.avx512fp16.mask.div.sh.round(<8 x half> %res2, <8 x half> %val, <8 x half> %src , i8 %mask, i32 4) + ret <8 x half> %res3 +} + +declare <8 x half> @llvm.x86.avx512fp16.mask.min.sh.round(<8 x half>, <8 x half>, <8 x half>, i8, i32) + +define <8 x half> @test_int_x86_avx512fp16_mask_min_sh(<8 x half> %x1, <8 x half> %x2, <8 x half> %src, i8 %mask, ptr %ptr) #0 { +; CHECK-LABEL: define <8 x half> @test_int_x86_avx512fp16_mask_min_sh( +; CHECK-SAME: <8 x half> [[X1:%.*]], <8 x half> [[X2:%.*]], <8 x half> [[SRC:%.*]], i8 [[MASK:%.*]], ptr [[PTR:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 56) to ptr), align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <8 x i16>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP3:%.*]] = load <8 x i16>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8 +; CHECK-NEXT: [[TMP4:%.*]] = load <8 x i16>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 32) to ptr), align 8 +; CHECK-NEXT: [[TMP5:%.*]] = load i8, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 48) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 +; CHECK-NEXT: br i1 [[_MSCMP]], label %[[BB6:.*]], label %[[BB7:.*]], !prof [[PROF1]] +; CHECK: [[BB6]]: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR8]] +; CHECK-NEXT: unreachable +; CHECK: [[BB7]]: +; CHECK-NEXT: [[VAL_HALF:%.*]] = load half, ptr [[PTR]], align 2 +; CHECK-NEXT: [[TMP8:%.*]] = ptrtoint ptr [[PTR]] to i64 +; CHECK-NEXT: [[TMP9:%.*]] = xor i64 [[TMP8]], 87960930222080 +; CHECK-NEXT: [[TMP10:%.*]] = inttoptr i64 [[TMP9]] to ptr +; CHECK-NEXT: [[_MSLD:%.*]] = load i16, ptr [[TMP10]], align 2 +; CHECK-NEXT: [[_MSPROP:%.*]] = insertelement <8 x i16> splat (i16 -1), i16 [[_MSLD]], i32 0 +; CHECK-NEXT: [[VAL:%.*]] = insertelement <8 x half> undef, half [[VAL_HALF]], i32 0 +; CHECK-NEXT: [[TMP11:%.*]] = bitcast <8 x i16> [[TMP2]] to i128 +; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i128 [[TMP11]], 0 +; CHECK-NEXT: [[TMP12:%.*]] = bitcast <8 x i16> [[TMP3]] to i128 +; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i128 [[TMP12]], 0 +; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP1]], [[_MSCMP2]] +; CHECK-NEXT: br i1 [[_MSOR]], label %[[BB13:.*]], label %[[BB14:.*]], !prof [[PROF1]] +; CHECK: [[BB13]]: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR8]] +; CHECK-NEXT: unreachable +; CHECK: [[BB14]]: +; CHECK-NEXT: [[RES0:%.*]] = call <8 x half> @llvm.x86.avx512fp16.mask.min.sh.round(<8 x half> [[X1]], <8 x half> [[X2]], <8 x half> zeroinitializer, i8 -1, i32 4) +; CHECK-NEXT: [[TMP15:%.*]] = bitcast <8 x i16> [[TMP3]] to i128 +; CHECK-NEXT: [[_MSCMP3:%.*]] = icmp ne i128 [[TMP15]], 0 +; CHECK-NEXT: [[TMP16:%.*]] = bitcast <8 x i16> [[TMP4]] to i128 +; CHECK-NEXT: [[_MSCMP4:%.*]] = icmp ne i128 [[TMP16]], 0 +; CHECK-NEXT: [[_MSOR5:%.*]] = or i1 [[_MSCMP3]], [[_MSCMP4]] +; CHECK-NEXT: [[_MSCMP6:%.*]] = icmp ne i8 [[TMP5]], 0 +; CHECK-NEXT: [[_MSOR7:%.*]] = or i1 [[_MSOR5]], [[_MSCMP6]] +; CHECK-NEXT: br i1 [[_MSOR7]], label %[[BB17:.*]], label %[[BB18:.*]], !prof [[PROF1]] +; CHECK: [[BB17]]: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR8]] +; CHECK-NEXT: unreachable +; CHECK: [[BB18]]: +; CHECK-NEXT: [[RES1:%.*]] = call <8 x half> @llvm.x86.avx512fp16.mask.min.sh.round(<8 x half> [[RES0]], <8 x half> [[X2]], <8 x half> [[SRC]], i8 [[MASK]], i32 4) +; CHECK-NEXT: [[TMP19:%.*]] = bitcast <8 x i16> [[TMP3]] to i128 +; CHECK-NEXT: [[_MSCMP8:%.*]] = icmp ne i128 [[TMP19]], 0 +; CHECK-NEXT: [[_MSCMP9:%.*]] = icmp ne i8 [[TMP5]], 0 +; CHECK-NEXT: [[_MSOR10:%.*]] = or i1 [[_MSCMP8]], [[_MSCMP9]] +; CHECK-NEXT: br i1 [[_MSOR10]], label %[[BB20:.*]], label %[[BB21:.*]], !prof [[PROF1]] +; CHECK: [[BB20]]: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR8]] +; CHECK-NEXT: unreachable +; CHECK: [[BB21]]: +; CHECK-NEXT: [[RES2:%.*]] = call <8 x half> @llvm.x86.avx512fp16.mask.min.sh.round(<8 x half> [[RES1]], <8 x half> [[X2]], <8 x half> zeroinitializer, i8 [[MASK]], i32 4) +; CHECK-NEXT: [[TMP22:%.*]] = bitcast <8 x i16> [[_MSPROP]] to i128 +; CHECK-NEXT: [[_MSCMP11:%.*]] = icmp ne i128 [[TMP22]], 0 +; CHECK-NEXT: [[TMP23:%.*]] = bitcast <8 x i16> [[TMP4]] to i128 +; CHECK-NEXT: [[_MSCMP12:%.*]] = icmp ne i128 [[TMP23]], 0 +; CHECK-NEXT: [[_MSOR13:%.*]] = or i1 [[_MSCMP11]], [[_MSCMP12]] +; CHECK-NEXT: [[_MSCMP14:%.*]] = icmp ne i8 [[TMP5]], 0 +; CHECK-NEXT: [[_MSOR15:%.*]] = or i1 [[_MSOR13]], [[_MSCMP14]] +; CHECK-NEXT: br i1 [[_MSOR15]], label %[[BB24:.*]], label %[[BB25:.*]], !prof [[PROF1]] +; CHECK: [[BB24]]: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR8]] +; CHECK-NEXT: unreachable +; CHECK: [[BB25]]: +; CHECK-NEXT: [[RES3:%.*]] = call <8 x half> @llvm.x86.avx512fp16.mask.min.sh.round(<8 x half> [[RES2]], <8 x half> [[VAL]], <8 x half> [[SRC]], i8 [[MASK]], i32 4) +; CHECK-NEXT: store <8 x i16> zeroinitializer, ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <8 x half> [[RES3]] +; + %val.half = load half,ptr %ptr + %val = insertelement <8 x half> undef, half %val.half, i32 0 + %res0 = call <8 x half> @llvm.x86.avx512fp16.mask.min.sh.round(<8 x half> %x1, <8 x half> %x2, <8 x half> zeroinitializer, i8 -1, i32 4) + %res1 = call <8 x half> @llvm.x86.avx512fp16.mask.min.sh.round(<8 x half> %res0, <8 x half> %x2, <8 x half> %src , i8 %mask, i32 4) + %res2 = call <8 x half> @llvm.x86.avx512fp16.mask.min.sh.round(<8 x half> %res1, <8 x half> %x2, <8 x half> zeroinitializer , i8 %mask, i32 4) + %res3 = call <8 x half> @llvm.x86.avx512fp16.mask.min.sh.round(<8 x half> %res2, <8 x half> %val, <8 x half> %src , i8 %mask, i32 4) + ret <8 x half> %res3 +} + +declare <8 x half> @llvm.x86.avx512fp16.mask.max.sh.round(<8 x half>, <8 x half>, <8 x half>, i8, i32) + +define <8 x half> @test_int_x86_avx512fp16_mask_max_sh(<8 x half> %x1, <8 x half> %x2, <8 x half> %src, i8 %mask, ptr %ptr) #0 { +; CHECK-LABEL: define <8 x half> @test_int_x86_avx512fp16_mask_max_sh( +; CHECK-SAME: <8 x half> [[X1:%.*]], <8 x half> [[X2:%.*]], <8 x half> [[SRC:%.*]], i8 [[MASK:%.*]], ptr [[PTR:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 56) to ptr), align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <8 x i16>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP3:%.*]] = load <8 x i16>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8 +; CHECK-NEXT: [[TMP4:%.*]] = load <8 x i16>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 32) to ptr), align 8 +; CHECK-NEXT: [[TMP5:%.*]] = load i8, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 48) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 +; CHECK-NEXT: br i1 [[_MSCMP]], label %[[BB6:.*]], label %[[BB7:.*]], !prof [[PROF1]] +; CHECK: [[BB6]]: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR8]] +; CHECK-NEXT: unreachable +; CHECK: [[BB7]]: +; CHECK-NEXT: [[VAL_HALF:%.*]] = load half, ptr [[PTR]], align 2 +; CHECK-NEXT: [[TMP8:%.*]] = ptrtoint ptr [[PTR]] to i64 +; CHECK-NEXT: [[TMP9:%.*]] = xor i64 [[TMP8]], 87960930222080 +; CHECK-NEXT: [[TMP10:%.*]] = inttoptr i64 [[TMP9]] to ptr +; CHECK-NEXT: [[_MSLD:%.*]] = load i16, ptr [[TMP10]], align 2 +; CHECK-NEXT: [[_MSPROP:%.*]] = insertelement <8 x i16> splat (i16 -1), i16 [[_MSLD]], i32 0 +; CHECK-NEXT: [[VAL:%.*]] = insertelement <8 x half> undef, half [[VAL_HALF]], i32 0 +; CHECK-NEXT: [[TMP11:%.*]] = bitcast <8 x i16> [[TMP2]] to i128 +; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i128 [[TMP11]], 0 +; CHECK-NEXT: [[TMP12:%.*]] = bitcast <8 x i16> [[TMP3]] to i128 +; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i128 [[TMP12]], 0 +; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP1]], [[_MSCMP2]] +; CHECK-NEXT: br i1 [[_MSOR]], label %[[BB13:.*]], label %[[BB14:.*]], !prof [[PROF1]] +; CHECK: [[BB13]]: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR8]] +; CHECK-NEXT: unreachable +; CHECK: [[BB14]]: +; CHECK-NEXT: [[RES0:%.*]] = call <8 x half> @llvm.x86.avx512fp16.mask.max.sh.round(<8 x half> [[X1]], <8 x half> [[X2]], <8 x half> zeroinitializer, i8 -1, i32 4) +; CHECK-NEXT: [[TMP15:%.*]] = bitcast <8 x i16> [[TMP3]] to i128 +; CHECK-NEXT: [[_MSCMP3:%.*]] = icmp ne i128 [[TMP15]], 0 +; CHECK-NEXT: [[TMP16:%.*]] = bitcast <8 x i16> [[TMP4]] to i128 +; CHECK-NEXT: [[_MSCMP4:%.*]] = icmp ne i128 [[TMP16]], 0 +; CHECK-NEXT: [[_MSOR5:%.*]] = or i1 [[_MSCMP3]], [[_MSCMP4]] +; CHECK-NEXT: [[_MSCMP6:%.*]] = icmp ne i8 [[TMP5]], 0 +; CHECK-NEXT: [[_MSOR7:%.*]] = or i1 [[_MSOR5]], [[_MSCMP6]] +; CHECK-NEXT: br i1 [[_MSOR7]], label %[[BB17:.*]], label %[[BB18:.*]], !prof [[PROF1]] +; CHECK: [[BB17]]: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR8]] +; CHECK-NEXT: unreachable +; CHECK: [[BB18]]: +; CHECK-NEXT: [[RES1:%.*]] = call <8 x half> @llvm.x86.avx512fp16.mask.max.sh.round(<8 x half> [[RES0]], <8 x half> [[X2]], <8 x half> [[SRC]], i8 [[MASK]], i32 4) +; CHECK-NEXT: [[TMP19:%.*]] = bitcast <8 x i16> [[TMP3]] to i128 +; CHECK-NEXT: [[_MSCMP8:%.*]] = icmp ne i128 [[TMP19]], 0 +; CHECK-NEXT: [[_MSCMP9:%.*]] = icmp ne i8 [[TMP5]], 0 +; CHECK-NEXT: [[_MSOR10:%.*]] = or i1 [[_MSCMP8]], [[_MSCMP9]] +; CHECK-NEXT: br i1 [[_MSOR10]], label %[[BB20:.*]], label %[[BB21:.*]], !prof [[PROF1]] +; CHECK: [[BB20]]: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR8]] +; CHECK-NEXT: unreachable +; CHECK: [[BB21]]: +; CHECK-NEXT: [[RES2:%.*]] = call <8 x half> @llvm.x86.avx512fp16.mask.max.sh.round(<8 x half> [[RES1]], <8 x half> [[X2]], <8 x half> zeroinitializer, i8 [[MASK]], i32 4) +; CHECK-NEXT: [[TMP22:%.*]] = bitcast <8 x i16> [[_MSPROP]] to i128 +; CHECK-NEXT: [[_MSCMP11:%.*]] = icmp ne i128 [[TMP22]], 0 +; CHECK-NEXT: [[TMP23:%.*]] = bitcast <8 x i16> [[TMP4]] to i128 +; CHECK-NEXT: [[_MSCMP12:%.*]] = icmp ne i128 [[TMP23]], 0 +; CHECK-NEXT: [[_MSOR13:%.*]] = or i1 [[_MSCMP11]], [[_MSCMP12]] +; CHECK-NEXT: [[_MSCMP14:%.*]] = icmp ne i8 [[TMP5]], 0 +; CHECK-NEXT: [[_MSOR15:%.*]] = or i1 [[_MSOR13]], [[_MSCMP14]] +; CHECK-NEXT: br i1 [[_MSOR15]], label %[[BB24:.*]], label %[[BB25:.*]], !prof [[PROF1]] +; CHECK: [[BB24]]: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR8]] +; CHECK-NEXT: unreachable +; CHECK: [[BB25]]: +; CHECK-NEXT: [[RES3:%.*]] = call <8 x half> @llvm.x86.avx512fp16.mask.max.sh.round(<8 x half> [[RES2]], <8 x half> [[VAL]], <8 x half> [[SRC]], i8 [[MASK]], i32 4) +; CHECK-NEXT: store <8 x i16> zeroinitializer, ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <8 x half> [[RES3]] +; + %val.half = load half,ptr %ptr + %val = insertelement <8 x half> undef, half %val.half, i32 0 + %res0 = call <8 x half> @llvm.x86.avx512fp16.mask.max.sh.round(<8 x half> %x1, <8 x half> %x2, <8 x half> zeroinitializer, i8 -1, i32 4) + %res1 = call <8 x half> @llvm.x86.avx512fp16.mask.max.sh.round(<8 x half> %res0, <8 x half> %x2, <8 x half> %src , i8 %mask, i32 4) + %res2 = call <8 x half> @llvm.x86.avx512fp16.mask.max.sh.round(<8 x half> %res1, <8 x half> %x2, <8 x half> zeroinitializer , i8 %mask, i32 4) + %res3 = call <8 x half> @llvm.x86.avx512fp16.mask.max.sh.round(<8 x half> %res2, <8 x half> %val, <8 x half> %src , i8 %mask, i32 4) + ret <8 x half> %res3 +} + +declare i8 @llvm.x86.avx512fp16.mask.cmp.sh(<8 x half>, <8 x half>, i32, i8, i32) + +define i8 @test_int_x86_avx512_mask_cmp_sh(<8 x half> %x0, <8 x half> %x1, i8 %x3, i32 %x4) #0 { +; CHECK-LABEL: define i8 @test_int_x86_avx512_mask_cmp_sh( +; CHECK-SAME: <8 x half> [[X0:%.*]], <8 x half> [[X1:%.*]], i8 [[X3:%.*]], i32 [[X4:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i16>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <8 x i16>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8 +; CHECK-NEXT: [[TMP3:%.*]] = load i8, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 32) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP4:%.*]] = bitcast <8 x i16> [[TMP1]] to i128 +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i128 [[TMP4]], 0 +; CHECK-NEXT: [[TMP5:%.*]] = bitcast <8 x i16> [[TMP2]] to i128 +; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i128 [[TMP5]], 0 +; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]] +; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i8 [[TMP3]], 0 +; CHECK-NEXT: [[_MSOR3:%.*]] = or i1 [[_MSOR]], [[_MSCMP2]] +; CHECK-NEXT: br i1 [[_MSOR3]], label %[[BB6:.*]], label %[[BB7:.*]], !prof [[PROF1]] +; CHECK: [[BB6]]: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR8]] +; CHECK-NEXT: unreachable +; CHECK: [[BB7]]: +; CHECK-NEXT: [[RES2:%.*]] = call i8 @llvm.x86.avx512fp16.mask.cmp.sh(<8 x half> [[X0]], <8 x half> [[X1]], i32 3, i8 [[X3]], i32 4) +; CHECK-NEXT: store i8 0, ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret i8 [[RES2]] +; + %res2 = call i8 @llvm.x86.avx512fp16.mask.cmp.sh(<8 x half> %x0, <8 x half> %x1, i32 3, i8 %x3, i32 4) + ret i8 %res2 +} + + +define i8 @test_int_x86_avx512_mask_cmp_sh_all(<8 x half> %x0, <8 x half> %x1, i8 %x3, i32 %x4) #0 { +; CHECK-LABEL: define i8 @test_int_x86_avx512_mask_cmp_sh_all( +; CHECK-SAME: <8 x half> [[X0:%.*]], <8 x half> [[X1:%.*]], i8 [[X3:%.*]], i32 [[X4:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i16>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <8 x i16>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8 +; CHECK-NEXT: [[TMP3:%.*]] = load i8, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 32) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP4:%.*]] = bitcast <8 x i16> [[TMP1]] to i128 +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i128 [[TMP4]], 0 +; CHECK-NEXT: [[TMP5:%.*]] = bitcast <8 x i16> [[TMP2]] to i128 +; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i128 [[TMP5]], 0 +; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]] +; CHECK-NEXT: br i1 [[_MSOR]], label %[[BB6:.*]], label %[[BB7:.*]], !prof [[PROF1]] +; CHECK: [[BB6]]: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR8]] +; CHECK-NEXT: unreachable +; CHECK: [[BB7]]: +; CHECK-NEXT: [[RES1:%.*]] = call i8 @llvm.x86.avx512fp16.mask.cmp.sh(<8 x half> [[X0]], <8 x half> [[X1]], i32 2, i8 -1, i32 4) +; CHECK-NEXT: [[TMP8:%.*]] = bitcast <8 x i16> [[TMP1]] to i128 +; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i128 [[TMP8]], 0 +; CHECK-NEXT: [[TMP9:%.*]] = bitcast <8 x i16> [[TMP2]] to i128 +; CHECK-NEXT: [[_MSCMP3:%.*]] = icmp ne i128 [[TMP9]], 0 +; CHECK-NEXT: [[_MSOR4:%.*]] = or i1 [[_MSCMP2]], [[_MSCMP3]] +; CHECK-NEXT: br i1 [[_MSOR4]], label %[[BB10:.*]], label %[[BB11:.*]], !prof [[PROF1]] +; CHECK: [[BB10]]: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR8]] +; CHECK-NEXT: unreachable +; CHECK: [[BB11]]: +; CHECK-NEXT: [[RES2:%.*]] = call i8 @llvm.x86.avx512fp16.mask.cmp.sh(<8 x half> [[X0]], <8 x half> [[X1]], i32 3, i8 -1, i32 8) +; CHECK-NEXT: [[TMP12:%.*]] = bitcast <8 x i16> [[TMP1]] to i128 +; CHECK-NEXT: [[_MSCMP5:%.*]] = icmp ne i128 [[TMP12]], 0 +; CHECK-NEXT: [[TMP13:%.*]] = bitcast <8 x i16> [[TMP2]] to i128 +; CHECK-NEXT: [[_MSCMP6:%.*]] = icmp ne i128 [[TMP13]], 0 +; CHECK-NEXT: [[_MSOR7:%.*]] = or i1 [[_MSCMP5]], [[_MSCMP6]] +; CHECK-NEXT: [[_MSCMP8:%.*]] = icmp ne i8 [[TMP3]], 0 +; CHECK-NEXT: [[_MSOR9:%.*]] = or i1 [[_MSOR7]], [[_MSCMP8]] +; CHECK-NEXT: br i1 [[_MSOR9]], label %[[BB14:.*]], label %[[BB15:.*]], !prof [[PROF1]] +; CHECK: [[BB14]]: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR8]] +; CHECK-NEXT: unreachable +; CHECK: [[BB15]]: +; CHECK-NEXT: [[RES3:%.*]] = call i8 @llvm.x86.avx512fp16.mask.cmp.sh(<8 x half> [[X0]], <8 x half> [[X1]], i32 4, i8 [[X3]], i32 4) +; CHECK-NEXT: [[TMP16:%.*]] = bitcast <8 x i16> [[TMP1]] to i128 +; CHECK-NEXT: [[_MSCMP10:%.*]] = icmp ne i128 [[TMP16]], 0 +; CHECK-NEXT: [[TMP17:%.*]] = bitcast <8 x i16> [[TMP2]] to i128 +; CHECK-NEXT: [[_MSCMP11:%.*]] = icmp ne i128 [[TMP17]], 0 +; CHECK-NEXT: [[_MSOR12:%.*]] = or i1 [[_MSCMP10]], [[_MSCMP11]] +; CHECK-NEXT: [[_MSCMP13:%.*]] = icmp ne i8 [[TMP3]], 0 +; CHECK-NEXT: [[_MSOR14:%.*]] = or i1 [[_MSOR12]], [[_MSCMP13]] +; CHECK-NEXT: br i1 [[_MSOR14]], label %[[BB18:.*]], label %[[BB19:.*]], !prof [[PROF1]] +; CHECK: [[BB18]]: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR8]] +; CHECK-NEXT: unreachable +; CHECK: [[BB19]]: +; CHECK-NEXT: [[RES4:%.*]] = call i8 @llvm.x86.avx512fp16.mask.cmp.sh(<8 x half> [[X0]], <8 x half> [[X1]], i32 5, i8 [[X3]], i32 8) +; CHECK-NEXT: [[TMP20:%.*]] = and i8 [[RES1]], 0 +; CHECK-NEXT: [[TMP21:%.*]] = and i8 0, [[RES2]] +; CHECK-NEXT: [[TMP22:%.*]] = or i8 0, [[TMP20]] +; CHECK-NEXT: [[TMP23:%.*]] = or i8 [[TMP22]], [[TMP21]] +; CHECK-NEXT: [[RES11:%.*]] = and i8 [[RES1]], [[RES2]] +; CHECK-NEXT: [[TMP24:%.*]] = and i8 [[RES3]], 0 +; CHECK-NEXT: [[TMP25:%.*]] = and i8 0, [[RES4]] +; CHECK-NEXT: [[TMP26:%.*]] = or i8 0, [[TMP24]] +; CHECK-NEXT: [[TMP27:%.*]] = or i8 [[TMP26]], [[TMP25]] +; CHECK-NEXT: [[RES12:%.*]] = and i8 [[RES3]], [[RES4]] +; CHECK-NEXT: [[TMP28:%.*]] = and i8 [[TMP23]], [[TMP27]] +; CHECK-NEXT: [[TMP29:%.*]] = and i8 [[RES11]], [[TMP27]] +; CHECK-NEXT: [[TMP30:%.*]] = and i8 [[TMP23]], [[RES12]] +; CHECK-NEXT: [[TMP31:%.*]] = or i8 [[TMP28]], [[TMP29]] +; CHECK-NEXT: [[TMP32:%.*]] = or i8 [[TMP31]], [[TMP30]] +; CHECK-NEXT: [[RES13:%.*]] = and i8 [[RES11]], [[RES12]] +; CHECK-NEXT: store i8 [[TMP32]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret i8 [[RES13]] +; + %res1 = call i8 @llvm.x86.avx512fp16.mask.cmp.sh(<8 x half> %x0, <8 x half> %x1, i32 2, i8 -1, i32 4) + %res2 = call i8 @llvm.x86.avx512fp16.mask.cmp.sh(<8 x half> %x0, <8 x half> %x1, i32 3, i8 -1, i32 8) + %res3 = call i8 @llvm.x86.avx512fp16.mask.cmp.sh(<8 x half> %x0, <8 x half> %x1, i32 4, i8 %x3, i32 4) + %res4 = call i8 @llvm.x86.avx512fp16.mask.cmp.sh(<8 x half> %x0, <8 x half> %x1, i32 5, i8 %x3, i32 8) + + %res11 = and i8 %res1, %res2 + %res12 = and i8 %res3, %res4 + %res13 = and i8 %res11, %res12 + ret i8 %res13 +} + +declare <16 x half> @llvm.x86.avx512.sitofp.round.v16f16.v16i32(<16 x i32>, i32) + +define <16 x half> @test_int_x86_avx512_mask_cvt_dq2ph_512(<16 x i32> %x0, <16 x half> %x1, i16 %x2) #0 { +; CHECK-LABEL: define <16 x half> @test_int_x86_avx512_mask_cvt_dq2ph_512( +; CHECK-SAME: <16 x i32> [[X0:%.*]], <16 x half> [[X1:%.*]], i16 [[X2:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: [[TMP1:%.*]] = load i16, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 96) to ptr), align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <16 x i32>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP3:%.*]] = load <16 x i16>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP4:%.*]] = bitcast i16 [[TMP1]] to <16 x i1> +; CHECK-NEXT: [[MASK:%.*]] = bitcast i16 [[X2]] to <16 x i1> +; CHECK-NEXT: [[TMP5:%.*]] = bitcast <16 x i32> [[TMP2]] to i512 +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i512 [[TMP5]], 0 +; CHECK-NEXT: br i1 [[_MSCMP]], label %[[BB6:.*]], label %[[BB7:.*]], !prof [[PROF1]] +; CHECK: [[BB6]]: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR8]] +; CHECK-NEXT: unreachable +; CHECK: [[BB7]]: +; CHECK-NEXT: [[RES0:%.*]] = call <16 x half> @llvm.x86.avx512.sitofp.round.v16f16.v16i32(<16 x i32> [[X0]], i32 4) +; CHECK-NEXT: [[TMP8:%.*]] = select <16 x i1> [[MASK]], <16 x i16> zeroinitializer, <16 x i16> [[TMP3]] +; CHECK-NEXT: [[TMP9:%.*]] = bitcast <16 x half> [[RES0]] to <16 x i16> +; CHECK-NEXT: [[TMP10:%.*]] = bitcast <16 x half> [[X1]] to <16 x i16> +; CHECK-NEXT: [[TMP11:%.*]] = xor <16 x i16> [[TMP9]], [[TMP10]] +; CHECK-NEXT: [[TMP12:%.*]] = or <16 x i16> [[TMP11]], zeroinitializer +; CHECK-NEXT: [[TMP13:%.*]] = or <16 x i16> [[TMP12]], [[TMP3]] +; CHECK-NEXT: [[_MSPROP_SELECT:%.*]] = select <16 x i1> [[TMP4]], <16 x i16> [[TMP13]], <16 x i16> [[TMP8]] +; CHECK-NEXT: [[RES:%.*]] = select <16 x i1> [[MASK]], <16 x half> [[RES0]], <16 x half> [[X1]] +; CHECK-NEXT: store <16 x i16> [[_MSPROP_SELECT]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <16 x half> [[RES]] +; + %mask = bitcast i16 %x2 to <16 x i1> + %res0 = call <16 x half> @llvm.x86.avx512.sitofp.round.v16f16.v16i32(<16 x i32> %x0, i32 4) + %res = select <16 x i1> %mask, <16 x half> %res0, <16 x half> %x1 + ret <16 x half> %res +} + +define <16 x half> @test_int_x86_avx512_mask_cvt_dq2ph_512_r(<16 x i32> %x0, <16 x half> %x1, i16 %x2) #0 { +; CHECK-LABEL: define <16 x half> @test_int_x86_avx512_mask_cvt_dq2ph_512_r( +; CHECK-SAME: <16 x i32> [[X0:%.*]], <16 x half> [[X1:%.*]], i16 [[X2:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: [[TMP1:%.*]] = load i16, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 96) to ptr), align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <16 x i32>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP3:%.*]] = load <16 x i16>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP4:%.*]] = bitcast i16 [[TMP1]] to <16 x i1> +; CHECK-NEXT: [[MASK:%.*]] = bitcast i16 [[X2]] to <16 x i1> +; CHECK-NEXT: [[TMP5:%.*]] = bitcast <16 x i32> [[TMP2]] to i512 +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i512 [[TMP5]], 0 +; CHECK-NEXT: br i1 [[_MSCMP]], label %[[BB6:.*]], label %[[BB7:.*]], !prof [[PROF1]] +; CHECK: [[BB6]]: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR8]] +; CHECK-NEXT: unreachable +; CHECK: [[BB7]]: +; CHECK-NEXT: [[RES0:%.*]] = call <16 x half> @llvm.x86.avx512.sitofp.round.v16f16.v16i32(<16 x i32> [[X0]], i32 10) +; CHECK-NEXT: [[TMP8:%.*]] = select <16 x i1> [[MASK]], <16 x i16> zeroinitializer, <16 x i16> [[TMP3]] +; CHECK-NEXT: [[TMP9:%.*]] = bitcast <16 x half> [[RES0]] to <16 x i16> +; CHECK-NEXT: [[TMP10:%.*]] = bitcast <16 x half> [[X1]] to <16 x i16> +; CHECK-NEXT: [[TMP11:%.*]] = xor <16 x i16> [[TMP9]], [[TMP10]] +; CHECK-NEXT: [[TMP12:%.*]] = or <16 x i16> [[TMP11]], zeroinitializer +; CHECK-NEXT: [[TMP13:%.*]] = or <16 x i16> [[TMP12]], [[TMP3]] +; CHECK-NEXT: [[_MSPROP_SELECT:%.*]] = select <16 x i1> [[TMP4]], <16 x i16> [[TMP13]], <16 x i16> [[TMP8]] +; CHECK-NEXT: [[RES:%.*]] = select <16 x i1> [[MASK]], <16 x half> [[RES0]], <16 x half> [[X1]] +; CHECK-NEXT: store <16 x i16> [[_MSPROP_SELECT]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <16 x half> [[RES]] +; + %mask = bitcast i16 %x2 to <16 x i1> + %res0 = call <16 x half> @llvm.x86.avx512.sitofp.round.v16f16.v16i32(<16 x i32> %x0, i32 10) + %res = select <16 x i1> %mask, <16 x half> %res0, <16 x half> %x1 + ret <16 x half> %res +} + +define <16 x half> @test_int_x86_avx512_mask_cvt_dq2ph_512_nomask(<16 x i32> %x0, <16 x half> %x1) #0 { +; CHECK-LABEL: define <16 x half> @test_int_x86_avx512_mask_cvt_dq2ph_512_nomask( +; CHECK-SAME: <16 x i32> [[X0:%.*]], <16 x half> [[X1:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: [[TMP1:%.*]] = load <16 x i32>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP2:%.*]] = bitcast <16 x i32> [[TMP1]] to i512 +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i512 [[TMP2]], 0 +; CHECK-NEXT: br i1 [[_MSCMP]], label %[[BB3:.*]], label %[[BB4:.*]], !prof [[PROF1]] +; CHECK: [[BB3]]: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR8]] +; CHECK-NEXT: unreachable +; CHECK: [[BB4]]: +; CHECK-NEXT: [[RES:%.*]] = call <16 x half> @llvm.x86.avx512.sitofp.round.v16f16.v16i32(<16 x i32> [[X0]], i32 4) +; CHECK-NEXT: store <16 x i16> zeroinitializer, ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <16 x half> [[RES]] +; + %res = call <16 x half> @llvm.x86.avx512.sitofp.round.v16f16.v16i32(<16 x i32> %x0, i32 4) + ret <16 x half> %res +} + +define <16 x half> @test_int_x86_avx512_mask_cvt_dq2ph_512_z(<16 x i32> %x0, i16 %x2) #0 { +; CHECK-LABEL: define <16 x half> @test_int_x86_avx512_mask_cvt_dq2ph_512_z( +; CHECK-SAME: <16 x i32> [[X0:%.*]], i16 [[X2:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: [[TMP1:%.*]] = load i16, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <16 x i32>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP3:%.*]] = bitcast i16 [[TMP1]] to <16 x i1> +; CHECK-NEXT: [[MASK:%.*]] = bitcast i16 [[X2]] to <16 x i1> +; CHECK-NEXT: [[TMP4:%.*]] = bitcast <16 x i32> [[TMP2]] to i512 +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i512 [[TMP4]], 0 +; CHECK-NEXT: br i1 [[_MSCMP]], label %[[BB5:.*]], label %[[BB6:.*]], !prof [[PROF1]] +; CHECK: [[BB5]]: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR8]] +; CHECK-NEXT: unreachable +; CHECK: [[BB6]]: +; CHECK-NEXT: [[RES0:%.*]] = call <16 x half> @llvm.x86.avx512.sitofp.round.v16f16.v16i32(<16 x i32> [[X0]], i32 4) +; CHECK-NEXT: [[TMP7:%.*]] = select <16 x i1> [[MASK]], <16 x i16> zeroinitializer, <16 x i16> zeroinitializer +; CHECK-NEXT: [[TMP8:%.*]] = bitcast <16 x half> [[RES0]] to <16 x i16> +; CHECK-NEXT: [[TMP9:%.*]] = xor <16 x i16> [[TMP8]], zeroinitializer +; CHECK-NEXT: [[TMP10:%.*]] = or <16 x i16> [[TMP9]], zeroinitializer +; CHECK-NEXT: [[TMP11:%.*]] = or <16 x i16> [[TMP10]], zeroinitializer +; CHECK-NEXT: [[_MSPROP_SELECT:%.*]] = select <16 x i1> [[TMP3]], <16 x i16> [[TMP11]], <16 x i16> [[TMP7]] +; CHECK-NEXT: [[RES:%.*]] = select <16 x i1> [[MASK]], <16 x half> [[RES0]], <16 x half> zeroinitializer +; CHECK-NEXT: store <16 x i16> [[_MSPROP_SELECT]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <16 x half> [[RES]] +; + %mask = bitcast i16 %x2 to <16 x i1> + %res0 = call <16 x half> @llvm.x86.avx512.sitofp.round.v16f16.v16i32(<16 x i32> %x0, i32 4) + %res = select <16 x i1> %mask, <16 x half> %res0, <16 x half> zeroinitializer + ret <16 x half> %res +} + +define <16 x half> @sint_to_fp_16i32_to_16f16(<16 x i32> %x) #0 { +; CHECK-LABEL: define <16 x half> @sint_to_fp_16i32_to_16f16( +; CHECK-SAME: <16 x i32> [[X:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: [[TMP1:%.*]] = load <16 x i32>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP2:%.*]] = trunc <16 x i32> [[TMP1]] to <16 x i16> +; CHECK-NEXT: [[RES:%.*]] = sitofp <16 x i32> [[X]] to <16 x half> +; CHECK-NEXT: store <16 x i16> [[TMP2]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <16 x half> [[RES]] +; + %res = sitofp <16 x i32> %x to <16 x half> + ret <16 x half> %res +} + +declare <16 x half> @llvm.x86.avx512.uitofp.round.v16f16.v16i32(<16 x i32>, i32) + +define <16 x half> @test_int_x86_avx512_mask_cvt_udq2ph_512_r(<16 x i32> %x0, <16 x half> %x1, i16 %x2) #0 { +; CHECK-LABEL: define <16 x half> @test_int_x86_avx512_mask_cvt_udq2ph_512_r( +; CHECK-SAME: <16 x i32> [[X0:%.*]], <16 x half> [[X1:%.*]], i16 [[X2:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: [[TMP1:%.*]] = load i16, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 96) to ptr), align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <16 x i32>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP3:%.*]] = load <16 x i16>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP4:%.*]] = bitcast i16 [[TMP1]] to <16 x i1> +; CHECK-NEXT: [[MASK:%.*]] = bitcast i16 [[X2]] to <16 x i1> +; CHECK-NEXT: [[TMP5:%.*]] = bitcast <16 x i32> [[TMP2]] to i512 +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i512 [[TMP5]], 0 +; CHECK-NEXT: br i1 [[_MSCMP]], label %[[BB6:.*]], label %[[BB7:.*]], !prof [[PROF1]] +; CHECK: [[BB6]]: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR8]] +; CHECK-NEXT: unreachable +; CHECK: [[BB7]]: +; CHECK-NEXT: [[RES0:%.*]] = call <16 x half> @llvm.x86.avx512.uitofp.round.v16f16.v16i32(<16 x i32> [[X0]], i32 10) +; CHECK-NEXT: [[TMP8:%.*]] = select <16 x i1> [[MASK]], <16 x i16> zeroinitializer, <16 x i16> [[TMP3]] +; CHECK-NEXT: [[TMP9:%.*]] = bitcast <16 x half> [[RES0]] to <16 x i16> +; CHECK-NEXT: [[TMP10:%.*]] = bitcast <16 x half> [[X1]] to <16 x i16> +; CHECK-NEXT: [[TMP11:%.*]] = xor <16 x i16> [[TMP9]], [[TMP10]] +; CHECK-NEXT: [[TMP12:%.*]] = or <16 x i16> [[TMP11]], zeroinitializer +; CHECK-NEXT: [[TMP13:%.*]] = or <16 x i16> [[TMP12]], [[TMP3]] +; CHECK-NEXT: [[_MSPROP_SELECT:%.*]] = select <16 x i1> [[TMP4]], <16 x i16> [[TMP13]], <16 x i16> [[TMP8]] +; CHECK-NEXT: [[RES:%.*]] = select <16 x i1> [[MASK]], <16 x half> [[RES0]], <16 x half> [[X1]] +; CHECK-NEXT: store <16 x i16> [[_MSPROP_SELECT]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <16 x half> [[RES]] +; + %mask = bitcast i16 %x2 to <16 x i1> + %res0 = call <16 x half> @llvm.x86.avx512.uitofp.round.v16f16.v16i32(<16 x i32> %x0, i32 10) + %res = select <16 x i1> %mask, <16 x half> %res0, <16 x half> %x1 + ret <16 x half> %res +} + +define <16 x half> @test_int_x86_avx512_mask_cvt_udq2ph_512_nomask(<16 x i32> %x0, <16 x half> %x1) #0 { +; CHECK-LABEL: define <16 x half> @test_int_x86_avx512_mask_cvt_udq2ph_512_nomask( +; CHECK-SAME: <16 x i32> [[X0:%.*]], <16 x half> [[X1:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: [[TMP1:%.*]] = load <16 x i32>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP2:%.*]] = bitcast <16 x i32> [[TMP1]] to i512 +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i512 [[TMP2]], 0 +; CHECK-NEXT: br i1 [[_MSCMP]], label %[[BB3:.*]], label %[[BB4:.*]], !prof [[PROF1]] +; CHECK: [[BB3]]: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR8]] +; CHECK-NEXT: unreachable +; CHECK: [[BB4]]: +; CHECK-NEXT: [[RES:%.*]] = call <16 x half> @llvm.x86.avx512.uitofp.round.v16f16.v16i32(<16 x i32> [[X0]], i32 4) +; CHECK-NEXT: store <16 x i16> zeroinitializer, ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <16 x half> [[RES]] +; + %res = call <16 x half> @llvm.x86.avx512.uitofp.round.v16f16.v16i32(<16 x i32> %x0, i32 4) + ret <16 x half> %res +} + +define <16 x half> @test_int_x86_avx512_mask_cvt_udq2ph_512_z(<16 x i32> %x0, i16 %x2) #0 { +; CHECK-LABEL: define <16 x half> @test_int_x86_avx512_mask_cvt_udq2ph_512_z( +; CHECK-SAME: <16 x i32> [[X0:%.*]], i16 [[X2:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: [[TMP1:%.*]] = load i16, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <16 x i32>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP3:%.*]] = bitcast i16 [[TMP1]] to <16 x i1> +; CHECK-NEXT: [[MASK:%.*]] = bitcast i16 [[X2]] to <16 x i1> +; CHECK-NEXT: [[TMP4:%.*]] = bitcast <16 x i32> [[TMP2]] to i512 +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i512 [[TMP4]], 0 +; CHECK-NEXT: br i1 [[_MSCMP]], label %[[BB5:.*]], label %[[BB6:.*]], !prof [[PROF1]] +; CHECK: [[BB5]]: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR8]] +; CHECK-NEXT: unreachable +; CHECK: [[BB6]]: +; CHECK-NEXT: [[RES0:%.*]] = call <16 x half> @llvm.x86.avx512.uitofp.round.v16f16.v16i32(<16 x i32> [[X0]], i32 4) +; CHECK-NEXT: [[TMP7:%.*]] = select <16 x i1> [[MASK]], <16 x i16> zeroinitializer, <16 x i16> zeroinitializer +; CHECK-NEXT: [[TMP8:%.*]] = bitcast <16 x half> [[RES0]] to <16 x i16> +; CHECK-NEXT: [[TMP9:%.*]] = xor <16 x i16> [[TMP8]], zeroinitializer +; CHECK-NEXT: [[TMP10:%.*]] = or <16 x i16> [[TMP9]], zeroinitializer +; CHECK-NEXT: [[TMP11:%.*]] = or <16 x i16> [[TMP10]], zeroinitializer +; CHECK-NEXT: [[_MSPROP_SELECT:%.*]] = select <16 x i1> [[TMP3]], <16 x i16> [[TMP11]], <16 x i16> [[TMP7]] +; CHECK-NEXT: [[RES:%.*]] = select <16 x i1> [[MASK]], <16 x half> [[RES0]], <16 x half> zeroinitializer +; CHECK-NEXT: store <16 x i16> [[_MSPROP_SELECT]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <16 x half> [[RES]] +; + %mask = bitcast i16 %x2 to <16 x i1> + %res0 = call <16 x half> @llvm.x86.avx512.uitofp.round.v16f16.v16i32(<16 x i32> %x0, i32 4) + %res = select <16 x i1> %mask, <16 x half> %res0, <16 x half> zeroinitializer + ret <16 x half> %res +} + +define <16 x half> @uint_to_fp_16i32_to_16f16(<16 x i32> %x) #0 { +; CHECK-LABEL: define <16 x half> @uint_to_fp_16i32_to_16f16( +; CHECK-SAME: <16 x i32> [[X:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: [[TMP1:%.*]] = load <16 x i32>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP2:%.*]] = trunc <16 x i32> [[TMP1]] to <16 x i16> +; CHECK-NEXT: [[RES:%.*]] = uitofp <16 x i32> [[X]] to <16 x half> +; CHECK-NEXT: store <16 x i16> [[TMP2]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <16 x half> [[RES]] +; + %res = uitofp <16 x i32> %x to <16 x half> + ret <16 x half> %res +} + +declare <16 x i32> @llvm.x86.avx512fp16.mask.vcvtph2dq.512(<16 x half>, <16 x i32>, i16, i32) + +define <16 x i32> @test_int_x86_avx512_mask_cvt_ph2dq_512(<16 x half> %x0, <16 x i32> %x1, i16 %x2) #0 { +; CHECK-LABEL: define <16 x i32> @test_int_x86_avx512_mask_cvt_ph2dq_512( +; CHECK-SAME: <16 x half> [[X0:%.*]], <16 x i32> [[X1:%.*]], i16 [[X2:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: [[TMP1:%.*]] = load <16 x i16>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <16 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 32) to ptr), align 8 +; CHECK-NEXT: [[TMP3:%.*]] = load i16, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 96) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP4:%.*]] = bitcast <16 x i16> [[TMP1]] to i256 +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i256 [[TMP4]], 0 +; CHECK-NEXT: [[TMP5:%.*]] = bitcast <16 x i32> [[TMP2]] to i512 +; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i512 [[TMP5]], 0 +; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]] +; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i16 [[TMP3]], 0 +; CHECK-NEXT: [[_MSOR3:%.*]] = or i1 [[_MSOR]], [[_MSCMP2]] +; CHECK-NEXT: br i1 [[_MSOR3]], label %[[BB6:.*]], label %[[BB7:.*]], !prof [[PROF1]] +; CHECK: [[BB6]]: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR8]] +; CHECK-NEXT: unreachable +; CHECK: [[BB7]]: +; CHECK-NEXT: [[RES:%.*]] = call <16 x i32> @llvm.x86.avx512fp16.mask.vcvtph2dq.512(<16 x half> [[X0]], <16 x i32> [[X1]], i16 [[X2]], i32 10) +; CHECK-NEXT: [[TMP8:%.*]] = bitcast <16 x i16> [[TMP1]] to i256 +; CHECK-NEXT: [[_MSCMP4:%.*]] = icmp ne i256 [[TMP8]], 0 +; CHECK-NEXT: [[TMP9:%.*]] = bitcast <16 x i32> [[TMP2]] to i512 +; CHECK-NEXT: [[_MSCMP5:%.*]] = icmp ne i512 [[TMP9]], 0 +; CHECK-NEXT: [[_MSOR6:%.*]] = or i1 [[_MSCMP4]], [[_MSCMP5]] +; CHECK-NEXT: br i1 [[_MSOR6]], label %[[BB10:.*]], label %[[BB11:.*]], !prof [[PROF1]] +; CHECK: [[BB10]]: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR8]] +; CHECK-NEXT: unreachable +; CHECK: [[BB11]]: +; CHECK-NEXT: [[RES1:%.*]] = call <16 x i32> @llvm.x86.avx512fp16.mask.vcvtph2dq.512(<16 x half> [[X0]], <16 x i32> [[X1]], i16 -1, i32 8) +; CHECK-NEXT: [[RES2:%.*]] = add <16 x i32> [[RES]], [[RES1]] +; CHECK-NEXT: store <16 x i32> zeroinitializer, ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <16 x i32> [[RES2]] +; + %res = call <16 x i32> @llvm.x86.avx512fp16.mask.vcvtph2dq.512(<16 x half> %x0, <16 x i32> %x1, i16 %x2, i32 10) + %res1 = call <16 x i32> @llvm.x86.avx512fp16.mask.vcvtph2dq.512(<16 x half> %x0, <16 x i32> %x1, i16 -1, i32 8) + %res2 = add <16 x i32> %res, %res1 + ret <16 x i32> %res2 +} + +declare <16 x i32> @llvm.x86.avx512fp16.mask.vcvtph2udq.512(<16 x half>, <16 x i32>, i16, i32) + +define <16 x i32> @test_int_x86_avx512_mask_cvt_ph2udq_512(<16 x half> %x0, <16 x i32> %x1, i16 %x2) #0 { +; CHECK-LABEL: define <16 x i32> @test_int_x86_avx512_mask_cvt_ph2udq_512( +; CHECK-SAME: <16 x half> [[X0:%.*]], <16 x i32> [[X1:%.*]], i16 [[X2:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: [[TMP1:%.*]] = load <16 x i16>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <16 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 32) to ptr), align 8 +; CHECK-NEXT: [[TMP3:%.*]] = load i16, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 96) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP4:%.*]] = bitcast <16 x i16> [[TMP1]] to i256 +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i256 [[TMP4]], 0 +; CHECK-NEXT: [[TMP5:%.*]] = bitcast <16 x i32> [[TMP2]] to i512 +; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i512 [[TMP5]], 0 +; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]] +; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i16 [[TMP3]], 0 +; CHECK-NEXT: [[_MSOR3:%.*]] = or i1 [[_MSOR]], [[_MSCMP2]] +; CHECK-NEXT: br i1 [[_MSOR3]], label %[[BB6:.*]], label %[[BB7:.*]], !prof [[PROF1]] +; CHECK: [[BB6]]: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR8]] +; CHECK-NEXT: unreachable +; CHECK: [[BB7]]: +; CHECK-NEXT: [[RES:%.*]] = call <16 x i32> @llvm.x86.avx512fp16.mask.vcvtph2udq.512(<16 x half> [[X0]], <16 x i32> [[X1]], i16 [[X2]], i32 10) +; CHECK-NEXT: [[TMP8:%.*]] = bitcast <16 x i16> [[TMP1]] to i256 +; CHECK-NEXT: [[_MSCMP4:%.*]] = icmp ne i256 [[TMP8]], 0 +; CHECK-NEXT: [[TMP9:%.*]] = bitcast <16 x i32> [[TMP2]] to i512 +; CHECK-NEXT: [[_MSCMP5:%.*]] = icmp ne i512 [[TMP9]], 0 +; CHECK-NEXT: [[_MSOR6:%.*]] = or i1 [[_MSCMP4]], [[_MSCMP5]] +; CHECK-NEXT: br i1 [[_MSOR6]], label %[[BB10:.*]], label %[[BB11:.*]], !prof [[PROF1]] +; CHECK: [[BB10]]: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR8]] +; CHECK-NEXT: unreachable +; CHECK: [[BB11]]: +; CHECK-NEXT: [[RES1:%.*]] = call <16 x i32> @llvm.x86.avx512fp16.mask.vcvtph2udq.512(<16 x half> [[X0]], <16 x i32> [[X1]], i16 -1, i32 8) +; CHECK-NEXT: [[RES2:%.*]] = add <16 x i32> [[RES]], [[RES1]] +; CHECK-NEXT: store <16 x i32> zeroinitializer, ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <16 x i32> [[RES2]] +; + %res = call <16 x i32> @llvm.x86.avx512fp16.mask.vcvtph2udq.512(<16 x half> %x0, <16 x i32> %x1, i16 %x2, i32 10) + %res1 = call <16 x i32> @llvm.x86.avx512fp16.mask.vcvtph2udq.512(<16 x half> %x0, <16 x i32> %x1, i16 -1, i32 8) + %res2 = add <16 x i32> %res, %res1 + ret <16 x i32> %res2 +} + +declare <16 x i32> @llvm.x86.avx512fp16.mask.vcvttph2dq.512(<16 x half>, <16 x i32>, i16, i32) + +define <16 x i32> @test_int_x86_avx512_mask_cvtt_ph2dq_512(<16 x half> %x0, <16 x i32> %x1, i16 %x2) #0 { +; CHECK-LABEL: define <16 x i32> @test_int_x86_avx512_mask_cvtt_ph2dq_512( +; CHECK-SAME: <16 x half> [[X0:%.*]], <16 x i32> [[X1:%.*]], i16 [[X2:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: [[TMP1:%.*]] = load <16 x i16>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <16 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 32) to ptr), align 8 +; CHECK-NEXT: [[TMP3:%.*]] = load i16, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 96) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP4:%.*]] = bitcast <16 x i16> [[TMP1]] to i256 +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i256 [[TMP4]], 0 +; CHECK-NEXT: [[TMP5:%.*]] = bitcast <16 x i32> [[TMP2]] to i512 +; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i512 [[TMP5]], 0 +; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]] +; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i16 [[TMP3]], 0 +; CHECK-NEXT: [[_MSOR3:%.*]] = or i1 [[_MSOR]], [[_MSCMP2]] +; CHECK-NEXT: br i1 [[_MSOR3]], label %[[BB6:.*]], label %[[BB7:.*]], !prof [[PROF1]] +; CHECK: [[BB6]]: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR8]] +; CHECK-NEXT: unreachable +; CHECK: [[BB7]]: +; CHECK-NEXT: [[RES:%.*]] = call <16 x i32> @llvm.x86.avx512fp16.mask.vcvttph2dq.512(<16 x half> [[X0]], <16 x i32> [[X1]], i16 [[X2]], i32 4) +; CHECK-NEXT: [[TMP8:%.*]] = bitcast <16 x i16> [[TMP1]] to i256 +; CHECK-NEXT: [[_MSCMP4:%.*]] = icmp ne i256 [[TMP8]], 0 +; CHECK-NEXT: [[TMP9:%.*]] = bitcast <16 x i32> [[TMP2]] to i512 +; CHECK-NEXT: [[_MSCMP5:%.*]] = icmp ne i512 [[TMP9]], 0 +; CHECK-NEXT: [[_MSOR6:%.*]] = or i1 [[_MSCMP4]], [[_MSCMP5]] +; CHECK-NEXT: br i1 [[_MSOR6]], label %[[BB10:.*]], label %[[BB11:.*]], !prof [[PROF1]] +; CHECK: [[BB10]]: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR8]] +; CHECK-NEXT: unreachable +; CHECK: [[BB11]]: +; CHECK-NEXT: [[RES1:%.*]] = call <16 x i32> @llvm.x86.avx512fp16.mask.vcvttph2dq.512(<16 x half> [[X0]], <16 x i32> [[X1]], i16 -1, i32 8) +; CHECK-NEXT: [[RES2:%.*]] = add <16 x i32> [[RES]], [[RES1]] +; CHECK-NEXT: store <16 x i32> zeroinitializer, ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <16 x i32> [[RES2]] +; + %res = call <16 x i32> @llvm.x86.avx512fp16.mask.vcvttph2dq.512(<16 x half> %x0, <16 x i32> %x1, i16 %x2, i32 4) + %res1 = call <16 x i32> @llvm.x86.avx512fp16.mask.vcvttph2dq.512(<16 x half> %x0, <16 x i32> %x1, i16 -1, i32 8) + %res2 = add <16 x i32> %res, %res1 + ret <16 x i32> %res2 +} + +declare <16 x i32> @llvm.x86.avx512fp16.mask.vcvttph2udq.512(<16 x half>, <16 x i32>, i16, i32) + +define <16 x i32> @test_int_x86_avx512_mask_cvtt_ph2udq_512(<16 x half> %x0, <16 x i32> %x1, i16 %x2) #0 { +; CHECK-LABEL: define <16 x i32> @test_int_x86_avx512_mask_cvtt_ph2udq_512( +; CHECK-SAME: <16 x half> [[X0:%.*]], <16 x i32> [[X1:%.*]], i16 [[X2:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: [[TMP1:%.*]] = load <16 x i16>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <16 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 32) to ptr), align 8 +; CHECK-NEXT: [[TMP3:%.*]] = load i16, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 96) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP4:%.*]] = bitcast <16 x i16> [[TMP1]] to i256 +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i256 [[TMP4]], 0 +; CHECK-NEXT: [[TMP5:%.*]] = bitcast <16 x i32> [[TMP2]] to i512 +; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i512 [[TMP5]], 0 +; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]] +; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i16 [[TMP3]], 0 +; CHECK-NEXT: [[_MSOR3:%.*]] = or i1 [[_MSOR]], [[_MSCMP2]] +; CHECK-NEXT: br i1 [[_MSOR3]], label %[[BB6:.*]], label %[[BB7:.*]], !prof [[PROF1]] +; CHECK: [[BB6]]: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR8]] +; CHECK-NEXT: unreachable +; CHECK: [[BB7]]: +; CHECK-NEXT: [[RES:%.*]] = call <16 x i32> @llvm.x86.avx512fp16.mask.vcvttph2udq.512(<16 x half> [[X0]], <16 x i32> [[X1]], i16 [[X2]], i32 4) +; CHECK-NEXT: [[TMP8:%.*]] = bitcast <16 x i16> [[TMP1]] to i256 +; CHECK-NEXT: [[_MSCMP4:%.*]] = icmp ne i256 [[TMP8]], 0 +; CHECK-NEXT: [[TMP9:%.*]] = bitcast <16 x i32> [[TMP2]] to i512 +; CHECK-NEXT: [[_MSCMP5:%.*]] = icmp ne i512 [[TMP9]], 0 +; CHECK-NEXT: [[_MSOR6:%.*]] = or i1 [[_MSCMP4]], [[_MSCMP5]] +; CHECK-NEXT: br i1 [[_MSOR6]], label %[[BB10:.*]], label %[[BB11:.*]], !prof [[PROF1]] +; CHECK: [[BB10]]: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR8]] +; CHECK-NEXT: unreachable +; CHECK: [[BB11]]: +; CHECK-NEXT: [[RES1:%.*]] = call <16 x i32> @llvm.x86.avx512fp16.mask.vcvttph2udq.512(<16 x half> [[X0]], <16 x i32> [[X1]], i16 -1, i32 8) +; CHECK-NEXT: [[RES2:%.*]] = add <16 x i32> [[RES]], [[RES1]] +; CHECK-NEXT: store <16 x i32> zeroinitializer, ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <16 x i32> [[RES2]] +; + %res = call <16 x i32> @llvm.x86.avx512fp16.mask.vcvttph2udq.512(<16 x half> %x0, <16 x i32> %x1, i16 %x2, i32 4) + %res1 = call <16 x i32> @llvm.x86.avx512fp16.mask.vcvttph2udq.512(<16 x half> %x0, <16 x i32> %x1, i16 -1, i32 8) + %res2 = add <16 x i32> %res, %res1 + ret <16 x i32> %res2 +} + +declare <8 x half> @llvm.x86.avx512.sitofp.round.v8f16.v8i64(<8 x i64>, i32) + +define <8 x half> @test_int_x86_avx512_mask_cvt_qq2ph_512(<8 x i64> %x0, <8 x half> %x1, i8 %x2) #0 { +; CHECK-LABEL: define <8 x half> @test_int_x86_avx512_mask_cvt_qq2ph_512( +; CHECK-SAME: <8 x i64> [[X0:%.*]], <8 x half> [[X1:%.*]], i8 [[X2:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: [[TMP1:%.*]] = load i8, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 80) to ptr), align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <8 x i64>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP3:%.*]] = load <8 x i16>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP4:%.*]] = bitcast i8 [[TMP1]] to <8 x i1> +; CHECK-NEXT: [[MASK:%.*]] = bitcast i8 [[X2]] to <8 x i1> +; CHECK-NEXT: [[TMP5:%.*]] = bitcast <8 x i64> [[TMP2]] to i512 +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i512 [[TMP5]], 0 +; CHECK-NEXT: br i1 [[_MSCMP]], label %[[BB6:.*]], label %[[BB7:.*]], !prof [[PROF1]] +; CHECK: [[BB6]]: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR8]] +; CHECK-NEXT: unreachable +; CHECK: [[BB7]]: +; CHECK-NEXT: [[RES0:%.*]] = call <8 x half> @llvm.x86.avx512.sitofp.round.v8f16.v8i64(<8 x i64> [[X0]], i32 4) +; CHECK-NEXT: [[TMP8:%.*]] = select <8 x i1> [[MASK]], <8 x i16> zeroinitializer, <8 x i16> [[TMP3]] +; CHECK-NEXT: [[TMP9:%.*]] = bitcast <8 x half> [[RES0]] to <8 x i16> +; CHECK-NEXT: [[TMP10:%.*]] = bitcast <8 x half> [[X1]] to <8 x i16> +; CHECK-NEXT: [[TMP11:%.*]] = xor <8 x i16> [[TMP9]], [[TMP10]] +; CHECK-NEXT: [[TMP12:%.*]] = or <8 x i16> [[TMP11]], zeroinitializer +; CHECK-NEXT: [[TMP13:%.*]] = or <8 x i16> [[TMP12]], [[TMP3]] +; CHECK-NEXT: [[_MSPROP_SELECT:%.*]] = select <8 x i1> [[TMP4]], <8 x i16> [[TMP13]], <8 x i16> [[TMP8]] +; CHECK-NEXT: [[RES:%.*]] = select <8 x i1> [[MASK]], <8 x half> [[RES0]], <8 x half> [[X1]] +; CHECK-NEXT: store <8 x i16> [[_MSPROP_SELECT]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <8 x half> [[RES]] +; + %mask = bitcast i8 %x2 to <8 x i1> + %res0 = call <8 x half> @llvm.x86.avx512.sitofp.round.v8f16.v8i64(<8 x i64> %x0, i32 4) + %res = select <8 x i1> %mask, <8 x half> %res0, <8 x half> %x1 + ret <8 x half> %res +} + +define <8 x half> @test_int_x86_avx512_mask_cvt_qq2ph_512_r(<8 x i64> %x0, <8 x half> %x1, i8 %x2) #0 { +; CHECK-LABEL: define <8 x half> @test_int_x86_avx512_mask_cvt_qq2ph_512_r( +; CHECK-SAME: <8 x i64> [[X0:%.*]], <8 x half> [[X1:%.*]], i8 [[X2:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: [[TMP1:%.*]] = load i8, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 80) to ptr), align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <8 x i64>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP3:%.*]] = load <8 x i16>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP4:%.*]] = bitcast i8 [[TMP1]] to <8 x i1> +; CHECK-NEXT: [[MASK:%.*]] = bitcast i8 [[X2]] to <8 x i1> +; CHECK-NEXT: [[TMP5:%.*]] = bitcast <8 x i64> [[TMP2]] to i512 +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i512 [[TMP5]], 0 +; CHECK-NEXT: br i1 [[_MSCMP]], label %[[BB6:.*]], label %[[BB7:.*]], !prof [[PROF1]] +; CHECK: [[BB6]]: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR8]] +; CHECK-NEXT: unreachable +; CHECK: [[BB7]]: +; CHECK-NEXT: [[RES0:%.*]] = call <8 x half> @llvm.x86.avx512.sitofp.round.v8f16.v8i64(<8 x i64> [[X0]], i32 10) +; CHECK-NEXT: [[TMP8:%.*]] = select <8 x i1> [[MASK]], <8 x i16> zeroinitializer, <8 x i16> [[TMP3]] +; CHECK-NEXT: [[TMP9:%.*]] = bitcast <8 x half> [[RES0]] to <8 x i16> +; CHECK-NEXT: [[TMP10:%.*]] = bitcast <8 x half> [[X1]] to <8 x i16> +; CHECK-NEXT: [[TMP11:%.*]] = xor <8 x i16> [[TMP9]], [[TMP10]] +; CHECK-NEXT: [[TMP12:%.*]] = or <8 x i16> [[TMP11]], zeroinitializer +; CHECK-NEXT: [[TMP13:%.*]] = or <8 x i16> [[TMP12]], [[TMP3]] +; CHECK-NEXT: [[_MSPROP_SELECT:%.*]] = select <8 x i1> [[TMP4]], <8 x i16> [[TMP13]], <8 x i16> [[TMP8]] +; CHECK-NEXT: [[RES:%.*]] = select <8 x i1> [[MASK]], <8 x half> [[RES0]], <8 x half> [[X1]] +; CHECK-NEXT: store <8 x i16> [[_MSPROP_SELECT]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <8 x half> [[RES]] +; + %mask = bitcast i8 %x2 to <8 x i1> + %res0 = call <8 x half> @llvm.x86.avx512.sitofp.round.v8f16.v8i64(<8 x i64> %x0, i32 10) + %res = select <8 x i1> %mask, <8 x half> %res0, <8 x half> %x1 + ret <8 x half> %res +} + +define <8 x half> @test_int_x86_avx512_mask_cvt_qq2ph_512_nomask(<8 x i64> %x0, <8 x half> %x1) #0 { +; CHECK-LABEL: define <8 x half> @test_int_x86_avx512_mask_cvt_qq2ph_512_nomask( +; CHECK-SAME: <8 x i64> [[X0:%.*]], <8 x half> [[X1:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i64>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x i64> [[TMP1]] to i512 +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i512 [[TMP2]], 0 +; CHECK-NEXT: br i1 [[_MSCMP]], label %[[BB3:.*]], label %[[BB4:.*]], !prof [[PROF1]] +; CHECK: [[BB3]]: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR8]] +; CHECK-NEXT: unreachable +; CHECK: [[BB4]]: +; CHECK-NEXT: [[RES:%.*]] = call <8 x half> @llvm.x86.avx512.sitofp.round.v8f16.v8i64(<8 x i64> [[X0]], i32 4) +; CHECK-NEXT: store <8 x i16> zeroinitializer, ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <8 x half> [[RES]] +; + %res = call <8 x half> @llvm.x86.avx512.sitofp.round.v8f16.v8i64(<8 x i64> %x0, i32 4) + ret <8 x half> %res +} + +define <8 x half> @test_int_x86_avx512_mask_cvt_qq2ph_512_z(<8 x i64> %x0, i8 %x2) #0 { +; CHECK-LABEL: define <8 x half> @test_int_x86_avx512_mask_cvt_qq2ph_512_z( +; CHECK-SAME: <8 x i64> [[X0:%.*]], i8 [[X2:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: [[TMP1:%.*]] = load i8, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <8 x i64>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP3:%.*]] = bitcast i8 [[TMP1]] to <8 x i1> +; CHECK-NEXT: [[MASK:%.*]] = bitcast i8 [[X2]] to <8 x i1> +; CHECK-NEXT: [[TMP4:%.*]] = bitcast <8 x i64> [[TMP2]] to i512 +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i512 [[TMP4]], 0 +; CHECK-NEXT: br i1 [[_MSCMP]], label %[[BB5:.*]], label %[[BB6:.*]], !prof [[PROF1]] +; CHECK: [[BB5]]: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR8]] +; CHECK-NEXT: unreachable +; CHECK: [[BB6]]: +; CHECK-NEXT: [[RES0:%.*]] = call <8 x half> @llvm.x86.avx512.sitofp.round.v8f16.v8i64(<8 x i64> [[X0]], i32 4) +; CHECK-NEXT: [[TMP7:%.*]] = select <8 x i1> [[MASK]], <8 x i16> zeroinitializer, <8 x i16> zeroinitializer +; CHECK-NEXT: [[TMP8:%.*]] = bitcast <8 x half> [[RES0]] to <8 x i16> +; CHECK-NEXT: [[TMP9:%.*]] = xor <8 x i16> [[TMP8]], zeroinitializer +; CHECK-NEXT: [[TMP10:%.*]] = or <8 x i16> [[TMP9]], zeroinitializer +; CHECK-NEXT: [[TMP11:%.*]] = or <8 x i16> [[TMP10]], zeroinitializer +; CHECK-NEXT: [[_MSPROP_SELECT:%.*]] = select <8 x i1> [[TMP3]], <8 x i16> [[TMP11]], <8 x i16> [[TMP7]] +; CHECK-NEXT: [[RES:%.*]] = select <8 x i1> [[MASK]], <8 x half> [[RES0]], <8 x half> zeroinitializer +; CHECK-NEXT: store <8 x i16> [[_MSPROP_SELECT]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <8 x half> [[RES]] +; + %mask = bitcast i8 %x2 to <8 x i1> + %res0 = call <8 x half> @llvm.x86.avx512.sitofp.round.v8f16.v8i64(<8 x i64> %x0, i32 4) + %res = select <8 x i1> %mask, <8 x half> %res0, <8 x half> zeroinitializer + ret <8 x half> %res +} + +declare <8 x half> @llvm.x86.avx512.uitofp.round.v8f16.v8i64(<8 x i64>, i32) + +define <8 x half> @test_int_x86_avx512_mask_cvt_uqq2ph_512(<8 x i64> %x0, <8 x half> %x1, i8 %x2) #0 { +; CHECK-LABEL: define <8 x half> @test_int_x86_avx512_mask_cvt_uqq2ph_512( +; CHECK-SAME: <8 x i64> [[X0:%.*]], <8 x half> [[X1:%.*]], i8 [[X2:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: [[TMP1:%.*]] = load i8, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 80) to ptr), align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <8 x i64>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP3:%.*]] = load <8 x i16>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP4:%.*]] = bitcast i8 [[TMP1]] to <8 x i1> +; CHECK-NEXT: [[MASK:%.*]] = bitcast i8 [[X2]] to <8 x i1> +; CHECK-NEXT: [[TMP5:%.*]] = bitcast <8 x i64> [[TMP2]] to i512 +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i512 [[TMP5]], 0 +; CHECK-NEXT: br i1 [[_MSCMP]], label %[[BB6:.*]], label %[[BB7:.*]], !prof [[PROF1]] +; CHECK: [[BB6]]: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR8]] +; CHECK-NEXT: unreachable +; CHECK: [[BB7]]: +; CHECK-NEXT: [[RES0:%.*]] = call <8 x half> @llvm.x86.avx512.uitofp.round.v8f16.v8i64(<8 x i64> [[X0]], i32 4) +; CHECK-NEXT: [[TMP8:%.*]] = select <8 x i1> [[MASK]], <8 x i16> zeroinitializer, <8 x i16> [[TMP3]] +; CHECK-NEXT: [[TMP9:%.*]] = bitcast <8 x half> [[RES0]] to <8 x i16> +; CHECK-NEXT: [[TMP10:%.*]] = bitcast <8 x half> [[X1]] to <8 x i16> +; CHECK-NEXT: [[TMP11:%.*]] = xor <8 x i16> [[TMP9]], [[TMP10]] +; CHECK-NEXT: [[TMP12:%.*]] = or <8 x i16> [[TMP11]], zeroinitializer +; CHECK-NEXT: [[TMP13:%.*]] = or <8 x i16> [[TMP12]], [[TMP3]] +; CHECK-NEXT: [[_MSPROP_SELECT:%.*]] = select <8 x i1> [[TMP4]], <8 x i16> [[TMP13]], <8 x i16> [[TMP8]] +; CHECK-NEXT: [[RES:%.*]] = select <8 x i1> [[MASK]], <8 x half> [[RES0]], <8 x half> [[X1]] +; CHECK-NEXT: store <8 x i16> [[_MSPROP_SELECT]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <8 x half> [[RES]] +; + %mask = bitcast i8 %x2 to <8 x i1> + %res0 = call <8 x half> @llvm.x86.avx512.uitofp.round.v8f16.v8i64(<8 x i64> %x0, i32 4) + %res = select <8 x i1> %mask, <8 x half> %res0, <8 x half> %x1 + ret <8 x half> %res +} + +define <8 x half> @test_int_x86_avx512_mask_cvt_uqq2ph_512_r(<8 x i64> %x0, <8 x half> %x1, i8 %x2) #0 { +; CHECK-LABEL: define <8 x half> @test_int_x86_avx512_mask_cvt_uqq2ph_512_r( +; CHECK-SAME: <8 x i64> [[X0:%.*]], <8 x half> [[X1:%.*]], i8 [[X2:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: [[TMP1:%.*]] = load i8, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 80) to ptr), align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <8 x i64>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP3:%.*]] = load <8 x i16>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP4:%.*]] = bitcast i8 [[TMP1]] to <8 x i1> +; CHECK-NEXT: [[MASK:%.*]] = bitcast i8 [[X2]] to <8 x i1> +; CHECK-NEXT: [[TMP5:%.*]] = bitcast <8 x i64> [[TMP2]] to i512 +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i512 [[TMP5]], 0 +; CHECK-NEXT: br i1 [[_MSCMP]], label %[[BB6:.*]], label %[[BB7:.*]], !prof [[PROF1]] +; CHECK: [[BB6]]: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR8]] +; CHECK-NEXT: unreachable +; CHECK: [[BB7]]: +; CHECK-NEXT: [[RES0:%.*]] = call <8 x half> @llvm.x86.avx512.uitofp.round.v8f16.v8i64(<8 x i64> [[X0]], i32 10) +; CHECK-NEXT: [[TMP8:%.*]] = select <8 x i1> [[MASK]], <8 x i16> zeroinitializer, <8 x i16> [[TMP3]] +; CHECK-NEXT: [[TMP9:%.*]] = bitcast <8 x half> [[RES0]] to <8 x i16> +; CHECK-NEXT: [[TMP10:%.*]] = bitcast <8 x half> [[X1]] to <8 x i16> +; CHECK-NEXT: [[TMP11:%.*]] = xor <8 x i16> [[TMP9]], [[TMP10]] +; CHECK-NEXT: [[TMP12:%.*]] = or <8 x i16> [[TMP11]], zeroinitializer +; CHECK-NEXT: [[TMP13:%.*]] = or <8 x i16> [[TMP12]], [[TMP3]] +; CHECK-NEXT: [[_MSPROP_SELECT:%.*]] = select <8 x i1> [[TMP4]], <8 x i16> [[TMP13]], <8 x i16> [[TMP8]] +; CHECK-NEXT: [[RES:%.*]] = select <8 x i1> [[MASK]], <8 x half> [[RES0]], <8 x half> [[X1]] +; CHECK-NEXT: store <8 x i16> [[_MSPROP_SELECT]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <8 x half> [[RES]] +; + %mask = bitcast i8 %x2 to <8 x i1> + %res0 = call <8 x half> @llvm.x86.avx512.uitofp.round.v8f16.v8i64(<8 x i64> %x0, i32 10) + %res = select <8 x i1> %mask, <8 x half> %res0, <8 x half> %x1 + ret <8 x half> %res +} + +define <8 x half> @test_int_x86_avx512_mask_cvt_uqq2ph_512_nomask(<8 x i64> %x0, <8 x half> %x1) #0 { +; CHECK-LABEL: define <8 x half> @test_int_x86_avx512_mask_cvt_uqq2ph_512_nomask( +; CHECK-SAME: <8 x i64> [[X0:%.*]], <8 x half> [[X1:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i64>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x i64> [[TMP1]] to i512 +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i512 [[TMP2]], 0 +; CHECK-NEXT: br i1 [[_MSCMP]], label %[[BB3:.*]], label %[[BB4:.*]], !prof [[PROF1]] +; CHECK: [[BB3]]: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR8]] +; CHECK-NEXT: unreachable +; CHECK: [[BB4]]: +; CHECK-NEXT: [[RES:%.*]] = call <8 x half> @llvm.x86.avx512.uitofp.round.v8f16.v8i64(<8 x i64> [[X0]], i32 4) +; CHECK-NEXT: store <8 x i16> zeroinitializer, ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <8 x half> [[RES]] +; + %res = call <8 x half> @llvm.x86.avx512.uitofp.round.v8f16.v8i64(<8 x i64> %x0, i32 4) + ret <8 x half> %res +} + +define <8 x half> @test_int_x86_avx512_mask_cvt_uqq2ph_512_z(<8 x i64> %x0, i8 %x2) #0 { +; CHECK-LABEL: define <8 x half> @test_int_x86_avx512_mask_cvt_uqq2ph_512_z( +; CHECK-SAME: <8 x i64> [[X0:%.*]], i8 [[X2:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: [[TMP1:%.*]] = load i8, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <8 x i64>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP3:%.*]] = bitcast i8 [[TMP1]] to <8 x i1> +; CHECK-NEXT: [[MASK:%.*]] = bitcast i8 [[X2]] to <8 x i1> +; CHECK-NEXT: [[TMP4:%.*]] = bitcast <8 x i64> [[TMP2]] to i512 +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i512 [[TMP4]], 0 +; CHECK-NEXT: br i1 [[_MSCMP]], label %[[BB5:.*]], label %[[BB6:.*]], !prof [[PROF1]] +; CHECK: [[BB5]]: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR8]] +; CHECK-NEXT: unreachable +; CHECK: [[BB6]]: +; CHECK-NEXT: [[RES0:%.*]] = call <8 x half> @llvm.x86.avx512.uitofp.round.v8f16.v8i64(<8 x i64> [[X0]], i32 4) +; CHECK-NEXT: [[TMP7:%.*]] = select <8 x i1> [[MASK]], <8 x i16> zeroinitializer, <8 x i16> zeroinitializer +; CHECK-NEXT: [[TMP8:%.*]] = bitcast <8 x half> [[RES0]] to <8 x i16> +; CHECK-NEXT: [[TMP9:%.*]] = xor <8 x i16> [[TMP8]], zeroinitializer +; CHECK-NEXT: [[TMP10:%.*]] = or <8 x i16> [[TMP9]], zeroinitializer +; CHECK-NEXT: [[TMP11:%.*]] = or <8 x i16> [[TMP10]], zeroinitializer +; CHECK-NEXT: [[_MSPROP_SELECT:%.*]] = select <8 x i1> [[TMP3]], <8 x i16> [[TMP11]], <8 x i16> [[TMP7]] +; CHECK-NEXT: [[RES:%.*]] = select <8 x i1> [[MASK]], <8 x half> [[RES0]], <8 x half> zeroinitializer +; CHECK-NEXT: store <8 x i16> [[_MSPROP_SELECT]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <8 x half> [[RES]] +; + %mask = bitcast i8 %x2 to <8 x i1> + %res0 = call <8 x half> @llvm.x86.avx512.uitofp.round.v8f16.v8i64(<8 x i64> %x0, i32 4) + %res = select <8 x i1> %mask, <8 x half> %res0, <8 x half> zeroinitializer + ret <8 x half> %res +} + +declare <8 x i64> @llvm.x86.avx512fp16.mask.vcvtph2qq.512(<8 x half>, <8 x i64>, i8, i32) + +define <8 x i64> @test_int_x86_avx512_mask_cvt_ph2qq_512(<8 x half> %x0, <8 x i64> %x1, i8 %x2) #0 { +; CHECK-LABEL: define <8 x i64> @test_int_x86_avx512_mask_cvt_ph2qq_512( +; CHECK-SAME: <8 x half> [[X0:%.*]], <8 x i64> [[X1:%.*]], i8 [[X2:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i16>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <8 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8 +; CHECK-NEXT: [[TMP3:%.*]] = load i8, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 80) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP4:%.*]] = bitcast <8 x i16> [[TMP1]] to i128 +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i128 [[TMP4]], 0 +; CHECK-NEXT: [[TMP5:%.*]] = bitcast <8 x i64> [[TMP2]] to i512 +; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i512 [[TMP5]], 0 +; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]] +; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i8 [[TMP3]], 0 +; CHECK-NEXT: [[_MSOR3:%.*]] = or i1 [[_MSOR]], [[_MSCMP2]] +; CHECK-NEXT: br i1 [[_MSOR3]], label %[[BB6:.*]], label %[[BB7:.*]], !prof [[PROF1]] +; CHECK: [[BB6]]: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR8]] +; CHECK-NEXT: unreachable +; CHECK: [[BB7]]: +; CHECK-NEXT: [[RES:%.*]] = call <8 x i64> @llvm.x86.avx512fp16.mask.vcvtph2qq.512(<8 x half> [[X0]], <8 x i64> [[X1]], i8 [[X2]], i32 10) +; CHECK-NEXT: [[TMP8:%.*]] = bitcast <8 x i16> [[TMP1]] to i128 +; CHECK-NEXT: [[_MSCMP4:%.*]] = icmp ne i128 [[TMP8]], 0 +; CHECK-NEXT: [[TMP9:%.*]] = bitcast <8 x i64> [[TMP2]] to i512 +; CHECK-NEXT: [[_MSCMP5:%.*]] = icmp ne i512 [[TMP9]], 0 +; CHECK-NEXT: [[_MSOR6:%.*]] = or i1 [[_MSCMP4]], [[_MSCMP5]] +; CHECK-NEXT: br i1 [[_MSOR6]], label %[[BB10:.*]], label %[[BB11:.*]], !prof [[PROF1]] +; CHECK: [[BB10]]: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR8]] +; CHECK-NEXT: unreachable +; CHECK: [[BB11]]: +; CHECK-NEXT: [[RES1:%.*]] = call <8 x i64> @llvm.x86.avx512fp16.mask.vcvtph2qq.512(<8 x half> [[X0]], <8 x i64> [[X1]], i8 -1, i32 8) +; CHECK-NEXT: [[RES2:%.*]] = add <8 x i64> [[RES]], [[RES1]] +; CHECK-NEXT: store <8 x i64> zeroinitializer, ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <8 x i64> [[RES2]] +; + %res = call <8 x i64> @llvm.x86.avx512fp16.mask.vcvtph2qq.512(<8 x half> %x0, <8 x i64> %x1, i8 %x2, i32 10) + %res1 = call <8 x i64> @llvm.x86.avx512fp16.mask.vcvtph2qq.512(<8 x half> %x0, <8 x i64> %x1, i8 -1, i32 8) + %res2 = add <8 x i64> %res, %res1 + ret <8 x i64> %res2 +} + +declare <8 x i64> @llvm.x86.avx512fp16.mask.vcvtph2uqq.512(<8 x half>, <8 x i64>, i8, i32) + +define <8 x i64> @test_int_x86_avx512_mask_cvt_ph2uqq_512(<8 x half> %x0, <8 x i64> %x1, i8 %x2) #0 { +; CHECK-LABEL: define <8 x i64> @test_int_x86_avx512_mask_cvt_ph2uqq_512( +; CHECK-SAME: <8 x half> [[X0:%.*]], <8 x i64> [[X1:%.*]], i8 [[X2:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i16>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <8 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8 +; CHECK-NEXT: [[TMP3:%.*]] = load i8, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 80) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP4:%.*]] = bitcast <8 x i16> [[TMP1]] to i128 +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i128 [[TMP4]], 0 +; CHECK-NEXT: [[TMP5:%.*]] = bitcast <8 x i64> [[TMP2]] to i512 +; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i512 [[TMP5]], 0 +; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]] +; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i8 [[TMP3]], 0 +; CHECK-NEXT: [[_MSOR3:%.*]] = or i1 [[_MSOR]], [[_MSCMP2]] +; CHECK-NEXT: br i1 [[_MSOR3]], label %[[BB6:.*]], label %[[BB7:.*]], !prof [[PROF1]] +; CHECK: [[BB6]]: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR8]] +; CHECK-NEXT: unreachable +; CHECK: [[BB7]]: +; CHECK-NEXT: [[RES:%.*]] = call <8 x i64> @llvm.x86.avx512fp16.mask.vcvtph2uqq.512(<8 x half> [[X0]], <8 x i64> [[X1]], i8 [[X2]], i32 10) +; CHECK-NEXT: [[TMP8:%.*]] = bitcast <8 x i16> [[TMP1]] to i128 +; CHECK-NEXT: [[_MSCMP4:%.*]] = icmp ne i128 [[TMP8]], 0 +; CHECK-NEXT: [[TMP9:%.*]] = bitcast <8 x i64> [[TMP2]] to i512 +; CHECK-NEXT: [[_MSCMP5:%.*]] = icmp ne i512 [[TMP9]], 0 +; CHECK-NEXT: [[_MSOR6:%.*]] = or i1 [[_MSCMP4]], [[_MSCMP5]] +; CHECK-NEXT: br i1 [[_MSOR6]], label %[[BB10:.*]], label %[[BB11:.*]], !prof [[PROF1]] +; CHECK: [[BB10]]: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR8]] +; CHECK-NEXT: unreachable +; CHECK: [[BB11]]: +; CHECK-NEXT: [[RES1:%.*]] = call <8 x i64> @llvm.x86.avx512fp16.mask.vcvtph2uqq.512(<8 x half> [[X0]], <8 x i64> [[X1]], i8 -1, i32 8) +; CHECK-NEXT: [[RES2:%.*]] = add <8 x i64> [[RES]], [[RES1]] +; CHECK-NEXT: store <8 x i64> zeroinitializer, ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <8 x i64> [[RES2]] +; + %res = call <8 x i64> @llvm.x86.avx512fp16.mask.vcvtph2uqq.512(<8 x half> %x0, <8 x i64> %x1, i8 %x2, i32 10) + %res1 = call <8 x i64> @llvm.x86.avx512fp16.mask.vcvtph2uqq.512(<8 x half> %x0, <8 x i64> %x1, i8 -1, i32 8) + %res2 = add <8 x i64> %res, %res1 + ret <8 x i64> %res2 +} + +declare <8 x i64> @llvm.x86.avx512fp16.mask.vcvttph2uqq.512(<8 x half>, <8 x i64>, i8, i32) + +define <8 x i64> @test_int_x86_avx512_mask_cvtt_ph2uqq_512(<8 x half> %x0, <8 x i64> %x1, i8 %x2) #0 { +; CHECK-LABEL: define <8 x i64> @test_int_x86_avx512_mask_cvtt_ph2uqq_512( +; CHECK-SAME: <8 x half> [[X0:%.*]], <8 x i64> [[X1:%.*]], i8 [[X2:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i16>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <8 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8 +; CHECK-NEXT: [[TMP3:%.*]] = load i8, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 80) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP4:%.*]] = bitcast <8 x i16> [[TMP1]] to i128 +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i128 [[TMP4]], 0 +; CHECK-NEXT: [[TMP5:%.*]] = bitcast <8 x i64> [[TMP2]] to i512 +; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i512 [[TMP5]], 0 +; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]] +; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i8 [[TMP3]], 0 +; CHECK-NEXT: [[_MSOR3:%.*]] = or i1 [[_MSOR]], [[_MSCMP2]] +; CHECK-NEXT: br i1 [[_MSOR3]], label %[[BB6:.*]], label %[[BB7:.*]], !prof [[PROF1]] +; CHECK: [[BB6]]: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR8]] +; CHECK-NEXT: unreachable +; CHECK: [[BB7]]: +; CHECK-NEXT: [[RES:%.*]] = call <8 x i64> @llvm.x86.avx512fp16.mask.vcvttph2uqq.512(<8 x half> [[X0]], <8 x i64> [[X1]], i8 [[X2]], i32 8) +; CHECK-NEXT: [[TMP8:%.*]] = bitcast <8 x i16> [[TMP1]] to i128 +; CHECK-NEXT: [[_MSCMP4:%.*]] = icmp ne i128 [[TMP8]], 0 +; CHECK-NEXT: [[TMP9:%.*]] = bitcast <8 x i64> [[TMP2]] to i512 +; CHECK-NEXT: [[_MSCMP5:%.*]] = icmp ne i512 [[TMP9]], 0 +; CHECK-NEXT: [[_MSOR6:%.*]] = or i1 [[_MSCMP4]], [[_MSCMP5]] +; CHECK-NEXT: br i1 [[_MSOR6]], label %[[BB10:.*]], label %[[BB11:.*]], !prof [[PROF1]] +; CHECK: [[BB10]]: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR8]] +; CHECK-NEXT: unreachable +; CHECK: [[BB11]]: +; CHECK-NEXT: [[RES1:%.*]] = call <8 x i64> @llvm.x86.avx512fp16.mask.vcvttph2uqq.512(<8 x half> [[X0]], <8 x i64> [[X1]], i8 -1, i32 4) +; CHECK-NEXT: [[RES2:%.*]] = add <8 x i64> [[RES]], [[RES1]] +; CHECK-NEXT: store <8 x i64> zeroinitializer, ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <8 x i64> [[RES2]] +; + %res = call <8 x i64> @llvm.x86.avx512fp16.mask.vcvttph2uqq.512(<8 x half> %x0, <8 x i64> %x1, i8 %x2, i32 8) + %res1 = call <8 x i64> @llvm.x86.avx512fp16.mask.vcvttph2uqq.512(<8 x half> %x0, <8 x i64> %x1, i8 -1, i32 4) + %res2 = add <8 x i64> %res, %res1 + ret <8 x i64> %res2 +} + +declare i32 @llvm.x86.avx512fp16.vcvtsh2si32(<8 x half>, i32) + +define i32 @test_x86_avx512fp16_vcvtsh2si32(<8 x half> %arg0) #0 { +; CHECK-LABEL: define i32 @test_x86_avx512fp16_vcvtsh2si32( +; CHECK-SAME: <8 x half> [[ARG0:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i16>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x i16> [[TMP1]] to i128 +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i128 [[TMP2]], 0 +; CHECK-NEXT: br i1 [[_MSCMP]], label %[[BB3:.*]], label %[[BB4:.*]], !prof [[PROF1]] +; CHECK: [[BB3]]: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR8]] +; CHECK-NEXT: unreachable +; CHECK: [[BB4]]: +; CHECK-NEXT: [[RES1:%.*]] = call i32 @llvm.x86.avx512fp16.vcvtsh2si32(<8 x half> [[ARG0]], i32 4) +; CHECK-NEXT: [[TMP5:%.*]] = bitcast <8 x i16> [[TMP1]] to i128 +; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i128 [[TMP5]], 0 +; CHECK-NEXT: br i1 [[_MSCMP1]], label %[[BB6:.*]], label %[[BB7:.*]], !prof [[PROF1]] +; CHECK: [[BB6]]: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR8]] +; CHECK-NEXT: unreachable +; CHECK: [[BB7]]: +; CHECK-NEXT: [[RES2:%.*]] = call i32 @llvm.x86.avx512fp16.vcvtsh2si32(<8 x half> [[ARG0]], i32 11) +; CHECK-NEXT: [[RES:%.*]] = add i32 [[RES1]], [[RES2]] +; CHECK-NEXT: store i32 0, ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret i32 [[RES]] +; + %res1 = call i32 @llvm.x86.avx512fp16.vcvtsh2si32(<8 x half> %arg0, i32 4) + %res2 = call i32 @llvm.x86.avx512fp16.vcvtsh2si32(<8 x half> %arg0, i32 11) + %res = add i32 %res1, %res2 + ret i32 %res +} + +declare i64 @llvm.x86.avx512fp16.vcvtsh2si64(<8 x half>, i32) + +define i64 @test_x86_avx512fp16_vcvtsh2si64(<8 x half> %arg0) #0 { +; CHECK-LABEL: define i64 @test_x86_avx512fp16_vcvtsh2si64( +; CHECK-SAME: <8 x half> [[ARG0:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i16>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x i16> [[TMP1]] to i128 +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i128 [[TMP2]], 0 +; CHECK-NEXT: br i1 [[_MSCMP]], label %[[BB3:.*]], label %[[BB4:.*]], !prof [[PROF1]] +; CHECK: [[BB3]]: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR8]] +; CHECK-NEXT: unreachable +; CHECK: [[BB4]]: +; CHECK-NEXT: [[RES1:%.*]] = call i64 @llvm.x86.avx512fp16.vcvtsh2si64(<8 x half> [[ARG0]], i32 4) +; CHECK-NEXT: [[TMP5:%.*]] = bitcast <8 x i16> [[TMP1]] to i128 +; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i128 [[TMP5]], 0 +; CHECK-NEXT: br i1 [[_MSCMP1]], label %[[BB6:.*]], label %[[BB7:.*]], !prof [[PROF1]] +; CHECK: [[BB6]]: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR8]] +; CHECK-NEXT: unreachable +; CHECK: [[BB7]]: +; CHECK-NEXT: [[RES2:%.*]] = call i64 @llvm.x86.avx512fp16.vcvtsh2si64(<8 x half> [[ARG0]], i32 10) +; CHECK-NEXT: [[RES:%.*]] = add i64 [[RES1]], [[RES2]] +; CHECK-NEXT: store i64 0, ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret i64 [[RES]] +; + %res1 = call i64 @llvm.x86.avx512fp16.vcvtsh2si64(<8 x half> %arg0, i32 4) + %res2 = call i64 @llvm.x86.avx512fp16.vcvtsh2si64(<8 x half> %arg0, i32 10) + %res = add i64 %res1, %res2 + ret i64 %res +} + +declare i32 @llvm.x86.avx512fp16.vcvttsh2si32(<8 x half>, i32) + +define i32 @test_x86_avx512fp16_vcvttsh2si32(<8 x half> %arg0) #0 { +; CHECK-LABEL: define i32 @test_x86_avx512fp16_vcvttsh2si32( +; CHECK-SAME: <8 x half> [[ARG0:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i16>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x i16> [[TMP1]] to i128 +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i128 [[TMP2]], 0 +; CHECK-NEXT: br i1 [[_MSCMP]], label %[[BB3:.*]], label %[[BB4:.*]], !prof [[PROF1]] +; CHECK: [[BB3]]: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR8]] +; CHECK-NEXT: unreachable +; CHECK: [[BB4]]: +; CHECK-NEXT: [[RES1:%.*]] = call i32 @llvm.x86.avx512fp16.vcvttsh2si32(<8 x half> [[ARG0]], i32 4) +; CHECK-NEXT: [[TMP5:%.*]] = bitcast <8 x i16> [[TMP1]] to i128 +; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i128 [[TMP5]], 0 +; CHECK-NEXT: br i1 [[_MSCMP1]], label %[[BB6:.*]], label %[[BB7:.*]], !prof [[PROF1]] +; CHECK: [[BB6]]: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR8]] +; CHECK-NEXT: unreachable +; CHECK: [[BB7]]: +; CHECK-NEXT: [[RES2:%.*]] = call i32 @llvm.x86.avx512fp16.vcvttsh2si32(<8 x half> [[ARG0]], i32 8) +; CHECK-NEXT: [[RES:%.*]] = add i32 [[RES1]], [[RES2]] +; CHECK-NEXT: store i32 0, ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret i32 [[RES]] +; + %res1 = call i32 @llvm.x86.avx512fp16.vcvttsh2si32(<8 x half> %arg0, i32 4) + %res2 = call i32 @llvm.x86.avx512fp16.vcvttsh2si32(<8 x half> %arg0, i32 8) + %res = add i32 %res1, %res2 + ret i32 %res +} + +declare i64 @llvm.x86.avx512fp16.vcvttsh2si64(<8 x half>, i32) + +define i64 @test_x86_avx512fp16_vcvttsh2si64(<8 x half> %arg0) #0 { +; CHECK-LABEL: define i64 @test_x86_avx512fp16_vcvttsh2si64( +; CHECK-SAME: <8 x half> [[ARG0:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i16>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x i16> [[TMP1]] to i128 +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i128 [[TMP2]], 0 +; CHECK-NEXT: br i1 [[_MSCMP]], label %[[BB3:.*]], label %[[BB4:.*]], !prof [[PROF1]] +; CHECK: [[BB3]]: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR8]] +; CHECK-NEXT: unreachable +; CHECK: [[BB4]]: +; CHECK-NEXT: [[RES1:%.*]] = call i64 @llvm.x86.avx512fp16.vcvttsh2si64(<8 x half> [[ARG0]], i32 4) +; CHECK-NEXT: [[TMP5:%.*]] = bitcast <8 x i16> [[TMP1]] to i128 +; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i128 [[TMP5]], 0 +; CHECK-NEXT: br i1 [[_MSCMP1]], label %[[BB6:.*]], label %[[BB7:.*]], !prof [[PROF1]] +; CHECK: [[BB6]]: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR8]] +; CHECK-NEXT: unreachable +; CHECK: [[BB7]]: +; CHECK-NEXT: [[RES2:%.*]] = call i64 @llvm.x86.avx512fp16.vcvttsh2si64(<8 x half> [[ARG0]], i32 8) +; CHECK-NEXT: [[RES:%.*]] = add i64 [[RES1]], [[RES2]] +; CHECK-NEXT: store i64 0, ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret i64 [[RES]] +; + %res1 = call i64 @llvm.x86.avx512fp16.vcvttsh2si64(<8 x half> %arg0, i32 4) + %res2 = call i64 @llvm.x86.avx512fp16.vcvttsh2si64(<8 x half> %arg0, i32 8) + %res = add i64 %res1, %res2 + ret i64 %res +} + + +declare i32 @llvm.x86.avx512fp16.vcvtsh2usi32(<8 x half>, i32) + +define i32 @test_x86_avx512fp16_vcvtsh2usi32(<8 x half> %arg0) #0 { +; CHECK-LABEL: define i32 @test_x86_avx512fp16_vcvtsh2usi32( +; CHECK-SAME: <8 x half> [[ARG0:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i16>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x i16> [[TMP1]] to i128 +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i128 [[TMP2]], 0 +; CHECK-NEXT: br i1 [[_MSCMP]], label %[[BB3:.*]], label %[[BB4:.*]], !prof [[PROF1]] +; CHECK: [[BB3]]: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR8]] +; CHECK-NEXT: unreachable +; CHECK: [[BB4]]: +; CHECK-NEXT: [[RES1:%.*]] = call i32 @llvm.x86.avx512fp16.vcvtsh2usi32(<8 x half> [[ARG0]], i32 4) +; CHECK-NEXT: [[TMP5:%.*]] = bitcast <8 x i16> [[TMP1]] to i128 +; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i128 [[TMP5]], 0 +; CHECK-NEXT: br i1 [[_MSCMP1]], label %[[BB6:.*]], label %[[BB7:.*]], !prof [[PROF1]] +; CHECK: [[BB6]]: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR8]] +; CHECK-NEXT: unreachable +; CHECK: [[BB7]]: +; CHECK-NEXT: [[RES2:%.*]] = call i32 @llvm.x86.avx512fp16.vcvtsh2usi32(<8 x half> [[ARG0]], i32 9) +; CHECK-NEXT: [[RES:%.*]] = add i32 [[RES1]], [[RES2]] +; CHECK-NEXT: store i32 0, ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret i32 [[RES]] +; + %res1 = call i32 @llvm.x86.avx512fp16.vcvtsh2usi32(<8 x half> %arg0, i32 4) + %res2 = call i32 @llvm.x86.avx512fp16.vcvtsh2usi32(<8 x half> %arg0, i32 9) + %res = add i32 %res1, %res2 + ret i32 %res +} + + +declare i64 @llvm.x86.avx512fp16.vcvtsh2usi64(<8 x half>, i32) + +define i64 @test_x86_avx512fp16_vcvtsh2usi64(<8 x half> %arg0) #0 { +; CHECK-LABEL: define i64 @test_x86_avx512fp16_vcvtsh2usi64( +; CHECK-SAME: <8 x half> [[ARG0:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i16>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x i16> [[TMP1]] to i128 +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i128 [[TMP2]], 0 +; CHECK-NEXT: br i1 [[_MSCMP]], label %[[BB3:.*]], label %[[BB4:.*]], !prof [[PROF1]] +; CHECK: [[BB3]]: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR8]] +; CHECK-NEXT: unreachable +; CHECK: [[BB4]]: +; CHECK-NEXT: [[RES1:%.*]] = call i64 @llvm.x86.avx512fp16.vcvtsh2usi64(<8 x half> [[ARG0]], i32 4) +; CHECK-NEXT: [[TMP5:%.*]] = bitcast <8 x i16> [[TMP1]] to i128 +; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i128 [[TMP5]], 0 +; CHECK-NEXT: br i1 [[_MSCMP1]], label %[[BB6:.*]], label %[[BB7:.*]], !prof [[PROF1]] +; CHECK: [[BB6]]: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR8]] +; CHECK-NEXT: unreachable +; CHECK: [[BB7]]: +; CHECK-NEXT: [[RES2:%.*]] = call i64 @llvm.x86.avx512fp16.vcvtsh2usi64(<8 x half> [[ARG0]], i32 10) +; CHECK-NEXT: [[RES:%.*]] = add i64 [[RES1]], [[RES2]] +; CHECK-NEXT: store i64 0, ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret i64 [[RES]] +; + %res1 = call i64 @llvm.x86.avx512fp16.vcvtsh2usi64(<8 x half> %arg0, i32 4) + %res2 = call i64 @llvm.x86.avx512fp16.vcvtsh2usi64(<8 x half> %arg0, i32 10) + %res = add i64 %res1, %res2 + ret i64 %res +} + +declare i32 @llvm.x86.avx512fp16.vcvttsh2usi32(<8 x half>, i32) + +define i32 @test_x86_avx512fp16_vcvttsh2usi32(<8 x half> %arg0) #0 { +; CHECK-LABEL: define i32 @test_x86_avx512fp16_vcvttsh2usi32( +; CHECK-SAME: <8 x half> [[ARG0:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i16>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x i16> [[TMP1]] to i128 +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i128 [[TMP2]], 0 +; CHECK-NEXT: br i1 [[_MSCMP]], label %[[BB3:.*]], label %[[BB4:.*]], !prof [[PROF1]] +; CHECK: [[BB3]]: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR8]] +; CHECK-NEXT: unreachable +; CHECK: [[BB4]]: +; CHECK-NEXT: [[RES1:%.*]] = call i32 @llvm.x86.avx512fp16.vcvttsh2usi32(<8 x half> [[ARG0]], i32 4) +; CHECK-NEXT: [[TMP5:%.*]] = bitcast <8 x i16> [[TMP1]] to i128 +; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i128 [[TMP5]], 0 +; CHECK-NEXT: br i1 [[_MSCMP1]], label %[[BB6:.*]], label %[[BB7:.*]], !prof [[PROF1]] +; CHECK: [[BB6]]: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR8]] +; CHECK-NEXT: unreachable +; CHECK: [[BB7]]: +; CHECK-NEXT: [[RES2:%.*]] = call i32 @llvm.x86.avx512fp16.vcvttsh2usi32(<8 x half> [[ARG0]], i32 8) +; CHECK-NEXT: [[RES:%.*]] = add i32 [[RES1]], [[RES2]] +; CHECK-NEXT: store i32 0, ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret i32 [[RES]] +; + %res1 = call i32 @llvm.x86.avx512fp16.vcvttsh2usi32(<8 x half> %arg0, i32 4) + %res2 = call i32 @llvm.x86.avx512fp16.vcvttsh2usi32(<8 x half> %arg0, i32 8) + %res = add i32 %res1, %res2 + ret i32 %res +} + +declare i64 @llvm.x86.avx512fp16.vcvttsh2usi64(<8 x half>, i32) + +define i64 @test_x86_avx512fp16_vcvttsh2usi64(<8 x half> %arg0) #0 { +; CHECK-LABEL: define i64 @test_x86_avx512fp16_vcvttsh2usi64( +; CHECK-SAME: <8 x half> [[ARG0:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i16>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x i16> [[TMP1]] to i128 +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i128 [[TMP2]], 0 +; CHECK-NEXT: br i1 [[_MSCMP]], label %[[BB3:.*]], label %[[BB4:.*]], !prof [[PROF1]] +; CHECK: [[BB3]]: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR8]] +; CHECK-NEXT: unreachable +; CHECK: [[BB4]]: +; CHECK-NEXT: [[RES1:%.*]] = call i64 @llvm.x86.avx512fp16.vcvttsh2usi64(<8 x half> [[ARG0]], i32 4) +; CHECK-NEXT: [[TMP5:%.*]] = bitcast <8 x i16> [[TMP1]] to i128 +; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i128 [[TMP5]], 0 +; CHECK-NEXT: br i1 [[_MSCMP1]], label %[[BB6:.*]], label %[[BB7:.*]], !prof [[PROF1]] +; CHECK: [[BB6]]: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR8]] +; CHECK-NEXT: unreachable +; CHECK: [[BB7]]: +; CHECK-NEXT: [[RES2:%.*]] = call i64 @llvm.x86.avx512fp16.vcvttsh2usi64(<8 x half> [[ARG0]], i32 8) +; CHECK-NEXT: [[RES:%.*]] = add i64 [[RES1]], [[RES2]] +; CHECK-NEXT: store i64 0, ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret i64 [[RES]] +; + %res1 = call i64 @llvm.x86.avx512fp16.vcvttsh2usi64(<8 x half> %arg0, i32 4) + %res2 = call i64 @llvm.x86.avx512fp16.vcvttsh2usi64(<8 x half> %arg0, i32 8) + %res = add i64 %res1, %res2 + ret i64 %res +} + +declare <8 x half> @llvm.x86.avx512fp16.vcvtsi2sh(<8 x half>, i32, i32) + +define <8 x half> @test_x86_avx512fp16_vcvtsi2sh(<8 x half> %arg0, i32 %arg1) #0 { +; CHECK-LABEL: define <8 x half> @test_x86_avx512fp16_vcvtsi2sh( +; CHECK-SAME: <8 x half> [[ARG0:%.*]], i32 [[ARG1:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i16>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP1]] to i128 +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i128 [[TMP3]], 0 +; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i32 [[TMP2]], 0 +; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]] +; CHECK-NEXT: br i1 [[_MSOR]], label %[[BB4:.*]], label %[[BB5:.*]], !prof [[PROF1]] +; CHECK: [[BB4]]: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR8]] +; CHECK-NEXT: unreachable +; CHECK: [[BB5]]: +; CHECK-NEXT: [[RES1:%.*]] = call <8 x half> @llvm.x86.avx512fp16.vcvtsi2sh(<8 x half> [[ARG0]], i32 [[ARG1]], i32 4) +; CHECK-NEXT: [[TMP6:%.*]] = bitcast <8 x i16> [[TMP1]] to i128 +; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i128 [[TMP6]], 0 +; CHECK-NEXT: [[_MSCMP3:%.*]] = icmp ne i32 [[TMP2]], 0 +; CHECK-NEXT: [[_MSOR4:%.*]] = or i1 [[_MSCMP2]], [[_MSCMP3]] +; CHECK-NEXT: br i1 [[_MSOR4]], label %[[BB7:.*]], label %[[BB8:.*]], !prof [[PROF1]] +; CHECK: [[BB7]]: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR8]] +; CHECK-NEXT: unreachable +; CHECK: [[BB8]]: +; CHECK-NEXT: [[RES2:%.*]] = call <8 x half> @llvm.x86.avx512fp16.vcvtsi2sh(<8 x half> [[ARG0]], i32 [[ARG1]], i32 9) +; CHECK-NEXT: [[RES:%.*]] = fadd <8 x half> [[RES1]], [[RES2]] +; CHECK-NEXT: store <8 x i16> zeroinitializer, ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <8 x half> [[RES]] +; + %res1 = call <8 x half> @llvm.x86.avx512fp16.vcvtsi2sh(<8 x half> %arg0, i32 %arg1, i32 4) + %res2 = call <8 x half> @llvm.x86.avx512fp16.vcvtsi2sh(<8 x half> %arg0, i32 %arg1, i32 9) + %res = fadd <8 x half> %res1, %res2 + ret <8 x half> %res +} + +declare <8 x half> @llvm.x86.avx512fp16.vcvtsi642sh(<8 x half>, i64, i32) + +define <8 x half> @test_x86_avx512fp16_vcvtsi642sh(<8 x half> %arg0, i64 %arg1) #0 { +; CHECK-LABEL: define <8 x half> @test_x86_avx512fp16_vcvtsi642sh( +; CHECK-SAME: <8 x half> [[ARG0:%.*]], i64 [[ARG1:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i16>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP1]] to i128 +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i128 [[TMP3]], 0 +; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i64 [[TMP2]], 0 +; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]] +; CHECK-NEXT: br i1 [[_MSOR]], label %[[BB4:.*]], label %[[BB5:.*]], !prof [[PROF1]] +; CHECK: [[BB4]]: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR8]] +; CHECK-NEXT: unreachable +; CHECK: [[BB5]]: +; CHECK-NEXT: [[RES1:%.*]] = call <8 x half> @llvm.x86.avx512fp16.vcvtsi642sh(<8 x half> [[ARG0]], i64 [[ARG1]], i32 4) +; CHECK-NEXT: [[TMP6:%.*]] = bitcast <8 x i16> [[TMP1]] to i128 +; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i128 [[TMP6]], 0 +; CHECK-NEXT: [[_MSCMP3:%.*]] = icmp ne i64 [[TMP2]], 0 +; CHECK-NEXT: [[_MSOR4:%.*]] = or i1 [[_MSCMP2]], [[_MSCMP3]] +; CHECK-NEXT: br i1 [[_MSOR4]], label %[[BB7:.*]], label %[[BB8:.*]], !prof [[PROF1]] +; CHECK: [[BB7]]: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR8]] +; CHECK-NEXT: unreachable +; CHECK: [[BB8]]: +; CHECK-NEXT: [[RES2:%.*]] = call <8 x half> @llvm.x86.avx512fp16.vcvtsi642sh(<8 x half> [[ARG0]], i64 [[ARG1]], i32 8) +; CHECK-NEXT: [[RES:%.*]] = fadd <8 x half> [[RES1]], [[RES2]] +; CHECK-NEXT: store <8 x i16> zeroinitializer, ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <8 x half> [[RES]] +; + %res1 = call <8 x half> @llvm.x86.avx512fp16.vcvtsi642sh(<8 x half> %arg0, i64 %arg1, i32 4) + %res2 = call <8 x half> @llvm.x86.avx512fp16.vcvtsi642sh(<8 x half> %arg0, i64 %arg1, i32 8) + %res = fadd <8 x half> %res1, %res2 + ret <8 x half> %res +} + +declare <8 x half> @llvm.x86.avx512fp16.vcvtusi2sh(<8 x half>, i32, i32) + +define <8 x half> @test_x86_avx512fp16_vcvtusi2sh(<8 x half> %arg0, i32 %arg1) #0 { +; CHECK-LABEL: define <8 x half> @test_x86_avx512fp16_vcvtusi2sh( +; CHECK-SAME: <8 x half> [[ARG0:%.*]], i32 [[ARG1:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i16>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP1]] to i128 +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i128 [[TMP3]], 0 +; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i32 [[TMP2]], 0 +; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]] +; CHECK-NEXT: br i1 [[_MSOR]], label %[[BB4:.*]], label %[[BB5:.*]], !prof [[PROF1]] +; CHECK: [[BB4]]: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR8]] +; CHECK-NEXT: unreachable +; CHECK: [[BB5]]: +; CHECK-NEXT: [[RES1:%.*]] = call <8 x half> @llvm.x86.avx512fp16.vcvtusi2sh(<8 x half> [[ARG0]], i32 [[ARG1]], i32 4) +; CHECK-NEXT: [[TMP6:%.*]] = bitcast <8 x i16> [[TMP1]] to i128 +; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i128 [[TMP6]], 0 +; CHECK-NEXT: [[_MSCMP3:%.*]] = icmp ne i32 [[TMP2]], 0 +; CHECK-NEXT: [[_MSOR4:%.*]] = or i1 [[_MSCMP2]], [[_MSCMP3]] +; CHECK-NEXT: br i1 [[_MSOR4]], label %[[BB7:.*]], label %[[BB8:.*]], !prof [[PROF1]] +; CHECK: [[BB7]]: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR8]] +; CHECK-NEXT: unreachable +; CHECK: [[BB8]]: +; CHECK-NEXT: [[RES2:%.*]] = call <8 x half> @llvm.x86.avx512fp16.vcvtusi2sh(<8 x half> [[ARG0]], i32 [[ARG1]], i32 9) +; CHECK-NEXT: [[RES:%.*]] = fadd <8 x half> [[RES1]], [[RES2]] +; CHECK-NEXT: store <8 x i16> zeroinitializer, ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <8 x half> [[RES]] +; + %res1 = call <8 x half> @llvm.x86.avx512fp16.vcvtusi2sh(<8 x half> %arg0, i32 %arg1, i32 4) + %res2 = call <8 x half> @llvm.x86.avx512fp16.vcvtusi2sh(<8 x half> %arg0, i32 %arg1, i32 9) + %res = fadd <8 x half> %res1, %res2 + ret <8 x half> %res +} + +declare <8 x half> @llvm.x86.avx512fp16.vcvtusi642sh(<8 x half>, i64, i32) + +define <8 x half> @test_x86_avx512fp16_vcvtusi642sh(<8 x half> %arg0, i64 %arg1) #0 { +; CHECK-LABEL: define <8 x half> @test_x86_avx512fp16_vcvtusi642sh( +; CHECK-SAME: <8 x half> [[ARG0:%.*]], i64 [[ARG1:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i16>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP1]] to i128 +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i128 [[TMP3]], 0 +; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i64 [[TMP2]], 0 +; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]] +; CHECK-NEXT: br i1 [[_MSOR]], label %[[BB4:.*]], label %[[BB5:.*]], !prof [[PROF1]] +; CHECK: [[BB4]]: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR8]] +; CHECK-NEXT: unreachable +; CHECK: [[BB5]]: +; CHECK-NEXT: [[RES1:%.*]] = call <8 x half> @llvm.x86.avx512fp16.vcvtusi642sh(<8 x half> [[ARG0]], i64 [[ARG1]], i32 4) +; CHECK-NEXT: [[TMP6:%.*]] = bitcast <8 x i16> [[TMP1]] to i128 +; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i128 [[TMP6]], 0 +; CHECK-NEXT: [[_MSCMP3:%.*]] = icmp ne i64 [[TMP2]], 0 +; CHECK-NEXT: [[_MSOR4:%.*]] = or i1 [[_MSCMP2]], [[_MSCMP3]] +; CHECK-NEXT: br i1 [[_MSOR4]], label %[[BB7:.*]], label %[[BB8:.*]], !prof [[PROF1]] +; CHECK: [[BB7]]: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR8]] +; CHECK-NEXT: unreachable +; CHECK: [[BB8]]: +; CHECK-NEXT: [[RES2:%.*]] = call <8 x half> @llvm.x86.avx512fp16.vcvtusi642sh(<8 x half> [[ARG0]], i64 [[ARG1]], i32 9) +; CHECK-NEXT: [[RES:%.*]] = fadd <8 x half> [[RES1]], [[RES2]] +; CHECK-NEXT: store <8 x i16> zeroinitializer, ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <8 x half> [[RES]] +; + %res1 = call <8 x half> @llvm.x86.avx512fp16.vcvtusi642sh(<8 x half> %arg0, i64 %arg1, i32 4) + %res2 = call <8 x half> @llvm.x86.avx512fp16.vcvtusi642sh(<8 x half> %arg0, i64 %arg1, i32 9) + %res = fadd <8 x half> %res1, %res2 + ret <8 x half> %res +} + + +define <16 x half> @test_mm256_castph128_ph256_freeze(<8 x half> %a0) nounwind { +; CHECK-LABEL: define <16 x half> @test_mm256_castph128_ph256_freeze( +; CHECK-SAME: <8 x half> [[A0:%.*]]) #[[ATTR5:[0-9]+]] { +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[A1:%.*]] = freeze <8 x half> undef +; CHECK-NEXT: [[RES:%.*]] = shufflevector <8 x half> [[A0]], <8 x half> [[A1]], <16 x i32> +; CHECK-NEXT: store <16 x i16> zeroinitializer, ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <16 x half> [[RES]] +; + %a1 = freeze <8 x half> undef +; The cold never bothered me anyway + %res = shufflevector <8 x half> %a0, <8 x half> %a1, <16 x i32> + ret <16 x half> %res +} + + +define <32 x half> @test_mm512_castph128_ph512_freeze(<8 x half> %a0) nounwind { +; CHECK-LABEL: define <32 x half> @test_mm512_castph128_ph512_freeze( +; CHECK-SAME: <8 x half> [[A0:%.*]]) #[[ATTR5]] { +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[A1:%.*]] = freeze <8 x half> undef +; CHECK-NEXT: [[RES:%.*]] = shufflevector <8 x half> [[A0]], <8 x half> [[A1]], <32 x i32> +; CHECK-NEXT: store <32 x i16> zeroinitializer, ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <32 x half> [[RES]] +; + %a1 = freeze <8 x half> undef + %res = shufflevector <8 x half> %a0, <8 x half> %a1, <32 x i32> + ret <32 x half> %res +} + + +define <32 x half> @test_mm512_castph256_ph512_freeze(<16 x half> %a0) nounwind { +; CHECK-LABEL: define <32 x half> @test_mm512_castph256_ph512_freeze( +; CHECK-SAME: <16 x half> [[A0:%.*]]) #[[ATTR5]] { +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[A1:%.*]] = freeze <16 x half> undef +; CHECK-NEXT: [[RES:%.*]] = shufflevector <16 x half> [[A0]], <16 x half> [[A1]], <32 x i32> +; CHECK-NEXT: store <32 x i16> zeroinitializer, ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <32 x half> [[RES]] +; + %a1 = freeze <16 x half> undef + %res = shufflevector <16 x half> %a0, <16 x half> %a1, <32 x i32> + ret <32 x half> %res +} + +attributes #0 = { sanitize_memory } From d2381ed6bf881219e247ed86a0c1955dedcf50e9 Mon Sep 17 00:00:00 2001 From: Thurston Dang Date: Fri, 18 Apr 2025 06:43:51 +0000 Subject: [PATCH 5/6] Fix typo --- .../MemorySanitizer/X86/avx512fp16-intrinsics.ll | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/llvm/test/Instrumentation/MemorySanitizer/X86/avx512fp16-intrinsics.ll b/llvm/test/Instrumentation/MemorySanitizer/X86/avx512fp16-intrinsics.ll index fd39f90b09243..4bdc81393b8a8 100644 --- a/llvm/test/Instrumentation/MemorySanitizer/X86/avx512fp16-intrinsics.ll +++ b/llvm/test/Instrumentation/MemorySanitizer/X86/avx512fp16-intrinsics.ll @@ -1,7 +1,7 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5 ; RUN: opt %s -S -mattr=+avx512f -passes=msan 2>&1 | FileCheck %s ; -; Forked from llvm/test/CodeGen/X86/avx512fp16-arith-intrinsics.ll +; Forked from llvm/test/CodeGen/X86/avx512fp16-intrinsics.ll ; ; Handled suboptimally (visitInstruction): ; - llvm.x86.avx512fp16.fpclass.ph.512 From b34c69a0b68d443e3218f8bfb78fb4915cc4f3f9 Mon Sep 17 00:00:00 2001 From: Thurston Dang Date: Fri, 18 Apr 2025 06:46:46 +0000 Subject: [PATCH 6/6] undef -> poison --- .../X86/avx512fp16-arith-vl-intrinsics.ll | 36 +++++++------- .../X86/avx512fp16-intrinsics.ll | 48 +++++++++---------- 2 files changed, 42 insertions(+), 42 deletions(-) diff --git a/llvm/test/Instrumentation/MemorySanitizer/X86/avx512fp16-arith-vl-intrinsics.ll b/llvm/test/Instrumentation/MemorySanitizer/X86/avx512fp16-arith-vl-intrinsics.ll index 224bab6dda923..e67e5e73134e9 100644 --- a/llvm/test/Instrumentation/MemorySanitizer/X86/avx512fp16-arith-vl-intrinsics.ll +++ b/llvm/test/Instrumentation/MemorySanitizer/X86/avx512fp16-arith-vl-intrinsics.ll @@ -1266,11 +1266,11 @@ define <4 x i32> @test_int_x86_avx512_cvt_ph2udq_128(<8 x half> %x0) #0 { ; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i16>, ptr @__msan_param_tls, align 8 ; CHECK-NEXT: call void @llvm.donothing() ; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR4]] -; CHECK-NEXT: [[RES:%.*]] = call <4 x i32> @llvm.x86.avx512fp16.mask.vcvtph2udq.128(<8 x half> [[X0]], <4 x i32> undef, i8 -1) +; CHECK-NEXT: [[RES:%.*]] = call <4 x i32> @llvm.x86.avx512fp16.mask.vcvtph2udq.128(<8 x half> [[X0]], <4 x i32> poison, i8 -1) ; CHECK-NEXT: store <4 x i32> zeroinitializer, ptr @__msan_retval_tls, align 8 ; CHECK-NEXT: ret <4 x i32> [[RES]] ; - %res = call <4 x i32> @llvm.x86.avx512fp16.mask.vcvtph2udq.128(<8 x half> %x0, <4 x i32> undef, i8 -1) + %res = call <4 x i32> @llvm.x86.avx512fp16.mask.vcvtph2udq.128(<8 x half> %x0, <4 x i32> poison, i8 -1) ret <4 x i32> %res } @@ -1332,11 +1332,11 @@ define <8 x i32> @test_int_x86_avx512_cvt_ph2udq_256(<8 x half> %x0) #0 { ; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i16>, ptr @__msan_param_tls, align 8 ; CHECK-NEXT: call void @llvm.donothing() ; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR4]] -; CHECK-NEXT: [[RES:%.*]] = call <8 x i32> @llvm.x86.avx512fp16.mask.vcvtph2udq.256(<8 x half> [[X0]], <8 x i32> undef, i8 -1) +; CHECK-NEXT: [[RES:%.*]] = call <8 x i32> @llvm.x86.avx512fp16.mask.vcvtph2udq.256(<8 x half> [[X0]], <8 x i32> poison, i8 -1) ; CHECK-NEXT: store <8 x i32> zeroinitializer, ptr @__msan_retval_tls, align 8 ; CHECK-NEXT: ret <8 x i32> [[RES]] ; - %res = call <8 x i32> @llvm.x86.avx512fp16.mask.vcvtph2udq.256(<8 x half> %x0, <8 x i32> undef, i8 -1) + %res = call <8 x i32> @llvm.x86.avx512fp16.mask.vcvtph2udq.256(<8 x half> %x0, <8 x i32> poison, i8 -1) ret <8 x i32> %res } @@ -1398,11 +1398,11 @@ define <4 x i32> @test_int_x86_avx512_cvtt_ph2dq_128(<8 x half> %x0) #0 { ; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i16>, ptr @__msan_param_tls, align 8 ; CHECK-NEXT: call void @llvm.donothing() ; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR4]] -; CHECK-NEXT: [[RES:%.*]] = call <4 x i32> @llvm.x86.avx512fp16.mask.vcvttph2dq.128(<8 x half> [[X0]], <4 x i32> undef, i8 -1) +; CHECK-NEXT: [[RES:%.*]] = call <4 x i32> @llvm.x86.avx512fp16.mask.vcvttph2dq.128(<8 x half> [[X0]], <4 x i32> poison, i8 -1) ; CHECK-NEXT: store <4 x i32> zeroinitializer, ptr @__msan_retval_tls, align 8 ; CHECK-NEXT: ret <4 x i32> [[RES]] ; - %res = call <4 x i32> @llvm.x86.avx512fp16.mask.vcvttph2dq.128(<8 x half> %x0, <4 x i32> undef, i8 -1) + %res = call <4 x i32> @llvm.x86.avx512fp16.mask.vcvttph2dq.128(<8 x half> %x0, <4 x i32> poison, i8 -1) ret <4 x i32> %res } @@ -1464,11 +1464,11 @@ define <8 x i32> @test_int_x86_avx512_cvtt_ph2dq_256(<8 x half> %x0) #0 { ; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i16>, ptr @__msan_param_tls, align 8 ; CHECK-NEXT: call void @llvm.donothing() ; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR4]] -; CHECK-NEXT: [[RES:%.*]] = call <8 x i32> @llvm.x86.avx512fp16.mask.vcvttph2dq.256(<8 x half> [[X0]], <8 x i32> undef, i8 -1) +; CHECK-NEXT: [[RES:%.*]] = call <8 x i32> @llvm.x86.avx512fp16.mask.vcvttph2dq.256(<8 x half> [[X0]], <8 x i32> poison, i8 -1) ; CHECK-NEXT: store <8 x i32> zeroinitializer, ptr @__msan_retval_tls, align 8 ; CHECK-NEXT: ret <8 x i32> [[RES]] ; - %res = call <8 x i32> @llvm.x86.avx512fp16.mask.vcvttph2dq.256(<8 x half> %x0, <8 x i32> undef, i8 -1) + %res = call <8 x i32> @llvm.x86.avx512fp16.mask.vcvttph2dq.256(<8 x half> %x0, <8 x i32> poison, i8 -1) ret <8 x i32> %res } @@ -1530,11 +1530,11 @@ define <4 x i32> @test_int_x86_avx512_cvtt_ph2udq_128(<8 x half> %x0) #0 { ; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i16>, ptr @__msan_param_tls, align 8 ; CHECK-NEXT: call void @llvm.donothing() ; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR4]] -; CHECK-NEXT: [[RES:%.*]] = call <4 x i32> @llvm.x86.avx512fp16.mask.vcvttph2udq.128(<8 x half> [[X0]], <4 x i32> undef, i8 -1) +; CHECK-NEXT: [[RES:%.*]] = call <4 x i32> @llvm.x86.avx512fp16.mask.vcvttph2udq.128(<8 x half> [[X0]], <4 x i32> poison, i8 -1) ; CHECK-NEXT: store <4 x i32> zeroinitializer, ptr @__msan_retval_tls, align 8 ; CHECK-NEXT: ret <4 x i32> [[RES]] ; - %res = call <4 x i32> @llvm.x86.avx512fp16.mask.vcvttph2udq.128(<8 x half> %x0, <4 x i32> undef, i8 -1) + %res = call <4 x i32> @llvm.x86.avx512fp16.mask.vcvttph2udq.128(<8 x half> %x0, <4 x i32> poison, i8 -1) ret <4 x i32> %res } @@ -1596,11 +1596,11 @@ define <8 x i32> @test_int_x86_avx512_cvtt_ph2udq_256(<8 x half> %x0) #0 { ; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i16>, ptr @__msan_param_tls, align 8 ; CHECK-NEXT: call void @llvm.donothing() ; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR4]] -; CHECK-NEXT: [[RES:%.*]] = call <8 x i32> @llvm.x86.avx512fp16.mask.vcvttph2udq.256(<8 x half> [[X0]], <8 x i32> undef, i8 -1) +; CHECK-NEXT: [[RES:%.*]] = call <8 x i32> @llvm.x86.avx512fp16.mask.vcvttph2udq.256(<8 x half> [[X0]], <8 x i32> poison, i8 -1) ; CHECK-NEXT: store <8 x i32> zeroinitializer, ptr @__msan_retval_tls, align 8 ; CHECK-NEXT: ret <8 x i32> [[RES]] ; - %res = call <8 x i32> @llvm.x86.avx512fp16.mask.vcvttph2udq.256(<8 x half> %x0, <8 x i32> undef, i8 -1) + %res = call <8 x i32> @llvm.x86.avx512fp16.mask.vcvttph2udq.256(<8 x half> %x0, <8 x i32> poison, i8 -1) ret <8 x i32> %res } @@ -1662,11 +1662,11 @@ define <4 x float> @test_int_x86_avx512_cvt_ph2psx_128(<8 x half> %x0) #0 { ; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i16>, ptr @__msan_param_tls, align 8 ; CHECK-NEXT: call void @llvm.donothing() ; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR4]] -; CHECK-NEXT: [[RES:%.*]] = call <4 x float> @llvm.x86.avx512fp16.mask.vcvtph2psx.128(<8 x half> [[X0]], <4 x float> undef, i8 -1) +; CHECK-NEXT: [[RES:%.*]] = call <4 x float> @llvm.x86.avx512fp16.mask.vcvtph2psx.128(<8 x half> [[X0]], <4 x float> poison, i8 -1) ; CHECK-NEXT: store <4 x i32> zeroinitializer, ptr @__msan_retval_tls, align 8 ; CHECK-NEXT: ret <4 x float> [[RES]] ; - %res = call <4 x float> @llvm.x86.avx512fp16.mask.vcvtph2psx.128(<8 x half> %x0, <4 x float> undef, i8 -1) + %res = call <4 x float> @llvm.x86.avx512fp16.mask.vcvtph2psx.128(<8 x half> %x0, <4 x float> poison, i8 -1) ret <4 x float> %res } @@ -1728,11 +1728,11 @@ define <8 x float> @test_int_x86_avx512_cvt_ph2psx_256(<8 x half> %x0) #0 { ; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i16>, ptr @__msan_param_tls, align 8 ; CHECK-NEXT: call void @llvm.donothing() ; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR4]] -; CHECK-NEXT: [[RES:%.*]] = call <8 x float> @llvm.x86.avx512fp16.mask.vcvtph2psx.256(<8 x half> [[X0]], <8 x float> undef, i8 -1) +; CHECK-NEXT: [[RES:%.*]] = call <8 x float> @llvm.x86.avx512fp16.mask.vcvtph2psx.256(<8 x half> [[X0]], <8 x float> poison, i8 -1) ; CHECK-NEXT: store <8 x i32> zeroinitializer, ptr @__msan_retval_tls, align 8 ; CHECK-NEXT: ret <8 x float> [[RES]] ; - %res = call <8 x float> @llvm.x86.avx512fp16.mask.vcvtph2psx.256(<8 x half> %x0, <8 x float> undef, i8 -1) + %res = call <8 x float> @llvm.x86.avx512fp16.mask.vcvtph2psx.256(<8 x half> %x0, <8 x float> poison, i8 -1) ret <8 x float> %res } @@ -1837,11 +1837,11 @@ define <8 x half> @test_int_x86_avx512_cvt_ps2phx_256(<8 x float> %x0) #0 { ; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i32>, ptr @__msan_param_tls, align 8 ; CHECK-NEXT: call void @llvm.donothing() ; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR4]] -; CHECK-NEXT: [[RES:%.*]] = call <8 x half> @llvm.x86.avx512fp16.mask.vcvtps2phx.256(<8 x float> [[X0]], <8 x half> undef, i8 -1) +; CHECK-NEXT: [[RES:%.*]] = call <8 x half> @llvm.x86.avx512fp16.mask.vcvtps2phx.256(<8 x float> [[X0]], <8 x half> poison, i8 -1) ; CHECK-NEXT: store <8 x i16> zeroinitializer, ptr @__msan_retval_tls, align 8 ; CHECK-NEXT: ret <8 x half> [[RES]] ; - %res = call <8 x half> @llvm.x86.avx512fp16.mask.vcvtps2phx.256(<8 x float> %x0, <8 x half> undef, i8 -1) + %res = call <8 x half> @llvm.x86.avx512fp16.mask.vcvtps2phx.256(<8 x float> %x0, <8 x half> poison, i8 -1) ret <8 x half> %res } diff --git a/llvm/test/Instrumentation/MemorySanitizer/X86/avx512fp16-intrinsics.ll b/llvm/test/Instrumentation/MemorySanitizer/X86/avx512fp16-intrinsics.ll index 4bdc81393b8a8..61a32e5e2042e 100644 --- a/llvm/test/Instrumentation/MemorySanitizer/X86/avx512fp16-intrinsics.ll +++ b/llvm/test/Instrumentation/MemorySanitizer/X86/avx512fp16-intrinsics.ll @@ -511,12 +511,12 @@ define <8 x half> @test_rsqrt_sh_load(<8 x half> %a0, ptr %a1ptr) #0 { ; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i128 [[TMP9]], 0 ; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP1]], [[_MSCMP2]] ; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR8]] -; CHECK-NEXT: [[RES:%.*]] = call <8 x half> @llvm.x86.avx512fp16.mask.rsqrt.sh(<8 x half> [[A0]], <8 x half> [[A1]], <8 x half> undef, i8 -1) +; CHECK-NEXT: [[RES:%.*]] = call <8 x half> @llvm.x86.avx512fp16.mask.rsqrt.sh(<8 x half> [[A0]], <8 x half> [[A1]], <8 x half> poison, i8 -1) ; CHECK-NEXT: store <8 x i16> zeroinitializer, ptr @__msan_retval_tls, align 8 ; CHECK-NEXT: ret <8 x half> [[RES]] ; %a1 = load <8 x half>, ptr %a1ptr - %res = call <8 x half> @llvm.x86.avx512fp16.mask.rsqrt.sh(<8 x half> %a0, <8 x half> %a1, <8 x half> undef, i8 -1) + %res = call <8 x half> @llvm.x86.avx512fp16.mask.rsqrt.sh(<8 x half> %a0, <8 x half> %a1, <8 x half> poison, i8 -1) ret <8 x half> %res } @@ -1102,12 +1102,12 @@ define <8 x half>@test_int_x86_avx512_mask_getexp_sh_load(<8 x half> %x0, ptr %x ; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i128 [[TMP9]], 0 ; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP1]], [[_MSCMP2]] ; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR8]] -; CHECK-NEXT: [[RES:%.*]] = call <8 x half> @llvm.x86.avx512fp16.mask.getexp.sh(<8 x half> [[X0]], <8 x half> [[X1]], <8 x half> undef, i8 -1, i32 4) +; CHECK-NEXT: [[RES:%.*]] = call <8 x half> @llvm.x86.avx512fp16.mask.getexp.sh(<8 x half> [[X0]], <8 x half> [[X1]], <8 x half> poison, i8 -1, i32 4) ; CHECK-NEXT: store <8 x i16> zeroinitializer, ptr @__msan_retval_tls, align 8 ; CHECK-NEXT: ret <8 x half> [[RES]] ; %x1 = load <8 x half>, ptr %x1ptr - %res = call <8 x half> @llvm.x86.avx512fp16.mask.getexp.sh(<8 x half> %x0, <8 x half> %x1, <8 x half> undef, i8 -1, i32 4) + %res = call <8 x half> @llvm.x86.avx512fp16.mask.getexp.sh(<8 x half> %x0, <8 x half> %x1, <8 x half> poison, i8 -1, i32 4) ret <8 x half> %res } @@ -1376,12 +1376,12 @@ define <8 x half>@test_int_x86_avx512_mask_scalef_sh_load(<8 x half> %x0, ptr %x ; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i128 [[TMP9]], 0 ; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP1]], [[_MSCMP2]] ; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR8]] -; CHECK-NEXT: [[RES:%.*]] = call <8 x half> @llvm.x86.avx512fp16.mask.scalef.sh(<8 x half> [[X0]], <8 x half> [[X1]], <8 x half> undef, i8 -1, i32 4) +; CHECK-NEXT: [[RES:%.*]] = call <8 x half> @llvm.x86.avx512fp16.mask.scalef.sh(<8 x half> [[X0]], <8 x half> [[X1]], <8 x half> poison, i8 -1, i32 4) ; CHECK-NEXT: store <8 x i16> zeroinitializer, ptr @__msan_retval_tls, align 8 ; CHECK-NEXT: ret <8 x half> [[RES]] ; %x1 = load <8 x half>, ptr %x1ptr - %res = call <8 x half> @llvm.x86.avx512fp16.mask.scalef.sh(<8 x half> %x0, <8 x half> %x1, <8 x half> undef, i8 -1, i32 4) + %res = call <8 x half> @llvm.x86.avx512fp16.mask.scalef.sh(<8 x half> %x0, <8 x half> %x1, <8 x half> poison, i8 -1, i32 4) ret <8 x half> %res } @@ -1408,7 +1408,7 @@ define <8 x half> @test_int_x86_avx512fp16_mask_add_sh(<8 x half> %x1, <8 x half ; CHECK-NEXT: [[TMP10:%.*]] = inttoptr i64 [[TMP9]] to ptr ; CHECK-NEXT: [[_MSLD:%.*]] = load i16, ptr [[TMP10]], align 2 ; CHECK-NEXT: [[_MSPROP:%.*]] = insertelement <8 x i16> splat (i16 -1), i16 [[_MSLD]], i32 0 -; CHECK-NEXT: [[VAL:%.*]] = insertelement <8 x half> undef, half [[VAL_HALF]], i32 0 +; CHECK-NEXT: [[VAL:%.*]] = insertelement <8 x half> poison, half [[VAL_HALF]], i32 0 ; CHECK-NEXT: [[TMP11:%.*]] = bitcast <8 x i16> [[TMP2]] to i128 ; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i128 [[TMP11]], 0 ; CHECK-NEXT: [[TMP12:%.*]] = bitcast <8 x i16> [[TMP3]] to i128 @@ -1460,7 +1460,7 @@ define <8 x half> @test_int_x86_avx512fp16_mask_add_sh(<8 x half> %x1, <8 x half ; CHECK-NEXT: ret <8 x half> [[RES3]] ; %val.half = load half,ptr %ptr - %val = insertelement <8 x half> undef, half %val.half, i32 0 + %val = insertelement <8 x half> poison, half %val.half, i32 0 %res0 = call <8 x half> @llvm.x86.avx512fp16.mask.add.sh.round(<8 x half> %x1, <8 x half> %x2, <8 x half> zeroinitializer, i8 -1, i32 4) %res1 = call <8 x half> @llvm.x86.avx512fp16.mask.add.sh.round(<8 x half> %res0, <8 x half> %x2, <8 x half> %src , i8 %mask, i32 4) %res2 = call <8 x half> @llvm.x86.avx512fp16.mask.add.sh.round(<8 x half> %res1, <8 x half> %x2, <8 x half> zeroinitializer , i8 %mask, i32 4) @@ -1491,7 +1491,7 @@ define <8 x half> @test_int_x86_avx512fp16_mask_sub_sh(<8 x half> %x1, <8 x half ; CHECK-NEXT: [[TMP10:%.*]] = inttoptr i64 [[TMP9]] to ptr ; CHECK-NEXT: [[_MSLD:%.*]] = load i16, ptr [[TMP10]], align 2 ; CHECK-NEXT: [[_MSPROP:%.*]] = insertelement <8 x i16> splat (i16 -1), i16 [[_MSLD]], i32 0 -; CHECK-NEXT: [[VAL:%.*]] = insertelement <8 x half> undef, half [[VAL_HALF]], i32 0 +; CHECK-NEXT: [[VAL:%.*]] = insertelement <8 x half> poison, half [[VAL_HALF]], i32 0 ; CHECK-NEXT: [[TMP11:%.*]] = bitcast <8 x i16> [[TMP2]] to i128 ; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i128 [[TMP11]], 0 ; CHECK-NEXT: [[TMP12:%.*]] = bitcast <8 x i16> [[TMP3]] to i128 @@ -1543,7 +1543,7 @@ define <8 x half> @test_int_x86_avx512fp16_mask_sub_sh(<8 x half> %x1, <8 x half ; CHECK-NEXT: ret <8 x half> [[RES3]] ; %val.half = load half,ptr %ptr - %val = insertelement <8 x half> undef, half %val.half, i32 0 + %val = insertelement <8 x half> poison, half %val.half, i32 0 %res0 = call <8 x half> @llvm.x86.avx512fp16.mask.sub.sh.round(<8 x half> %x1, <8 x half> %x2, <8 x half> zeroinitializer, i8 -1, i32 4) %res1 = call <8 x half> @llvm.x86.avx512fp16.mask.sub.sh.round(<8 x half> %res0, <8 x half> %x2, <8 x half> %src , i8 %mask, i32 4) %res2 = call <8 x half> @llvm.x86.avx512fp16.mask.sub.sh.round(<8 x half> %res1, <8 x half> %x2, <8 x half> zeroinitializer , i8 %mask, i32 4) @@ -1574,7 +1574,7 @@ define <8 x half> @test_int_x86_avx512fp16_mask_mul_sh(<8 x half> %x1, <8 x half ; CHECK-NEXT: [[TMP10:%.*]] = inttoptr i64 [[TMP9]] to ptr ; CHECK-NEXT: [[_MSLD:%.*]] = load i16, ptr [[TMP10]], align 2 ; CHECK-NEXT: [[_MSPROP:%.*]] = insertelement <8 x i16> splat (i16 -1), i16 [[_MSLD]], i32 0 -; CHECK-NEXT: [[VAL:%.*]] = insertelement <8 x half> undef, half [[VAL_HALF]], i32 0 +; CHECK-NEXT: [[VAL:%.*]] = insertelement <8 x half> poison, half [[VAL_HALF]], i32 0 ; CHECK-NEXT: [[TMP11:%.*]] = bitcast <8 x i16> [[TMP2]] to i128 ; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i128 [[TMP11]], 0 ; CHECK-NEXT: [[TMP12:%.*]] = bitcast <8 x i16> [[TMP3]] to i128 @@ -1626,7 +1626,7 @@ define <8 x half> @test_int_x86_avx512fp16_mask_mul_sh(<8 x half> %x1, <8 x half ; CHECK-NEXT: ret <8 x half> [[RES3]] ; %val.half = load half,ptr %ptr - %val = insertelement <8 x half> undef, half %val.half, i32 0 + %val = insertelement <8 x half> poison, half %val.half, i32 0 %res0 = call <8 x half> @llvm.x86.avx512fp16.mask.mul.sh.round(<8 x half> %x1, <8 x half> %x2, <8 x half> zeroinitializer, i8 -1, i32 4) %res1 = call <8 x half> @llvm.x86.avx512fp16.mask.mul.sh.round(<8 x half> %res0, <8 x half> %x2, <8 x half> %src , i8 %mask, i32 4) %res2 = call <8 x half> @llvm.x86.avx512fp16.mask.mul.sh.round(<8 x half> %res1, <8 x half> %x2, <8 x half> zeroinitializer , i8 %mask, i32 4) @@ -1657,7 +1657,7 @@ define <8 x half> @test_int_x86_avx512fp16_mask_div_sh(<8 x half> %x1, <8 x half ; CHECK-NEXT: [[TMP10:%.*]] = inttoptr i64 [[TMP9]] to ptr ; CHECK-NEXT: [[_MSLD:%.*]] = load i16, ptr [[TMP10]], align 2 ; CHECK-NEXT: [[_MSPROP:%.*]] = insertelement <8 x i16> splat (i16 -1), i16 [[_MSLD]], i32 0 -; CHECK-NEXT: [[VAL:%.*]] = insertelement <8 x half> undef, half [[VAL_HALF]], i32 0 +; CHECK-NEXT: [[VAL:%.*]] = insertelement <8 x half> poison, half [[VAL_HALF]], i32 0 ; CHECK-NEXT: [[TMP11:%.*]] = bitcast <8 x i16> [[TMP2]] to i128 ; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i128 [[TMP11]], 0 ; CHECK-NEXT: [[TMP12:%.*]] = bitcast <8 x i16> [[TMP3]] to i128 @@ -1709,7 +1709,7 @@ define <8 x half> @test_int_x86_avx512fp16_mask_div_sh(<8 x half> %x1, <8 x half ; CHECK-NEXT: ret <8 x half> [[RES3]] ; %val.half = load half,ptr %ptr - %val = insertelement <8 x half> undef, half %val.half, i32 0 + %val = insertelement <8 x half> poison, half %val.half, i32 0 %res0 = call <8 x half> @llvm.x86.avx512fp16.mask.div.sh.round(<8 x half> %x1, <8 x half> %x2, <8 x half> zeroinitializer, i8 -1, i32 4) %res1 = call <8 x half> @llvm.x86.avx512fp16.mask.div.sh.round(<8 x half> %res0, <8 x half> %x2, <8 x half> %src , i8 %mask, i32 4) %res2 = call <8 x half> @llvm.x86.avx512fp16.mask.div.sh.round(<8 x half> %res1, <8 x half> %x2, <8 x half> zeroinitializer , i8 %mask, i32 4) @@ -1740,7 +1740,7 @@ define <8 x half> @test_int_x86_avx512fp16_mask_min_sh(<8 x half> %x1, <8 x half ; CHECK-NEXT: [[TMP10:%.*]] = inttoptr i64 [[TMP9]] to ptr ; CHECK-NEXT: [[_MSLD:%.*]] = load i16, ptr [[TMP10]], align 2 ; CHECK-NEXT: [[_MSPROP:%.*]] = insertelement <8 x i16> splat (i16 -1), i16 [[_MSLD]], i32 0 -; CHECK-NEXT: [[VAL:%.*]] = insertelement <8 x half> undef, half [[VAL_HALF]], i32 0 +; CHECK-NEXT: [[VAL:%.*]] = insertelement <8 x half> poison, half [[VAL_HALF]], i32 0 ; CHECK-NEXT: [[TMP11:%.*]] = bitcast <8 x i16> [[TMP2]] to i128 ; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i128 [[TMP11]], 0 ; CHECK-NEXT: [[TMP12:%.*]] = bitcast <8 x i16> [[TMP3]] to i128 @@ -1792,7 +1792,7 @@ define <8 x half> @test_int_x86_avx512fp16_mask_min_sh(<8 x half> %x1, <8 x half ; CHECK-NEXT: ret <8 x half> [[RES3]] ; %val.half = load half,ptr %ptr - %val = insertelement <8 x half> undef, half %val.half, i32 0 + %val = insertelement <8 x half> poison, half %val.half, i32 0 %res0 = call <8 x half> @llvm.x86.avx512fp16.mask.min.sh.round(<8 x half> %x1, <8 x half> %x2, <8 x half> zeroinitializer, i8 -1, i32 4) %res1 = call <8 x half> @llvm.x86.avx512fp16.mask.min.sh.round(<8 x half> %res0, <8 x half> %x2, <8 x half> %src , i8 %mask, i32 4) %res2 = call <8 x half> @llvm.x86.avx512fp16.mask.min.sh.round(<8 x half> %res1, <8 x half> %x2, <8 x half> zeroinitializer , i8 %mask, i32 4) @@ -1823,7 +1823,7 @@ define <8 x half> @test_int_x86_avx512fp16_mask_max_sh(<8 x half> %x1, <8 x half ; CHECK-NEXT: [[TMP10:%.*]] = inttoptr i64 [[TMP9]] to ptr ; CHECK-NEXT: [[_MSLD:%.*]] = load i16, ptr [[TMP10]], align 2 ; CHECK-NEXT: [[_MSPROP:%.*]] = insertelement <8 x i16> splat (i16 -1), i16 [[_MSLD]], i32 0 -; CHECK-NEXT: [[VAL:%.*]] = insertelement <8 x half> undef, half [[VAL_HALF]], i32 0 +; CHECK-NEXT: [[VAL:%.*]] = insertelement <8 x half> poison, half [[VAL_HALF]], i32 0 ; CHECK-NEXT: [[TMP11:%.*]] = bitcast <8 x i16> [[TMP2]] to i128 ; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i128 [[TMP11]], 0 ; CHECK-NEXT: [[TMP12:%.*]] = bitcast <8 x i16> [[TMP3]] to i128 @@ -1875,7 +1875,7 @@ define <8 x half> @test_int_x86_avx512fp16_mask_max_sh(<8 x half> %x1, <8 x half ; CHECK-NEXT: ret <8 x half> [[RES3]] ; %val.half = load half,ptr %ptr - %val = insertelement <8 x half> undef, half %val.half, i32 0 + %val = insertelement <8 x half> poison, half %val.half, i32 0 %res0 = call <8 x half> @llvm.x86.avx512fp16.mask.max.sh.round(<8 x half> %x1, <8 x half> %x2, <8 x half> zeroinitializer, i8 -1, i32 4) %res1 = call <8 x half> @llvm.x86.avx512fp16.mask.max.sh.round(<8 x half> %res0, <8 x half> %x2, <8 x half> %src , i8 %mask, i32 4) %res2 = call <8 x half> @llvm.x86.avx512fp16.mask.max.sh.round(<8 x half> %res1, <8 x half> %x2, <8 x half> zeroinitializer , i8 %mask, i32 4) @@ -3204,12 +3204,12 @@ define <16 x half> @test_mm256_castph128_ph256_freeze(<8 x half> %a0) nounwind { ; CHECK-LABEL: define <16 x half> @test_mm256_castph128_ph256_freeze( ; CHECK-SAME: <8 x half> [[A0:%.*]]) #[[ATTR5:[0-9]+]] { ; CHECK-NEXT: call void @llvm.donothing() -; CHECK-NEXT: [[A1:%.*]] = freeze <8 x half> undef +; CHECK-NEXT: [[A1:%.*]] = freeze <8 x half> poison ; CHECK-NEXT: [[RES:%.*]] = shufflevector <8 x half> [[A0]], <8 x half> [[A1]], <16 x i32> ; CHECK-NEXT: store <16 x i16> zeroinitializer, ptr @__msan_retval_tls, align 8 ; CHECK-NEXT: ret <16 x half> [[RES]] ; - %a1 = freeze <8 x half> undef + %a1 = freeze <8 x half> poison ; The cold never bothered me anyway %res = shufflevector <8 x half> %a0, <8 x half> %a1, <16 x i32> ret <16 x half> %res @@ -3220,12 +3220,12 @@ define <32 x half> @test_mm512_castph128_ph512_freeze(<8 x half> %a0) nounwind { ; CHECK-LABEL: define <32 x half> @test_mm512_castph128_ph512_freeze( ; CHECK-SAME: <8 x half> [[A0:%.*]]) #[[ATTR5]] { ; CHECK-NEXT: call void @llvm.donothing() -; CHECK-NEXT: [[A1:%.*]] = freeze <8 x half> undef +; CHECK-NEXT: [[A1:%.*]] = freeze <8 x half> poison ; CHECK-NEXT: [[RES:%.*]] = shufflevector <8 x half> [[A0]], <8 x half> [[A1]], <32 x i32> ; CHECK-NEXT: store <32 x i16> zeroinitializer, ptr @__msan_retval_tls, align 8 ; CHECK-NEXT: ret <32 x half> [[RES]] ; - %a1 = freeze <8 x half> undef + %a1 = freeze <8 x half> poison %res = shufflevector <8 x half> %a0, <8 x half> %a1, <32 x i32> ret <32 x half> %res } @@ -3235,12 +3235,12 @@ define <32 x half> @test_mm512_castph256_ph512_freeze(<16 x half> %a0) nounwind ; CHECK-LABEL: define <32 x half> @test_mm512_castph256_ph512_freeze( ; CHECK-SAME: <16 x half> [[A0:%.*]]) #[[ATTR5]] { ; CHECK-NEXT: call void @llvm.donothing() -; CHECK-NEXT: [[A1:%.*]] = freeze <16 x half> undef +; CHECK-NEXT: [[A1:%.*]] = freeze <16 x half> poison ; CHECK-NEXT: [[RES:%.*]] = shufflevector <16 x half> [[A0]], <16 x half> [[A1]], <32 x i32> ; CHECK-NEXT: store <32 x i16> zeroinitializer, ptr @__msan_retval_tls, align 8 ; CHECK-NEXT: ret <32 x half> [[RES]] ; - %a1 = freeze <16 x half> undef + %a1 = freeze <16 x half> poison %res = shufflevector <16 x half> %a0, <16 x half> %a1, <32 x i32> ret <32 x half> %res }