diff --git a/llvm/lib/Transforms/Instrumentation/MemorySanitizer.cpp b/llvm/lib/Transforms/Instrumentation/MemorySanitizer.cpp index 4cef57d43f203..eb7e1d29d2b67 100644 --- a/llvm/lib/Transforms/Instrumentation/MemorySanitizer.cpp +++ b/llvm/lib/Transforms/Instrumentation/MemorySanitizer.cpp @@ -5453,14 +5453,32 @@ struct MemorySanitizerVisitor : public InstVisitor { // Multiply and Add Packed Words // < 4 x i32> @llvm.x86.sse2.pmadd.wd(<8 x i16>, <8 x i16>) // < 8 x i32> @llvm.x86.avx2.pmadd.wd(<16 x i16>, <16 x i16>) - + // <16 x i32> @llvm.x86.avx512.pmaddw.d.512(<32 x i16>, <32 x i16>) + // // Multiply and Add Packed Signed and Unsigned Bytes // < 8 x i16> @llvm.x86.ssse3.pmadd.ub.sw.128(<16 x i8>, <16 x i8>) // <16 x i16> @llvm.x86.avx2.pmadd.ub.sw(<32 x i8>, <32 x i8>) + // <32 x i16> @llvm.x86.avx512.pmaddubs.w.512(<64 x i8>, <64 x i8>) + // + // These intrinsics are auto-upgraded into non-masked forms: + // < 4 x i32> @llvm.x86.avx512.mask.pmaddw.d.128 + // (<8 x i16>, <8 x i16>, <4 x i32>, i8) + // < 8 x i32> @llvm.x86.avx512.mask.pmaddw.d.256 + // (<16 x i16>, <16 x i16>, <8 x i32>, i8) + // <16 x i32> @llvm.x86.avx512.mask.pmaddw.d.512 + // (<32 x i16>, <32 x i16>, <16 x i32>, i16) + // < 8 x i16> @llvm.x86.avx512.mask.pmaddubs.w.128 + // (<16 x i8>, <16 x i8>, <8 x i16>, i8) + // <16 x i16> @llvm.x86.avx512.mask.pmaddubs.w.256 + // (<32 x i8>, <32 x i8>, <16 x i16>, i16) + // <32 x i16> @llvm.x86.avx512.mask.pmaddubs.w.512 + // (<64 x i8>, <64 x i8>, <32 x i16>, i32) case Intrinsic::x86_sse2_pmadd_wd: case Intrinsic::x86_avx2_pmadd_wd: + case Intrinsic::x86_avx512_pmaddw_d_512: case Intrinsic::x86_ssse3_pmadd_ub_sw_128: case Intrinsic::x86_avx2_pmadd_ub_sw: + case Intrinsic::x86_avx512_pmaddubs_w_512: handleVectorPmaddIntrinsic(I, /*ReductionFactor=*/2); break; diff --git a/llvm/test/Instrumentation/MemorySanitizer/X86/avx512bw-intrinsics-upgrade.ll b/llvm/test/Instrumentation/MemorySanitizer/X86/avx512bw-intrinsics-upgrade.ll index 02df9c49a010b..39faf07a56b3f 100644 --- a/llvm/test/Instrumentation/MemorySanitizer/X86/avx512bw-intrinsics-upgrade.ll +++ b/llvm/test/Instrumentation/MemorySanitizer/X86/avx512bw-intrinsics-upgrade.ll @@ -7,8 +7,6 @@ ; - llvm.x86.avx512.dbpsadbw.512 ; - llvm.x86.avx512.packssdw.512, llvm.x86.avx512.packsswb.512 ; - llvm.x86.avx512.packusdw.512, llvm.x86.avx512.packuswb.512 -; - llvm.x86.avx512.pmaddubs.w.512 -; - llvm.x86.avx512.pmaddw.d.512 ; ; Heuristically handled: ; - llvm.sadd.sat.v32i16, llvm.sadd.sat.v64i8 @@ -4930,18 +4928,18 @@ define <32 x i16> @test_int_x86_avx512_pmaddubs_w_512(<64 x i8> %x0, <64 x i8> % ; CHECK-NEXT: [[TMP1:%.*]] = load <64 x i8>, ptr @__msan_param_tls, align 8 ; CHECK-NEXT: [[TMP2:%.*]] = load <64 x i8>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8 ; CHECK-NEXT: call void @llvm.donothing() -; CHECK-NEXT: [[TMP3:%.*]] = bitcast <64 x i8> [[TMP1]] to i512 -; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i512 [[TMP3]], 0 -; CHECK-NEXT: [[TMP4:%.*]] = bitcast <64 x i8> [[TMP2]] to i512 -; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i512 [[TMP4]], 0 -; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]] -; CHECK-NEXT: br i1 [[_MSOR]], label [[TMP5:%.*]], label [[TMP6:%.*]], !prof [[PROF1]] -; CHECK: 5: -; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR7]] -; CHECK-NEXT: unreachable -; CHECK: 6: -; CHECK-NEXT: [[TMP7:%.*]] = call <32 x i16> @llvm.x86.avx512.pmaddubs.w.512(<64 x i8> [[X0:%.*]], <64 x i8> [[X1:%.*]]) -; CHECK-NEXT: store <32 x i16> zeroinitializer, ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: [[TMP3:%.*]] = or <64 x i8> [[TMP1]], [[TMP2]] +; CHECK-NEXT: [[TMP4:%.*]] = icmp ne <64 x i8> [[X0:%.*]], zeroinitializer +; CHECK-NEXT: [[TMP5:%.*]] = icmp ne <64 x i8> [[X1:%.*]], zeroinitializer +; CHECK-NEXT: [[TMP6:%.*]] = and <64 x i1> [[TMP4]], [[TMP5]] +; CHECK-NEXT: [[TMP13:%.*]] = sext <64 x i1> [[TMP6]] to <64 x i8> +; CHECK-NEXT: [[TMP8:%.*]] = and <64 x i8> [[TMP3]], [[TMP13]] +; CHECK-NEXT: [[TMP9:%.*]] = shufflevector <64 x i8> [[TMP8]], <64 x i8> poison, <32 x i32> +; CHECK-NEXT: [[TMP10:%.*]] = shufflevector <64 x i8> [[TMP8]], <64 x i8> poison, <32 x i32> +; CHECK-NEXT: [[TMP11:%.*]] = or <32 x i8> [[TMP9]], [[TMP10]] +; CHECK-NEXT: [[TMP12:%.*]] = zext <32 x i8> [[TMP11]] to <32 x i16> +; CHECK-NEXT: [[TMP7:%.*]] = call <32 x i16> @llvm.x86.avx512.pmaddubs.w.512(<64 x i8> [[X0]], <64 x i8> [[X1]]) +; CHECK-NEXT: store <32 x i16> [[TMP12]], ptr @__msan_retval_tls, align 8 ; CHECK-NEXT: ret <32 x i16> [[TMP7]] ; %res = call <32 x i16> @llvm.x86.avx512.mask.pmaddubs.w.512(<64 x i8> %x0, <64 x i8> %x1, <32 x i16> %x2, i32 -1) @@ -4955,22 +4953,22 @@ define <32 x i16> @test_int_x86_avx512_mask_pmaddubs_w_512(<64 x i8> %x0, <64 x ; CHECK-NEXT: [[TMP3:%.*]] = load i32, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 192) to ptr), align 8 ; CHECK-NEXT: [[TMP4:%.*]] = load <32 x i16>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 128) to ptr), align 8 ; CHECK-NEXT: call void @llvm.donothing() -; CHECK-NEXT: [[TMP5:%.*]] = bitcast <64 x i8> [[TMP1]] to i512 -; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i512 [[TMP5]], 0 -; CHECK-NEXT: [[TMP6:%.*]] = bitcast <64 x i8> [[TMP2]] to i512 -; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i512 [[TMP6]], 0 -; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]] -; CHECK-NEXT: br i1 [[_MSOR]], label [[TMP7:%.*]], label [[TMP8:%.*]], !prof [[PROF1]] -; CHECK: 7: -; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR7]] -; CHECK-NEXT: unreachable -; CHECK: 8: -; CHECK-NEXT: [[TMP9:%.*]] = call <32 x i16> @llvm.x86.avx512.pmaddubs.w.512(<64 x i8> [[X0:%.*]], <64 x i8> [[X1:%.*]]) +; CHECK-NEXT: [[TMP5:%.*]] = or <64 x i8> [[TMP1]], [[TMP2]] +; CHECK-NEXT: [[TMP6:%.*]] = icmp ne <64 x i8> [[X0:%.*]], zeroinitializer +; CHECK-NEXT: [[TMP7:%.*]] = icmp ne <64 x i8> [[X1:%.*]], zeroinitializer +; CHECK-NEXT: [[TMP8:%.*]] = and <64 x i1> [[TMP6]], [[TMP7]] +; CHECK-NEXT: [[TMP17:%.*]] = sext <64 x i1> [[TMP8]] to <64 x i8> +; CHECK-NEXT: [[TMP18:%.*]] = and <64 x i8> [[TMP5]], [[TMP17]] +; CHECK-NEXT: [[TMP19:%.*]] = shufflevector <64 x i8> [[TMP18]], <64 x i8> poison, <32 x i32> +; CHECK-NEXT: [[TMP20:%.*]] = shufflevector <64 x i8> [[TMP18]], <64 x i8> poison, <32 x i32> +; CHECK-NEXT: [[TMP21:%.*]] = or <32 x i8> [[TMP19]], [[TMP20]] +; CHECK-NEXT: [[TMP22:%.*]] = zext <32 x i8> [[TMP21]] to <32 x i16> +; CHECK-NEXT: [[TMP9:%.*]] = call <32 x i16> @llvm.x86.avx512.pmaddubs.w.512(<64 x i8> [[X0]], <64 x i8> [[X1]]) ; CHECK-NEXT: [[TMP10:%.*]] = bitcast i32 [[TMP3]] to <32 x i1> ; CHECK-NEXT: [[TMP11:%.*]] = bitcast i32 [[X3:%.*]] to <32 x i1> -; CHECK-NEXT: [[TMP12:%.*]] = select <32 x i1> [[TMP11]], <32 x i16> zeroinitializer, <32 x i16> [[TMP4]] +; CHECK-NEXT: [[TMP12:%.*]] = select <32 x i1> [[TMP11]], <32 x i16> [[TMP22]], <32 x i16> [[TMP4]] ; CHECK-NEXT: [[TMP13:%.*]] = xor <32 x i16> [[TMP9]], [[X2:%.*]] -; CHECK-NEXT: [[TMP14:%.*]] = or <32 x i16> [[TMP13]], zeroinitializer +; CHECK-NEXT: [[TMP14:%.*]] = or <32 x i16> [[TMP13]], [[TMP22]] ; CHECK-NEXT: [[TMP15:%.*]] = or <32 x i16> [[TMP14]], [[TMP4]] ; CHECK-NEXT: [[_MSPROP_SELECT:%.*]] = select <32 x i1> [[TMP10]], <32 x i16> [[TMP15]], <32 x i16> [[TMP12]] ; CHECK-NEXT: [[TMP16:%.*]] = select <32 x i1> [[TMP11]], <32 x i16> [[TMP9]], <32 x i16> [[X2]] @@ -4988,18 +4986,18 @@ define <16 x i32> @test_int_x86_avx512_pmaddw_d_512(<32 x i16> %x0, <32 x i16> % ; CHECK-NEXT: [[TMP1:%.*]] = load <32 x i16>, ptr @__msan_param_tls, align 8 ; CHECK-NEXT: [[TMP2:%.*]] = load <32 x i16>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8 ; CHECK-NEXT: call void @llvm.donothing() -; CHECK-NEXT: [[TMP3:%.*]] = bitcast <32 x i16> [[TMP1]] to i512 -; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i512 [[TMP3]], 0 -; CHECK-NEXT: [[TMP4:%.*]] = bitcast <32 x i16> [[TMP2]] to i512 -; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i512 [[TMP4]], 0 -; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]] -; CHECK-NEXT: br i1 [[_MSOR]], label [[TMP5:%.*]], label [[TMP6:%.*]], !prof [[PROF1]] -; CHECK: 5: -; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR7]] -; CHECK-NEXT: unreachable -; CHECK: 6: -; CHECK-NEXT: [[TMP7:%.*]] = call <16 x i32> @llvm.x86.avx512.pmaddw.d.512(<32 x i16> [[X0:%.*]], <32 x i16> [[X1:%.*]]) -; CHECK-NEXT: store <16 x i32> zeroinitializer, ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: [[TMP3:%.*]] = or <32 x i16> [[TMP1]], [[TMP2]] +; CHECK-NEXT: [[TMP4:%.*]] = icmp ne <32 x i16> [[X0:%.*]], zeroinitializer +; CHECK-NEXT: [[TMP5:%.*]] = icmp ne <32 x i16> [[X1:%.*]], zeroinitializer +; CHECK-NEXT: [[TMP6:%.*]] = and <32 x i1> [[TMP4]], [[TMP5]] +; CHECK-NEXT: [[TMP13:%.*]] = sext <32 x i1> [[TMP6]] to <32 x i16> +; CHECK-NEXT: [[TMP8:%.*]] = and <32 x i16> [[TMP3]], [[TMP13]] +; CHECK-NEXT: [[TMP9:%.*]] = shufflevector <32 x i16> [[TMP8]], <32 x i16> poison, <16 x i32> +; CHECK-NEXT: [[TMP10:%.*]] = shufflevector <32 x i16> [[TMP8]], <32 x i16> poison, <16 x i32> +; CHECK-NEXT: [[TMP11:%.*]] = or <16 x i16> [[TMP9]], [[TMP10]] +; CHECK-NEXT: [[TMP12:%.*]] = zext <16 x i16> [[TMP11]] to <16 x i32> +; CHECK-NEXT: [[TMP7:%.*]] = call <16 x i32> @llvm.x86.avx512.pmaddw.d.512(<32 x i16> [[X0]], <32 x i16> [[X1]]) +; CHECK-NEXT: store <16 x i32> [[TMP12]], ptr @__msan_retval_tls, align 8 ; CHECK-NEXT: ret <16 x i32> [[TMP7]] ; %res = call <16 x i32> @llvm.x86.avx512.mask.pmaddw.d.512(<32 x i16> %x0, <32 x i16> %x1, <16 x i32> %x2, i16 -1) @@ -5013,22 +5011,22 @@ define <16 x i32> @test_int_x86_avx512_mask_pmaddw_d_512(<32 x i16> %x0, <32 x i ; CHECK-NEXT: [[TMP3:%.*]] = load i16, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 192) to ptr), align 8 ; CHECK-NEXT: [[TMP4:%.*]] = load <16 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 128) to ptr), align 8 ; CHECK-NEXT: call void @llvm.donothing() -; CHECK-NEXT: [[TMP5:%.*]] = bitcast <32 x i16> [[TMP1]] to i512 -; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i512 [[TMP5]], 0 -; CHECK-NEXT: [[TMP6:%.*]] = bitcast <32 x i16> [[TMP2]] to i512 -; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i512 [[TMP6]], 0 -; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]] -; CHECK-NEXT: br i1 [[_MSOR]], label [[TMP7:%.*]], label [[TMP8:%.*]], !prof [[PROF1]] -; CHECK: 7: -; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR7]] -; CHECK-NEXT: unreachable -; CHECK: 8: -; CHECK-NEXT: [[TMP9:%.*]] = call <16 x i32> @llvm.x86.avx512.pmaddw.d.512(<32 x i16> [[X0:%.*]], <32 x i16> [[X1:%.*]]) +; CHECK-NEXT: [[TMP5:%.*]] = or <32 x i16> [[TMP1]], [[TMP2]] +; CHECK-NEXT: [[TMP6:%.*]] = icmp ne <32 x i16> [[X0:%.*]], zeroinitializer +; CHECK-NEXT: [[TMP7:%.*]] = icmp ne <32 x i16> [[X1:%.*]], zeroinitializer +; CHECK-NEXT: [[TMP8:%.*]] = and <32 x i1> [[TMP6]], [[TMP7]] +; CHECK-NEXT: [[TMP17:%.*]] = sext <32 x i1> [[TMP8]] to <32 x i16> +; CHECK-NEXT: [[TMP18:%.*]] = and <32 x i16> [[TMP5]], [[TMP17]] +; CHECK-NEXT: [[TMP19:%.*]] = shufflevector <32 x i16> [[TMP18]], <32 x i16> poison, <16 x i32> +; CHECK-NEXT: [[TMP20:%.*]] = shufflevector <32 x i16> [[TMP18]], <32 x i16> poison, <16 x i32> +; CHECK-NEXT: [[TMP21:%.*]] = or <16 x i16> [[TMP19]], [[TMP20]] +; CHECK-NEXT: [[TMP22:%.*]] = zext <16 x i16> [[TMP21]] to <16 x i32> +; CHECK-NEXT: [[TMP9:%.*]] = call <16 x i32> @llvm.x86.avx512.pmaddw.d.512(<32 x i16> [[X0]], <32 x i16> [[X1]]) ; CHECK-NEXT: [[TMP10:%.*]] = bitcast i16 [[TMP3]] to <16 x i1> ; CHECK-NEXT: [[TMP11:%.*]] = bitcast i16 [[X3:%.*]] to <16 x i1> -; CHECK-NEXT: [[TMP12:%.*]] = select <16 x i1> [[TMP11]], <16 x i32> zeroinitializer, <16 x i32> [[TMP4]] +; CHECK-NEXT: [[TMP12:%.*]] = select <16 x i1> [[TMP11]], <16 x i32> [[TMP22]], <16 x i32> [[TMP4]] ; CHECK-NEXT: [[TMP13:%.*]] = xor <16 x i32> [[TMP9]], [[X2:%.*]] -; CHECK-NEXT: [[TMP14:%.*]] = or <16 x i32> [[TMP13]], zeroinitializer +; CHECK-NEXT: [[TMP14:%.*]] = or <16 x i32> [[TMP13]], [[TMP22]] ; CHECK-NEXT: [[TMP15:%.*]] = or <16 x i32> [[TMP14]], [[TMP4]] ; CHECK-NEXT: [[_MSPROP_SELECT:%.*]] = select <16 x i1> [[TMP10]], <16 x i32> [[TMP15]], <16 x i32> [[TMP12]] ; CHECK-NEXT: [[TMP16:%.*]] = select <16 x i1> [[TMP11]], <16 x i32> [[TMP9]], <16 x i32> [[X2]] diff --git a/llvm/test/Instrumentation/MemorySanitizer/X86/avx512bw-intrinsics.ll b/llvm/test/Instrumentation/MemorySanitizer/X86/avx512bw-intrinsics.ll index 78c272c7b2c5a..62e0ec2dd9199 100644 --- a/llvm/test/Instrumentation/MemorySanitizer/X86/avx512bw-intrinsics.ll +++ b/llvm/test/Instrumentation/MemorySanitizer/X86/avx512bw-intrinsics.ll @@ -9,7 +9,6 @@ ; - llvm.x86.avx512.mask.pmov.wb.mem.512 ; - llvm.x86.avx512.packssdw.512, llvm.x86.avx512.packsswb.512 ; - llvm.x86.avx512.packusdw.512, llvm.x86.avx512.packuswb.512 -; - llvm.x86.avx512.pmaddubs.w.512, llvm.x86.avx512.pmaddw.d.512 ; - llvm.x86.avx512.psad.bw.512 ; ; Heuristically handled: @@ -2204,18 +2203,18 @@ define <32 x i16> @test_int_x86_avx512_pmaddubs_w_512(<64 x i8> %x0, <64 x i8> % ; CHECK-NEXT: [[TMP1:%.*]] = load <64 x i8>, ptr @__msan_param_tls, align 8 ; CHECK-NEXT: [[TMP2:%.*]] = load <64 x i8>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8 ; CHECK-NEXT: call void @llvm.donothing() -; CHECK-NEXT: [[TMP3:%.*]] = bitcast <64 x i8> [[TMP1]] to i512 -; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i512 [[TMP3]], 0 -; CHECK-NEXT: [[TMP4:%.*]] = bitcast <64 x i8> [[TMP2]] to i512 -; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i512 [[TMP4]], 0 -; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]] -; CHECK-NEXT: br i1 [[_MSOR]], label [[TMP5:%.*]], label [[TMP6:%.*]], !prof [[PROF1]] -; CHECK: 5: -; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR8]] -; CHECK-NEXT: unreachable -; CHECK: 6: -; CHECK-NEXT: [[TMP7:%.*]] = call <32 x i16> @llvm.x86.avx512.pmaddubs.w.512(<64 x i8> [[X0:%.*]], <64 x i8> [[X1:%.*]]) -; CHECK-NEXT: store <32 x i16> zeroinitializer, ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: [[TMP3:%.*]] = or <64 x i8> [[TMP1]], [[TMP2]] +; CHECK-NEXT: [[TMP4:%.*]] = icmp ne <64 x i8> [[X0:%.*]], zeroinitializer +; CHECK-NEXT: [[TMP5:%.*]] = icmp ne <64 x i8> [[X1:%.*]], zeroinitializer +; CHECK-NEXT: [[TMP6:%.*]] = and <64 x i1> [[TMP4]], [[TMP5]] +; CHECK-NEXT: [[TMP13:%.*]] = sext <64 x i1> [[TMP6]] to <64 x i8> +; CHECK-NEXT: [[TMP8:%.*]] = and <64 x i8> [[TMP3]], [[TMP13]] +; CHECK-NEXT: [[TMP9:%.*]] = shufflevector <64 x i8> [[TMP8]], <64 x i8> poison, <32 x i32> +; CHECK-NEXT: [[TMP10:%.*]] = shufflevector <64 x i8> [[TMP8]], <64 x i8> poison, <32 x i32> +; CHECK-NEXT: [[TMP11:%.*]] = or <32 x i8> [[TMP9]], [[TMP10]] +; CHECK-NEXT: [[TMP12:%.*]] = zext <32 x i8> [[TMP11]] to <32 x i16> +; CHECK-NEXT: [[TMP7:%.*]] = call <32 x i16> @llvm.x86.avx512.pmaddubs.w.512(<64 x i8> [[X0]], <64 x i8> [[X1]]) +; CHECK-NEXT: store <32 x i16> [[TMP12]], ptr @__msan_retval_tls, align 8 ; CHECK-NEXT: ret <32 x i16> [[TMP7]] ; %1 = call <32 x i16> @llvm.x86.avx512.pmaddubs.w.512(<64 x i8> %x0, <64 x i8> %x1) @@ -2229,22 +2228,22 @@ define <32 x i16> @test_int_x86_avx512_mask_pmaddubs_w_512(<64 x i8> %x0, <64 x ; CHECK-NEXT: [[TMP3:%.*]] = load i32, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 192) to ptr), align 8 ; CHECK-NEXT: [[TMP4:%.*]] = load <32 x i16>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 128) to ptr), align 8 ; CHECK-NEXT: call void @llvm.donothing() -; CHECK-NEXT: [[TMP5:%.*]] = bitcast <64 x i8> [[TMP1]] to i512 -; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i512 [[TMP5]], 0 -; CHECK-NEXT: [[TMP6:%.*]] = bitcast <64 x i8> [[TMP2]] to i512 -; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i512 [[TMP6]], 0 -; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]] -; CHECK-NEXT: br i1 [[_MSOR]], label [[TMP7:%.*]], label [[TMP8:%.*]], !prof [[PROF1]] -; CHECK: 7: -; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR8]] -; CHECK-NEXT: unreachable -; CHECK: 8: -; CHECK-NEXT: [[TMP9:%.*]] = call <32 x i16> @llvm.x86.avx512.pmaddubs.w.512(<64 x i8> [[X0:%.*]], <64 x i8> [[X1:%.*]]) +; CHECK-NEXT: [[TMP5:%.*]] = or <64 x i8> [[TMP1]], [[TMP2]] +; CHECK-NEXT: [[TMP6:%.*]] = icmp ne <64 x i8> [[X0:%.*]], zeroinitializer +; CHECK-NEXT: [[TMP7:%.*]] = icmp ne <64 x i8> [[X1:%.*]], zeroinitializer +; CHECK-NEXT: [[TMP8:%.*]] = and <64 x i1> [[TMP6]], [[TMP7]] +; CHECK-NEXT: [[TMP17:%.*]] = sext <64 x i1> [[TMP8]] to <64 x i8> +; CHECK-NEXT: [[TMP18:%.*]] = and <64 x i8> [[TMP5]], [[TMP17]] +; CHECK-NEXT: [[TMP19:%.*]] = shufflevector <64 x i8> [[TMP18]], <64 x i8> poison, <32 x i32> +; CHECK-NEXT: [[TMP20:%.*]] = shufflevector <64 x i8> [[TMP18]], <64 x i8> poison, <32 x i32> +; CHECK-NEXT: [[TMP21:%.*]] = or <32 x i8> [[TMP19]], [[TMP20]] +; CHECK-NEXT: [[TMP22:%.*]] = zext <32 x i8> [[TMP21]] to <32 x i16> +; CHECK-NEXT: [[TMP9:%.*]] = call <32 x i16> @llvm.x86.avx512.pmaddubs.w.512(<64 x i8> [[X0]], <64 x i8> [[X1]]) ; CHECK-NEXT: [[TMP10:%.*]] = bitcast i32 [[TMP3]] to <32 x i1> ; CHECK-NEXT: [[TMP11:%.*]] = bitcast i32 [[X3:%.*]] to <32 x i1> -; CHECK-NEXT: [[TMP12:%.*]] = select <32 x i1> [[TMP11]], <32 x i16> zeroinitializer, <32 x i16> [[TMP4]] +; CHECK-NEXT: [[TMP12:%.*]] = select <32 x i1> [[TMP11]], <32 x i16> [[TMP22]], <32 x i16> [[TMP4]] ; CHECK-NEXT: [[TMP13:%.*]] = xor <32 x i16> [[TMP9]], [[X2:%.*]] -; CHECK-NEXT: [[TMP14:%.*]] = or <32 x i16> [[TMP13]], zeroinitializer +; CHECK-NEXT: [[TMP14:%.*]] = or <32 x i16> [[TMP13]], [[TMP22]] ; CHECK-NEXT: [[TMP15:%.*]] = or <32 x i16> [[TMP14]], [[TMP4]] ; CHECK-NEXT: [[_MSPROP_SELECT:%.*]] = select <32 x i1> [[TMP10]], <32 x i16> [[TMP15]], <32 x i16> [[TMP12]] ; CHECK-NEXT: [[TMP16:%.*]] = select <32 x i1> [[TMP11]], <32 x i16> [[TMP9]], <32 x i16> [[X2]] @@ -2264,18 +2263,18 @@ define <16 x i32> @test_int_x86_avx512_pmaddw_d_512(<32 x i16> %x0, <32 x i16> % ; CHECK-NEXT: [[TMP1:%.*]] = load <32 x i16>, ptr @__msan_param_tls, align 8 ; CHECK-NEXT: [[TMP2:%.*]] = load <32 x i16>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8 ; CHECK-NEXT: call void @llvm.donothing() -; CHECK-NEXT: [[TMP3:%.*]] = bitcast <32 x i16> [[TMP1]] to i512 -; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i512 [[TMP3]], 0 -; CHECK-NEXT: [[TMP4:%.*]] = bitcast <32 x i16> [[TMP2]] to i512 -; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i512 [[TMP4]], 0 -; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]] -; CHECK-NEXT: br i1 [[_MSOR]], label [[TMP5:%.*]], label [[TMP6:%.*]], !prof [[PROF1]] -; CHECK: 5: -; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR8]] -; CHECK-NEXT: unreachable -; CHECK: 6: -; CHECK-NEXT: [[TMP7:%.*]] = call <16 x i32> @llvm.x86.avx512.pmaddw.d.512(<32 x i16> [[X0:%.*]], <32 x i16> [[X1:%.*]]) -; CHECK-NEXT: store <16 x i32> zeroinitializer, ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: [[TMP3:%.*]] = or <32 x i16> [[TMP1]], [[TMP2]] +; CHECK-NEXT: [[TMP4:%.*]] = icmp ne <32 x i16> [[X0:%.*]], zeroinitializer +; CHECK-NEXT: [[TMP5:%.*]] = icmp ne <32 x i16> [[X1:%.*]], zeroinitializer +; CHECK-NEXT: [[TMP6:%.*]] = and <32 x i1> [[TMP4]], [[TMP5]] +; CHECK-NEXT: [[TMP13:%.*]] = sext <32 x i1> [[TMP6]] to <32 x i16> +; CHECK-NEXT: [[TMP8:%.*]] = and <32 x i16> [[TMP3]], [[TMP13]] +; CHECK-NEXT: [[TMP9:%.*]] = shufflevector <32 x i16> [[TMP8]], <32 x i16> poison, <16 x i32> +; CHECK-NEXT: [[TMP10:%.*]] = shufflevector <32 x i16> [[TMP8]], <32 x i16> poison, <16 x i32> +; CHECK-NEXT: [[TMP11:%.*]] = or <16 x i16> [[TMP9]], [[TMP10]] +; CHECK-NEXT: [[TMP12:%.*]] = zext <16 x i16> [[TMP11]] to <16 x i32> +; CHECK-NEXT: [[TMP7:%.*]] = call <16 x i32> @llvm.x86.avx512.pmaddw.d.512(<32 x i16> [[X0]], <32 x i16> [[X1]]) +; CHECK-NEXT: store <16 x i32> [[TMP12]], ptr @__msan_retval_tls, align 8 ; CHECK-NEXT: ret <16 x i32> [[TMP7]] ; %1 = call <16 x i32> @llvm.x86.avx512.pmaddw.d.512(<32 x i16> %x0, <32 x i16> %x1) @@ -2289,22 +2288,22 @@ define <16 x i32> @test_int_x86_avx512_mask_pmaddw_d_512(<32 x i16> %x0, <32 x i ; CHECK-NEXT: [[TMP3:%.*]] = load i16, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 192) to ptr), align 8 ; CHECK-NEXT: [[TMP4:%.*]] = load <16 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 128) to ptr), align 8 ; CHECK-NEXT: call void @llvm.donothing() -; CHECK-NEXT: [[TMP5:%.*]] = bitcast <32 x i16> [[TMP1]] to i512 -; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i512 [[TMP5]], 0 -; CHECK-NEXT: [[TMP6:%.*]] = bitcast <32 x i16> [[TMP2]] to i512 -; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i512 [[TMP6]], 0 -; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]] -; CHECK-NEXT: br i1 [[_MSOR]], label [[TMP7:%.*]], label [[TMP8:%.*]], !prof [[PROF1]] -; CHECK: 7: -; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR8]] -; CHECK-NEXT: unreachable -; CHECK: 8: -; CHECK-NEXT: [[TMP9:%.*]] = call <16 x i32> @llvm.x86.avx512.pmaddw.d.512(<32 x i16> [[X0:%.*]], <32 x i16> [[X1:%.*]]) +; CHECK-NEXT: [[TMP5:%.*]] = or <32 x i16> [[TMP1]], [[TMP2]] +; CHECK-NEXT: [[TMP6:%.*]] = icmp ne <32 x i16> [[X0:%.*]], zeroinitializer +; CHECK-NEXT: [[TMP7:%.*]] = icmp ne <32 x i16> [[X1:%.*]], zeroinitializer +; CHECK-NEXT: [[TMP8:%.*]] = and <32 x i1> [[TMP6]], [[TMP7]] +; CHECK-NEXT: [[TMP17:%.*]] = sext <32 x i1> [[TMP8]] to <32 x i16> +; CHECK-NEXT: [[TMP18:%.*]] = and <32 x i16> [[TMP5]], [[TMP17]] +; CHECK-NEXT: [[TMP19:%.*]] = shufflevector <32 x i16> [[TMP18]], <32 x i16> poison, <16 x i32> +; CHECK-NEXT: [[TMP20:%.*]] = shufflevector <32 x i16> [[TMP18]], <32 x i16> poison, <16 x i32> +; CHECK-NEXT: [[TMP21:%.*]] = or <16 x i16> [[TMP19]], [[TMP20]] +; CHECK-NEXT: [[TMP22:%.*]] = zext <16 x i16> [[TMP21]] to <16 x i32> +; CHECK-NEXT: [[TMP9:%.*]] = call <16 x i32> @llvm.x86.avx512.pmaddw.d.512(<32 x i16> [[X0]], <32 x i16> [[X1]]) ; CHECK-NEXT: [[TMP10:%.*]] = bitcast i16 [[TMP3]] to <16 x i1> ; CHECK-NEXT: [[TMP11:%.*]] = bitcast i16 [[X3:%.*]] to <16 x i1> -; CHECK-NEXT: [[TMP12:%.*]] = select <16 x i1> [[TMP11]], <16 x i32> zeroinitializer, <16 x i32> [[TMP4]] +; CHECK-NEXT: [[TMP12:%.*]] = select <16 x i1> [[TMP11]], <16 x i32> [[TMP22]], <16 x i32> [[TMP4]] ; CHECK-NEXT: [[TMP13:%.*]] = xor <16 x i32> [[TMP9]], [[X2:%.*]] -; CHECK-NEXT: [[TMP14:%.*]] = or <16 x i32> [[TMP13]], zeroinitializer +; CHECK-NEXT: [[TMP14:%.*]] = or <16 x i32> [[TMP13]], [[TMP22]] ; CHECK-NEXT: [[TMP15:%.*]] = or <16 x i32> [[TMP14]], [[TMP4]] ; CHECK-NEXT: [[_MSPROP_SELECT:%.*]] = select <16 x i1> [[TMP10]], <16 x i32> [[TMP15]], <16 x i32> [[TMP12]] ; CHECK-NEXT: [[TMP16:%.*]] = select <16 x i1> [[TMP11]], <16 x i32> [[TMP9]], <16 x i32> [[X2]]