77; - llvm.x86.avx512.dbpsadbw.512
88; - llvm.x86.avx512.packssdw.512, llvm.x86.avx512.packsswb.512
99; - llvm.x86.avx512.packusdw.512, llvm.x86.avx512.packuswb.512
10- ; - llvm.x86.avx512.pmaddubs.w.512
11- ; - llvm.x86.avx512.pmaddw.d.512
1210;
1311; Heuristically handled:
1412; - llvm.sadd.sat.v32i16, llvm.sadd.sat.v64i8
@@ -4931,18 +4929,21 @@ define <32 x i16> @test_int_x86_avx512_pmaddubs_w_512(<64 x i8> %x0, <64 x i8> %
49314929; CHECK-NEXT: [[TMP1:%.*]] = load <64 x i8>, ptr @__msan_param_tls, align 8
49324930; CHECK-NEXT: [[TMP2:%.*]] = load <64 x i8>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8
49334931; CHECK-NEXT: call void @llvm.donothing()
4934- ; CHECK-NEXT: [[TMP3:%.*]] = bitcast <64 x i8> [[TMP1]] to i512
4935- ; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i512 [[TMP3]], 0
4936- ; CHECK-NEXT: [[TMP4:%.*]] = bitcast <64 x i8> [[TMP2]] to i512
4937- ; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i512 [[TMP4]], 0
4938- ; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]]
4939- ; CHECK-NEXT: br i1 [[_MSOR]], label [[TMP5:%.*]], label [[TMP6:%.*]], !prof [[PROF1]]
4940- ; CHECK: 5:
4941- ; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR7]]
4942- ; CHECK-NEXT: unreachable
4943- ; CHECK: 6:
4944- ; CHECK-NEXT: [[TMP7:%.*]] = call <32 x i16> @llvm.x86.avx512.pmaddubs.w.512(<64 x i8> [[X0:%.*]], <64 x i8> [[X1:%.*]])
4945- ; CHECK-NEXT: store <32 x i16> zeroinitializer, ptr @__msan_retval_tls, align 8
4932+ ; CHECK-NEXT: [[TMP3:%.*]] = icmp ne <64 x i8> [[TMP1]], zeroinitializer
4933+ ; CHECK-NEXT: [[TMP4:%.*]] = icmp ne <64 x i8> [[TMP2]], zeroinitializer
4934+ ; CHECK-NEXT: [[TMP5:%.*]] = icmp ne <64 x i8> [[X0:%.*]], zeroinitializer
4935+ ; CHECK-NEXT: [[TMP6:%.*]] = icmp ne <64 x i8> [[X1:%.*]], zeroinitializer
4936+ ; CHECK-NEXT: [[TMP17:%.*]] = and <64 x i1> [[TMP3]], [[TMP4]]
4937+ ; CHECK-NEXT: [[TMP8:%.*]] = and <64 x i1> [[TMP5]], [[TMP4]]
4938+ ; CHECK-NEXT: [[TMP9:%.*]] = and <64 x i1> [[TMP3]], [[TMP6]]
4939+ ; CHECK-NEXT: [[TMP10:%.*]] = or <64 x i1> [[TMP17]], [[TMP8]]
4940+ ; CHECK-NEXT: [[TMP11:%.*]] = or <64 x i1> [[TMP10]], [[TMP9]]
4941+ ; CHECK-NEXT: [[TMP12:%.*]] = sext <64 x i1> [[TMP11]] to <64 x i8>
4942+ ; CHECK-NEXT: [[TMP13:%.*]] = bitcast <64 x i8> [[TMP12]] to <32 x i16>
4943+ ; CHECK-NEXT: [[TMP14:%.*]] = icmp ne <32 x i16> [[TMP13]], zeroinitializer
4944+ ; CHECK-NEXT: [[TMP16:%.*]] = sext <32 x i1> [[TMP14]] to <32 x i16>
4945+ ; CHECK-NEXT: [[TMP7:%.*]] = call <32 x i16> @llvm.x86.avx512.pmaddubs.w.512(<64 x i8> [[X0]], <64 x i8> [[X1]])
4946+ ; CHECK-NEXT: store <32 x i16> [[TMP16]], ptr @__msan_retval_tls, align 8
49464947; CHECK-NEXT: ret <32 x i16> [[TMP7]]
49474948;
49484949 %res = call <32 x i16> @llvm.x86.avx512.mask.pmaddubs.w.512(<64 x i8> %x0, <64 x i8> %x1, <32 x i16> %x2, i32 -1)
@@ -4956,22 +4957,25 @@ define <32 x i16> @test_int_x86_avx512_mask_pmaddubs_w_512(<64 x i8> %x0, <64 x
49564957; CHECK-NEXT: [[TMP3:%.*]] = load i32, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 192) to ptr), align 8
49574958; CHECK-NEXT: [[TMP4:%.*]] = load <32 x i16>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 128) to ptr), align 8
49584959; CHECK-NEXT: call void @llvm.donothing()
4959- ; CHECK-NEXT: [[TMP5:%.*]] = bitcast <64 x i8> [[TMP1]] to i512
4960- ; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i512 [[TMP5]], 0
4961- ; CHECK-NEXT: [[TMP6:%.*]] = bitcast <64 x i8> [[TMP2]] to i512
4962- ; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i512 [[TMP6]], 0
4963- ; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]]
4964- ; CHECK-NEXT: br i1 [[_MSOR]], label [[TMP7:%.*]], label [[TMP8:%.*]], !prof [[PROF1]]
4965- ; CHECK: 7:
4966- ; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR7]]
4967- ; CHECK-NEXT: unreachable
4968- ; CHECK: 8:
4969- ; CHECK-NEXT: [[TMP9:%.*]] = call <32 x i16> @llvm.x86.avx512.pmaddubs.w.512(<64 x i8> [[X0:%.*]], <64 x i8> [[X1:%.*]])
4960+ ; CHECK-NEXT: [[TMP5:%.*]] = icmp ne <64 x i8> [[TMP1]], zeroinitializer
4961+ ; CHECK-NEXT: [[TMP6:%.*]] = icmp ne <64 x i8> [[TMP2]], zeroinitializer
4962+ ; CHECK-NEXT: [[TMP7:%.*]] = icmp ne <64 x i8> [[X0:%.*]], zeroinitializer
4963+ ; CHECK-NEXT: [[TMP8:%.*]] = icmp ne <64 x i8> [[X1:%.*]], zeroinitializer
4964+ ; CHECK-NEXT: [[TMP19:%.*]] = and <64 x i1> [[TMP5]], [[TMP6]]
4965+ ; CHECK-NEXT: [[TMP20:%.*]] = and <64 x i1> [[TMP7]], [[TMP6]]
4966+ ; CHECK-NEXT: [[TMP21:%.*]] = and <64 x i1> [[TMP5]], [[TMP8]]
4967+ ; CHECK-NEXT: [[TMP22:%.*]] = or <64 x i1> [[TMP19]], [[TMP20]]
4968+ ; CHECK-NEXT: [[TMP23:%.*]] = or <64 x i1> [[TMP22]], [[TMP21]]
4969+ ; CHECK-NEXT: [[TMP24:%.*]] = sext <64 x i1> [[TMP23]] to <64 x i8>
4970+ ; CHECK-NEXT: [[TMP17:%.*]] = bitcast <64 x i8> [[TMP24]] to <32 x i16>
4971+ ; CHECK-NEXT: [[TMP25:%.*]] = icmp ne <32 x i16> [[TMP17]], zeroinitializer
4972+ ; CHECK-NEXT: [[TMP18:%.*]] = sext <32 x i1> [[TMP25]] to <32 x i16>
4973+ ; CHECK-NEXT: [[TMP9:%.*]] = call <32 x i16> @llvm.x86.avx512.pmaddubs.w.512(<64 x i8> [[X0]], <64 x i8> [[X1]])
49704974; CHECK-NEXT: [[TMP10:%.*]] = bitcast i32 [[TMP3]] to <32 x i1>
49714975; CHECK-NEXT: [[TMP11:%.*]] = bitcast i32 [[X3:%.*]] to <32 x i1>
4972- ; CHECK-NEXT: [[TMP12:%.*]] = select <32 x i1> [[TMP11]], <32 x i16> zeroinitializer , <32 x i16> [[TMP4]]
4976+ ; CHECK-NEXT: [[TMP12:%.*]] = select <32 x i1> [[TMP11]], <32 x i16> [[TMP18]] , <32 x i16> [[TMP4]]
49734977; CHECK-NEXT: [[TMP13:%.*]] = xor <32 x i16> [[TMP9]], [[X2:%.*]]
4974- ; CHECK-NEXT: [[TMP14:%.*]] = or <32 x i16> [[TMP13]], zeroinitializer
4978+ ; CHECK-NEXT: [[TMP14:%.*]] = or <32 x i16> [[TMP13]], [[TMP18]]
49754979; CHECK-NEXT: [[TMP15:%.*]] = or <32 x i16> [[TMP14]], [[TMP4]]
49764980; CHECK-NEXT: [[_MSPROP_SELECT:%.*]] = select <32 x i1> [[TMP10]], <32 x i16> [[TMP15]], <32 x i16> [[TMP12]]
49774981; CHECK-NEXT: [[TMP16:%.*]] = select <32 x i1> [[TMP11]], <32 x i16> [[TMP9]], <32 x i16> [[X2]]
@@ -4989,18 +4993,21 @@ define <16 x i32> @test_int_x86_avx512_pmaddw_d_512(<32 x i16> %x0, <32 x i16> %
49894993; CHECK-NEXT: [[TMP1:%.*]] = load <32 x i16>, ptr @__msan_param_tls, align 8
49904994; CHECK-NEXT: [[TMP2:%.*]] = load <32 x i16>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8
49914995; CHECK-NEXT: call void @llvm.donothing()
4992- ; CHECK-NEXT: [[TMP3:%.*]] = bitcast <32 x i16> [[TMP1]] to i512
4993- ; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i512 [[TMP3]], 0
4994- ; CHECK-NEXT: [[TMP4:%.*]] = bitcast <32 x i16> [[TMP2]] to i512
4995- ; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i512 [[TMP4]], 0
4996- ; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]]
4997- ; CHECK-NEXT: br i1 [[_MSOR]], label [[TMP5:%.*]], label [[TMP6:%.*]], !prof [[PROF1]]
4998- ; CHECK: 5:
4999- ; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR7]]
5000- ; CHECK-NEXT: unreachable
5001- ; CHECK: 6:
5002- ; CHECK-NEXT: [[TMP7:%.*]] = call <16 x i32> @llvm.x86.avx512.pmaddw.d.512(<32 x i16> [[X0:%.*]], <32 x i16> [[X1:%.*]])
5003- ; CHECK-NEXT: store <16 x i32> zeroinitializer, ptr @__msan_retval_tls, align 8
4996+ ; CHECK-NEXT: [[TMP3:%.*]] = icmp ne <32 x i16> [[TMP1]], zeroinitializer
4997+ ; CHECK-NEXT: [[TMP4:%.*]] = icmp ne <32 x i16> [[TMP2]], zeroinitializer
4998+ ; CHECK-NEXT: [[TMP5:%.*]] = icmp ne <32 x i16> [[X0:%.*]], zeroinitializer
4999+ ; CHECK-NEXT: [[TMP6:%.*]] = icmp ne <32 x i16> [[X1:%.*]], zeroinitializer
5000+ ; CHECK-NEXT: [[TMP17:%.*]] = and <32 x i1> [[TMP3]], [[TMP4]]
5001+ ; CHECK-NEXT: [[TMP8:%.*]] = and <32 x i1> [[TMP5]], [[TMP4]]
5002+ ; CHECK-NEXT: [[TMP9:%.*]] = and <32 x i1> [[TMP3]], [[TMP6]]
5003+ ; CHECK-NEXT: [[TMP10:%.*]] = or <32 x i1> [[TMP17]], [[TMP8]]
5004+ ; CHECK-NEXT: [[TMP11:%.*]] = or <32 x i1> [[TMP10]], [[TMP9]]
5005+ ; CHECK-NEXT: [[TMP12:%.*]] = sext <32 x i1> [[TMP11]] to <32 x i16>
5006+ ; CHECK-NEXT: [[TMP13:%.*]] = bitcast <32 x i16> [[TMP12]] to <16 x i32>
5007+ ; CHECK-NEXT: [[TMP14:%.*]] = icmp ne <16 x i32> [[TMP13]], zeroinitializer
5008+ ; CHECK-NEXT: [[TMP16:%.*]] = sext <16 x i1> [[TMP14]] to <16 x i32>
5009+ ; CHECK-NEXT: [[TMP7:%.*]] = call <16 x i32> @llvm.x86.avx512.pmaddw.d.512(<32 x i16> [[X0]], <32 x i16> [[X1]])
5010+ ; CHECK-NEXT: store <16 x i32> [[TMP16]], ptr @__msan_retval_tls, align 8
50045011; CHECK-NEXT: ret <16 x i32> [[TMP7]]
50055012;
50065013 %res = call <16 x i32> @llvm.x86.avx512.mask.pmaddw.d.512(<32 x i16> %x0, <32 x i16> %x1, <16 x i32> %x2, i16 -1)
@@ -5014,22 +5021,25 @@ define <16 x i32> @test_int_x86_avx512_mask_pmaddw_d_512(<32 x i16> %x0, <32 x i
50145021; CHECK-NEXT: [[TMP3:%.*]] = load i16, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 192) to ptr), align 8
50155022; CHECK-NEXT: [[TMP4:%.*]] = load <16 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 128) to ptr), align 8
50165023; CHECK-NEXT: call void @llvm.donothing()
5017- ; CHECK-NEXT: [[TMP5:%.*]] = bitcast <32 x i16> [[TMP1]] to i512
5018- ; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i512 [[TMP5]], 0
5019- ; CHECK-NEXT: [[TMP6:%.*]] = bitcast <32 x i16> [[TMP2]] to i512
5020- ; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i512 [[TMP6]], 0
5021- ; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]]
5022- ; CHECK-NEXT: br i1 [[_MSOR]], label [[TMP7:%.*]], label [[TMP8:%.*]], !prof [[PROF1]]
5023- ; CHECK: 7:
5024- ; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR7]]
5025- ; CHECK-NEXT: unreachable
5026- ; CHECK: 8:
5027- ; CHECK-NEXT: [[TMP9:%.*]] = call <16 x i32> @llvm.x86.avx512.pmaddw.d.512(<32 x i16> [[X0:%.*]], <32 x i16> [[X1:%.*]])
5024+ ; CHECK-NEXT: [[TMP5:%.*]] = icmp ne <32 x i16> [[TMP1]], zeroinitializer
5025+ ; CHECK-NEXT: [[TMP6:%.*]] = icmp ne <32 x i16> [[TMP2]], zeroinitializer
5026+ ; CHECK-NEXT: [[TMP7:%.*]] = icmp ne <32 x i16> [[X0:%.*]], zeroinitializer
5027+ ; CHECK-NEXT: [[TMP8:%.*]] = icmp ne <32 x i16> [[X1:%.*]], zeroinitializer
5028+ ; CHECK-NEXT: [[TMP19:%.*]] = and <32 x i1> [[TMP5]], [[TMP6]]
5029+ ; CHECK-NEXT: [[TMP20:%.*]] = and <32 x i1> [[TMP7]], [[TMP6]]
5030+ ; CHECK-NEXT: [[TMP21:%.*]] = and <32 x i1> [[TMP5]], [[TMP8]]
5031+ ; CHECK-NEXT: [[TMP22:%.*]] = or <32 x i1> [[TMP19]], [[TMP20]]
5032+ ; CHECK-NEXT: [[TMP23:%.*]] = or <32 x i1> [[TMP22]], [[TMP21]]
5033+ ; CHECK-NEXT: [[TMP24:%.*]] = sext <32 x i1> [[TMP23]] to <32 x i16>
5034+ ; CHECK-NEXT: [[TMP17:%.*]] = bitcast <32 x i16> [[TMP24]] to <16 x i32>
5035+ ; CHECK-NEXT: [[TMP25:%.*]] = icmp ne <16 x i32> [[TMP17]], zeroinitializer
5036+ ; CHECK-NEXT: [[TMP18:%.*]] = sext <16 x i1> [[TMP25]] to <16 x i32>
5037+ ; CHECK-NEXT: [[TMP9:%.*]] = call <16 x i32> @llvm.x86.avx512.pmaddw.d.512(<32 x i16> [[X0]], <32 x i16> [[X1]])
50285038; CHECK-NEXT: [[TMP10:%.*]] = bitcast i16 [[TMP3]] to <16 x i1>
50295039; CHECK-NEXT: [[TMP11:%.*]] = bitcast i16 [[X3:%.*]] to <16 x i1>
5030- ; CHECK-NEXT: [[TMP12:%.*]] = select <16 x i1> [[TMP11]], <16 x i32> zeroinitializer , <16 x i32> [[TMP4]]
5040+ ; CHECK-NEXT: [[TMP12:%.*]] = select <16 x i1> [[TMP11]], <16 x i32> [[TMP18]] , <16 x i32> [[TMP4]]
50315041; CHECK-NEXT: [[TMP13:%.*]] = xor <16 x i32> [[TMP9]], [[X2:%.*]]
5032- ; CHECK-NEXT: [[TMP14:%.*]] = or <16 x i32> [[TMP13]], zeroinitializer
5042+ ; CHECK-NEXT: [[TMP14:%.*]] = or <16 x i32> [[TMP13]], [[TMP18]]
50335043; CHECK-NEXT: [[TMP15:%.*]] = or <16 x i32> [[TMP14]], [[TMP4]]
50345044; CHECK-NEXT: [[_MSPROP_SELECT:%.*]] = select <16 x i1> [[TMP10]], <16 x i32> [[TMP15]], <16 x i32> [[TMP12]]
50355045; CHECK-NEXT: [[TMP16:%.*]] = select <16 x i1> [[TMP11]], <16 x i32> [[TMP9]], <16 x i32> [[X2]]
0 commit comments