Skip to content

Commit 1c38e2c

Browse files
committed
[msan] Add Instrumentation for Avx512 Instructions: pmaddw, pmaddubs
This applies the pmadd handler (recently improved in llvm#152941) to the Avx512 equivalent of the pmaddw and pmaddubs intrinsics: <16 x i32> @llvm.x86.avx512.pmaddw.d.512(<32 x i16>, <32 x i16>) <32 x i16> @llvm.x86.avx512.pmaddubs.w.512(<64 x i8>, <64 x i8>)
1 parent 638bd11 commit 1c38e2c

File tree

3 files changed

+142
-103
lines changed

3 files changed

+142
-103
lines changed

llvm/lib/Transforms/Instrumentation/MemorySanitizer.cpp

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5486,14 +5486,32 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> {
54865486
// Multiply and Add Packed Words
54875487
// < 4 x i32> @llvm.x86.sse2.pmadd.wd(<8 x i16>, <8 x i16>)
54885488
// < 8 x i32> @llvm.x86.avx2.pmadd.wd(<16 x i16>, <16 x i16>)
5489+
// <16 x i32> @llvm.x86.avx512.pmaddw.d.512(<32 x i16>, <32 x i16>)
54895490
//
54905491
// Multiply and Add Packed Signed and Unsigned Bytes
54915492
// < 8 x i16> @llvm.x86.ssse3.pmadd.ub.sw.128(<16 x i8>, <16 x i8>)
54925493
// <16 x i16> @llvm.x86.avx2.pmadd.ub.sw(<32 x i8>, <32 x i8>)
5494+
// <32 x i16> @llvm.x86.avx512.pmaddubs.w.512(<64 x i8>, <64 x i8>)
5495+
//
5496+
// These intrinsics are auto-upgraded into non-masked forms:
5497+
// < 4 x i32> @llvm.x86.avx512.mask.pmaddw.d.128
5498+
// (<8 x i16>, <8 x i16>, <4 x i32>, i8)
5499+
// < 8 x i32> @llvm.x86.avx512.mask.pmaddw.d.256
5500+
// (<16 x i16>, <16 x i16>, <8 x i32>, i8)
5501+
// <16 x i32> @llvm.x86.avx512.mask.pmaddw.d.512
5502+
// (<32 x i16>, <32 x i16>, <16 x i32>, i16)
5503+
// < 8 x i16> @llvm.x86.avx512.mask.pmaddubs.w.128
5504+
// (<16 x i8>, <16 x i8>, <8 x i16>, i8)
5505+
// <16 x i16> @llvm.x86.avx512.mask.pmaddubs.w.256
5506+
// (<32 x i8>, <32 x i8>, <16 x i16>, i16)
5507+
// <32 x i16> @llvm.x86.avx512.mask.pmaddubs.w.512
5508+
// (<64 x i8>, <64 x i8>, <32 x i16>, i32)
54935509
case Intrinsic::x86_sse2_pmadd_wd:
54945510
case Intrinsic::x86_avx2_pmadd_wd:
5511+
case Intrinsic::x86_avx512_pmaddw_d_512:
54955512
case Intrinsic::x86_ssse3_pmadd_ub_sw_128:
54965513
case Intrinsic::x86_avx2_pmadd_ub_sw:
5514+
case Intrinsic::x86_avx512_pmaddubs_w_512:
54975515
handleVectorPmaddIntrinsic(I, /*ReductionFactor=*/2);
54985516
break;
54995517

llvm/test/Instrumentation/MemorySanitizer/X86/avx512bw-intrinsics-upgrade.ll

Lines changed: 62 additions & 52 deletions
Original file line numberDiff line numberDiff line change
@@ -7,8 +7,6 @@
77
; - llvm.x86.avx512.dbpsadbw.512
88
; - llvm.x86.avx512.packssdw.512, llvm.x86.avx512.packsswb.512
99
; - llvm.x86.avx512.packusdw.512, llvm.x86.avx512.packuswb.512
10-
; - llvm.x86.avx512.pmaddubs.w.512
11-
; - llvm.x86.avx512.pmaddw.d.512
1210
;
1311
; Heuristically handled:
1412
; - llvm.sadd.sat.v32i16, llvm.sadd.sat.v64i8
@@ -4931,18 +4929,21 @@ define <32 x i16> @test_int_x86_avx512_pmaddubs_w_512(<64 x i8> %x0, <64 x i8> %
49314929
; CHECK-NEXT: [[TMP1:%.*]] = load <64 x i8>, ptr @__msan_param_tls, align 8
49324930
; CHECK-NEXT: [[TMP2:%.*]] = load <64 x i8>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8
49334931
; CHECK-NEXT: call void @llvm.donothing()
4934-
; CHECK-NEXT: [[TMP3:%.*]] = bitcast <64 x i8> [[TMP1]] to i512
4935-
; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i512 [[TMP3]], 0
4936-
; CHECK-NEXT: [[TMP4:%.*]] = bitcast <64 x i8> [[TMP2]] to i512
4937-
; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i512 [[TMP4]], 0
4938-
; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]]
4939-
; CHECK-NEXT: br i1 [[_MSOR]], label [[TMP5:%.*]], label [[TMP6:%.*]], !prof [[PROF1]]
4940-
; CHECK: 5:
4941-
; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR7]]
4942-
; CHECK-NEXT: unreachable
4943-
; CHECK: 6:
4944-
; CHECK-NEXT: [[TMP7:%.*]] = call <32 x i16> @llvm.x86.avx512.pmaddubs.w.512(<64 x i8> [[X0:%.*]], <64 x i8> [[X1:%.*]])
4945-
; CHECK-NEXT: store <32 x i16> zeroinitializer, ptr @__msan_retval_tls, align 8
4932+
; CHECK-NEXT: [[TMP3:%.*]] = icmp ne <64 x i8> [[TMP1]], zeroinitializer
4933+
; CHECK-NEXT: [[TMP4:%.*]] = icmp ne <64 x i8> [[TMP2]], zeroinitializer
4934+
; CHECK-NEXT: [[TMP5:%.*]] = icmp ne <64 x i8> [[X0:%.*]], zeroinitializer
4935+
; CHECK-NEXT: [[TMP6:%.*]] = icmp ne <64 x i8> [[X1:%.*]], zeroinitializer
4936+
; CHECK-NEXT: [[TMP17:%.*]] = and <64 x i1> [[TMP3]], [[TMP4]]
4937+
; CHECK-NEXT: [[TMP8:%.*]] = and <64 x i1> [[TMP5]], [[TMP4]]
4938+
; CHECK-NEXT: [[TMP9:%.*]] = and <64 x i1> [[TMP3]], [[TMP6]]
4939+
; CHECK-NEXT: [[TMP10:%.*]] = or <64 x i1> [[TMP17]], [[TMP8]]
4940+
; CHECK-NEXT: [[TMP11:%.*]] = or <64 x i1> [[TMP10]], [[TMP9]]
4941+
; CHECK-NEXT: [[TMP12:%.*]] = sext <64 x i1> [[TMP11]] to <64 x i8>
4942+
; CHECK-NEXT: [[TMP13:%.*]] = bitcast <64 x i8> [[TMP12]] to <32 x i16>
4943+
; CHECK-NEXT: [[TMP14:%.*]] = icmp ne <32 x i16> [[TMP13]], zeroinitializer
4944+
; CHECK-NEXT: [[TMP16:%.*]] = sext <32 x i1> [[TMP14]] to <32 x i16>
4945+
; CHECK-NEXT: [[TMP7:%.*]] = call <32 x i16> @llvm.x86.avx512.pmaddubs.w.512(<64 x i8> [[X0]], <64 x i8> [[X1]])
4946+
; CHECK-NEXT: store <32 x i16> [[TMP16]], ptr @__msan_retval_tls, align 8
49464947
; CHECK-NEXT: ret <32 x i16> [[TMP7]]
49474948
;
49484949
%res = call <32 x i16> @llvm.x86.avx512.mask.pmaddubs.w.512(<64 x i8> %x0, <64 x i8> %x1, <32 x i16> %x2, i32 -1)
@@ -4956,22 +4957,25 @@ define <32 x i16> @test_int_x86_avx512_mask_pmaddubs_w_512(<64 x i8> %x0, <64 x
49564957
; CHECK-NEXT: [[TMP3:%.*]] = load i32, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 192) to ptr), align 8
49574958
; CHECK-NEXT: [[TMP4:%.*]] = load <32 x i16>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 128) to ptr), align 8
49584959
; CHECK-NEXT: call void @llvm.donothing()
4959-
; CHECK-NEXT: [[TMP5:%.*]] = bitcast <64 x i8> [[TMP1]] to i512
4960-
; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i512 [[TMP5]], 0
4961-
; CHECK-NEXT: [[TMP6:%.*]] = bitcast <64 x i8> [[TMP2]] to i512
4962-
; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i512 [[TMP6]], 0
4963-
; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]]
4964-
; CHECK-NEXT: br i1 [[_MSOR]], label [[TMP7:%.*]], label [[TMP8:%.*]], !prof [[PROF1]]
4965-
; CHECK: 7:
4966-
; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR7]]
4967-
; CHECK-NEXT: unreachable
4968-
; CHECK: 8:
4969-
; CHECK-NEXT: [[TMP9:%.*]] = call <32 x i16> @llvm.x86.avx512.pmaddubs.w.512(<64 x i8> [[X0:%.*]], <64 x i8> [[X1:%.*]])
4960+
; CHECK-NEXT: [[TMP5:%.*]] = icmp ne <64 x i8> [[TMP1]], zeroinitializer
4961+
; CHECK-NEXT: [[TMP6:%.*]] = icmp ne <64 x i8> [[TMP2]], zeroinitializer
4962+
; CHECK-NEXT: [[TMP7:%.*]] = icmp ne <64 x i8> [[X0:%.*]], zeroinitializer
4963+
; CHECK-NEXT: [[TMP8:%.*]] = icmp ne <64 x i8> [[X1:%.*]], zeroinitializer
4964+
; CHECK-NEXT: [[TMP19:%.*]] = and <64 x i1> [[TMP5]], [[TMP6]]
4965+
; CHECK-NEXT: [[TMP20:%.*]] = and <64 x i1> [[TMP7]], [[TMP6]]
4966+
; CHECK-NEXT: [[TMP21:%.*]] = and <64 x i1> [[TMP5]], [[TMP8]]
4967+
; CHECK-NEXT: [[TMP22:%.*]] = or <64 x i1> [[TMP19]], [[TMP20]]
4968+
; CHECK-NEXT: [[TMP23:%.*]] = or <64 x i1> [[TMP22]], [[TMP21]]
4969+
; CHECK-NEXT: [[TMP24:%.*]] = sext <64 x i1> [[TMP23]] to <64 x i8>
4970+
; CHECK-NEXT: [[TMP17:%.*]] = bitcast <64 x i8> [[TMP24]] to <32 x i16>
4971+
; CHECK-NEXT: [[TMP25:%.*]] = icmp ne <32 x i16> [[TMP17]], zeroinitializer
4972+
; CHECK-NEXT: [[TMP18:%.*]] = sext <32 x i1> [[TMP25]] to <32 x i16>
4973+
; CHECK-NEXT: [[TMP9:%.*]] = call <32 x i16> @llvm.x86.avx512.pmaddubs.w.512(<64 x i8> [[X0]], <64 x i8> [[X1]])
49704974
; CHECK-NEXT: [[TMP10:%.*]] = bitcast i32 [[TMP3]] to <32 x i1>
49714975
; CHECK-NEXT: [[TMP11:%.*]] = bitcast i32 [[X3:%.*]] to <32 x i1>
4972-
; CHECK-NEXT: [[TMP12:%.*]] = select <32 x i1> [[TMP11]], <32 x i16> zeroinitializer, <32 x i16> [[TMP4]]
4976+
; CHECK-NEXT: [[TMP12:%.*]] = select <32 x i1> [[TMP11]], <32 x i16> [[TMP18]], <32 x i16> [[TMP4]]
49734977
; CHECK-NEXT: [[TMP13:%.*]] = xor <32 x i16> [[TMP9]], [[X2:%.*]]
4974-
; CHECK-NEXT: [[TMP14:%.*]] = or <32 x i16> [[TMP13]], zeroinitializer
4978+
; CHECK-NEXT: [[TMP14:%.*]] = or <32 x i16> [[TMP13]], [[TMP18]]
49754979
; CHECK-NEXT: [[TMP15:%.*]] = or <32 x i16> [[TMP14]], [[TMP4]]
49764980
; CHECK-NEXT: [[_MSPROP_SELECT:%.*]] = select <32 x i1> [[TMP10]], <32 x i16> [[TMP15]], <32 x i16> [[TMP12]]
49774981
; CHECK-NEXT: [[TMP16:%.*]] = select <32 x i1> [[TMP11]], <32 x i16> [[TMP9]], <32 x i16> [[X2]]
@@ -4989,18 +4993,21 @@ define <16 x i32> @test_int_x86_avx512_pmaddw_d_512(<32 x i16> %x0, <32 x i16> %
49894993
; CHECK-NEXT: [[TMP1:%.*]] = load <32 x i16>, ptr @__msan_param_tls, align 8
49904994
; CHECK-NEXT: [[TMP2:%.*]] = load <32 x i16>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8
49914995
; CHECK-NEXT: call void @llvm.donothing()
4992-
; CHECK-NEXT: [[TMP3:%.*]] = bitcast <32 x i16> [[TMP1]] to i512
4993-
; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i512 [[TMP3]], 0
4994-
; CHECK-NEXT: [[TMP4:%.*]] = bitcast <32 x i16> [[TMP2]] to i512
4995-
; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i512 [[TMP4]], 0
4996-
; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]]
4997-
; CHECK-NEXT: br i1 [[_MSOR]], label [[TMP5:%.*]], label [[TMP6:%.*]], !prof [[PROF1]]
4998-
; CHECK: 5:
4999-
; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR7]]
5000-
; CHECK-NEXT: unreachable
5001-
; CHECK: 6:
5002-
; CHECK-NEXT: [[TMP7:%.*]] = call <16 x i32> @llvm.x86.avx512.pmaddw.d.512(<32 x i16> [[X0:%.*]], <32 x i16> [[X1:%.*]])
5003-
; CHECK-NEXT: store <16 x i32> zeroinitializer, ptr @__msan_retval_tls, align 8
4996+
; CHECK-NEXT: [[TMP3:%.*]] = icmp ne <32 x i16> [[TMP1]], zeroinitializer
4997+
; CHECK-NEXT: [[TMP4:%.*]] = icmp ne <32 x i16> [[TMP2]], zeroinitializer
4998+
; CHECK-NEXT: [[TMP5:%.*]] = icmp ne <32 x i16> [[X0:%.*]], zeroinitializer
4999+
; CHECK-NEXT: [[TMP6:%.*]] = icmp ne <32 x i16> [[X1:%.*]], zeroinitializer
5000+
; CHECK-NEXT: [[TMP17:%.*]] = and <32 x i1> [[TMP3]], [[TMP4]]
5001+
; CHECK-NEXT: [[TMP8:%.*]] = and <32 x i1> [[TMP5]], [[TMP4]]
5002+
; CHECK-NEXT: [[TMP9:%.*]] = and <32 x i1> [[TMP3]], [[TMP6]]
5003+
; CHECK-NEXT: [[TMP10:%.*]] = or <32 x i1> [[TMP17]], [[TMP8]]
5004+
; CHECK-NEXT: [[TMP11:%.*]] = or <32 x i1> [[TMP10]], [[TMP9]]
5005+
; CHECK-NEXT: [[TMP12:%.*]] = sext <32 x i1> [[TMP11]] to <32 x i16>
5006+
; CHECK-NEXT: [[TMP13:%.*]] = bitcast <32 x i16> [[TMP12]] to <16 x i32>
5007+
; CHECK-NEXT: [[TMP14:%.*]] = icmp ne <16 x i32> [[TMP13]], zeroinitializer
5008+
; CHECK-NEXT: [[TMP16:%.*]] = sext <16 x i1> [[TMP14]] to <16 x i32>
5009+
; CHECK-NEXT: [[TMP7:%.*]] = call <16 x i32> @llvm.x86.avx512.pmaddw.d.512(<32 x i16> [[X0]], <32 x i16> [[X1]])
5010+
; CHECK-NEXT: store <16 x i32> [[TMP16]], ptr @__msan_retval_tls, align 8
50045011
; CHECK-NEXT: ret <16 x i32> [[TMP7]]
50055012
;
50065013
%res = call <16 x i32> @llvm.x86.avx512.mask.pmaddw.d.512(<32 x i16> %x0, <32 x i16> %x1, <16 x i32> %x2, i16 -1)
@@ -5014,22 +5021,25 @@ define <16 x i32> @test_int_x86_avx512_mask_pmaddw_d_512(<32 x i16> %x0, <32 x i
50145021
; CHECK-NEXT: [[TMP3:%.*]] = load i16, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 192) to ptr), align 8
50155022
; CHECK-NEXT: [[TMP4:%.*]] = load <16 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 128) to ptr), align 8
50165023
; CHECK-NEXT: call void @llvm.donothing()
5017-
; CHECK-NEXT: [[TMP5:%.*]] = bitcast <32 x i16> [[TMP1]] to i512
5018-
; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i512 [[TMP5]], 0
5019-
; CHECK-NEXT: [[TMP6:%.*]] = bitcast <32 x i16> [[TMP2]] to i512
5020-
; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i512 [[TMP6]], 0
5021-
; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]]
5022-
; CHECK-NEXT: br i1 [[_MSOR]], label [[TMP7:%.*]], label [[TMP8:%.*]], !prof [[PROF1]]
5023-
; CHECK: 7:
5024-
; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR7]]
5025-
; CHECK-NEXT: unreachable
5026-
; CHECK: 8:
5027-
; CHECK-NEXT: [[TMP9:%.*]] = call <16 x i32> @llvm.x86.avx512.pmaddw.d.512(<32 x i16> [[X0:%.*]], <32 x i16> [[X1:%.*]])
5024+
; CHECK-NEXT: [[TMP5:%.*]] = icmp ne <32 x i16> [[TMP1]], zeroinitializer
5025+
; CHECK-NEXT: [[TMP6:%.*]] = icmp ne <32 x i16> [[TMP2]], zeroinitializer
5026+
; CHECK-NEXT: [[TMP7:%.*]] = icmp ne <32 x i16> [[X0:%.*]], zeroinitializer
5027+
; CHECK-NEXT: [[TMP8:%.*]] = icmp ne <32 x i16> [[X1:%.*]], zeroinitializer
5028+
; CHECK-NEXT: [[TMP19:%.*]] = and <32 x i1> [[TMP5]], [[TMP6]]
5029+
; CHECK-NEXT: [[TMP20:%.*]] = and <32 x i1> [[TMP7]], [[TMP6]]
5030+
; CHECK-NEXT: [[TMP21:%.*]] = and <32 x i1> [[TMP5]], [[TMP8]]
5031+
; CHECK-NEXT: [[TMP22:%.*]] = or <32 x i1> [[TMP19]], [[TMP20]]
5032+
; CHECK-NEXT: [[TMP23:%.*]] = or <32 x i1> [[TMP22]], [[TMP21]]
5033+
; CHECK-NEXT: [[TMP24:%.*]] = sext <32 x i1> [[TMP23]] to <32 x i16>
5034+
; CHECK-NEXT: [[TMP17:%.*]] = bitcast <32 x i16> [[TMP24]] to <16 x i32>
5035+
; CHECK-NEXT: [[TMP25:%.*]] = icmp ne <16 x i32> [[TMP17]], zeroinitializer
5036+
; CHECK-NEXT: [[TMP18:%.*]] = sext <16 x i1> [[TMP25]] to <16 x i32>
5037+
; CHECK-NEXT: [[TMP9:%.*]] = call <16 x i32> @llvm.x86.avx512.pmaddw.d.512(<32 x i16> [[X0]], <32 x i16> [[X1]])
50285038
; CHECK-NEXT: [[TMP10:%.*]] = bitcast i16 [[TMP3]] to <16 x i1>
50295039
; CHECK-NEXT: [[TMP11:%.*]] = bitcast i16 [[X3:%.*]] to <16 x i1>
5030-
; CHECK-NEXT: [[TMP12:%.*]] = select <16 x i1> [[TMP11]], <16 x i32> zeroinitializer, <16 x i32> [[TMP4]]
5040+
; CHECK-NEXT: [[TMP12:%.*]] = select <16 x i1> [[TMP11]], <16 x i32> [[TMP18]], <16 x i32> [[TMP4]]
50315041
; CHECK-NEXT: [[TMP13:%.*]] = xor <16 x i32> [[TMP9]], [[X2:%.*]]
5032-
; CHECK-NEXT: [[TMP14:%.*]] = or <16 x i32> [[TMP13]], zeroinitializer
5042+
; CHECK-NEXT: [[TMP14:%.*]] = or <16 x i32> [[TMP13]], [[TMP18]]
50335043
; CHECK-NEXT: [[TMP15:%.*]] = or <16 x i32> [[TMP14]], [[TMP4]]
50345044
; CHECK-NEXT: [[_MSPROP_SELECT:%.*]] = select <16 x i1> [[TMP10]], <16 x i32> [[TMP15]], <16 x i32> [[TMP12]]
50355045
; CHECK-NEXT: [[TMP16:%.*]] = select <16 x i1> [[TMP11]], <16 x i32> [[TMP9]], <16 x i32> [[X2]]

0 commit comments

Comments
 (0)