Skip to content

Commit 638bd11

Browse files
authored
[msan] Handle SSE/AVX pshuf intrinsic by applying to shadow (llvm#153895)
llvm.x86.sse.pshuf.w(<1 x i64>, i8) and llvm.x86.avx512.pshuf.b.512(<64 x i8>, <64 x i8>) are currently handled strictly, which is suboptimal. llvm.x86.ssse3.pshuf.b(<1 x i64>, <1 x i64>) llvm.x86.ssse3.pshuf.b.128(<16 x i8>, <16 x i8>) and llvm.x86.avx2.pshuf.b(<32 x i8>, <32 x i8>) are currently heuristically handled using maybeHandleSimpleNomemIntrinsic, which is incorrect. Since the second argument is the shuffle order, we instrument all these intrinsics using `handleIntrinsicByApplyingToShadow(..., /*trailingVerbatimArgs=*/1)` (llvm#114490).
1 parent a44bd15 commit 638bd11

File tree

7 files changed

+79
-62
lines changed

7 files changed

+79
-62
lines changed

llvm/lib/Transforms/Instrumentation/MemorySanitizer.cpp

Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3237,6 +3237,8 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> {
32373237
///
32383238
/// TODO: "horizontal"/"pairwise" intrinsics are often incorrectly matched by
32393239
/// by this handler. See horizontalReduce().
3240+
///
3241+
/// TODO: permutation intrinsics are also often incorrectly matched.
32403242
[[maybe_unused]] bool
32413243
maybeHandleSimpleNomemIntrinsic(IntrinsicInst &I,
32423244
unsigned int trailingFlags) {
@@ -5719,6 +5721,26 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> {
57195721
handleAVXVpermi2var(I);
57205722
break;
57215723

5724+
// Packed Shuffle
5725+
// llvm.x86.sse.pshuf.w(<1 x i64>, i8)
5726+
// llvm.x86.ssse3.pshuf.b(<1 x i64>, <1 x i64>)
5727+
// llvm.x86.ssse3.pshuf.b.128(<16 x i8>, <16 x i8>)
5728+
// llvm.x86.avx2.pshuf.b(<32 x i8>, <32 x i8>)
5729+
// llvm.x86.avx512.pshuf.b.512(<64 x i8>, <64 x i8>)
5730+
//
5731+
// The following intrinsics are auto-upgraded:
5732+
// llvm.x86.sse2.pshuf.d(<4 x i32>, i8)
5733+
// llvm.x86.sse2.gpshufh.w(<8 x i16>, i8)
5734+
// llvm.x86.sse2.pshufl.w(<8 x i16>, i8)
5735+
case Intrinsic::x86_avx2_pshuf_b:
5736+
case Intrinsic::x86_sse_pshuf_w:
5737+
case Intrinsic::x86_ssse3_pshuf_b_128:
5738+
case Intrinsic::x86_ssse3_pshuf_b:
5739+
case Intrinsic::x86_avx512_pshuf_b_512:
5740+
handleIntrinsicByApplyingToShadow(I, I.getIntrinsicID(),
5741+
/*trailingVerbatimArgs=*/1);
5742+
break;
5743+
57225744
case Intrinsic::x86_avx512_mask_cvtps2dq_512: {
57235745
handleAVX512VectorConvertFPToInt(I);
57245746
break;

llvm/test/Instrumentation/MemorySanitizer/X86/avx2-intrinsics-x86.ll

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -767,8 +767,9 @@ define <32 x i8> @test_x86_avx2_pshuf_b(<32 x i8> %a0, <32 x i8> %a1) #0 {
767767
; CHECK-NEXT: [[TMP1:%.*]] = load <32 x i8>, ptr @__msan_param_tls, align 8
768768
; CHECK-NEXT: [[TMP2:%.*]] = load <32 x i8>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 32) to ptr), align 8
769769
; CHECK-NEXT: call void @llvm.donothing()
770-
; CHECK-NEXT: [[_MSPROP:%.*]] = or <32 x i8> [[TMP1]], [[TMP2]]
771-
; CHECK-NEXT: [[RES:%.*]] = call <32 x i8> @llvm.x86.avx2.pshuf.b(<32 x i8> [[A0:%.*]], <32 x i8> [[A1:%.*]])
770+
; CHECK-NEXT: [[TMP3:%.*]] = call <32 x i8> @llvm.x86.avx2.pshuf.b(<32 x i8> [[TMP1]], <32 x i8> [[A1:%.*]])
771+
; CHECK-NEXT: [[_MSPROP:%.*]] = or <32 x i8> [[TMP2]], [[TMP3]]
772+
; CHECK-NEXT: [[RES:%.*]] = call <32 x i8> @llvm.x86.avx2.pshuf.b(<32 x i8> [[A0:%.*]], <32 x i8> [[A1]])
772773
; CHECK-NEXT: store <32 x i8> [[_MSPROP]], ptr @__msan_retval_tls, align 8
773774
; CHECK-NEXT: ret <32 x i8> [[RES]]
774775
;

llvm/test/Instrumentation/MemorySanitizer/X86/avx512bw-intrinsics-upgrade.ll

Lines changed: 6 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -22,7 +22,6 @@
2222
; - llvm.x86.avx512.pavg.b.512, llvm.x86.avx512.pavg.w.512
2323
; - llvm.x86.avx512.permvar.hi.512
2424
; - llvm.x86.avx512.pmul.hr.sw.512, llvm.x86.avx512.pmulhu.w.512, llvm.x86.avx512.pmulh.w.512
25-
; - llvm.x86.avx512.pshuf.b.512
2625
; - llvm.x86.avx512.psllv.w.512, llvm.x86.avx512.psrav.w.512, llvm.x86.avx512.psrlv.w.512
2726

2827
target datalayout = "e-m:o-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128"
@@ -1968,8 +1967,9 @@ define <64 x i8> @test_int_x86_avx512_pshuf_b_512(<64 x i8> %x0, <64 x i8> %x1,
19681967
; CHECK-NEXT: [[TMP1:%.*]] = load <64 x i8>, ptr @__msan_param_tls, align 8
19691968
; CHECK-NEXT: [[TMP2:%.*]] = load <64 x i8>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8
19701969
; CHECK-NEXT: call void @llvm.donothing()
1971-
; CHECK-NEXT: [[_MSPROP:%.*]] = or <64 x i8> [[TMP1]], [[TMP2]]
1972-
; CHECK-NEXT: [[TMP3:%.*]] = call <64 x i8> @llvm.x86.avx512.pshuf.b.512(<64 x i8> [[X0:%.*]], <64 x i8> [[X1:%.*]])
1970+
; CHECK-NEXT: [[TMP4:%.*]] = call <64 x i8> @llvm.x86.avx512.pshuf.b.512(<64 x i8> [[TMP1]], <64 x i8> [[X1:%.*]])
1971+
; CHECK-NEXT: [[_MSPROP:%.*]] = or <64 x i8> [[TMP2]], [[TMP4]]
1972+
; CHECK-NEXT: [[TMP3:%.*]] = call <64 x i8> @llvm.x86.avx512.pshuf.b.512(<64 x i8> [[X0:%.*]], <64 x i8> [[X1]])
19731973
; CHECK-NEXT: store <64 x i8> [[_MSPROP]], ptr @__msan_retval_tls, align 8
19741974
; CHECK-NEXT: ret <64 x i8> [[TMP3]]
19751975
;
@@ -1984,8 +1984,9 @@ define <64 x i8> @test_int_x86_avx512_mask_pshuf_b_512(<64 x i8> %x0, <64 x i8>
19841984
; CHECK-NEXT: [[TMP3:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 192) to ptr), align 8
19851985
; CHECK-NEXT: [[TMP4:%.*]] = load <64 x i8>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 128) to ptr), align 8
19861986
; CHECK-NEXT: call void @llvm.donothing()
1987-
; CHECK-NEXT: [[_MSPROP:%.*]] = or <64 x i8> [[TMP1]], [[TMP2]]
1988-
; CHECK-NEXT: [[TMP5:%.*]] = call <64 x i8> @llvm.x86.avx512.pshuf.b.512(<64 x i8> [[X0:%.*]], <64 x i8> [[X1:%.*]])
1987+
; CHECK-NEXT: [[TMP13:%.*]] = call <64 x i8> @llvm.x86.avx512.pshuf.b.512(<64 x i8> [[TMP1]], <64 x i8> [[X1:%.*]])
1988+
; CHECK-NEXT: [[_MSPROP:%.*]] = or <64 x i8> [[TMP2]], [[TMP13]]
1989+
; CHECK-NEXT: [[TMP5:%.*]] = call <64 x i8> @llvm.x86.avx512.pshuf.b.512(<64 x i8> [[X0:%.*]], <64 x i8> [[X1]])
19891990
; CHECK-NEXT: [[TMP6:%.*]] = bitcast i64 [[TMP3]] to <64 x i1>
19901991
; CHECK-NEXT: [[TMP7:%.*]] = bitcast i64 [[X3:%.*]] to <64 x i1>
19911992
; CHECK-NEXT: [[TMP8:%.*]] = select <64 x i1> [[TMP7]], <64 x i8> [[_MSPROP]], <64 x i8> [[TMP4]]

llvm/test/Instrumentation/MemorySanitizer/X86/avx512bw-intrinsics.ll

Lines changed: 9 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,6 @@
1717
; - llvm.x86.avx512.pavg.b.512, llvm.x86.avx512.pavg.w.512
1818
; - llvm.x86.avx512.permvar.hi.512
1919
; - llvm.x86.avx512.pmul.hr.sw.512, llvm.x86.avx512.pmulhu.w.512, llvm.x86.avx512.pmulh.w.512
20-
; - llvm.x86.avx512.pshuf.b.512
2120
; - llvm.x86.avx512.psllv.w.512
2221
; - llvm.x86.avx512.psrav.w.512, llvm.x86.avx512.psrlv.w.512
2322

@@ -1714,8 +1713,9 @@ define <64 x i8>@test_int_x86_avx512_pshuf_b_512(<64 x i8> %x0, <64 x i8> %x1) #
17141713
; CHECK-NEXT: [[TMP1:%.*]] = load <64 x i8>, ptr @__msan_param_tls, align 8
17151714
; CHECK-NEXT: [[TMP2:%.*]] = load <64 x i8>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8
17161715
; CHECK-NEXT: call void @llvm.donothing()
1717-
; CHECK-NEXT: [[_MSPROP:%.*]] = or <64 x i8> [[TMP1]], [[TMP2]]
1718-
; CHECK-NEXT: [[RES:%.*]] = call <64 x i8> @llvm.x86.avx512.pshuf.b.512(<64 x i8> [[X0:%.*]], <64 x i8> [[X1:%.*]])
1716+
; CHECK-NEXT: [[TMP3:%.*]] = call <64 x i8> @llvm.x86.avx512.pshuf.b.512(<64 x i8> [[TMP1]], <64 x i8> [[X1:%.*]])
1717+
; CHECK-NEXT: [[_MSPROP:%.*]] = or <64 x i8> [[TMP2]], [[TMP3]]
1718+
; CHECK-NEXT: [[RES:%.*]] = call <64 x i8> @llvm.x86.avx512.pshuf.b.512(<64 x i8> [[X0:%.*]], <64 x i8> [[X1]])
17191719
; CHECK-NEXT: store <64 x i8> [[_MSPROP]], ptr @__msan_retval_tls, align 8
17201720
; CHECK-NEXT: ret <64 x i8> [[RES]]
17211721
;
@@ -1730,8 +1730,9 @@ define <64 x i8>@test_int_x86_avx512_pshuf_b_512_mask(<64 x i8> %x0, <64 x i8> %
17301730
; CHECK-NEXT: [[TMP3:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 192) to ptr), align 8
17311731
; CHECK-NEXT: [[TMP4:%.*]] = load <64 x i8>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 128) to ptr), align 8
17321732
; CHECK-NEXT: call void @llvm.donothing()
1733-
; CHECK-NEXT: [[_MSPROP:%.*]] = or <64 x i8> [[TMP1]], [[TMP2]]
1734-
; CHECK-NEXT: [[RES:%.*]] = call <64 x i8> @llvm.x86.avx512.pshuf.b.512(<64 x i8> [[X0:%.*]], <64 x i8> [[X1:%.*]])
1733+
; CHECK-NEXT: [[TMP10:%.*]] = call <64 x i8> @llvm.x86.avx512.pshuf.b.512(<64 x i8> [[TMP1]], <64 x i8> [[X1:%.*]])
1734+
; CHECK-NEXT: [[_MSPROP:%.*]] = or <64 x i8> [[TMP2]], [[TMP10]]
1735+
; CHECK-NEXT: [[RES:%.*]] = call <64 x i8> @llvm.x86.avx512.pshuf.b.512(<64 x i8> [[X0:%.*]], <64 x i8> [[X1]])
17351736
; CHECK-NEXT: [[TMP5:%.*]] = bitcast i64 [[TMP3]] to <64 x i1>
17361737
; CHECK-NEXT: [[MASK_CAST:%.*]] = bitcast i64 [[MASK:%.*]] to <64 x i1>
17371738
; CHECK-NEXT: [[TMP6:%.*]] = select <64 x i1> [[MASK_CAST]], <64 x i8> [[_MSPROP]], <64 x i8> [[TMP4]]
@@ -1755,8 +1756,9 @@ define <64 x i8>@test_int_x86_avx512_pshuf_b_512_maskz(<64 x i8> %x0, <64 x i8>
17551756
; CHECK-NEXT: [[TMP2:%.*]] = load <64 x i8>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8
17561757
; CHECK-NEXT: [[TMP3:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 128) to ptr), align 8
17571758
; CHECK-NEXT: call void @llvm.donothing()
1758-
; CHECK-NEXT: [[_MSPROP:%.*]] = or <64 x i8> [[TMP1]], [[TMP2]]
1759-
; CHECK-NEXT: [[RES:%.*]] = call <64 x i8> @llvm.x86.avx512.pshuf.b.512(<64 x i8> [[X0:%.*]], <64 x i8> [[X1:%.*]])
1759+
; CHECK-NEXT: [[TMP9:%.*]] = call <64 x i8> @llvm.x86.avx512.pshuf.b.512(<64 x i8> [[TMP1]], <64 x i8> [[X1:%.*]])
1760+
; CHECK-NEXT: [[_MSPROP:%.*]] = or <64 x i8> [[TMP2]], [[TMP9]]
1761+
; CHECK-NEXT: [[RES:%.*]] = call <64 x i8> @llvm.x86.avx512.pshuf.b.512(<64 x i8> [[X0:%.*]], <64 x i8> [[X1]])
17601762
; CHECK-NEXT: [[TMP4:%.*]] = bitcast i64 [[TMP3]] to <64 x i1>
17611763
; CHECK-NEXT: [[MASK_CAST:%.*]] = bitcast i64 [[MASK:%.*]] to <64 x i1>
17621764
; CHECK-NEXT: [[TMP5:%.*]] = select <64 x i1> [[MASK_CAST]], <64 x i8> [[_MSPROP]], <64 x i8> zeroinitializer

llvm/test/Instrumentation/MemorySanitizer/X86/mmx-intrinsics.ll

Lines changed: 18 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -4,8 +4,6 @@
44
; Handled strictly:
55
; - i32 @llvm.x86.mmx.pmovmskb(<1 x i64> %mmx_var.i) #2
66
; - void @llvm.x86.mmx.maskmovq(<1 x i64> %mmx_var.i, <1 x i64> %mmx_var1.i, ptr %p) #2
7-
; - <1 x i64> @llvm.x86.sse.pshuf.w(<1 x i64> %4, i8 3) #5
8-
; - <1 x i64> @llvm.x86.sse.pshuf.w(<1 x i64> %4, i8 3) #5
97
; - <2 x double> @llvm.x86.sse.cvtpi2pd(<1 x i64> %4) #5
108
; - <1 x i64> @llvm.x86.sse.cvttpd2pi(<2 x double> %a) #5
119
; - <1 x i64> @llvm.x86.sse.cvtpd2pi(<2 x double> %a) #5
@@ -2792,19 +2790,17 @@ define i64 @test21(<1 x i64> %a) #0 {
27922790
; CHECK-NEXT: [[TMP0:%.*]] = bitcast <1 x i64> [[A]] to <4 x i16>
27932791
; CHECK-NEXT: [[TMP10:%.*]] = bitcast <4 x i16> [[TMP8]] to <1 x i64>
27942792
; CHECK-NEXT: [[TMP11:%.*]] = bitcast <4 x i16> [[TMP0]] to <1 x i64>
2795-
; CHECK-NEXT: [[TMP9:%.*]] = bitcast <1 x i64> [[TMP10]] to i64
2796-
; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP9]], 0
2797-
; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP6:%.*]], label [[TMP12:%.*]], !prof [[PROF1]]
2798-
; CHECK: 6:
2799-
; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR6]]
2800-
; CHECK-NEXT: unreachable
2801-
; CHECK: 7:
2802-
; CHECK-NEXT: [[TMP13:%.*]] = tail call <1 x i64> @llvm.x86.sse.pshuf.w(<1 x i64> [[TMP11]], i8 3) #[[ATTR5]]
2793+
; CHECK-NEXT: [[TMP9:%.*]] = call <1 x i64> @llvm.x86.sse.pshuf.w(<1 x i64> [[TMP10]], i8 3)
2794+
; CHECK-NEXT: [[TMP13:%.*]] = or <1 x i64> zeroinitializer, [[TMP9]]
2795+
; CHECK-NEXT: [[TMP6:%.*]] = tail call <1 x i64> @llvm.x86.sse.pshuf.w(<1 x i64> [[TMP11]], i8 3) #[[ATTR5]]
28032796
; CHECK-NEXT: [[TMP3:%.*]] = bitcast <1 x i64> [[TMP13]] to <4 x i16>
2797+
; CHECK-NEXT: [[TMP12:%.*]] = bitcast <1 x i64> [[TMP6]] to <4 x i16>
28042798
; CHECK-NEXT: [[TMP4:%.*]] = bitcast <4 x i16> [[TMP3]] to <1 x i64>
2799+
; CHECK-NEXT: [[TMP14:%.*]] = bitcast <4 x i16> [[TMP12]] to <1 x i64>
28052800
; CHECK-NEXT: [[TMP5:%.*]] = extractelement <1 x i64> [[TMP4]], i32 0
2806-
; CHECK-NEXT: store i64 0, ptr @__msan_retval_tls, align 8
2807-
; CHECK-NEXT: ret i64 [[TMP5]]
2801+
; CHECK-NEXT: [[TMP15:%.*]] = extractelement <1 x i64> [[TMP14]], i32 0
2802+
; CHECK-NEXT: store i64 [[TMP5]], ptr @__msan_retval_tls, align 8
2803+
; CHECK-NEXT: ret i64 [[TMP15]]
28082804
;
28092805
entry:
28102806
%0 = bitcast <1 x i64> %a to <4 x i16>
@@ -2826,19 +2822,17 @@ define i32 @test21_2(<1 x i64> %a) #0 {
28262822
; CHECK-NEXT: [[TMP0:%.*]] = bitcast <1 x i64> [[A]] to <4 x i16>
28272823
; CHECK-NEXT: [[TMP10:%.*]] = bitcast <4 x i16> [[TMP8]] to <1 x i64>
28282824
; CHECK-NEXT: [[TMP11:%.*]] = bitcast <4 x i16> [[TMP0]] to <1 x i64>
2829-
; CHECK-NEXT: [[TMP9:%.*]] = bitcast <1 x i64> [[TMP10]] to i64
2830-
; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP9]], 0
2831-
; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP6:%.*]], label [[TMP12:%.*]], !prof [[PROF1]]
2832-
; CHECK: 6:
2833-
; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR6]]
2834-
; CHECK-NEXT: unreachable
2835-
; CHECK: 7:
2836-
; CHECK-NEXT: [[TMP13:%.*]] = tail call <1 x i64> @llvm.x86.sse.pshuf.w(<1 x i64> [[TMP11]], i8 3) #[[ATTR5]]
2825+
; CHECK-NEXT: [[TMP9:%.*]] = call <1 x i64> @llvm.x86.sse.pshuf.w(<1 x i64> [[TMP10]], i8 3)
2826+
; CHECK-NEXT: [[TMP13:%.*]] = or <1 x i64> zeroinitializer, [[TMP9]]
2827+
; CHECK-NEXT: [[TMP6:%.*]] = tail call <1 x i64> @llvm.x86.sse.pshuf.w(<1 x i64> [[TMP11]], i8 3) #[[ATTR5]]
28372828
; CHECK-NEXT: [[TMP3:%.*]] = bitcast <1 x i64> [[TMP13]] to <4 x i16>
2829+
; CHECK-NEXT: [[TMP12:%.*]] = bitcast <1 x i64> [[TMP6]] to <4 x i16>
28382830
; CHECK-NEXT: [[TMP4:%.*]] = bitcast <4 x i16> [[TMP3]] to <2 x i32>
2831+
; CHECK-NEXT: [[TMP14:%.*]] = bitcast <4 x i16> [[TMP12]] to <2 x i32>
28392832
; CHECK-NEXT: [[TMP5:%.*]] = extractelement <2 x i32> [[TMP4]], i32 0
2840-
; CHECK-NEXT: store i32 0, ptr @__msan_retval_tls, align 8
2841-
; CHECK-NEXT: ret i32 [[TMP5]]
2833+
; CHECK-NEXT: [[TMP15:%.*]] = extractelement <2 x i32> [[TMP14]], i32 0
2834+
; CHECK-NEXT: store i32 [[TMP5]], ptr @__msan_retval_tls, align 8
2835+
; CHECK-NEXT: ret i32 [[TMP15]]
28422836
;
28432837
entry:
28442838
%0 = bitcast <1 x i64> %a to <4 x i16>
@@ -3249,7 +3243,8 @@ define i64 @test9(<1 x i64> %a, <1 x i64> %b) #0 {
32493243
; CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x i8> [[TMP1]] to <1 x i64>
32503244
; CHECK-NEXT: [[TMP8:%.*]] = bitcast <8 x i8> [[TMP12]] to <1 x i64>
32513245
; CHECK-NEXT: [[TMP17:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64>
3252-
; CHECK-NEXT: [[_MSPROP:%.*]] = or <1 x i64> [[TMP16]], [[TMP8]]
3246+
; CHECK-NEXT: [[TMP20:%.*]] = call <1 x i64> @llvm.x86.ssse3.pshuf.b(<1 x i64> [[TMP16]], <1 x i64> [[TMP17]])
3247+
; CHECK-NEXT: [[_MSPROP:%.*]] = or <1 x i64> [[TMP8]], [[TMP20]]
32533248
; CHECK-NEXT: [[TMP18:%.*]] = tail call <1 x i64> @llvm.x86.ssse3.pshuf.b(<1 x i64> [[TMP2]], <1 x i64> [[TMP17]]) #[[ATTR5]]
32543249
; CHECK-NEXT: [[TMP11:%.*]] = bitcast <1 x i64> [[_MSPROP]] to <8 x i8>
32553250
; CHECK-NEXT: [[TMP19:%.*]] = bitcast <1 x i64> [[TMP18]] to <8 x i8>

llvm/test/Instrumentation/MemorySanitizer/i386/avx2-intrinsics-i386.ll

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -807,8 +807,9 @@ define <32 x i8> @test_x86_avx2_pshuf_b(<32 x i8> %a0, <32 x i8> %a1) #0 {
807807
; CHECK-NEXT: [[TMP2:%.*]] = load <32 x i8>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 32) to ptr), align 8
808808
; CHECK-NEXT: [[TMP3:%.*]] = load i64, ptr @__msan_va_arg_overflow_size_tls, align 8
809809
; CHECK-NEXT: call void @llvm.donothing()
810-
; CHECK-NEXT: [[_MSPROP:%.*]] = or <32 x i8> [[TMP1]], [[TMP2]]
811-
; CHECK-NEXT: [[RES:%.*]] = call <32 x i8> @llvm.x86.avx2.pshuf.b(<32 x i8> [[A0:%.*]], <32 x i8> [[A1:%.*]])
810+
; CHECK-NEXT: [[TMP4:%.*]] = call <32 x i8> @llvm.x86.avx2.pshuf.b(<32 x i8> [[TMP1]], <32 x i8> [[A1:%.*]])
811+
; CHECK-NEXT: [[_MSPROP:%.*]] = or <32 x i8> [[TMP2]], [[TMP4]]
812+
; CHECK-NEXT: [[RES:%.*]] = call <32 x i8> @llvm.x86.avx2.pshuf.b(<32 x i8> [[A0:%.*]], <32 x i8> [[A1]])
812813
; CHECK-NEXT: store <32 x i8> [[_MSPROP]], ptr @__msan_retval_tls, align 8
813814
; CHECK-NEXT: ret <32 x i8> [[RES]]
814815
;

llvm/test/Instrumentation/MemorySanitizer/i386/mmx-intrinsics.ll

Lines changed: 18 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -4,8 +4,6 @@
44
; Handled strictly:
55
; - i32 @llvm.x86.mmx.pmovmskb(<1 x i64> %mmx_var.i) #2
66
; - void @llvm.x86.mmx.maskmovq(<1 x i64> %mmx_var.i, <1 x i64> %mmx_var1.i, ptr %p) #2
7-
; - <1 x i64> @llvm.x86.sse.pshuf.w(<1 x i64> %4, i8 3) #5
8-
; - <1 x i64> @llvm.x86.sse.pshuf.w(<1 x i64> %4, i8 3) #5
97
; - <2 x double> @llvm.x86.sse.cvtpi2pd(<1 x i64> %4) #5
108
; - <1 x i64> @llvm.x86.sse.cvttpd2pi(<2 x double> %a) #5
119
; - <1 x i64> @llvm.x86.sse.cvtpd2pi(<2 x double> %a) #5
@@ -2863,19 +2861,17 @@ define i64 @test21(<1 x i64> %a) #0 {
28632861
; CHECK-NEXT: [[TMP0:%.*]] = bitcast <1 x i64> [[A]] to <4 x i16>
28642862
; CHECK-NEXT: [[TMP10:%.*]] = bitcast <4 x i16> [[TMP8]] to <1 x i64>
28652863
; CHECK-NEXT: [[TMP11:%.*]] = bitcast <4 x i16> [[TMP0]] to <1 x i64>
2866-
; CHECK-NEXT: [[TMP9:%.*]] = bitcast <1 x i64> [[TMP10]] to i64
2867-
; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP9]], 0
2868-
; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP12:%.*]], label [[TMP14:%.*]], !prof [[PROF1]]
2869-
; CHECK: 7:
2870-
; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR6]]
2871-
; CHECK-NEXT: unreachable
2872-
; CHECK: 8:
2873-
; CHECK-NEXT: [[TMP13:%.*]] = tail call <1 x i64> @llvm.x86.sse.pshuf.w(<1 x i64> [[TMP11]], i8 3) #[[ATTR5]]
2864+
; CHECK-NEXT: [[TMP6:%.*]] = call <1 x i64> @llvm.x86.sse.pshuf.w(<1 x i64> [[TMP10]], i8 3)
2865+
; CHECK-NEXT: [[TMP13:%.*]] = or <1 x i64> zeroinitializer, [[TMP6]]
2866+
; CHECK-NEXT: [[TMP14:%.*]] = tail call <1 x i64> @llvm.x86.sse.pshuf.w(<1 x i64> [[TMP11]], i8 3) #[[ATTR5]]
28742867
; CHECK-NEXT: [[TMP3:%.*]] = bitcast <1 x i64> [[TMP13]] to <4 x i16>
2868+
; CHECK-NEXT: [[TMP9:%.*]] = bitcast <1 x i64> [[TMP14]] to <4 x i16>
28752869
; CHECK-NEXT: [[TMP4:%.*]] = bitcast <4 x i16> [[TMP3]] to <1 x i64>
2870+
; CHECK-NEXT: [[TMP15:%.*]] = bitcast <4 x i16> [[TMP9]] to <1 x i64>
28762871
; CHECK-NEXT: [[TMP5:%.*]] = extractelement <1 x i64> [[TMP4]], i32 0
2877-
; CHECK-NEXT: store i64 0, ptr @__msan_retval_tls, align 8
2878-
; CHECK-NEXT: ret i64 [[TMP5]]
2872+
; CHECK-NEXT: [[TMP12:%.*]] = extractelement <1 x i64> [[TMP15]], i32 0
2873+
; CHECK-NEXT: store i64 [[TMP5]], ptr @__msan_retval_tls, align 8
2874+
; CHECK-NEXT: ret i64 [[TMP12]]
28792875
;
28802876
entry:
28812877
%0 = bitcast <1 x i64> %a to <4 x i16>
@@ -2898,19 +2894,17 @@ define i32 @test21_2(<1 x i64> %a) #0 {
28982894
; CHECK-NEXT: [[TMP0:%.*]] = bitcast <1 x i64> [[A]] to <4 x i16>
28992895
; CHECK-NEXT: [[TMP10:%.*]] = bitcast <4 x i16> [[TMP8]] to <1 x i64>
29002896
; CHECK-NEXT: [[TMP11:%.*]] = bitcast <4 x i16> [[TMP0]] to <1 x i64>
2901-
; CHECK-NEXT: [[TMP9:%.*]] = bitcast <1 x i64> [[TMP10]] to i64
2902-
; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP9]], 0
2903-
; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP12:%.*]], label [[TMP14:%.*]], !prof [[PROF1]]
2904-
; CHECK: 7:
2905-
; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR6]]
2906-
; CHECK-NEXT: unreachable
2907-
; CHECK: 8:
2908-
; CHECK-NEXT: [[TMP13:%.*]] = tail call <1 x i64> @llvm.x86.sse.pshuf.w(<1 x i64> [[TMP11]], i8 3) #[[ATTR5]]
2897+
; CHECK-NEXT: [[TMP6:%.*]] = call <1 x i64> @llvm.x86.sse.pshuf.w(<1 x i64> [[TMP10]], i8 3)
2898+
; CHECK-NEXT: [[TMP13:%.*]] = or <1 x i64> zeroinitializer, [[TMP6]]
2899+
; CHECK-NEXT: [[TMP14:%.*]] = tail call <1 x i64> @llvm.x86.sse.pshuf.w(<1 x i64> [[TMP11]], i8 3) #[[ATTR5]]
29092900
; CHECK-NEXT: [[TMP3:%.*]] = bitcast <1 x i64> [[TMP13]] to <4 x i16>
2901+
; CHECK-NEXT: [[TMP9:%.*]] = bitcast <1 x i64> [[TMP14]] to <4 x i16>
29102902
; CHECK-NEXT: [[TMP4:%.*]] = bitcast <4 x i16> [[TMP3]] to <2 x i32>
2903+
; CHECK-NEXT: [[TMP15:%.*]] = bitcast <4 x i16> [[TMP9]] to <2 x i32>
29112904
; CHECK-NEXT: [[TMP5:%.*]] = extractelement <2 x i32> [[TMP4]], i32 0
2912-
; CHECK-NEXT: store i32 0, ptr @__msan_retval_tls, align 8
2913-
; CHECK-NEXT: ret i32 [[TMP5]]
2905+
; CHECK-NEXT: [[TMP12:%.*]] = extractelement <2 x i32> [[TMP15]], i32 0
2906+
; CHECK-NEXT: store i32 [[TMP5]], ptr @__msan_retval_tls, align 8
2907+
; CHECK-NEXT: ret i32 [[TMP12]]
29142908
;
29152909
entry:
29162910
%0 = bitcast <1 x i64> %a to <4 x i16>
@@ -3333,7 +3327,8 @@ define i64 @test9(<1 x i64> %a, <1 x i64> %b) #0 {
33333327
; CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x i8> [[TMP1]] to <1 x i64>
33343328
; CHECK-NEXT: [[TMP8:%.*]] = bitcast <8 x i8> [[TMP12]] to <1 x i64>
33353329
; CHECK-NEXT: [[TMP17:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64>
3336-
; CHECK-NEXT: [[_MSPROP:%.*]] = or <1 x i64> [[TMP16]], [[TMP8]]
3330+
; CHECK-NEXT: [[TMP20:%.*]] = call <1 x i64> @llvm.x86.ssse3.pshuf.b(<1 x i64> [[TMP16]], <1 x i64> [[TMP17]])
3331+
; CHECK-NEXT: [[_MSPROP:%.*]] = or <1 x i64> [[TMP8]], [[TMP20]]
33373332
; CHECK-NEXT: [[TMP18:%.*]] = tail call <1 x i64> @llvm.x86.ssse3.pshuf.b(<1 x i64> [[TMP2]], <1 x i64> [[TMP17]]) #[[ATTR5]]
33383333
; CHECK-NEXT: [[TMP11:%.*]] = bitcast <1 x i64> [[_MSPROP]] to <8 x i8>
33393334
; CHECK-NEXT: [[TMP19:%.*]] = bitcast <1 x i64> [[TMP18]] to <8 x i8>

0 commit comments

Comments
 (0)