Skip to content

Commit fd03942

Browse files
committed
Revert "Address Vitaly's feedback - split into base change"
This reverts commit 32b6431.
1 parent 32b6431 commit fd03942

File tree

7 files changed

+180
-156
lines changed

7 files changed

+180
-156
lines changed

llvm/lib/Transforms/Instrumentation/MemorySanitizer.cpp

Lines changed: 36 additions & 100 deletions
Original file line numberDiff line numberDiff line change
@@ -2624,7 +2624,8 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> {
26242624

26252625
FixedVectorType *ParamType =
26262626
cast<FixedVectorType>(I.getArgOperand(0)->getType());
2627-
assert(I.arg_size() != 2 || I.getArgOperand(0)->getType() == I.getArgOperand(1)->getType());
2627+
if (I.arg_size() == 2)
2628+
assert(I.getArgOperand(0)->getType() == I.getArgOperand(1)->getType());
26282629

26292630
[[maybe_unused]] FixedVectorType *ReturnType =
26302631
cast<FixedVectorType>(I.getType());
@@ -4188,87 +4189,6 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> {
41884189
setOriginForNaryOp(I);
41894190
}
41904191

4191-
void handleAVXHorizontalAddSubIntrinsic(IntrinsicInst &I) {
4192-
// Approximation only:
4193-
// output = horizontal_add/sub(A, B)
4194-
// => shadow[output] = horizontal_add(shadow[A], shadow[B])
4195-
//
4196-
// We always use horizontal add instead of subtract, because subtracting
4197-
// a fully uninitialized shadow would result in a fully initialized shadow.
4198-
//
4199-
// - If we add two adjacent zero (initialized) shadow values, the
4200-
// result always be zero i.e., no false positives.
4201-
// - If we add two shadows, one of which is uninitialized, the
4202-
// result will always be non-zero i.e., no false negatives.
4203-
// - However, we can have false negatives if we do an addition that wraps
4204-
// to zero; we consider this an acceptable tradeoff for performance.
4205-
//
4206-
// To make shadow propagation precise, we want the equivalent of
4207-
// "horizontal OR", but this is not available for SSE3/SSSE3/AVX/AVX2.
4208-
4209-
Intrinsic::ID shadowIntrinsicID = I.getIntrinsicID();
4210-
4211-
switch (I.getIntrinsicID()) {
4212-
case Intrinsic::x86_sse3_hsub_ps:
4213-
shadowIntrinsicID = Intrinsic::x86_sse3_hadd_ps;
4214-
break;
4215-
4216-
case Intrinsic::x86_sse3_hsub_pd:
4217-
shadowIntrinsicID = Intrinsic::x86_sse3_hadd_pd;
4218-
break;
4219-
4220-
case Intrinsic::x86_ssse3_phsub_d:
4221-
shadowIntrinsicID = Intrinsic::x86_ssse3_phadd_d;
4222-
break;
4223-
4224-
case Intrinsic::x86_ssse3_phsub_d_128:
4225-
shadowIntrinsicID = Intrinsic::x86_ssse3_phadd_d_128;
4226-
break;
4227-
4228-
case Intrinsic::x86_ssse3_phsub_w:
4229-
shadowIntrinsicID = Intrinsic::x86_ssse3_phadd_w;
4230-
break;
4231-
4232-
case Intrinsic::x86_ssse3_phsub_w_128:
4233-
shadowIntrinsicID = Intrinsic::x86_ssse3_phadd_w_128;
4234-
break;
4235-
4236-
case Intrinsic::x86_ssse3_phsub_sw:
4237-
shadowIntrinsicID = Intrinsic::x86_ssse3_phadd_sw;
4238-
break;
4239-
4240-
case Intrinsic::x86_ssse3_phsub_sw_128:
4241-
shadowIntrinsicID = Intrinsic::x86_ssse3_phadd_sw_128;
4242-
break;
4243-
4244-
case Intrinsic::x86_avx_hsub_pd_256:
4245-
shadowIntrinsicID = Intrinsic::x86_avx_hadd_pd_256;
4246-
break;
4247-
4248-
case Intrinsic::x86_avx_hsub_ps_256:
4249-
shadowIntrinsicID = Intrinsic::x86_avx_hadd_ps_256;
4250-
break;
4251-
4252-
case Intrinsic::x86_avx2_phsub_d:
4253-
shadowIntrinsicID = Intrinsic::x86_avx2_phadd_d;
4254-
break;
4255-
4256-
case Intrinsic::x86_avx2_phsub_w:
4257-
shadowIntrinsicID = Intrinsic::x86_avx2_phadd_w;
4258-
break;
4259-
4260-
case Intrinsic::x86_avx2_phsub_sw:
4261-
shadowIntrinsicID = Intrinsic::x86_avx2_phadd_sw;
4262-
break;
4263-
4264-
default:
4265-
break;
4266-
}
4267-
4268-
return handleIntrinsicByApplyingToShadow(I, shadowIntrinsicID,
4269-
/*trailingVerbatimArgs*/ 0);
4270-
}
4271-
42724192
/// Handle Arm NEON vector store intrinsics (vst{2,3,4}, vst1x_{2,3,4},
42734193
/// and vst{2,3,4}lane).
42744194
///
@@ -4815,33 +4735,49 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> {
48154735
handleVtestIntrinsic(I);
48164736
break;
48174737

4818-
case Intrinsic::x86_sse3_hadd_ps:
4819-
case Intrinsic::x86_sse3_hadd_pd:
4820-
case Intrinsic::x86_ssse3_phadd_d:
4821-
case Intrinsic::x86_ssse3_phadd_d_128:
4738+
// Packed Horizontal Add/Subtract
48224739
case Intrinsic::x86_ssse3_phadd_w:
48234740
case Intrinsic::x86_ssse3_phadd_w_128:
4741+
case Intrinsic::x86_avx2_phadd_w:
4742+
case Intrinsic::x86_ssse3_phsub_w:
4743+
case Intrinsic::x86_ssse3_phsub_w_128:
4744+
case Intrinsic::x86_avx2_phsub_w: {
4745+
handlePairwiseShadowOrIntrinsic(I, /*ReinterpretElemWidth=*/16);
4746+
break;
4747+
}
4748+
4749+
// Packed Horizontal Add/Subtract
4750+
case Intrinsic::x86_ssse3_phadd_d:
4751+
case Intrinsic::x86_ssse3_phadd_d_128:
4752+
case Intrinsic::x86_avx2_phadd_d:
4753+
case Intrinsic::x86_ssse3_phsub_d:
4754+
case Intrinsic::x86_ssse3_phsub_d_128:
4755+
case Intrinsic::x86_avx2_phsub_d: {
4756+
handlePairwiseShadowOrIntrinsic(I, /*ReinterpretElemWidth=*/32);
4757+
break;
4758+
}
4759+
4760+
// Packed Horizontal Add/Subtract and Saturate
48244761
case Intrinsic::x86_ssse3_phadd_sw:
48254762
case Intrinsic::x86_ssse3_phadd_sw_128:
4763+
case Intrinsic::x86_avx2_phadd_sw:
4764+
case Intrinsic::x86_ssse3_phsub_sw:
4765+
case Intrinsic::x86_ssse3_phsub_sw_128:
4766+
case Intrinsic::x86_avx2_phsub_sw: {
4767+
handlePairwiseShadowOrIntrinsic(I, /*ReinterpretElemWidth=*/16);
4768+
break;
4769+
}
4770+
4771+
// Packed Single/Double Precision Floating-Point Horizontal Add
4772+
case Intrinsic::x86_sse3_hadd_ps:
4773+
case Intrinsic::x86_sse3_hadd_pd:
48264774
case Intrinsic::x86_avx_hadd_pd_256:
48274775
case Intrinsic::x86_avx_hadd_ps_256:
4828-
case Intrinsic::x86_avx2_phadd_d:
4829-
case Intrinsic::x86_avx2_phadd_w:
4830-
case Intrinsic::x86_avx2_phadd_sw:
48314776
case Intrinsic::x86_sse3_hsub_ps:
48324777
case Intrinsic::x86_sse3_hsub_pd:
4833-
case Intrinsic::x86_ssse3_phsub_d:
4834-
case Intrinsic::x86_ssse3_phsub_d_128:
4835-
case Intrinsic::x86_ssse3_phsub_w:
4836-
case Intrinsic::x86_ssse3_phsub_w_128:
4837-
case Intrinsic::x86_ssse3_phsub_sw:
4838-
case Intrinsic::x86_ssse3_phsub_sw_128:
48394778
case Intrinsic::x86_avx_hsub_pd_256:
4840-
case Intrinsic::x86_avx_hsub_ps_256:
4841-
case Intrinsic::x86_avx2_phsub_d:
4842-
case Intrinsic::x86_avx2_phsub_w:
4843-
case Intrinsic::x86_avx2_phsub_sw: {
4844-
handleAVXHorizontalAddSubIntrinsic(I);
4779+
case Intrinsic::x86_avx_hsub_ps_256: {
4780+
handlePairwiseShadowOrIntrinsic(I, /*ReinterpretElemWidth=*/std::nullopt);
48454781
break;
48464782
}
48474783

llvm/test/Instrumentation/MemorySanitizer/X86/avx-intrinsics-x86.ll

Lines changed: 12 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -435,10 +435,9 @@ define <4 x double> @test_x86_avx_hadd_pd_256(<4 x double> %a0, <4 x double> %a1
435435
; CHECK-NEXT: [[TMP1:%.*]] = load <4 x i64>, ptr @__msan_param_tls, align 8
436436
; CHECK-NEXT: [[TMP2:%.*]] = load <4 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 32) to ptr), align 8
437437
; CHECK-NEXT: call void @llvm.donothing()
438-
; CHECK-NEXT: [[A0:%.*]] = bitcast <4 x i64> [[TMP1]] to <4 x double>
439-
; CHECK-NEXT: [[A1:%.*]] = bitcast <4 x i64> [[TMP2]] to <4 x double>
440-
; CHECK-NEXT: [[RES:%.*]] = call <4 x double> @llvm.x86.avx.hadd.pd.256(<4 x double> [[A0]], <4 x double> [[A1]])
441-
; CHECK-NEXT: [[_MSPROP:%.*]] = bitcast <4 x double> [[RES]] to <4 x i64>
438+
; CHECK-NEXT: [[TMP3:%.*]] = shufflevector <4 x i64> [[TMP1]], <4 x i64> [[TMP2]], <4 x i32> <i32 0, i32 2, i32 4, i32 6>
439+
; CHECK-NEXT: [[TMP4:%.*]] = shufflevector <4 x i64> [[TMP1]], <4 x i64> [[TMP2]], <4 x i32> <i32 1, i32 3, i32 5, i32 7>
440+
; CHECK-NEXT: [[_MSPROP:%.*]] = or <4 x i64> [[TMP3]], [[TMP4]]
442441
; CHECK-NEXT: [[RES1:%.*]] = call <4 x double> @llvm.x86.avx.hadd.pd.256(<4 x double> [[A2:%.*]], <4 x double> [[A3:%.*]])
443442
; CHECK-NEXT: store <4 x i64> [[_MSPROP]], ptr @__msan_retval_tls, align 8
444443
; CHECK-NEXT: ret <4 x double> [[RES1]]
@@ -454,10 +453,9 @@ define <8 x float> @test_x86_avx_hadd_ps_256(<8 x float> %a0, <8 x float> %a1) #
454453
; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i32>, ptr @__msan_param_tls, align 8
455454
; CHECK-NEXT: [[TMP2:%.*]] = load <8 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 32) to ptr), align 8
456455
; CHECK-NEXT: call void @llvm.donothing()
457-
; CHECK-NEXT: [[A0:%.*]] = bitcast <8 x i32> [[TMP1]] to <8 x float>
458-
; CHECK-NEXT: [[A1:%.*]] = bitcast <8 x i32> [[TMP2]] to <8 x float>
459-
; CHECK-NEXT: [[RES:%.*]] = call <8 x float> @llvm.x86.avx.hadd.ps.256(<8 x float> [[A0]], <8 x float> [[A1]])
460-
; CHECK-NEXT: [[_MSPROP:%.*]] = bitcast <8 x float> [[RES]] to <8 x i32>
456+
; CHECK-NEXT: [[TMP3:%.*]] = shufflevector <8 x i32> [[TMP1]], <8 x i32> [[TMP2]], <8 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14>
457+
; CHECK-NEXT: [[TMP4:%.*]] = shufflevector <8 x i32> [[TMP1]], <8 x i32> [[TMP2]], <8 x i32> <i32 1, i32 3, i32 5, i32 7, i32 9, i32 11, i32 13, i32 15>
458+
; CHECK-NEXT: [[_MSPROP:%.*]] = or <8 x i32> [[TMP3]], [[TMP4]]
461459
; CHECK-NEXT: [[RES1:%.*]] = call <8 x float> @llvm.x86.avx.hadd.ps.256(<8 x float> [[A2:%.*]], <8 x float> [[A3:%.*]])
462460
; CHECK-NEXT: store <8 x i32> [[_MSPROP]], ptr @__msan_retval_tls, align 8
463461
; CHECK-NEXT: ret <8 x float> [[RES1]]
@@ -473,10 +471,9 @@ define <4 x double> @test_x86_avx_hsub_pd_256(<4 x double> %a0, <4 x double> %a1
473471
; CHECK-NEXT: [[TMP1:%.*]] = load <4 x i64>, ptr @__msan_param_tls, align 8
474472
; CHECK-NEXT: [[TMP2:%.*]] = load <4 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 32) to ptr), align 8
475473
; CHECK-NEXT: call void @llvm.donothing()
476-
; CHECK-NEXT: [[A0:%.*]] = bitcast <4 x i64> [[TMP1]] to <4 x double>
477-
; CHECK-NEXT: [[A1:%.*]] = bitcast <4 x i64> [[TMP2]] to <4 x double>
478-
; CHECK-NEXT: [[RES:%.*]] = call <4 x double> @llvm.x86.avx.hadd.pd.256(<4 x double> [[A0]], <4 x double> [[A1]])
479-
; CHECK-NEXT: [[_MSPROP:%.*]] = bitcast <4 x double> [[RES]] to <4 x i64>
474+
; CHECK-NEXT: [[TMP3:%.*]] = shufflevector <4 x i64> [[TMP1]], <4 x i64> [[TMP2]], <4 x i32> <i32 0, i32 2, i32 4, i32 6>
475+
; CHECK-NEXT: [[TMP4:%.*]] = shufflevector <4 x i64> [[TMP1]], <4 x i64> [[TMP2]], <4 x i32> <i32 1, i32 3, i32 5, i32 7>
476+
; CHECK-NEXT: [[_MSPROP:%.*]] = or <4 x i64> [[TMP3]], [[TMP4]]
480477
; CHECK-NEXT: [[RES1:%.*]] = call <4 x double> @llvm.x86.avx.hsub.pd.256(<4 x double> [[A2:%.*]], <4 x double> [[A3:%.*]])
481478
; CHECK-NEXT: store <4 x i64> [[_MSPROP]], ptr @__msan_retval_tls, align 8
482479
; CHECK-NEXT: ret <4 x double> [[RES1]]
@@ -492,10 +489,9 @@ define <8 x float> @test_x86_avx_hsub_ps_256(<8 x float> %a0, <8 x float> %a1) #
492489
; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i32>, ptr @__msan_param_tls, align 8
493490
; CHECK-NEXT: [[TMP2:%.*]] = load <8 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 32) to ptr), align 8
494491
; CHECK-NEXT: call void @llvm.donothing()
495-
; CHECK-NEXT: [[A0:%.*]] = bitcast <8 x i32> [[TMP1]] to <8 x float>
496-
; CHECK-NEXT: [[A1:%.*]] = bitcast <8 x i32> [[TMP2]] to <8 x float>
497-
; CHECK-NEXT: [[RES:%.*]] = call <8 x float> @llvm.x86.avx.hadd.ps.256(<8 x float> [[A0]], <8 x float> [[A1]])
498-
; CHECK-NEXT: [[_MSPROP:%.*]] = bitcast <8 x float> [[RES]] to <8 x i32>
492+
; CHECK-NEXT: [[TMP3:%.*]] = shufflevector <8 x i32> [[TMP1]], <8 x i32> [[TMP2]], <8 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14>
493+
; CHECK-NEXT: [[TMP4:%.*]] = shufflevector <8 x i32> [[TMP1]], <8 x i32> [[TMP2]], <8 x i32> <i32 1, i32 3, i32 5, i32 7, i32 9, i32 11, i32 13, i32 15>
494+
; CHECK-NEXT: [[_MSPROP:%.*]] = or <8 x i32> [[TMP3]], [[TMP4]]
499495
; CHECK-NEXT: [[RES1:%.*]] = call <8 x float> @llvm.x86.avx.hsub.ps.256(<8 x float> [[A2:%.*]], <8 x float> [[A3:%.*]])
500496
; CHECK-NEXT: store <8 x i32> [[_MSPROP]], ptr @__msan_retval_tls, align 8
501497
; CHECK-NEXT: ret <8 x float> [[RES1]]

llvm/test/Instrumentation/MemorySanitizer/X86/avx2-intrinsics-x86.ll

Lines changed: 18 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -569,7 +569,9 @@ define <8 x i32> @test_x86_avx2_phadd_d(<8 x i32> %a0, <8 x i32> %a1) #0 {
569569
; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i32>, ptr @__msan_param_tls, align 8
570570
; CHECK-NEXT: [[TMP2:%.*]] = load <8 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 32) to ptr), align 8
571571
; CHECK-NEXT: call void @llvm.donothing()
572-
; CHECK-NEXT: [[_MSPROP:%.*]] = call <8 x i32> @llvm.x86.avx2.phadd.d(<8 x i32> [[TMP1]], <8 x i32> [[TMP2]])
572+
; CHECK-NEXT: [[TMP9:%.*]] = shufflevector <8 x i32> [[TMP1]], <8 x i32> [[TMP2]], <8 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14>
573+
; CHECK-NEXT: [[TMP10:%.*]] = shufflevector <8 x i32> [[TMP1]], <8 x i32> [[TMP2]], <8 x i32> <i32 1, i32 3, i32 5, i32 7, i32 9, i32 11, i32 13, i32 15>
574+
; CHECK-NEXT: [[_MSPROP:%.*]] = or <8 x i32> [[TMP9]], [[TMP10]]
573575
; CHECK-NEXT: [[RES:%.*]] = call <8 x i32> @llvm.x86.avx2.phadd.d(<8 x i32> [[A0:%.*]], <8 x i32> [[A1:%.*]])
574576
; CHECK-NEXT: store <8 x i32> [[_MSPROP]], ptr @__msan_retval_tls, align 8
575577
; CHECK-NEXT: ret <8 x i32> [[RES]]
@@ -585,7 +587,9 @@ define <16 x i16> @test_x86_avx2_phadd_sw(<16 x i16> %a0, <16 x i16> %a1) #0 {
585587
; CHECK-NEXT: [[TMP1:%.*]] = load <16 x i16>, ptr @__msan_param_tls, align 8
586588
; CHECK-NEXT: [[TMP2:%.*]] = load <16 x i16>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 32) to ptr), align 8
587589
; CHECK-NEXT: call void @llvm.donothing()
588-
; CHECK-NEXT: [[_MSPROP:%.*]] = call <16 x i16> @llvm.x86.avx2.phadd.sw(<16 x i16> [[TMP1]], <16 x i16> [[TMP2]])
590+
; CHECK-NEXT: [[TMP9:%.*]] = shufflevector <16 x i16> [[TMP1]], <16 x i16> [[TMP2]], <16 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14, i32 16, i32 18, i32 20, i32 22, i32 24, i32 26, i32 28, i32 30>
591+
; CHECK-NEXT: [[TMP10:%.*]] = shufflevector <16 x i16> [[TMP1]], <16 x i16> [[TMP2]], <16 x i32> <i32 1, i32 3, i32 5, i32 7, i32 9, i32 11, i32 13, i32 15, i32 17, i32 19, i32 21, i32 23, i32 25, i32 27, i32 29, i32 31>
592+
; CHECK-NEXT: [[_MSPROP:%.*]] = or <16 x i16> [[TMP9]], [[TMP10]]
589593
; CHECK-NEXT: [[RES:%.*]] = call <16 x i16> @llvm.x86.avx2.phadd.sw(<16 x i16> [[A0:%.*]], <16 x i16> [[A1:%.*]])
590594
; CHECK-NEXT: store <16 x i16> [[_MSPROP]], ptr @__msan_retval_tls, align 8
591595
; CHECK-NEXT: ret <16 x i16> [[RES]]
@@ -601,7 +605,9 @@ define <16 x i16> @test_x86_avx2_phadd_w(<16 x i16> %a0, <16 x i16> %a1) #0 {
601605
; CHECK-NEXT: [[TMP1:%.*]] = load <16 x i16>, ptr @__msan_param_tls, align 8
602606
; CHECK-NEXT: [[TMP2:%.*]] = load <16 x i16>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 32) to ptr), align 8
603607
; CHECK-NEXT: call void @llvm.donothing()
604-
; CHECK-NEXT: [[_MSPROP:%.*]] = call <16 x i16> @llvm.x86.avx2.phadd.w(<16 x i16> [[TMP1]], <16 x i16> [[TMP2]])
608+
; CHECK-NEXT: [[TMP9:%.*]] = shufflevector <16 x i16> [[TMP1]], <16 x i16> [[TMP2]], <16 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14, i32 16, i32 18, i32 20, i32 22, i32 24, i32 26, i32 28, i32 30>
609+
; CHECK-NEXT: [[TMP10:%.*]] = shufflevector <16 x i16> [[TMP1]], <16 x i16> [[TMP2]], <16 x i32> <i32 1, i32 3, i32 5, i32 7, i32 9, i32 11, i32 13, i32 15, i32 17, i32 19, i32 21, i32 23, i32 25, i32 27, i32 29, i32 31>
610+
; CHECK-NEXT: [[_MSPROP:%.*]] = or <16 x i16> [[TMP9]], [[TMP10]]
605611
; CHECK-NEXT: [[RES:%.*]] = call <16 x i16> @llvm.x86.avx2.phadd.w(<16 x i16> [[A0:%.*]], <16 x i16> [[A1:%.*]])
606612
; CHECK-NEXT: store <16 x i16> [[_MSPROP]], ptr @__msan_retval_tls, align 8
607613
; CHECK-NEXT: ret <16 x i16> [[RES]]
@@ -617,7 +623,9 @@ define <8 x i32> @test_x86_avx2_phsub_d(<8 x i32> %a0, <8 x i32> %a1) #0 {
617623
; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i32>, ptr @__msan_param_tls, align 8
618624
; CHECK-NEXT: [[TMP2:%.*]] = load <8 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 32) to ptr), align 8
619625
; CHECK-NEXT: call void @llvm.donothing()
620-
; CHECK-NEXT: [[_MSPROP:%.*]] = call <8 x i32> @llvm.x86.avx2.phadd.d(<8 x i32> [[TMP1]], <8 x i32> [[TMP2]])
626+
; CHECK-NEXT: [[TMP9:%.*]] = shufflevector <8 x i32> [[TMP1]], <8 x i32> [[TMP2]], <8 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14>
627+
; CHECK-NEXT: [[TMP10:%.*]] = shufflevector <8 x i32> [[TMP1]], <8 x i32> [[TMP2]], <8 x i32> <i32 1, i32 3, i32 5, i32 7, i32 9, i32 11, i32 13, i32 15>
628+
; CHECK-NEXT: [[_MSPROP:%.*]] = or <8 x i32> [[TMP9]], [[TMP10]]
621629
; CHECK-NEXT: [[RES:%.*]] = call <8 x i32> @llvm.x86.avx2.phsub.d(<8 x i32> [[A0:%.*]], <8 x i32> [[A1:%.*]])
622630
; CHECK-NEXT: store <8 x i32> [[_MSPROP]], ptr @__msan_retval_tls, align 8
623631
; CHECK-NEXT: ret <8 x i32> [[RES]]
@@ -633,7 +641,9 @@ define <16 x i16> @test_x86_avx2_phsub_sw(<16 x i16> %a0, <16 x i16> %a1) #0 {
633641
; CHECK-NEXT: [[TMP1:%.*]] = load <16 x i16>, ptr @__msan_param_tls, align 8
634642
; CHECK-NEXT: [[TMP2:%.*]] = load <16 x i16>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 32) to ptr), align 8
635643
; CHECK-NEXT: call void @llvm.donothing()
636-
; CHECK-NEXT: [[_MSPROP:%.*]] = call <16 x i16> @llvm.x86.avx2.phadd.sw(<16 x i16> [[TMP1]], <16 x i16> [[TMP2]])
644+
; CHECK-NEXT: [[TMP9:%.*]] = shufflevector <16 x i16> [[TMP1]], <16 x i16> [[TMP2]], <16 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14, i32 16, i32 18, i32 20, i32 22, i32 24, i32 26, i32 28, i32 30>
645+
; CHECK-NEXT: [[TMP10:%.*]] = shufflevector <16 x i16> [[TMP1]], <16 x i16> [[TMP2]], <16 x i32> <i32 1, i32 3, i32 5, i32 7, i32 9, i32 11, i32 13, i32 15, i32 17, i32 19, i32 21, i32 23, i32 25, i32 27, i32 29, i32 31>
646+
; CHECK-NEXT: [[_MSPROP:%.*]] = or <16 x i16> [[TMP9]], [[TMP10]]
637647
; CHECK-NEXT: [[RES:%.*]] = call <16 x i16> @llvm.x86.avx2.phsub.sw(<16 x i16> [[A0:%.*]], <16 x i16> [[A1:%.*]])
638648
; CHECK-NEXT: store <16 x i16> [[_MSPROP]], ptr @__msan_retval_tls, align 8
639649
; CHECK-NEXT: ret <16 x i16> [[RES]]
@@ -649,7 +659,9 @@ define <16 x i16> @test_x86_avx2_phsub_w(<16 x i16> %a0, <16 x i16> %a1) #0 {
649659
; CHECK-NEXT: [[TMP1:%.*]] = load <16 x i16>, ptr @__msan_param_tls, align 8
650660
; CHECK-NEXT: [[TMP2:%.*]] = load <16 x i16>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 32) to ptr), align 8
651661
; CHECK-NEXT: call void @llvm.donothing()
652-
; CHECK-NEXT: [[_MSPROP:%.*]] = call <16 x i16> @llvm.x86.avx2.phadd.w(<16 x i16> [[TMP1]], <16 x i16> [[TMP2]])
662+
; CHECK-NEXT: [[TMP9:%.*]] = shufflevector <16 x i16> [[TMP1]], <16 x i16> [[TMP2]], <16 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14, i32 16, i32 18, i32 20, i32 22, i32 24, i32 26, i32 28, i32 30>
663+
; CHECK-NEXT: [[TMP10:%.*]] = shufflevector <16 x i16> [[TMP1]], <16 x i16> [[TMP2]], <16 x i32> <i32 1, i32 3, i32 5, i32 7, i32 9, i32 11, i32 13, i32 15, i32 17, i32 19, i32 21, i32 23, i32 25, i32 27, i32 29, i32 31>
664+
; CHECK-NEXT: [[_MSPROP:%.*]] = or <16 x i16> [[TMP9]], [[TMP10]]
653665
; CHECK-NEXT: [[RES:%.*]] = call <16 x i16> @llvm.x86.avx2.phsub.w(<16 x i16> [[A0:%.*]], <16 x i16> [[A1:%.*]])
654666
; CHECK-NEXT: store <16 x i16> [[_MSPROP]], ptr @__msan_retval_tls, align 8
655667
; CHECK-NEXT: ret <16 x i16> [[RES]]

0 commit comments

Comments
 (0)