@@ -3429,8 +3429,10 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> {
34293429 return ShadowType;
34303430 }
34313431
3432- // / Doubles the length of a vector shadow (filled with zeros) if necessary to
3433- // / match the length of the shadow for the instruction.
3432+ // / Doubles the length of a vector shadow (extending with zeros) if necessary
3433+ // / to match the length of the shadow for the instruction.
3434+ // / If scalar types of the vectors are different, it will use the type of the
3435+ // / input vector.
34343436 // / This is more type-safe than CreateShadowCast().
34353437 Value *maybeExtendVectorShadowWithZeros (Value *Shadow, IntrinsicInst &I) {
34363438 IRBuilder<> IRB (&I);
@@ -3440,10 +3442,9 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> {
34403442 Value *FullShadow = getCleanShadow (&I);
34413443 assert (cast<FixedVectorType>(Shadow->getType ())->getNumElements () <=
34423444 cast<FixedVectorType>(FullShadow->getType ())->getNumElements ());
3443- assert (cast<FixedVectorType>(Shadow->getType ())->getScalarType () ==
3444- cast<FixedVectorType>(FullShadow->getType ())->getScalarType ());
34453445
3446- if (Shadow->getType () == FullShadow->getType ()) {
3446+ if (cast<FixedVectorType>(Shadow->getType ())->getNumElements () ==
3447+ cast<FixedVectorType>(FullShadow->getType ())->getNumElements ()) {
34473448 FullShadow = Shadow;
34483449 } else {
34493450 // TODO: generalize beyond 2x?
@@ -4528,55 +4529,93 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> {
45284529 return isFixedFPVectorTy (V->getType ());
45294530 }
45304531
4531- // e.g., call <16 x i32> @llvm.x86.avx512.mask.cvtps2dq.512
4532- // (<16 x float> a, <16 x i32> writethru, i16 mask,
4533- // i32 rounding)
4532+ // e.g., <16 x i32> @llvm.x86.avx512.mask.cvtps2dq.512
4533+ // (<16 x float> a, <16 x i32> writethru, i16 mask,
4534+ // i32 rounding)
4535+ //
4536+ // Inconveniently, some similar intrinsics have a different operand order:
4537+ // <16 x i16> @llvm.x86.avx512.mask.vcvtps2ph.512
4538+ // (<16 x float> a, i32 rounding, <16 x i16> writethru,
4539+ // i16 mask)
4540+ //
4541+ // If the return type has more elements than A, the excess elements are
4542+ // zeroed (and the corresponding shadow is initialized).
4543+ // <8 x i16> @llvm.x86.avx512.mask.vcvtps2ph.128
4544+ // (<4 x float> a, i32 rounding, <8 x i16> writethru,
4545+ // i8 mask)
45344546 //
45354547 // dst[i] = mask[i] ? convert(a[i]) : writethru[i]
45364548 // dst_shadow[i] = mask[i] ? all_or_nothing(a_shadow[i]) : writethru_shadow[i]
45374549 // where all_or_nothing(x) is fully uninitialized if x has any
45384550 // uninitialized bits
4539- void handleAVX512VectorConvertFPToInt (IntrinsicInst &I) {
4551+ void handleAVX512VectorConvertFPToInt (IntrinsicInst &I, bool LastMask ) {
45404552 IRBuilder<> IRB (&I);
45414553
45424554 assert (I.arg_size () == 4 );
45434555 Value *A = I.getOperand (0 );
4544- Value *WriteThrough = I.getOperand (1 );
4545- Value *Mask = I.getOperand (2 );
4546- Value *RoundingMode = I.getOperand (3 );
4556+ Value *WriteThrough;
4557+ Value *Mask;
4558+ Value *RoundingMode;
4559+ if (LastMask) {
4560+ WriteThrough = I.getOperand (2 );
4561+ Mask = I.getOperand (3 );
4562+ RoundingMode = I.getOperand (1 );
4563+ } else {
4564+ WriteThrough = I.getOperand (1 );
4565+ Mask = I.getOperand (2 );
4566+ RoundingMode = I.getOperand (3 );
4567+ }
45474568
45484569 assert (isFixedFPVector (A));
45494570 assert (isFixedIntVector (WriteThrough));
45504571
45514572 unsigned ANumElements =
45524573 cast<FixedVectorType>(A->getType ())->getNumElements ();
4553- assert (ANumElements ==
4554- cast<FixedVectorType>(WriteThrough->getType ())->getNumElements ());
4574+ unsigned WriteThruNumElements =
4575+ cast<FixedVectorType>(WriteThrough->getType ())->getNumElements ();
4576+ assert (ANumElements == WriteThruNumElements ||
4577+ ANumElements * 2 == WriteThruNumElements);
45554578
45564579 assert (Mask->getType ()->isIntegerTy ());
4557- assert (Mask->getType ()->getScalarSizeInBits () == ANumElements);
4580+ unsigned MaskNumElements = Mask->getType ()->getScalarSizeInBits ();
4581+ assert (ANumElements == MaskNumElements ||
4582+ ANumElements * 2 == MaskNumElements);
4583+
4584+ assert (WriteThruNumElements == MaskNumElements);
4585+
45584586 insertCheckShadowOf (Mask, &I);
45594587
45604588 assert (RoundingMode->getType ()->isIntegerTy ());
4561- // Only four bits of the rounding mode are used, though it's very
4589+ // Only some bits of the rounding mode are used, though it's very
45624590 // unusual to have uninitialized bits there (more commonly, it's a
45634591 // constant).
45644592 insertCheckShadowOf (RoundingMode, &I);
45654593
45664594 assert (I.getType () == WriteThrough->getType ());
45674595
4596+ Value *AShadow = getShadow (A);
4597+ AShadow = maybeExtendVectorShadowWithZeros (AShadow, I);
4598+
4599+ if (ANumElements * 2 == MaskNumElements) {
4600+ // Ensure that the irrelevant bits of the mask are zero, hence selecting
4601+ // from the zeroed shadow instead of the writethrough's shadow.
4602+ Mask = IRB.CreateTrunc (Mask, IRB.getIntNTy (ANumElements));
4603+ Mask = IRB.CreateZExt (Mask, IRB.getIntNTy (MaskNumElements));
4604+ }
4605+
45684606 // Convert i16 mask to <16 x i1>
45694607 Mask = IRB.CreateBitCast (
4570- Mask, FixedVectorType::get (IRB.getInt1Ty (), ANumElements ));
4608+ Mask, FixedVectorType::get (IRB.getInt1Ty (), MaskNumElements ));
45714609
4572- Value *AShadow = getShadow (A);
4573- // / For scalars:
4574- // / Since they are converting from floating-point, the output is:
4610+ // / For floating-point to integer conversion, the output is:
45754611 // / - fully uninitialized if *any* bit of the input is uninitialized
45764612 // / - fully ininitialized if all bits of the input are ininitialized
45774613 // / We apply the same principle on a per-element basis for vectors.
4578- AShadow = IRB.CreateSExt (IRB.CreateICmpNE (AShadow, getCleanShadow (A)),
4579- getShadowTy (A));
4614+ // /
4615+ // / We use the scalar width of the return type instead of A's.
4616+ AShadow = IRB.CreateSExt (
4617+ IRB.CreateICmpNE (AShadow, getCleanShadow (AShadow->getType ())),
4618+ getShadowTy (&I));
45804619
45814620 Value *WriteThroughShadow = getShadow (WriteThrough);
45824621 Value *Shadow = IRB.CreateSelect (Mask, AShadow, WriteThroughShadow);
@@ -5920,11 +5959,29 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> {
59205959 /* trailingVerbatimArgs=*/ 1 );
59215960 break ;
59225961
5962+ // Convert Packed Single Precision Floating-Point Values
5963+ // to Packed SignedDoubleword Integer Values
5964+ //
5965+ // <16 x i32> @llvm.x86.avx512.mask.cvtps2dq.512
5966+ // (<16 x float>, <16 x i32>, i16, i32)
59235967 case Intrinsic::x86_avx512_mask_cvtps2dq_512: {
5924- handleAVX512VectorConvertFPToInt (I);
5968+ handleAVX512VectorConvertFPToInt (I, /* LastMask= */ false );
59255969 break ;
59265970 }
59275971
5972+ // Convert Single-Precision FP Value to 16-bit FP Value
5973+ // <16 x i16> @llvm.x86.avx512.mask.vcvtps2ph.512
5974+ // (<16 x float>, i32, <16 x i16>, i16)
5975+ // <8 x i16> @llvm.x86.avx512.mask.vcvtps2ph.128
5976+ // (<4 x float>, i32, <8 x i16>, i8)
5977+ // <8 x i16> @llvm.x86.avx512.mask.vcvtps2ph.256
5978+ // (<8 x float>, i32, <8 x i16>, i8)
5979+ case Intrinsic::x86_avx512_mask_vcvtps2ph_512:
5980+ case Intrinsic::x86_avx512_mask_vcvtps2ph_256:
5981+ case Intrinsic::x86_avx512_mask_vcvtps2ph_128:
5982+ handleAVX512VectorConvertFPToInt (I, /* LastMask=*/ true );
5983+ break ;
5984+
59285985 // AVX512 PMOV: Packed MOV, with truncation
59295986 // Precisely handled by applying the same intrinsic to the shadow
59305987 case Intrinsic::x86_avx512_mask_pmov_dw_512:
0 commit comments