-
Notifications
You must be signed in to change notification settings - Fork 15.2k
[msan] Handle AVX512 vector down convert (non-mem) intrinsics #147606
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from 1 commit
047a1b1
29e474f
c2ee02c
f4236ab
f779e0b
e90c6e6
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -4592,6 +4592,86 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> { | |
| ConstantInt::get(IRB.getInt32Ty(), 0)); | ||
| } | ||
|
|
||
| // Handle llvm.x86.avx512.mask.pmov{,s,us}.*.512 | ||
| // | ||
| // e.g., call <16 x i8> @llvm.x86.avx512.mask.pmov.qb.512 | ||
| // (<8 x i64>, <16 x i8>, i8) | ||
| // A WriteThru Mask | ||
| // | ||
| // call <16 x i8> @llvm.x86.avx512.mask.pmovs.db.512 | ||
| // (<16 x i32>, <16 x i8>, i16) | ||
| // | ||
| // Dst[i] = Mask[i] ? truncate_or_saturate(A[i]) : WriteThru[i] | ||
| // Dst_shadow[i] = Mask[i] ? truncate(A_shadow[i]) : WriteThru_shadow[i] | ||
| // | ||
| // If Dst has more elements than A, the excess elements are zeroed (and the | ||
| // corresponding shadow is initialized). | ||
| // | ||
| // Note: for PMOV (truncation), handleIntrinsicByApplyingToShadow is precise | ||
| // and is much faster than this handler. | ||
| void handleAVX512VectorDownConvert(IntrinsicInst &I) { | ||
| IRBuilder<> IRB(&I); | ||
|
|
||
| assert(I.arg_size() == 3); | ||
| Value *A = I.getOperand(0); | ||
| Value *WriteThrough = I.getOperand(1); | ||
| Value *Mask = I.getOperand(2); | ||
|
|
||
| assert(isa<FixedVectorType>(A->getType())); | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. not necessarily in this PR, but maybe we want this because this is asserted all over the place?
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Ack |
||
| assert(A->getType()->isIntOrIntVectorTy()); | ||
|
|
||
| assert(isa<FixedVectorType>(WriteThrough->getType())); | ||
| assert(WriteThrough->getType()->isIntOrIntVectorTy()); | ||
|
|
||
| unsigned ANumElements = | ||
| cast<FixedVectorType>(A->getType())->getNumElements(); | ||
| unsigned OutputNumElements = | ||
| cast<FixedVectorType>(WriteThrough->getType())->getNumElements(); | ||
| assert(ANumElements == OutputNumElements || | ||
| ANumElements * 2 == OutputNumElements); | ||
|
|
||
| assert(Mask->getType()->isIntegerTy()); | ||
| assert(Mask->getType()->getScalarSizeInBits() == ANumElements); | ||
|
|
||
| assert(I.getType() == WriteThrough->getType()); | ||
|
|
||
| // Widen the mask, if necessary, to have one bit per element of the output | ||
| // vector. | ||
| // We want the extra bits to have '1's, so that the CreateSelect will | ||
| // select the values from AShadow instead of WriteThroughShadow ("maskless" | ||
| // versions of the intrinsics are sometimes implemented using an all-1's | ||
| // mask and an undefined value for WriteThroughShadow). We accomplish this | ||
| // by using bitwise NOT before and after the ZExt. | ||
| if (ANumElements != OutputNumElements) { | ||
| Mask = IRB.CreateNot(Mask); | ||
| Mask = IRB.CreateZExt(Mask, Type::getIntNTy(*MS.C, OutputNumElements), "_ms_widen_mask"); | ||
| Mask = IRB.CreateNot(Mask); | ||
| } | ||
| Mask = IRB.CreateBitCast( | ||
| Mask, FixedVectorType::get(IRB.getInt1Ty(), OutputNumElements)); | ||
|
|
||
| Value *AShadow = getShadow(A); | ||
|
|
||
| // The return type might have more elements than the input. | ||
| // Temporarily shrink the return type's number of elements. | ||
| VectorType *ShadowType = maybeShrinkVectorShadowType(A, I); | ||
|
|
||
| // PMOV truncates; PMOVS/PMOVUS uses signed/unsigned saturation. | ||
| // This handler treats them all as truncation. | ||
| // | ||
| // TODO: use GetMinMaxUnsigned() to handle saturation precisely. | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. add whether the imprecision leads to false positives or negatives.
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Done: 29e474f |
||
| AShadow = IRB.CreateTrunc(AShadow, ShadowType, "_ms_trunc_shadow"); | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Why is there so much whitespace?
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Reduced, but leaving enough to honor our SpaceX colleagues |
||
|
|
||
| AShadow = maybeExtendVectorShadowWithZeros(AShadow, I); | ||
|
|
||
| Value *WriteThroughShadow = getShadow(WriteThrough); | ||
|
|
||
| Value *Shadow = IRB.CreateSelect(Mask, AShadow, WriteThroughShadow); | ||
|
|
||
| setShadow(&I, Shadow); | ||
| setOriginForNaryOp(I); | ||
| } | ||
|
|
||
| // For sh.* compiler intrinsics: | ||
| // llvm.x86.avx512fp16.mask.{add/sub/mul/div/max/min}.sh.round | ||
| // (<8 x half>, <8 x half>, <8 x half>, i8, i32) | ||
|
|
@@ -5412,6 +5492,66 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> { | |
| break; | ||
| } | ||
|
|
||
| // AVX512 PMOV: Packed MOV, with truncation | ||
| // Precisely handled by applying the same intrinsic to the shadow | ||
| case Intrinsic::x86_avx512_mask_pmov_dw_512: | ||
| case Intrinsic::x86_avx512_mask_pmov_db_512: | ||
| case Intrinsic::x86_avx512_mask_pmov_qb_512: | ||
| case Intrinsic::x86_avx512_mask_pmov_qw_512: { | ||
| // Intrinsic::x86_avx512_mask_pmov_{qd,wb}_512 were removed in | ||
| // f608dc1f5775ee880e8ea30e2d06ab5a4a935c22 | ||
| handleIntrinsicByApplyingToShadow(I, I.getIntrinsicID(), | ||
| /*trailingVerbatimArgs=*/1); | ||
| break; | ||
| } | ||
|
|
||
| // AVX512 PMVOV{S,US}: Packed MOV, with signed/unsigned saturation | ||
| // Approximately handled using the corresponding truncation intrinsic | ||
| // TODO: improve handleAVX512VectorDownConvert to precisely model saturation | ||
| case Intrinsic::x86_avx512_mask_pmovs_dw_512: | ||
| case Intrinsic::x86_avx512_mask_pmovus_dw_512: { | ||
| handleIntrinsicByApplyingToShadow(I, | ||
| Intrinsic::x86_avx512_mask_pmov_dw_512, | ||
| /* trailingVerbatimArgs=*/1); | ||
| break; | ||
| } | ||
|
|
||
| case Intrinsic::x86_avx512_mask_pmovs_db_512: | ||
| case Intrinsic::x86_avx512_mask_pmovus_db_512: { | ||
| handleIntrinsicByApplyingToShadow(I, | ||
| Intrinsic::x86_avx512_mask_pmov_db_512, | ||
| /* trailingVerbatimArgs=*/1); | ||
| break; | ||
| } | ||
|
|
||
| case Intrinsic::x86_avx512_mask_pmovs_qb_512: | ||
| case Intrinsic::x86_avx512_mask_pmovus_qb_512: { | ||
| handleIntrinsicByApplyingToShadow(I, | ||
| Intrinsic::x86_avx512_mask_pmov_qb_512, | ||
| /* trailingVerbatimArgs=*/1); | ||
| break; | ||
| } | ||
|
|
||
| case Intrinsic::x86_avx512_mask_pmovs_qw_512: | ||
| case Intrinsic::x86_avx512_mask_pmovus_qw_512: { | ||
| handleIntrinsicByApplyingToShadow(I, | ||
| Intrinsic::x86_avx512_mask_pmov_qw_512, | ||
| /* trailingVerbatimArgs=*/1); | ||
| break; | ||
| } | ||
|
|
||
| case Intrinsic::x86_avx512_mask_pmovs_qd_512: | ||
| case Intrinsic::x86_avx512_mask_pmovus_qd_512: | ||
| case Intrinsic::x86_avx512_mask_pmovs_wb_512: | ||
| case Intrinsic::x86_avx512_mask_pmovus_wb_512: { | ||
| // Since Intrinsic::x86_avx512_mask_pmov_{qd,wb}_512 do not exist, we | ||
| // cannot use handleIntrinsicByApplyingToShadow. Instead, we call the | ||
| // slow-path handler. | ||
| handleAVX512VectorDownConvert(I); | ||
| break; | ||
| } | ||
|
|
||
| // AVX512 FP16 Arithmetic | ||
| case Intrinsic::x86_avx512fp16_mask_add_sh_round: | ||
| case Intrinsic::x86_avx512fp16_mask_sub_sh_round: | ||
| case Intrinsic::x86_avx512fp16_mask_mul_sh_round: | ||
|
|
||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Don't we need to shadow check the mask?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Done