[msan][NFCI] Generalize handleAVX512VectorGenericMaskedFP() operands (#159966)

thurstond · web-flow · commit 475e0ee7fa44 · 2025-09-24T16:32:40.000-07:00
This generalizes handleAVX512VectorGenericMaskedFP() (introduced in #158397), to potentially handle intrinsics that have A/WriteThru/Mask in an operand order that is different to AVX512/AVX10 rcp and rsqrt. Any operands other than A and WriteThru must be fully initialized. For example, the generalized handler could be applied in follow-up work to many of the AVX512 rndscale intrinsics: ``` <32 x half> @llvm.x86.avx512fp16.mask.rndscale.ph.512(<32 x half>, i32, <32 x half>, i32, i32) <16 x float> @llvm.x86.avx512.mask.rndscale.ps.512(<16 x float>, i32, <16 x float>, i16, i32) <8 x double> @llvm.x86.avx512.mask.rndscale.pd.512(<8 x double>, i32, <8 x double>, i8, i32) A Imm WriteThru Mask Rounding <8 x float> @llvm.x86.avx512.mask.rndscale.ps.256(<8 x float>, i32, <8 x float>, i8) <4 x float> @llvm.x86.avx512.mask.rndscale.ps.128(<4 x float>, i32, <4 x float>, i8) <4 x double> @llvm.x86.avx512.mask.rndscale.pd.256(<4 x double>, i32, <4 x double>, i8) <2 x double> @llvm.x86.avx512.mask.rndscale.pd.128(<2 x double>, i32, <2 x double>, i8) A Imm WriteThru Mask ```
diff --git a/llvm/lib/Transforms/Instrumentation/MemorySanitizer.cpp b/llvm/lib/Transforms/Instrumentation/MemorySanitizer.cpp
@@ -4926,36 +4926,56 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> {
   //       <2 x double> @llvm.x86.avx512.rcp14.pd.128
   //                        (<2 x double>, <2 x double>, i8)
   //
+  //       <8 x double> @llvm.x86.avx512.mask.rndscale.pd.512
+  //                        (<8 x double>, i32, <8 x double>, i8,  i32)
+  //                         A             Imm  WriteThru     Mask Rounding
+  //
+  // All operands other than A and WriteThru (e.g., Mask, Imm, Rounding) must
+  // be fully initialized.
+  //
   // Dst[i]        = Mask[i] ? some_op(A[i]) : WriteThru[i]
   // Dst_shadow[i] = Mask[i] ? all_or_nothing(A_shadow[i]) : WriteThru_shadow[i]
-  void handleAVX512VectorGenericMaskedFP(IntrinsicInst &I) {
+  void handleAVX512VectorGenericMaskedFP(IntrinsicInst &I, unsigned AIndex,
+                                         unsigned WriteThruIndex,
+                                         unsigned MaskIndex) {
     IRBuilder<> IRB(&I);
 
-    assert(I.arg_size() == 3);
-    Value *A = I.getOperand(0);
-    Value *WriteThrough = I.getOperand(1);
-    Value *Mask = I.getOperand(2);
+    unsigned NumArgs = I.arg_size();
+    assert(AIndex < NumArgs);
+    assert(WriteThruIndex < NumArgs);
+    assert(MaskIndex < NumArgs);
+    assert(AIndex != WriteThruIndex);
+    assert(AIndex != MaskIndex);
+    assert(WriteThruIndex != MaskIndex);
+
+    Value *A = I.getOperand(AIndex);
+    Value *WriteThru = I.getOperand(WriteThruIndex);
+    Value *Mask = I.getOperand(MaskIndex);
 
     assert(isFixedFPVector(A));
-    assert(isFixedFPVector(WriteThrough));
+    assert(isFixedFPVector(WriteThru));
 
     [[maybe_unused]] unsigned ANumElements =
         cast<FixedVectorType>(A->getType())->getNumElements();
     unsigned OutputNumElements =
-        cast<FixedVectorType>(WriteThrough->getType())->getNumElements();
+        cast<FixedVectorType>(WriteThru->getType())->getNumElements();
     assert(ANumElements == OutputNumElements);
 
-    assert(Mask->getType()->isIntegerTy());
-    // Some bits of the mask might be unused, but check them all anyway
-    // (typically the mask is an integer constant).
-    insertCheckShadowOf(Mask, &I);
+    for (unsigned i = 0; i < NumArgs; ++i) {
+      if (i != AIndex && i != WriteThruIndex) {
+        // Imm, Mask, Rounding etc. are "control" data, hence we require that
+        // they be fully initialized.
+        assert(I.getOperand(i)->getType()->isIntegerTy());
+        insertCheckShadowOf(I.getOperand(i), &I);
+      }
+    }
 
     // The mask has 1 bit per element of A, but a minimum of 8 bits.
     if (Mask->getType()->getScalarSizeInBits() == 8 && ANumElements < 8)
       Mask = IRB.CreateTrunc(Mask, Type::getIntNTy(*MS.C, ANumElements));
     assert(Mask->getType()->getScalarSizeInBits() == ANumElements);
 
-    assert(I.getType() == WriteThrough->getType());
+    assert(I.getType() == WriteThru->getType());
 
     Mask = IRB.CreateBitCast(
         Mask, FixedVectorType::get(IRB.getInt1Ty(), OutputNumElements));
@@ -4966,9 +4986,9 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> {
     AShadow = IRB.CreateSExt(IRB.CreateICmpNE(AShadow, getCleanShadow(AShadow)),
                              AShadow->getType());
 
-    Value *WriteThroughShadow = getShadow(WriteThrough);
+    Value *WriteThruShadow = getShadow(WriteThru);
 
-    Value *Shadow = IRB.CreateSelect(Mask, AShadow, WriteThroughShadow);
+    Value *Shadow = IRB.CreateSelect(Mask, AShadow, WriteThruShadow);
     setShadow(&I, Shadow);
 
     setOriginForNaryOp(I);
@@ -6202,7 +6222,8 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> {
     case Intrinsic::x86_avx512fp16_mask_rsqrt_ph_512:
     case Intrinsic::x86_avx512fp16_mask_rsqrt_ph_256:
     case Intrinsic::x86_avx512fp16_mask_rsqrt_ph_128:
-      handleAVX512VectorGenericMaskedFP(I);
+      handleAVX512VectorGenericMaskedFP(I, /*AIndex=*/0, /*WriteThruIndex=*/1,
+                                        /*MaskIndex=*/2);
       break;
 
     // AVX512/AVX10 Reciprocal Square Root
@@ -6253,7 +6274,8 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> {
     case Intrinsic::x86_avx512fp16_mask_rcp_ph_512:
     case Intrinsic::x86_avx512fp16_mask_rcp_ph_256:
     case Intrinsic::x86_avx512fp16_mask_rcp_ph_128:
-      handleAVX512VectorGenericMaskedFP(I);
+      handleAVX512VectorGenericMaskedFP(I, /*AIndex=*/0, /*WriteThruIndex=*/1,
+                                        /*MaskIndex=*/2);
       break;
 
     // AVX512 FP16 Arithmetic