Additional explicit SIMD16 checks have been implemented for

ekochetk · igcbot · commit 8a8d752708b5 · 2025-05-28T10:29:58.000+02:00
IGCVectorizer

IGCVectorizer only supports simd16 as of now, explicit checks and
asserts has been added
diff --git a/IGC/Compiler/CISACodeGen/EmitVISAPass.cpp b/IGC/Compiler/CISACodeGen/EmitVISAPass.cpp
@@ -4650,6 +4650,7 @@ void EmitPass::FPTrunc(const SSource sources[2], const DstModifier& modifier) {
     if (IGC_IS_FLAG_ENABLED(EnableVectorEmitter) &&
             sources[0].value->getType()->isVectorTy()) {
 
+        IGC_ASSERT_EXIT_MESSAGE(numLanes(m_encoder->GetSimdSize()) == 16, "As of now Vector Emission is only supported for SIMD16");
         unsigned VectorSize = getVectorSize(sources[0].value);
 
         // float is 4 bytes --> divide by 4
@@ -4689,6 +4690,7 @@ void EmitPass::Add(const SSource sources[2], const DstModifier& modifier)
             sources[0].value->getType()->isVectorTy() &&
             sources[1].value->getType()->isVectorTy()) {
 
+        IGC_ASSERT_EXIT_MESSAGE(numLanes(m_encoder->GetSimdSize()) == 16, "As of now Vector Emission is only supported for SIMD16");
         unsigned VectorSize = getVectorSize(sources[0].value);
 
         for (unsigned i = 0; i < VectorSize; ++i) {
@@ -4728,6 +4730,7 @@ void EmitPass::Mul(const SSource sources[2], const DstModifier& modifier)
             sources[0].value->getType()->isVectorTy() &&
             sources[1].value->getType()->isVectorTy()) {
 
+        IGC_ASSERT_EXIT_MESSAGE(numLanes(m_encoder->GetSimdSize()) == 16, "As of now Vector Emission is only supported for SIMD16");
         unsigned VectorSize = getVectorSize(sources[0].value);
 
         for (unsigned i = 0; i < VectorSize; ++i) {
@@ -4785,6 +4788,8 @@ void EmitPass::Div(const SSource sources[2], const DstModifier& modifier)
             sources[0].value->getType()->isVectorTy() &&
             sources[1].value->getType()->isVectorTy()) {
 
+        llvm::errs() <<  numLanes(m_encoder->GetSimdSize()) << "\n";
+        IGC_ASSERT_EXIT_MESSAGE(numLanes(m_encoder->GetSimdSize()) == 16, "As of now Vector Emission is only supported for SIMD16");
         unsigned VectorSize = getVectorSize(sources[0].value);
 
         for (unsigned i = 0; i < VectorSize; ++i) {
@@ -4812,6 +4817,7 @@ void EmitPass::Inv(const SSource sources[2], const DstModifier& modifier) {
             sources[0].value->getType()->isVectorTy() &&
             sources[1].value->getType()->isVectorTy()) {
 
+        IGC_ASSERT_EXIT_MESSAGE(numLanes(m_encoder->GetSimdSize()) == 16, "As of now Vector Emission is only supported for SIMD16");
         unsigned VectorSize = getVectorSize(sources[0].value);
 
         CVariable* src[1];
@@ -4874,6 +4880,7 @@ void EmitPass::FDiv(const SSource sources[2], const DstModifier& modifier)
             sources[0].value->getType()->isVectorTy() &&
             sources[1].value->getType()->isVectorTy()) {
 
+        IGC_ASSERT_EXIT_MESSAGE(numLanes(m_encoder->GetSimdSize()) == 16, "As of now Vector Emission is only supported for SIMD16");
         if (isVectorOfOnes(sources[0].value)) Inv(sources, modifier);
         else Div(sources,modifier);
 
diff --git a/IGC/Compiler/CISACodeGen/IGCVectorizer.cpp b/IGC/Compiler/CISACodeGen/IGCVectorizer.cpp
@@ -765,7 +765,24 @@ void IGCVectorizer::collectInstructionToProcess(VecArr &ToProcess,
     }
 }
 
+bool IGCVectorizer::checkIfSIMD16(llvm::Function &F) {
+
+    MDUtils = getAnalysis<MetaDataUtilsWrapper>().getMetaDataUtils();
+    bool Result = false;
+    if(MDUtils->findFunctionsInfoItem(&F) != MDUtils->end_FunctionsInfo()) {
+        IGC::IGCMD::FunctionInfoMetaDataHandle funcInfoMD = MDUtils->getFunctionsInfoItem(&F);
+        unsigned SimdSize = funcInfoMD->getSubGroupSize()->getSIMDSize();
+        Result = SimdSize == 16;
+    }
+
+    return Result;
+}
+
 bool IGCVectorizer::runOnFunction(llvm::Function &F) {
+
+    // DPAS only allowed in simd16 mode + helps to reduce untested cases
+    if (!checkIfSIMD16(F)) return false;
+
     M = F.getParent();
     CGCtx = getAnalysis<CodeGenContextWrapper>().getCodeGenContext();
     initializeLogFile(F);
diff --git a/IGC/Compiler/CISACodeGen/IGCVectorizer.h b/IGC/Compiler/CISACodeGen/IGCVectorizer.h
@@ -51,6 +51,7 @@ class IGCVectorizer : public llvm::FunctionPass {
     };
 
     CodeGenContext *CGCtx = nullptr;
+    IGCMD::MetaDataUtils* MDUtils = nullptr;
 
     // when we vectorize, we build a new vector chain,
     // this map contains associations between scalar and vector
@@ -67,6 +68,7 @@ class IGCVectorizer : public llvm::FunctionPass {
     std::string LogStr;
     llvm::raw_string_ostream OutputLogStream = raw_string_ostream(LogStr);
     Module* M = nullptr;
+    bool checkIfSIMD16(llvm::Function &F);
     void initializeLogFile(Function& F);
     void writeLog();
 
@@ -104,6 +106,7 @@ class IGCVectorizer : public llvm::FunctionPass {
     virtual bool runOnFunction(llvm::Function &F) override;
     virtual void getAnalysisUsage(llvm::AnalysisUsage &AU) const override {
         AU.addRequired<CodeGenContextWrapper>();
+        AU.addRequired<MetaDataUtilsWrapper>();
     }
     IGCVectorizer();
     IGCVectorizer(const std::string& FileName);
diff --git a/IGC/Compiler/tests/IGCVectorizer/vectorizer-test-binary-fmul.ll b/IGC/Compiler/tests/IGCVectorizer/vectorizer-test-binary-fmul.ll
@@ -1,6 +1,3 @@
-; UNSUPPORTED: system-windows
-; REQUIRES: pvc-supported, regkeys
-
 ; RUN: igc_opt -S  --igc-vectorizer -dce < %s 2>&1 | FileCheck %s
 
 ; CHECK: %vectorized_phi
@@ -12,7 +9,7 @@
 ; CHECK: %vector5 = insertelement <8 x float> %vector4
 ; CHECK: %vector6 = insertelement <8 x float> %vector5
 ; CHECK: %vector7 = insertelement <8 x float> %vector6
-; CHECK: %vectorized_binary = fmul <8 x float> %vector7, %vectorized_phi
+; CHECK: %vectorized_binary = fmul fast <8 x float> %vector7, %vectorized_phi
 ; CHECK: call <8 x float> @llvm.genx.GenISA.sub.group.dpas.v8f32.v8f32.v8i16.v8i32(<8 x float> %vectorized_binary
 
 ; ModuleID = 'reduced.ll'
@@ -21,7 +18,7 @@ target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f3
 target triple = "spir64-unknown-unknown"
 
 ; Function Attrs: convergent nounwind
-define spir_kernel void @_attn_fwd() #0 {
+define spir_kernel void @quux() {
   br label %._crit_edge
 
 ._crit_edge:                                      ; preds = %._crit_edge, %0
@@ -83,3 +80,9 @@ uselistorder float (float)* @llvm.exp2.f32, { 7, 6, 5, 4, 3, 2, 1, 0 }
 attributes #0 = { convergent nounwind }
 attributes #1 = { convergent nounwind readnone willreturn }
 attributes #2 = { nofree nosync nounwind readnone speculatable willreturn }
+
+!igc.functions = !{!0}
+!0 = !{void ()* @quux, !1}
+!1 = !{!2, !3}
+!2 = !{!"function_type", i32 0}
+!3 = !{!"sub_group_size", i32 16}
diff --git a/IGC/Compiler/tests/IGCVectorizer/vectorizer-test-dpas-incorrect.ll b/IGC/Compiler/tests/IGCVectorizer/vectorizer-test-dpas-incorrect.ll
@@ -106,3 +106,8 @@ declare <8 x i32> @llvm.genx.GenISA.LSC2DBlockRead.v8i32(i64, i32, i32, i32, i32
 
 declare void @llvm.genx.GenISA.LSC2DBlockWrite.v8i32(i64, i32, i32, i32, i32, i32, i32, i32, i32, i32, i1, i1, i32, <8 x i32>)
 
+!igc.functions = !{!0}
+!0 = !{void ()* @quux, !1}
+!1 = !{!2, !3}
+!2 = !{!"function_type", i32 0}
+!3 = !{!"sub_group_size", i32 16}
diff --git a/IGC/Compiler/tests/IGCVectorizer/vectorizer-test-dpas-phi.ll b/IGC/Compiler/tests/IGCVectorizer/vectorizer-test-dpas-phi.ll
@@ -83,3 +83,9 @@ declare <8 x i32> @llvm.genx.GenISA.LSC2DBlockRead.v8i32(i64, i32, i32, i32, i32
 
 declare void @llvm.genx.GenISA.LSC2DBlockWrite.v8i32(i64, i32, i32, i32, i32, i32, i32, i32, i32, i32, i1, i1, i32, <8 x i32>)
 
+!igc.functions = !{!0}
+!0 = !{void ()* @quux, !1}
+!1 = !{!2, !3}
+!2 = !{!"function_type", i32 0}
+!3 = !{!"sub_group_size", i32 16}
+
diff --git a/IGC/Compiler/tests/IGCVectorizer/vectorizer-test-dpas-unsupported-swizzle.ll b/IGC/Compiler/tests/IGCVectorizer/vectorizer-test-dpas-unsupported-swizzle.ll
@@ -109,3 +109,9 @@ declare void @llvm.genx.GenISA.LSC2DBlockWrite.v8i32(i64, i32, i32, i32, i32, i3
 
 ; uselistorder directives
 uselistorder <8 x float> (<8 x float>, <8 x i16>, <8 x i32>, i32, i32, i32, i32, i1)* @llvm.genx.GenISA.sub.group.dpas.v8f32.v8f32.v8i16.v8i32, { 1, 0 }
+
+!igc.functions = !{!0}
+!0 = !{void ()* @quux, !1}
+!1 = !{!2, !3}
+!2 = !{!"function_type", i32 0}
+!3 = !{!"sub_group_size", i32 16}
diff --git a/IGC/Compiler/tests/IGCVectorizer/vectorizer-test-fdiv-inv.ll b/IGC/Compiler/tests/IGCVectorizer/vectorizer-test-fdiv-inv.ll
@@ -1,6 +1,3 @@
-; UNSUPPORTED: system-windows
-; REQUIRES: regkeys
-
 ; RUN: igc_opt -S  --igc-vectorizer -dce < %s 2>&1 | FileCheck %s
 
 ; CHECK: %vectorized_binary = fdiv fast <8 x float>
@@ -11,7 +8,7 @@ target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f3
 target triple = "spir64-unknown-unknown"
 
 ; Function Attrs: convergent nounwind
-define spir_kernel void @_attn_fwd() #0 {
+define spir_kernel void @quux() #0 {
   br label %._crit_edge
 
 ._crit_edge:                                      ; preds = %._crit_edge, %0
@@ -73,3 +70,9 @@ uselistorder float (float)* @llvm.exp2.f32, { 7, 6, 5, 4, 3, 2, 1, 0 }
 attributes #0 = { convergent nounwind }
 attributes #1 = { convergent nounwind readnone willreturn }
 attributes #2 = { nofree nosync nounwind readnone speculatable willreturn }
+
+!igc.functions = !{!0}
+!0 = !{void ()* @quux, !1}
+!1 = !{!2, !3}
+!2 = !{!"function_type", i32 0}
+!3 = !{!"sub_group_size", i32 16}
diff --git a/IGC/Compiler/tests/IGCVectorizer/vectorizer-test-fdiv-not-inv.ll b/IGC/Compiler/tests/IGCVectorizer/vectorizer-test-fdiv-not-inv.ll
@@ -1,6 +1,3 @@
-; UNSUPPORTED: system-windows
-; REQUIRES: regkeys
-
 ; RUN: igc_opt -S  --igc-vectorizer -dce < %s 2>&1 | FileCheck %s
 
 ; CHECK: %vectorized_binary = fdiv fast <8 x float>
@@ -11,7 +8,7 @@ target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f3
 target triple = "spir64-unknown-unknown"
 
 ; Function Attrs: convergent nounwind
-define spir_kernel void @_attn_fwd() #0 {
+define spir_kernel void @quux() #0 {
   br label %._crit_edge
 
 ._crit_edge:                                      ; preds = %._crit_edge, %0
@@ -73,3 +70,9 @@ uselistorder float (float)* @llvm.exp2.f32, { 7, 6, 5, 4, 3, 2, 1, 0 }
 attributes #0 = { convergent nounwind }
 attributes #1 = { convergent nounwind readnone willreturn }
 attributes #2 = { nofree nosync nounwind readnone speculatable willreturn }
+
+!igc.functions = !{!0}
+!0 = !{void ()* @quux, !1}
+!1 = !{!2, !3}
+!2 = !{!"function_type", i32 0}
+!3 = !{!"sub_group_size", i32 16}