@@ -51,6 +51,13 @@ const llvm::StringRef SubGroupFuncsResolution::SUB_GROUP_BROADCAST_US = "__built
5151const llvm::StringRef SubGroupFuncsResolution::SUB_GROUP_BROADCAST_F = " __builtin_IB_simd_broadcast_f" ;
5252const llvm::StringRef SubGroupFuncsResolution::SUB_GROUP_BROADCAST_H = " __builtin_IB_simd_broadcast_h" ;
5353const llvm::StringRef SubGroupFuncsResolution::SUB_GROUP_BROADCAST_DF = " __builtin_IB_simd_broadcast_df" ;
54+ const llvm::StringRef SubGroupFuncsResolution::SUB_GROUP_CLUSTERED_BROADCAST = " __builtin_IB_simd_clustered_broadcast" ;
55+ const llvm::StringRef SubGroupFuncsResolution::SUB_GROUP_CLUSTERED_BROADCAST_B = " __builtin_IB_simd_clustered_broadcast_b" ;
56+ const llvm::StringRef SubGroupFuncsResolution::SUB_GROUP_CLUSTERED_BROADCAST_C = " __builtin_IB_simd_clustered_broadcast_c" ;
57+ const llvm::StringRef SubGroupFuncsResolution::SUB_GROUP_CLUSTERED_BROADCAST_US = " __builtin_IB_simd_clustered_broadcast_us" ;
58+ const llvm::StringRef SubGroupFuncsResolution::SUB_GROUP_CLUSTERED_BROADCAST_F = " __builtin_IB_simd_clustered_broadcast_f" ;
59+ const llvm::StringRef SubGroupFuncsResolution::SUB_GROUP_CLUSTERED_BROADCAST_H = " __builtin_IB_simd_clustered_broadcast_h" ;
60+ const llvm::StringRef SubGroupFuncsResolution::SUB_GROUP_CLUSTERED_BROADCAST_DF = " __builtin_IB_simd_clustered_broadcast_df" ;
5461const llvm::StringRef SubGroupFuncsResolution::SIMD_BLOCK_READ_1_GBL = " __builtin_IB_simd_block_read_1_global" ;
5562const llvm::StringRef SubGroupFuncsResolution::SIMD_BLOCK_READ_2_GBL = " __builtin_IB_simd_block_read_2_global" ;
5663const llvm::StringRef SubGroupFuncsResolution::SIMD_BLOCK_READ_4_GBL = " __builtin_IB_simd_block_read_4_global" ;
@@ -680,6 +687,41 @@ void SubGroupFuncsResolution::visitCallInst(CallInst& CI)
680687 CI.replaceAllUsesWith (simdBroadcast);
681688 CI.eraseFromParent ();
682689 }
690+ else if (funcName.equals (SubGroupFuncsResolution::SUB_GROUP_CLUSTERED_BROADCAST) ||
691+ funcName.equals (SubGroupFuncsResolution::SUB_GROUP_CLUSTERED_BROADCAST_US) ||
692+ funcName.equals (SubGroupFuncsResolution::SUB_GROUP_CLUSTERED_BROADCAST_F) ||
693+ funcName.equals (SubGroupFuncsResolution::SUB_GROUP_CLUSTERED_BROADCAST_H) ||
694+ funcName.equals (SubGroupFuncsResolution::SUB_GROUP_CLUSTERED_BROADCAST_C) ||
695+ funcName.equals (SubGroupFuncsResolution::SUB_GROUP_CLUSTERED_BROADCAST_B) ||
696+ funcName.equals (SubGroupFuncsResolution::SUB_GROUP_CLUSTERED_BROADCAST_DF)
697+ )
698+ {
699+ // Creates intrinsics that will be lowered in the CodeGen and will handle the sub_group_clustered_broadcast function
700+ IRBuilder<> IRB (&CI);
701+ Value* args[4 ];
702+ args[0 ] = CI.getArgOperand (0 );
703+ args[1 ] = CI.getArgOperand (1 );
704+ args[2 ] = CI.getArgOperand (2 );
705+ args[3 ] = IRB.getInt32 (0 );
706+
707+ if (!isa<ConstantInt>(args[1 ]))
708+ {
709+ m_pCtx->EmitError (" cluster_size argument in clustered_broadcast must be constant." , &CI);
710+ return ;
711+ }
712+ if (!isa<ConstantInt>(args[2 ]))
713+ {
714+ m_pCtx->EmitError (" in_cluster_lane argument in clustered_broadcast must be constant." , &CI);
715+ return ;
716+ }
717+
718+ Function* simdClusteredBroadcastFunc = GenISAIntrinsic::getDeclaration (CI.getCalledFunction ()->getParent (),
719+ GenISAIntrinsic::GenISA_WaveClusteredBroadcast, args[0 ]->getType ());
720+ Instruction* simdClusteredBroadcast = CallInst::Create (simdClusteredBroadcastFunc, args, " simdClusteredBroadcast" , &CI);
721+ updateDebugLoc (&CI, simdClusteredBroadcast);
722+ CI.replaceAllUsesWith (simdClusteredBroadcast);
723+ CI.eraseFromParent ();
724+ }
683725 else if (funcName.equals (SubGroupFuncsResolution::SUB_GROUP_SHUFFLE_DOWN) ||
684726 funcName.equals (SubGroupFuncsResolution::SUB_GROUP_SHUFFLE_DOWN_US) ||
685727 funcName.equals (SubGroupFuncsResolution::SUB_GROUP_SHUFFLE_DOWN_UC))
0 commit comments