@@ -2802,9 +2802,31 @@ static void emitAppendBufOp(
28022802 resource = addResource (compiler , & atomicCounterResource );
28032803 }
28042804
2805- SpvOp op = instr -> opcode == IL_OP_APPEND_BUF_ALLOC ? SpvOpAtomicIIncrement
2806- : SpvOpAtomicIDecrement ;
2807-
2805+ bool useSubgroupOps = compiler -> kernel -> shaderType == IL_SHADER_COMPUTE ;
2806+ IlcSpvId electBlockBeginId ;
2807+ IlcSpvId electBlockEndId ;
2808+ IlcSpvId preElectBlockLabelId ;
2809+ IlcSpvId workgroupScopeId ;
2810+ IlcSpvId laneCountId , laneIndexId ;
2811+ if (useSubgroupOps ) {
2812+ preElectBlockLabelId = getTopBlockLabel (compiler );
2813+ electBlockBeginId = ilcSpvAllocId (compiler -> module );
2814+ electBlockEndId = ilcSpvAllocId (compiler -> module );
2815+
2816+ ilcSpvPutCapability (compiler -> module , SpvCapabilityGroupNonUniform );
2817+ ilcSpvPutCapability (compiler -> module , SpvCapabilityGroupNonUniformBallot );
2818+ workgroupScopeId = ilcSpvPutConstant (compiler -> module , compiler -> intId , SpvScopeWorkgroup );
2819+
2820+ IlcSpvId ballotId = ilcSpvPutGroupNonUniformBallot (compiler -> module , compiler -> uint4Id , workgroupScopeId , ilcSpvPutConstantTrue (compiler -> module , compiler -> boolId ));
2821+ laneCountId = ilcSpvPutGroupNonUniformBallotBitCount (compiler -> module , compiler -> uintId , workgroupScopeId , SpvGroupOperationReduce , ballotId );
2822+ laneIndexId = ilcSpvPutGroupNonUniformBallotBitCount (compiler -> module , compiler -> uintId , workgroupScopeId , SpvGroupOperationExclusiveScan , ballotId );
2823+ IlcSpvId electionCondId = ilcSpvPutGroupNonUniformElect (compiler -> module , compiler -> boolId , workgroupScopeId );
2824+ ilcSpvPutSelectionMerge (compiler -> module , electBlockEndId );
2825+ ilcSpvPutBranchConditional (compiler -> module , electionCondId , electBlockBeginId , electBlockEndId );
2826+ ilcSpvPutLabel (compiler -> module , electBlockBeginId );
2827+ } else {
2828+ laneCountId = ilcSpvPutConstant (compiler -> module , compiler -> uintId , 1u );
2829+ }
28082830 IlcSpvId ptrTypeId = ilcSpvPutPointerType (compiler -> module , SpvStorageClassStorageBuffer ,
28092831 compiler -> uintId );
28102832 IlcSpvId zeroId = ilcSpvPutConstant (compiler -> module , compiler -> intId , ZERO_LITERAL );
@@ -2816,10 +2838,27 @@ static void emitAppendBufOp(
28162838 IlcSpvId semanticsId = ilcSpvPutConstant (compiler -> module , compiler -> intId ,
28172839 SpvMemorySemanticsAcquireReleaseMask |
28182840 SpvMemorySemanticsUniformMemoryMask );
2819- IlcSpvId readId = ilcSpvPutAtomicOp (compiler -> module , op , compiler -> uintId , ptrId ,
2820- scopeId , semanticsId , 0 );
2821- IlcSpvId resId = emitVectorGrow (compiler , readId , compiler -> uintId , 1 );
2841+ IlcSpvId readId ;
2842+ SpvOp op = instr -> opcode == IL_OP_APPEND_BUF_ALLOC ? SpvOpAtomicIAdd : SpvOpAtomicISub ;
2843+
2844+ readId = ilcSpvPutAtomicOp (compiler -> module , op , compiler -> uintId , ptrId ,
2845+ scopeId , semanticsId , laneCountId );
28222846
2847+ if (useSubgroupOps ) {
2848+ ilcSpvPutBranch (compiler -> module , electBlockEndId );
2849+ ilcSpvPutLabel (compiler -> module , electBlockEndId );
2850+
2851+ IlcSpvId constUndefId = ilcSpvPutConstantUndef (compiler -> module , compiler -> uintId );
2852+ IlcSpvId phiLabels [4 ] = {
2853+ readId , electBlockBeginId ,
2854+ constUndefId , preElectBlockLabelId ,
2855+ };
2856+ readId = ilcSpvPutPhi (compiler -> module , compiler -> uintId , 4 , phiLabels );
2857+ readId = ilcSpvPutGroupNonUniformBroadcastFirst (compiler -> module , compiler -> uintId , workgroupScopeId , readId );
2858+ readId = ilcSpvPutOp2 (compiler -> module , instr -> opcode == IL_OP_APPEND_BUF_ALLOC ? SpvOpIAdd : SpvOpISub , compiler -> uintId , readId , laneIndexId );
2859+ }
2860+
2861+ IlcSpvId resId = emitVectorGrow (compiler , readId , compiler -> uintId , 1 );
28232862 storeDestination (compiler , dst , resId , compiler -> uint4Id );
28242863}
28252864
0 commit comments