@@ -81,7 +81,7 @@ ArgumentEncodingContext::encodeVertexBuffers(uint32_t slot_mask, uint64_t offset
8181 continue ;
8282 }
8383 auto length = buffer->length ();
84- auto [buffer_alloc, buffer_offset] = access (buffer, DXMT_ENCODER_RESOURCE_ACESS_READ);
84+ auto [buffer_alloc, buffer_offset] = access< true > (buffer, DXMT_ENCODER_RESOURCE_ACESS_READ);
8585 entries[index].buffer_handle = buffer_alloc->gpuAddress () + buffer_offset + state.offset ;
8686 entries[index].stride = state.stride ;
8787 entries[index++].length = length > state.offset ? length - state.offset : 0 ;
@@ -143,7 +143,10 @@ template void ArgumentEncodingContext::encodeConstantBuffers<PipelineStage::Pixe
143143template <PipelineStage stage, PipelineKind kind>
144144void
145145ArgumentEncodingContext::encodeConstantBuffers (const MTL_SHADER_REFLECTION *reflection, const MTL_SM50_SHADER_ARGUMENT * constant_buffers, uint64_t offset) {
146- uint64_t *encoded_buffer = getMappedArgumentBuffer<uint64_t , stage == PipelineStage::Compute>(offset);
146+ uint64_t *encoded_buffer = getMappedArgumentBuffer < uint64_t , stage == PipelineStage::Compute > (offset);
147+
148+ constexpr bool AtVertexStage = stage == PipelineStage::Vertex || stage == PipelineStage::Domain ||
149+ stage == PipelineStage::Hull || stage == PipelineStage::Geometry;
147150
148151 for (unsigned i = 0 ; i < reflection->NumConstantBuffers ; i++) {
149152 auto &arg = constant_buffers[i];
@@ -158,7 +161,7 @@ ArgumentEncodingContext::encodeConstantBuffers(const MTL_SHADER_REFLECTION *refl
158161 }
159162 auto argbuf = cbuf.buffer ;
160163 // FIXME: did we intended to use the whole buffer?
161- auto [argbuf_alloc, argbuf_offset] = access (argbuf, DXMT_ENCODER_RESOURCE_ACESS_READ);
164+ auto [argbuf_alloc, argbuf_offset] = access<AtVertexStage> (argbuf, DXMT_ENCODER_RESOURCE_ACESS_READ);
162165 encoded_buffer[arg.StructurePtrOffset ] = argbuf_alloc->gpuAddress () + argbuf_offset + cbuf.offset ;
163166 makeResident<stage, kind>(argbuf.ptr ());
164167 break ;
@@ -246,6 +249,9 @@ ArgumentEncodingContext::encodeShaderResources(
246249
247250 auto &UAVBindingSet = stage == PipelineStage::Compute ? cs_uav_ : om_uav_;
248251
252+ constexpr bool AtVertexStage = stage == PipelineStage::Vertex || stage == PipelineStage::Domain ||
253+ stage == PipelineStage::Hull || stage == PipelineStage::Geometry;
254+
249255 for (unsigned i = 0 ; i < BindingCount; i++) {
250256 auto &arg = arguments[i];
251257 switch (arg.Type ) {
@@ -272,7 +278,7 @@ ArgumentEncodingContext::encodeShaderResources(
272278
273279 if (arg.Flags & MTL_SM50_SHADER_ARGUMENT_BUFFER) {
274280 if (srv.buffer .ptr ()) {
275- auto [srv_alloc, offset] = access (srv.buffer , srv.slice .byteOffset , srv.slice .byteLength , DXMT_ENCODER_RESOURCE_ACESS_READ);
281+ auto [srv_alloc, offset] = access<AtVertexStage> (srv.buffer , srv.slice .byteOffset , srv.slice .byteLength , DXMT_ENCODER_RESOURCE_ACESS_READ);
276282 encoded_buffer[arg.StructurePtrOffset ] = srv_alloc->gpuAddress () + offset + srv.slice .byteOffset ;
277283 encoded_buffer[arg.StructurePtrOffset + 1 ] = srv.slice .byteLength ;
278284 makeResident<stage, kind>(srv.buffer .ptr ());
@@ -283,7 +289,7 @@ ArgumentEncodingContext::encodeShaderResources(
283289 } else if (arg.Flags & MTL_SM50_SHADER_ARGUMENT_TEXTURE) {
284290 if (srv.buffer .ptr ()) {
285291 assert (arg.Flags & MTL_SM50_SHADER_ARGUMENT_TBUFFER_OFFSET);
286- auto [view, offset] = access (srv.buffer , srv.viewId , DXMT_ENCODER_RESOURCE_ACESS_READ);
292+ auto [view, offset] = access<AtVertexStage> (srv.buffer , srv.viewId , DXMT_ENCODER_RESOURCE_ACESS_READ);
287293 encoded_buffer[arg.StructurePtrOffset ] = view.gpu_resource_id ;
288294 encoded_buffer[arg.StructurePtrOffset + 1 ] =
289295 ((uint64_t )srv.slice .elementCount << 32 ) | (uint64_t )(srv.slice .firstElement + offset);
@@ -292,7 +298,7 @@ ArgumentEncodingContext::encodeShaderResources(
292298 assert (arg.Flags & MTL_SM50_SHADER_ARGUMENT_TEXTURE_MINLOD_CLAMP);
293299 auto viewIdChecked = srv.texture ->checkViewUseArray (srv.viewId , arg.Flags & MTL_SM50_SHADER_ARGUMENT_TEXTURE_ARRAY);
294300 encoded_buffer[arg.StructurePtrOffset ] =
295- access (srv.texture , viewIdChecked, DXMT_ENCODER_RESOURCE_ACESS_READ).gpu_resource_id ;
301+ access<AtVertexStage> (srv.texture , viewIdChecked, DXMT_ENCODER_RESOURCE_ACESS_READ).gpu_resource_id ;
296302 encoded_buffer[arg.StructurePtrOffset + 1 ] = TextureMetadata (srv.texture ->arrayLength (viewIdChecked), 0 );
297303 makeResident<stage, kind>(srv.texture .ptr (), viewIdChecked);
298304 } else {
@@ -312,7 +318,7 @@ ArgumentEncodingContext::encodeShaderResources(
312318
313319 if (arg.Flags & MTL_SM50_SHADER_ARGUMENT_BUFFER) {
314320 if (uav.buffer .ptr ()) {
315- auto [uav_alloc, offset] = access (uav.buffer , uav.slice .byteOffset , uav.slice .byteLength , access_flags);
321+ auto [uav_alloc, offset] = access<AtVertexStage> (uav.buffer , uav.slice .byteOffset , uav.slice .byteLength , access_flags);
316322 encoded_buffer[arg.StructurePtrOffset ] = uav_alloc->gpuAddress () + offset + uav.slice .byteOffset ;
317323 encoded_buffer[arg.StructurePtrOffset + 1 ] = uav.slice .byteLength ;
318324 makeResident<stage, kind>(uav.buffer .ptr (), read, write);
@@ -323,15 +329,15 @@ ArgumentEncodingContext::encodeShaderResources(
323329 } else if (arg.Flags & MTL_SM50_SHADER_ARGUMENT_TEXTURE) {
324330 if (uav.buffer .ptr ()) {
325331 assert (arg.Flags & MTL_SM50_SHADER_ARGUMENT_TBUFFER_OFFSET);
326- auto [view, offset] = access (uav.buffer , uav.viewId , DXMT_ENCODER_RESOURCE_ACESS_READ);
332+ auto [view, offset] = access<AtVertexStage> (uav.buffer , uav.viewId , DXMT_ENCODER_RESOURCE_ACESS_READ);
327333 encoded_buffer[arg.StructurePtrOffset ] = view.gpu_resource_id ;
328334 encoded_buffer[arg.StructurePtrOffset + 1 ] =
329335 ((uint64_t )uav.slice .elementCount << 32 ) | (uint64_t )(uav.slice .firstElement + offset);
330336 makeResident<stage, kind>(uav.buffer .ptr (), uav.viewId , read, write);
331337 } else if (uav.texture .ptr ()) {
332338 assert (arg.Flags & MTL_SM50_SHADER_ARGUMENT_TEXTURE_MINLOD_CLAMP);
333339 auto viewIdChecked = uav.texture ->checkViewUseArray (uav.viewId , arg.Flags & MTL_SM50_SHADER_ARGUMENT_TEXTURE_ARRAY);
334- encoded_buffer[arg.StructurePtrOffset ] = access (uav.texture , viewIdChecked, access_flags).gpu_resource_id ;
340+ encoded_buffer[arg.StructurePtrOffset ] = access<AtVertexStage> (uav.texture , viewIdChecked, access_flags).gpu_resource_id ;
335341 encoded_buffer[arg.StructurePtrOffset + 1 ] = TextureMetadata (uav.texture ->arrayLength (viewIdChecked), 0 );
336342 makeResident<stage, kind>(uav.texture .ptr (), viewIdChecked, read, write);
337343 } else {
@@ -341,7 +347,7 @@ ArgumentEncodingContext::encodeShaderResources(
341347 }
342348 if (arg.Flags & MTL_SM50_SHADER_ARGUMENT_UAV_COUNTER) {
343349 if (uav.counter ) {
344- auto [counter_alloc, offset] = access (uav.counter , 0 , 4 , DXMT_ENCODER_RESOURCE_ACESS_READ | DXMT_ENCODER_RESOURCE_ACESS_WRITE);
350+ auto [counter_alloc, offset] = access<AtVertexStage> (uav.counter , 0 , 4 , DXMT_ENCODER_RESOURCE_ACESS_READ | DXMT_ENCODER_RESOURCE_ACESS_WRITE);
345351 encoded_buffer[arg.StructurePtrOffset + 2 ] = counter_alloc->gpuAddress () + offset;
346352 makeResident<stage, kind>(uav.counter .ptr (), true , true );
347353 } else {
@@ -572,6 +578,7 @@ ArgumentEncodingContext::startRenderPass(
572578 assert (!encoder_current);
573579 auto encoder_info = allocate<RenderEncoderData>();
574580 encoder_info->type = EncoderType::Render;
581+ encoder_info->encoder_id_vertex = nextEncoderId ();
575582 encoder_info->id = nextEncoderId ();
576583 WMT::InitializeRenderPassInfo (encoder_info->info );
577584 encoder_info->cmd_head .type = WMTRenderCommandNop;
@@ -587,6 +594,7 @@ ArgumentEncodingContext::startRenderPass(
587594 encoder_current = encoder_info;
588595
589596 fence_alias_map_.unalias (encoder_info->id & kFenceIdMask );
597+ fence_alias_map_.unalias (encoder_info->encoder_id_vertex & kFenceIdMask );
590598 currentFrameStatistics ().render_pass_count ++;
591599
592600 vro_state_.beginEncoder ();
@@ -757,9 +765,13 @@ ArgumentEncodingContext::flushCommands(WMT::CommandBuffer cmdbuf, uint64_t seqId
757765 }
758766 auto gpu_buffer_ = data->allocated_argbuf ;
759767 auto encoder = cmdbuf.renderCommandEncoder (data->info );
760- data->fence_wait .forEach (fence_alias_map_, [&](FenceId id) {
761- encoder.waitForFence (fence_pool_[id], WMTRenderStageVertex | WMTRenderStageMesh | WMTRenderStageObject);
762- });
768+ data->fence_wait .forEach (
769+ data->fence_wait_vertex , fence_alias_map_,
770+ [&](FenceId id) {
771+ encoder.waitForFence (fence_pool_[id], WMTRenderStageVertex | WMTRenderStageMesh | WMTRenderStageObject);
772+ },
773+ [&](FenceId id) { encoder.waitForFence (fence_pool_[id], WMTRenderStageFragment); }
774+ );
763775 encoder.setVertexBuffer (gpu_buffer_, 0 , 16 );
764776 encoder.setVertexBuffer (gpu_buffer_, 0 , 29 );
765777 encoder.setVertexBuffer (gpu_buffer_, 0 , 30 );
@@ -832,6 +844,10 @@ ArgumentEncodingContext::flushCommands(WMT::CommandBuffer cmdbuf, uint64_t seqId
832844 );
833845 }
834846 encoder.encodeCommands (&data->cmd_head );
847+ encoder.updateFence (
848+ fence_pool_[data->encoder_id_vertex & kFenceIdMask ],
849+ WMTRenderStageVertex | WMTRenderStageMesh | WMTRenderStageObject
850+ );
835851 encoder.updateFence (fence_pool_[data->id & kFenceIdMask ], WMTRenderStageFragment);
836852 encoder.endEncoding ();
837853 data->~RenderEncoderData ();
@@ -1092,7 +1108,7 @@ ArgumentEncodingContext::checkEncoderRelation(EncoderData *former, EncoderData *
10921108 auto r1 = reinterpret_cast <RenderEncoderData *>(latter);
10931109 auto r0 = reinterpret_cast <RenderEncoderData *>(former);
10941110
1095- if (isEncoderSignatureMatched (r0, r1)) {
1111+ if (isEncoderSignatureMatched (r0, r1) && !r1-> fence_wait_vertex . contains (r0-> id & kFenceIdMask ) ) {
10961112 for (unsigned i = 0 ; i < r0->render_target_count ; i++) {
10971113 auto &a0 = r0->info .colors [i];
10981114 auto &a1 = r1->info .colors [i];
@@ -1133,6 +1149,9 @@ ArgumentEncodingContext::checkEncoderRelation(EncoderData *former, EncoderData *
11331149 r1->fence_wait .remove (r0->id & kFenceIdMask );
11341150 r1->fence_wait .merge (r0->fence_wait );
11351151 fence_alias_map_.alias (r0->id & kFenceIdMask , r1->id & kFenceIdMask );
1152+ r1->fence_wait_vertex .remove (r0->encoder_id_vertex & kFenceIdMask );
1153+ r1->fence_wait_vertex .merge (r0->fence_wait_vertex );
1154+ fence_alias_map_.alias (r0->encoder_id_vertex & kFenceIdMask , r1->encoder_id_vertex & kFenceIdMask );
11361155
11371156 currentFrameStatistics ().render_pass_optimized ++;
11381157 r0->~RenderEncoderData ();
@@ -1151,6 +1170,23 @@ ArgumentEncodingContext::hasDataDependency(EncoderData *latter, EncoderData *for
11511170 /* *
11521171 `former` is guaranteed unaliased
11531172 */
1173+ if (latter->type == EncoderType::Render) {
1174+ auto render_latter = reinterpret_cast <RenderEncoderData *>(latter);
1175+ if (former->type == EncoderType::Render) {
1176+ auto render_former = reinterpret_cast <RenderEncoderData *>(former);
1177+ return render_latter->fence_wait .contains (render_former->id & kFenceIdMask ) ||
1178+ render_latter->fence_wait_vertex .contains (render_former->id & kFenceIdMask ) ||
1179+ render_latter->fence_wait .contains (render_former->encoder_id_vertex & kFenceIdMask ) ||
1180+ render_latter->fence_wait_vertex .contains (render_former->encoder_id_vertex & kFenceIdMask );
1181+ }
1182+ return render_latter->fence_wait .contains (former->id & kFenceIdMask ) ||
1183+ render_latter->fence_wait_vertex .contains (former->id & kFenceIdMask );
1184+ }
1185+ if (former->type == EncoderType::Render) {
1186+ auto render_former = reinterpret_cast <RenderEncoderData *>(former);
1187+ return latter->fence_wait .contains (render_former->id & kFenceIdMask ) ||
1188+ latter->fence_wait .contains (render_former->encoder_id_vertex & kFenceIdMask );
1189+ }
11541190 return latter->fence_wait .contains (former->id & kFenceIdMask );
11551191}
11561192
0 commit comments