@@ -755,7 +755,7 @@ static void gfx_v9_0_set_rlc_funcs(struct amdgpu_device *adev);
755755static int gfx_v9_0_get_cu_info (struct amdgpu_device * adev ,
756756 struct amdgpu_cu_info * cu_info );
757757static uint64_t gfx_v9_0_get_gpu_clock_counter (struct amdgpu_device * adev );
758- static void gfx_v9_0_ring_emit_de_meta (struct amdgpu_ring * ring );
758+ static void gfx_v9_0_ring_emit_de_meta (struct amdgpu_ring * ring , bool resume );
759759static u64 gfx_v9_0_ring_get_rptr_compute (struct amdgpu_ring * ring );
760760static void gfx_v9_0_query_ras_error_count (struct amdgpu_device * adev ,
761761 void * ras_error_status );
@@ -828,9 +828,10 @@ static void gfx_v9_0_kiq_unmap_queues(struct amdgpu_ring *kiq_ring,
828828 PACKET3_UNMAP_QUEUES_DOORBELL_OFFSET0 (ring -> doorbell_index ));
829829
830830 if (action == PREEMPT_QUEUES_NO_UNMAP ) {
831- amdgpu_ring_write (kiq_ring , lower_32_bits (gpu_addr ));
832- amdgpu_ring_write (kiq_ring , upper_32_bits (gpu_addr ));
833- amdgpu_ring_write (kiq_ring , seq );
831+ amdgpu_ring_write (kiq_ring , lower_32_bits (ring -> wptr & ring -> buf_mask ));
832+ amdgpu_ring_write (kiq_ring , 0 );
833+ amdgpu_ring_write (kiq_ring , 0 );
834+
834835 } else {
835836 amdgpu_ring_write (kiq_ring , 0 );
836837 amdgpu_ring_write (kiq_ring , 0 );
@@ -5204,11 +5205,17 @@ static void gfx_v9_0_ring_emit_ib_gfx(struct amdgpu_ring *ring,
52045205
52055206 control |= ib -> length_dw | (vmid << 24 );
52065207
5207- if (amdgpu_sriov_vf ( ring -> adev ) && ( ib -> flags & AMDGPU_IB_FLAG_PREEMPT ) ) {
5208+ if (ib -> flags & AMDGPU_IB_FLAG_PREEMPT ) {
52085209 control |= INDIRECT_BUFFER_PRE_ENB (1 );
52095210
5211+ if (flags & AMDGPU_IB_PREEMPTED )
5212+ control |= INDIRECT_BUFFER_PRE_RESUME (1 );
5213+
52105214 if (!(ib -> flags & AMDGPU_IB_FLAG_CE ) && vmid )
5211- gfx_v9_0_ring_emit_de_meta (ring );
5215+ gfx_v9_0_ring_emit_de_meta (ring ,
5216+ (!amdgpu_sriov_vf (ring -> adev ) &&
5217+ flags & AMDGPU_IB_PREEMPTED ) ?
5218+ true : false);
52125219 }
52135220
52145221 amdgpu_ring_write (ring , header );
@@ -5263,17 +5270,24 @@ static void gfx_v9_0_ring_emit_fence(struct amdgpu_ring *ring, u64 addr,
52635270 bool write64bit = flags & AMDGPU_FENCE_FLAG_64BIT ;
52645271 bool int_sel = flags & AMDGPU_FENCE_FLAG_INT ;
52655272 bool writeback = flags & AMDGPU_FENCE_FLAG_TC_WB_ONLY ;
5273+ bool exec = flags & AMDGPU_FENCE_FLAG_EXEC ;
5274+ uint32_t dw2 = 0 ;
52665275
52675276 /* RELEASE_MEM - flush caches, send int */
52685277 amdgpu_ring_write (ring , PACKET3 (PACKET3_RELEASE_MEM , 6 ));
5269- amdgpu_ring_write (ring , ((writeback ? (EOP_TC_WB_ACTION_EN |
5270- EOP_TC_NC_ACTION_EN ) :
5271- (EOP_TCL1_ACTION_EN |
5272- EOP_TC_ACTION_EN |
5273- EOP_TC_WB_ACTION_EN |
5274- EOP_TC_MD_ACTION_EN )) |
5275- EVENT_TYPE (CACHE_FLUSH_AND_INV_TS_EVENT ) |
5276- EVENT_INDEX (5 )));
5278+
5279+ if (writeback ) {
5280+ dw2 = EOP_TC_NC_ACTION_EN ;
5281+ } else {
5282+ dw2 = EOP_TCL1_ACTION_EN | EOP_TC_ACTION_EN |
5283+ EOP_TC_MD_ACTION_EN ;
5284+ }
5285+ dw2 |= EOP_TC_WB_ACTION_EN | EVENT_TYPE (CACHE_FLUSH_AND_INV_TS_EVENT ) |
5286+ EVENT_INDEX (5 );
5287+ if (exec )
5288+ dw2 |= EOP_EXEC ;
5289+
5290+ amdgpu_ring_write (ring , dw2 );
52775291 amdgpu_ring_write (ring , DATA_SEL (write64bit ? 2 : 1 ) | INT_SEL (int_sel ? 2 : 0 ));
52785292
52795293 /*
@@ -5378,33 +5392,135 @@ static void gfx_v9_ring_emit_sb(struct amdgpu_ring *ring)
53785392 amdgpu_ring_write (ring , 0 );
53795393}
53805394
5381- static void gfx_v9_0_ring_emit_ce_meta (struct amdgpu_ring * ring )
5395+ static void gfx_v9_0_ring_emit_ce_meta (struct amdgpu_ring * ring , bool resume )
53825396{
5397+ struct amdgpu_device * adev = ring -> adev ;
53835398 struct v9_ce_ib_state ce_payload = {0 };
5384- uint64_t csa_addr ;
5399+ uint64_t offset , ce_payload_gpu_addr ;
5400+ void * ce_payload_cpu_addr ;
53855401 int cnt ;
53865402
53875403 cnt = (sizeof (ce_payload ) >> 2 ) + 4 - 2 ;
5388- csa_addr = amdgpu_csa_vaddr (ring -> adev );
5404+
5405+ if (ring -> is_mes_queue ) {
5406+ offset = offsetof(struct amdgpu_mes_ctx_meta_data ,
5407+ gfx [0 ].gfx_meta_data ) +
5408+ offsetof(struct v9_gfx_meta_data , ce_payload );
5409+ ce_payload_gpu_addr =
5410+ amdgpu_mes_ctx_get_offs_gpu_addr (ring , offset );
5411+ ce_payload_cpu_addr =
5412+ amdgpu_mes_ctx_get_offs_cpu_addr (ring , offset );
5413+ } else {
5414+ offset = offsetof(struct v9_gfx_meta_data , ce_payload );
5415+ ce_payload_gpu_addr = amdgpu_csa_vaddr (ring -> adev ) + offset ;
5416+ ce_payload_cpu_addr = adev -> virt .csa_cpu_addr + offset ;
5417+ }
53895418
53905419 amdgpu_ring_write (ring , PACKET3 (PACKET3_WRITE_DATA , cnt ));
53915420 amdgpu_ring_write (ring , (WRITE_DATA_ENGINE_SEL (2 ) |
53925421 WRITE_DATA_DST_SEL (8 ) |
53935422 WR_CONFIRM ) |
53945423 WRITE_DATA_CACHE_POLICY (0 ));
5395- amdgpu_ring_write (ring , lower_32_bits (csa_addr + offsetof(struct v9_gfx_meta_data , ce_payload )));
5396- amdgpu_ring_write (ring , upper_32_bits (csa_addr + offsetof(struct v9_gfx_meta_data , ce_payload )));
5397- amdgpu_ring_write_multiple (ring , (void * )& ce_payload , sizeof (ce_payload ) >> 2 );
5424+ amdgpu_ring_write (ring , lower_32_bits (ce_payload_gpu_addr ));
5425+ amdgpu_ring_write (ring , upper_32_bits (ce_payload_gpu_addr ));
5426+
5427+ if (resume )
5428+ amdgpu_ring_write_multiple (ring , ce_payload_cpu_addr ,
5429+ sizeof (ce_payload ) >> 2 );
5430+ else
5431+ amdgpu_ring_write_multiple (ring , (void * )& ce_payload ,
5432+ sizeof (ce_payload ) >> 2 );
5433+ }
5434+
5435+ static int gfx_v9_0_ring_preempt_ib (struct amdgpu_ring * ring )
5436+ {
5437+ int i , r = 0 ;
5438+ struct amdgpu_device * adev = ring -> adev ;
5439+ struct amdgpu_kiq * kiq = & adev -> gfx .kiq ;
5440+ struct amdgpu_ring * kiq_ring = & kiq -> ring ;
5441+ unsigned long flags ;
5442+
5443+ if (!kiq -> pmf || !kiq -> pmf -> kiq_unmap_queues )
5444+ return - EINVAL ;
5445+
5446+ spin_lock_irqsave (& kiq -> ring_lock , flags );
5447+
5448+ if (amdgpu_ring_alloc (kiq_ring , kiq -> pmf -> unmap_queues_size )) {
5449+ spin_unlock_irqrestore (& kiq -> ring_lock , flags );
5450+ return - ENOMEM ;
5451+ }
5452+
5453+ /* assert preemption condition */
5454+ amdgpu_ring_set_preempt_cond_exec (ring , false);
5455+
5456+ ring -> trail_seq += 1 ;
5457+ amdgpu_ring_alloc (ring , 13 );
5458+ gfx_v9_0_ring_emit_fence (ring , ring -> trail_fence_gpu_addr ,
5459+ ring -> trail_seq , AMDGPU_FENCE_FLAG_EXEC );
5460+ /*reset the CP_VMID_PREEMPT after trailing fence*/
5461+ amdgpu_ring_emit_wreg (ring ,
5462+ SOC15_REG_OFFSET (GC , 0 , mmCP_VMID_PREEMPT ),
5463+ 0x0 );
5464+
5465+ /* assert IB preemption, emit the trailing fence */
5466+ kiq -> pmf -> kiq_unmap_queues (kiq_ring , ring , PREEMPT_QUEUES_NO_UNMAP ,
5467+ ring -> trail_fence_gpu_addr ,
5468+ ring -> trail_seq );
5469+
5470+ amdgpu_ring_commit (kiq_ring );
5471+ spin_unlock_irqrestore (& kiq -> ring_lock , flags );
5472+
5473+ /* poll the trailing fence */
5474+ for (i = 0 ; i < adev -> usec_timeout ; i ++ ) {
5475+ if (ring -> trail_seq ==
5476+ le32_to_cpu (* ring -> trail_fence_cpu_addr ))
5477+ break ;
5478+ udelay (1 );
5479+ }
5480+
5481+ if (i >= adev -> usec_timeout ) {
5482+ r = - EINVAL ;
5483+ DRM_WARN ("ring %d timeout to preempt ib\n" , ring -> idx );
5484+ }
5485+
5486+ amdgpu_ring_commit (ring );
5487+
5488+ /* deassert preemption condition */
5489+ amdgpu_ring_set_preempt_cond_exec (ring , true);
5490+ return r ;
53985491}
53995492
5400- static void gfx_v9_0_ring_emit_de_meta (struct amdgpu_ring * ring )
5493+ static void gfx_v9_0_ring_emit_de_meta (struct amdgpu_ring * ring , bool resume )
54015494{
5495+ struct amdgpu_device * adev = ring -> adev ;
54025496 struct v9_de_ib_state de_payload = {0 };
5403- uint64_t csa_addr , gds_addr ;
5497+ uint64_t offset , gds_addr , de_payload_gpu_addr ;
5498+ void * de_payload_cpu_addr ;
54045499 int cnt ;
54055500
5406- csa_addr = amdgpu_csa_vaddr (ring -> adev );
5407- gds_addr = csa_addr + 4096 ;
5501+ if (ring -> is_mes_queue ) {
5502+ offset = offsetof(struct amdgpu_mes_ctx_meta_data ,
5503+ gfx [0 ].gfx_meta_data ) +
5504+ offsetof(struct v9_gfx_meta_data , de_payload );
5505+ de_payload_gpu_addr =
5506+ amdgpu_mes_ctx_get_offs_gpu_addr (ring , offset );
5507+ de_payload_cpu_addr =
5508+ amdgpu_mes_ctx_get_offs_cpu_addr (ring , offset );
5509+
5510+ offset = offsetof(struct amdgpu_mes_ctx_meta_data ,
5511+ gfx [0 ].gds_backup ) +
5512+ offsetof(struct v9_gfx_meta_data , de_payload );
5513+ gds_addr = amdgpu_mes_ctx_get_offs_gpu_addr (ring , offset );
5514+ } else {
5515+ offset = offsetof(struct v9_gfx_meta_data , de_payload );
5516+ de_payload_gpu_addr = amdgpu_csa_vaddr (ring -> adev ) + offset ;
5517+ de_payload_cpu_addr = adev -> virt .csa_cpu_addr + offset ;
5518+
5519+ gds_addr = ALIGN (amdgpu_csa_vaddr (ring -> adev ) +
5520+ AMDGPU_CSA_SIZE - adev -> gds .gds_size ,
5521+ PAGE_SIZE );
5522+ }
5523+
54085524 de_payload .gds_backup_addrlo = lower_32_bits (gds_addr );
54095525 de_payload .gds_backup_addrhi = upper_32_bits (gds_addr );
54105526
@@ -5414,9 +5530,15 @@ static void gfx_v9_0_ring_emit_de_meta(struct amdgpu_ring *ring)
54145530 WRITE_DATA_DST_SEL (8 ) |
54155531 WR_CONFIRM ) |
54165532 WRITE_DATA_CACHE_POLICY (0 ));
5417- amdgpu_ring_write (ring , lower_32_bits (csa_addr + offsetof(struct v9_gfx_meta_data , de_payload )));
5418- amdgpu_ring_write (ring , upper_32_bits (csa_addr + offsetof(struct v9_gfx_meta_data , de_payload )));
5419- amdgpu_ring_write_multiple (ring , (void * )& de_payload , sizeof (de_payload ) >> 2 );
5533+ amdgpu_ring_write (ring , lower_32_bits (de_payload_gpu_addr ));
5534+ amdgpu_ring_write (ring , upper_32_bits (de_payload_gpu_addr ));
5535+
5536+ if (resume )
5537+ amdgpu_ring_write_multiple (ring , de_payload_cpu_addr ,
5538+ sizeof (de_payload ) >> 2 );
5539+ else
5540+ amdgpu_ring_write_multiple (ring , (void * )& de_payload ,
5541+ sizeof (de_payload ) >> 2 );
54205542}
54215543
54225544static void gfx_v9_0_ring_emit_frame_cntl (struct amdgpu_ring * ring , bool start ,
@@ -5432,8 +5554,9 @@ static void gfx_v9_ring_emit_cntxcntl(struct amdgpu_ring *ring, uint32_t flags)
54325554{
54335555 uint32_t dw2 = 0 ;
54345556
5435- if (amdgpu_sriov_vf (ring -> adev ))
5436- gfx_v9_0_ring_emit_ce_meta (ring );
5557+ gfx_v9_0_ring_emit_ce_meta (ring ,
5558+ (!amdgpu_sriov_vf (ring -> adev ) &&
5559+ flags & AMDGPU_IB_PREEMPTED ) ? true : false);
54375560
54385561 dw2 |= 0x80000000 ; /* set load_enable otherwise this package is just NOPs */
54395562 if (flags & AMDGPU_HAVE_CTX_SWITCH ) {
@@ -6760,6 +6883,7 @@ static const struct amdgpu_ring_funcs gfx_v9_0_ring_funcs_gfx = {
67606883 .emit_cntxcntl = gfx_v9_ring_emit_cntxcntl ,
67616884 .init_cond_exec = gfx_v9_0_ring_emit_init_cond_exec ,
67626885 .patch_cond_exec = gfx_v9_0_ring_emit_patch_cond_exec ,
6886+ .preempt_ib = gfx_v9_0_ring_preempt_ib ,
67636887 .emit_frame_cntl = gfx_v9_0_ring_emit_frame_cntl ,
67646888 .emit_wreg = gfx_v9_0_ring_emit_wreg ,
67656889 .emit_reg_wait = gfx_v9_0_ring_emit_reg_wait ,
0 commit comments