@@ -755,7 +755,7 @@ static void gfx_v9_0_set_rlc_funcs(struct amdgpu_device *adev);
755
755
static int gfx_v9_0_get_cu_info (struct amdgpu_device * adev ,
756
756
struct amdgpu_cu_info * cu_info );
757
757
static uint64_t gfx_v9_0_get_gpu_clock_counter (struct amdgpu_device * adev );
758
- static void gfx_v9_0_ring_emit_de_meta (struct amdgpu_ring * ring );
758
+ static void gfx_v9_0_ring_emit_de_meta (struct amdgpu_ring * ring , bool resume );
759
759
static u64 gfx_v9_0_ring_get_rptr_compute (struct amdgpu_ring * ring );
760
760
static void gfx_v9_0_query_ras_error_count (struct amdgpu_device * adev ,
761
761
void * ras_error_status );
@@ -828,9 +828,10 @@ static void gfx_v9_0_kiq_unmap_queues(struct amdgpu_ring *kiq_ring,
828
828
PACKET3_UNMAP_QUEUES_DOORBELL_OFFSET0 (ring -> doorbell_index ));
829
829
830
830
if (action == PREEMPT_QUEUES_NO_UNMAP ) {
831
- amdgpu_ring_write (kiq_ring , lower_32_bits (gpu_addr ));
832
- amdgpu_ring_write (kiq_ring , upper_32_bits (gpu_addr ));
833
- amdgpu_ring_write (kiq_ring , seq );
831
+ amdgpu_ring_write (kiq_ring , lower_32_bits (ring -> wptr & ring -> buf_mask ));
832
+ amdgpu_ring_write (kiq_ring , 0 );
833
+ amdgpu_ring_write (kiq_ring , 0 );
834
+
834
835
} else {
835
836
amdgpu_ring_write (kiq_ring , 0 );
836
837
amdgpu_ring_write (kiq_ring , 0 );
@@ -5204,11 +5205,17 @@ static void gfx_v9_0_ring_emit_ib_gfx(struct amdgpu_ring *ring,
5204
5205
5205
5206
control |= ib -> length_dw | (vmid << 24 );
5206
5207
5207
- if (amdgpu_sriov_vf ( ring -> adev ) && ( ib -> flags & AMDGPU_IB_FLAG_PREEMPT ) ) {
5208
+ if (ib -> flags & AMDGPU_IB_FLAG_PREEMPT ) {
5208
5209
control |= INDIRECT_BUFFER_PRE_ENB (1 );
5209
5210
5211
+ if (flags & AMDGPU_IB_PREEMPTED )
5212
+ control |= INDIRECT_BUFFER_PRE_RESUME (1 );
5213
+
5210
5214
if (!(ib -> flags & AMDGPU_IB_FLAG_CE ) && vmid )
5211
- gfx_v9_0_ring_emit_de_meta (ring );
5215
+ gfx_v9_0_ring_emit_de_meta (ring ,
5216
+ (!amdgpu_sriov_vf (ring -> adev ) &&
5217
+ flags & AMDGPU_IB_PREEMPTED ) ?
5218
+ true : false);
5212
5219
}
5213
5220
5214
5221
amdgpu_ring_write (ring , header );
@@ -5263,17 +5270,24 @@ static void gfx_v9_0_ring_emit_fence(struct amdgpu_ring *ring, u64 addr,
5263
5270
bool write64bit = flags & AMDGPU_FENCE_FLAG_64BIT ;
5264
5271
bool int_sel = flags & AMDGPU_FENCE_FLAG_INT ;
5265
5272
bool writeback = flags & AMDGPU_FENCE_FLAG_TC_WB_ONLY ;
5273
+ bool exec = flags & AMDGPU_FENCE_FLAG_EXEC ;
5274
+ uint32_t dw2 = 0 ;
5266
5275
5267
5276
/* RELEASE_MEM - flush caches, send int */
5268
5277
amdgpu_ring_write (ring , PACKET3 (PACKET3_RELEASE_MEM , 6 ));
5269
- amdgpu_ring_write (ring , ((writeback ? (EOP_TC_WB_ACTION_EN |
5270
- EOP_TC_NC_ACTION_EN ) :
5271
- (EOP_TCL1_ACTION_EN |
5272
- EOP_TC_ACTION_EN |
5273
- EOP_TC_WB_ACTION_EN |
5274
- EOP_TC_MD_ACTION_EN )) |
5275
- EVENT_TYPE (CACHE_FLUSH_AND_INV_TS_EVENT ) |
5276
- EVENT_INDEX (5 )));
5278
+
5279
+ if (writeback ) {
5280
+ dw2 = EOP_TC_NC_ACTION_EN ;
5281
+ } else {
5282
+ dw2 = EOP_TCL1_ACTION_EN | EOP_TC_ACTION_EN |
5283
+ EOP_TC_MD_ACTION_EN ;
5284
+ }
5285
+ dw2 |= EOP_TC_WB_ACTION_EN | EVENT_TYPE (CACHE_FLUSH_AND_INV_TS_EVENT ) |
5286
+ EVENT_INDEX (5 );
5287
+ if (exec )
5288
+ dw2 |= EOP_EXEC ;
5289
+
5290
+ amdgpu_ring_write (ring , dw2 );
5277
5291
amdgpu_ring_write (ring , DATA_SEL (write64bit ? 2 : 1 ) | INT_SEL (int_sel ? 2 : 0 ));
5278
5292
5279
5293
/*
@@ -5378,33 +5392,135 @@ static void gfx_v9_ring_emit_sb(struct amdgpu_ring *ring)
5378
5392
amdgpu_ring_write (ring , 0 );
5379
5393
}
5380
5394
5381
- static void gfx_v9_0_ring_emit_ce_meta (struct amdgpu_ring * ring )
5395
+ static void gfx_v9_0_ring_emit_ce_meta (struct amdgpu_ring * ring , bool resume )
5382
5396
{
5397
+ struct amdgpu_device * adev = ring -> adev ;
5383
5398
struct v9_ce_ib_state ce_payload = {0 };
5384
- uint64_t csa_addr ;
5399
+ uint64_t offset , ce_payload_gpu_addr ;
5400
+ void * ce_payload_cpu_addr ;
5385
5401
int cnt ;
5386
5402
5387
5403
cnt = (sizeof (ce_payload ) >> 2 ) + 4 - 2 ;
5388
- csa_addr = amdgpu_csa_vaddr (ring -> adev );
5404
+
5405
+ if (ring -> is_mes_queue ) {
5406
+ offset = offsetof(struct amdgpu_mes_ctx_meta_data ,
5407
+ gfx [0 ].gfx_meta_data ) +
5408
+ offsetof(struct v9_gfx_meta_data , ce_payload );
5409
+ ce_payload_gpu_addr =
5410
+ amdgpu_mes_ctx_get_offs_gpu_addr (ring , offset );
5411
+ ce_payload_cpu_addr =
5412
+ amdgpu_mes_ctx_get_offs_cpu_addr (ring , offset );
5413
+ } else {
5414
+ offset = offsetof(struct v9_gfx_meta_data , ce_payload );
5415
+ ce_payload_gpu_addr = amdgpu_csa_vaddr (ring -> adev ) + offset ;
5416
+ ce_payload_cpu_addr = adev -> virt .csa_cpu_addr + offset ;
5417
+ }
5389
5418
5390
5419
amdgpu_ring_write (ring , PACKET3 (PACKET3_WRITE_DATA , cnt ));
5391
5420
amdgpu_ring_write (ring , (WRITE_DATA_ENGINE_SEL (2 ) |
5392
5421
WRITE_DATA_DST_SEL (8 ) |
5393
5422
WR_CONFIRM ) |
5394
5423
WRITE_DATA_CACHE_POLICY (0 ));
5395
- amdgpu_ring_write (ring , lower_32_bits (csa_addr + offsetof(struct v9_gfx_meta_data , ce_payload )));
5396
- amdgpu_ring_write (ring , upper_32_bits (csa_addr + offsetof(struct v9_gfx_meta_data , ce_payload )));
5397
- amdgpu_ring_write_multiple (ring , (void * )& ce_payload , sizeof (ce_payload ) >> 2 );
5424
+ amdgpu_ring_write (ring , lower_32_bits (ce_payload_gpu_addr ));
5425
+ amdgpu_ring_write (ring , upper_32_bits (ce_payload_gpu_addr ));
5426
+
5427
+ if (resume )
5428
+ amdgpu_ring_write_multiple (ring , ce_payload_cpu_addr ,
5429
+ sizeof (ce_payload ) >> 2 );
5430
+ else
5431
+ amdgpu_ring_write_multiple (ring , (void * )& ce_payload ,
5432
+ sizeof (ce_payload ) >> 2 );
5433
+ }
5434
+
5435
+ static int gfx_v9_0_ring_preempt_ib (struct amdgpu_ring * ring )
5436
+ {
5437
+ int i , r = 0 ;
5438
+ struct amdgpu_device * adev = ring -> adev ;
5439
+ struct amdgpu_kiq * kiq = & adev -> gfx .kiq ;
5440
+ struct amdgpu_ring * kiq_ring = & kiq -> ring ;
5441
+ unsigned long flags ;
5442
+
5443
+ if (!kiq -> pmf || !kiq -> pmf -> kiq_unmap_queues )
5444
+ return - EINVAL ;
5445
+
5446
+ spin_lock_irqsave (& kiq -> ring_lock , flags );
5447
+
5448
+ if (amdgpu_ring_alloc (kiq_ring , kiq -> pmf -> unmap_queues_size )) {
5449
+ spin_unlock_irqrestore (& kiq -> ring_lock , flags );
5450
+ return - ENOMEM ;
5451
+ }
5452
+
5453
+ /* assert preemption condition */
5454
+ amdgpu_ring_set_preempt_cond_exec (ring , false);
5455
+
5456
+ ring -> trail_seq += 1 ;
5457
+ amdgpu_ring_alloc (ring , 13 );
5458
+ gfx_v9_0_ring_emit_fence (ring , ring -> trail_fence_gpu_addr ,
5459
+ ring -> trail_seq , AMDGPU_FENCE_FLAG_EXEC );
5460
+ /*reset the CP_VMID_PREEMPT after trailing fence*/
5461
+ amdgpu_ring_emit_wreg (ring ,
5462
+ SOC15_REG_OFFSET (GC , 0 , mmCP_VMID_PREEMPT ),
5463
+ 0x0 );
5464
+
5465
+ /* assert IB preemption, emit the trailing fence */
5466
+ kiq -> pmf -> kiq_unmap_queues (kiq_ring , ring , PREEMPT_QUEUES_NO_UNMAP ,
5467
+ ring -> trail_fence_gpu_addr ,
5468
+ ring -> trail_seq );
5469
+
5470
+ amdgpu_ring_commit (kiq_ring );
5471
+ spin_unlock_irqrestore (& kiq -> ring_lock , flags );
5472
+
5473
+ /* poll the trailing fence */
5474
+ for (i = 0 ; i < adev -> usec_timeout ; i ++ ) {
5475
+ if (ring -> trail_seq ==
5476
+ le32_to_cpu (* ring -> trail_fence_cpu_addr ))
5477
+ break ;
5478
+ udelay (1 );
5479
+ }
5480
+
5481
+ if (i >= adev -> usec_timeout ) {
5482
+ r = - EINVAL ;
5483
+ DRM_WARN ("ring %d timeout to preempt ib\n" , ring -> idx );
5484
+ }
5485
+
5486
+ amdgpu_ring_commit (ring );
5487
+
5488
+ /* deassert preemption condition */
5489
+ amdgpu_ring_set_preempt_cond_exec (ring , true);
5490
+ return r ;
5398
5491
}
5399
5492
5400
- static void gfx_v9_0_ring_emit_de_meta (struct amdgpu_ring * ring )
5493
+ static void gfx_v9_0_ring_emit_de_meta (struct amdgpu_ring * ring , bool resume )
5401
5494
{
5495
+ struct amdgpu_device * adev = ring -> adev ;
5402
5496
struct v9_de_ib_state de_payload = {0 };
5403
- uint64_t csa_addr , gds_addr ;
5497
+ uint64_t offset , gds_addr , de_payload_gpu_addr ;
5498
+ void * de_payload_cpu_addr ;
5404
5499
int cnt ;
5405
5500
5406
- csa_addr = amdgpu_csa_vaddr (ring -> adev );
5407
- gds_addr = csa_addr + 4096 ;
5501
+ if (ring -> is_mes_queue ) {
5502
+ offset = offsetof(struct amdgpu_mes_ctx_meta_data ,
5503
+ gfx [0 ].gfx_meta_data ) +
5504
+ offsetof(struct v9_gfx_meta_data , de_payload );
5505
+ de_payload_gpu_addr =
5506
+ amdgpu_mes_ctx_get_offs_gpu_addr (ring , offset );
5507
+ de_payload_cpu_addr =
5508
+ amdgpu_mes_ctx_get_offs_cpu_addr (ring , offset );
5509
+
5510
+ offset = offsetof(struct amdgpu_mes_ctx_meta_data ,
5511
+ gfx [0 ].gds_backup ) +
5512
+ offsetof(struct v9_gfx_meta_data , de_payload );
5513
+ gds_addr = amdgpu_mes_ctx_get_offs_gpu_addr (ring , offset );
5514
+ } else {
5515
+ offset = offsetof(struct v9_gfx_meta_data , de_payload );
5516
+ de_payload_gpu_addr = amdgpu_csa_vaddr (ring -> adev ) + offset ;
5517
+ de_payload_cpu_addr = adev -> virt .csa_cpu_addr + offset ;
5518
+
5519
+ gds_addr = ALIGN (amdgpu_csa_vaddr (ring -> adev ) +
5520
+ AMDGPU_CSA_SIZE - adev -> gds .gds_size ,
5521
+ PAGE_SIZE );
5522
+ }
5523
+
5408
5524
de_payload .gds_backup_addrlo = lower_32_bits (gds_addr );
5409
5525
de_payload .gds_backup_addrhi = upper_32_bits (gds_addr );
5410
5526
@@ -5414,9 +5530,15 @@ static void gfx_v9_0_ring_emit_de_meta(struct amdgpu_ring *ring)
5414
5530
WRITE_DATA_DST_SEL (8 ) |
5415
5531
WR_CONFIRM ) |
5416
5532
WRITE_DATA_CACHE_POLICY (0 ));
5417
- amdgpu_ring_write (ring , lower_32_bits (csa_addr + offsetof(struct v9_gfx_meta_data , de_payload )));
5418
- amdgpu_ring_write (ring , upper_32_bits (csa_addr + offsetof(struct v9_gfx_meta_data , de_payload )));
5419
- amdgpu_ring_write_multiple (ring , (void * )& de_payload , sizeof (de_payload ) >> 2 );
5533
+ amdgpu_ring_write (ring , lower_32_bits (de_payload_gpu_addr ));
5534
+ amdgpu_ring_write (ring , upper_32_bits (de_payload_gpu_addr ));
5535
+
5536
+ if (resume )
5537
+ amdgpu_ring_write_multiple (ring , de_payload_cpu_addr ,
5538
+ sizeof (de_payload ) >> 2 );
5539
+ else
5540
+ amdgpu_ring_write_multiple (ring , (void * )& de_payload ,
5541
+ sizeof (de_payload ) >> 2 );
5420
5542
}
5421
5543
5422
5544
static void gfx_v9_0_ring_emit_frame_cntl (struct amdgpu_ring * ring , bool start ,
@@ -5432,8 +5554,9 @@ static void gfx_v9_ring_emit_cntxcntl(struct amdgpu_ring *ring, uint32_t flags)
5432
5554
{
5433
5555
uint32_t dw2 = 0 ;
5434
5556
5435
- if (amdgpu_sriov_vf (ring -> adev ))
5436
- gfx_v9_0_ring_emit_ce_meta (ring );
5557
+ gfx_v9_0_ring_emit_ce_meta (ring ,
5558
+ (!amdgpu_sriov_vf (ring -> adev ) &&
5559
+ flags & AMDGPU_IB_PREEMPTED ) ? true : false);
5437
5560
5438
5561
dw2 |= 0x80000000 ; /* set load_enable otherwise this package is just NOPs */
5439
5562
if (flags & AMDGPU_HAVE_CTX_SWITCH ) {
@@ -6760,6 +6883,7 @@ static const struct amdgpu_ring_funcs gfx_v9_0_ring_funcs_gfx = {
6760
6883
.emit_cntxcntl = gfx_v9_ring_emit_cntxcntl ,
6761
6884
.init_cond_exec = gfx_v9_0_ring_emit_init_cond_exec ,
6762
6885
.patch_cond_exec = gfx_v9_0_ring_emit_patch_cond_exec ,
6886
+ .preempt_ib = gfx_v9_0_ring_preempt_ib ,
6763
6887
.emit_frame_cntl = gfx_v9_0_ring_emit_frame_cntl ,
6764
6888
.emit_wreg = gfx_v9_0_ring_emit_wreg ,
6765
6889
.emit_reg_wait = gfx_v9_0_ring_emit_reg_wait ,
0 commit comments