Skip to content

Commit 0c97a19

Browse files
jiadozhualexdeucher
authored andcommitted
drm/amdgpu: Add software ring callbacks for gfx9 (v8)
Set ring functions with software ring callbacks on gfx9. The software ring could be tested by debugfs_test_ib case. v2: Set sw_ring 2 to enable software ring by default. v3: Remove the parameter for software ring enablement. v4: Use amdgpu_ring_init/fini for software rings. v5: Update for code format. Fix conflict. v6: Remove unnecessary checks and enable software ring on gfx9 by default. v7: Use static array for software ring names and priorities. v8: Stop creating software rings if no gfx ring existed. Cc: Christian Koenig <[email protected]> Cc: Luben Tuikov <[email protected]> Cc: Andrey Grodzovsky <[email protected]> Cc: Michel Dänzer <[email protected]> Cc: Likun Gao <[email protected]> Signed-off-by: Jiadong.Zhu <[email protected]> Acked-by: Luben Tuikov <[email protected]> Acked-by: Huang Rui <[email protected]> Acked-by: Christian König <[email protected]> Signed-off-by: Alex Deucher <[email protected]>
1 parent ded946f commit 0c97a19

File tree

5 files changed

+136
-1
lines changed

5 files changed

+136
-1
lines changed

drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -354,6 +354,7 @@ struct amdgpu_gfx {
354354

355355
bool is_poweron;
356356

357+
struct amdgpu_ring sw_gfx_ring[AMDGPU_MAX_SW_GFX_RINGS];
357358
struct amdgpu_ring_mux muxer;
358359
};
359360

drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -39,6 +39,7 @@ struct amdgpu_vm;
3939
#define AMDGPU_MAX_RINGS 28
4040
#define AMDGPU_MAX_HWIP_RINGS 8
4141
#define AMDGPU_MAX_GFX_RINGS 2
42+
#define AMDGPU_MAX_SW_GFX_RINGS 2
4243
#define AMDGPU_MAX_COMPUTE_RINGS 8
4344
#define AMDGPU_MAX_VCE_RINGS 3
4445
#define AMDGPU_MAX_UVD_ENC_RINGS 2

drivers/gpu/drm/amd/amdgpu/amdgpu_ring_mux.c

Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -29,6 +29,14 @@
2929

3030
#define AMDGPU_MUX_RESUBMIT_JIFFIES_TIMEOUT (HZ / 2)
3131

32+
static const struct ring_info {
33+
unsigned int hw_pio;
34+
const char *ring_name;
35+
} sw_ring_info[] = {
36+
{ AMDGPU_RING_PRIO_DEFAULT, "gfx_low"},
37+
{ AMDGPU_RING_PRIO_2, "gfx_high"},
38+
};
39+
3240
int amdgpu_ring_mux_init(struct amdgpu_ring_mux *mux, struct amdgpu_ring *ring,
3341
unsigned int entry_size)
3442
{
@@ -219,3 +227,15 @@ void amdgpu_sw_ring_insert_nop(struct amdgpu_ring *ring, uint32_t count)
219227
{
220228
WARN_ON(!ring->is_sw_ring);
221229
}
230+
231+
const char *amdgpu_sw_ring_name(int idx)
232+
{
233+
return idx < ARRAY_SIZE(sw_ring_info) ?
234+
sw_ring_info[idx].ring_name : NULL;
235+
}
236+
237+
unsigned int amdgpu_sw_ring_priority(int idx)
238+
{
239+
return idx < ARRAY_SIZE(sw_ring_info) ?
240+
sw_ring_info[idx].hw_pio : AMDGPU_RING_PRIO_DEFAULT;
241+
}

drivers/gpu/drm/amd/amdgpu/amdgpu_ring_mux.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -73,4 +73,6 @@ void amdgpu_sw_ring_insert_nop(struct amdgpu_ring *ring, uint32_t count);
7373
void amdgpu_sw_ring_ib_begin(struct amdgpu_ring *ring);
7474
void amdgpu_sw_ring_ib_end(struct amdgpu_ring *ring);
7575

76+
const char *amdgpu_sw_ring_name(int idx);
77+
unsigned int amdgpu_sw_ring_priority(int idx);
7678
#endif

drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c

Lines changed: 112 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -47,6 +47,7 @@
4747

4848
#include "amdgpu_ras.h"
4949

50+
#include "amdgpu_ring_mux.h"
5051
#include "gfx_v9_4.h"
5152
#include "gfx_v9_0.h"
5253
#include "gfx_v9_4_2.h"
@@ -56,6 +57,7 @@
5657
#include "asic_reg/gc/gc_9_0_default.h"
5758

5859
#define GFX9_NUM_GFX_RINGS 1
60+
#define GFX9_NUM_SW_GFX_RINGS 2
5961
#define GFX9_MEC_HPD_SIZE 4096
6062
#define RLCG_UCODE_LOADING_START_ADDRESS 0x00002000L
6163
#define RLC_SAVE_RESTORE_ADDR_STARTING_OFFSET 0x00000000L
@@ -2103,6 +2105,7 @@ static int gfx_v9_0_sw_init(void *handle)
21032105
struct amdgpu_ring *ring;
21042106
struct amdgpu_kiq *kiq;
21052107
struct amdgpu_device *adev = (struct amdgpu_device *)handle;
2108+
unsigned int hw_prio;
21062109

21072110
switch (adev->ip_versions[GC_HWIP][0]) {
21082111
case IP_VERSION(9, 0, 1):
@@ -2186,13 +2189,51 @@ static int gfx_v9_0_sw_init(void *handle)
21862189
sprintf(ring->name, "gfx_%d", i);
21872190
ring->use_doorbell = true;
21882191
ring->doorbell_index = adev->doorbell_index.gfx_ring0 << 1;
2192+
2193+
/* disable scheduler on the real ring */
2194+
ring->no_scheduler = true;
21892195
r = amdgpu_ring_init(adev, ring, 1024, &adev->gfx.eop_irq,
21902196
AMDGPU_CP_IRQ_GFX_ME0_PIPE0_EOP,
21912197
AMDGPU_RING_PRIO_DEFAULT, NULL);
21922198
if (r)
21932199
return r;
21942200
}
21952201

2202+
/* set up the software rings */
2203+
if (adev->gfx.num_gfx_rings) {
2204+
for (i = 0; i < GFX9_NUM_SW_GFX_RINGS; i++) {
2205+
ring = &adev->gfx.sw_gfx_ring[i];
2206+
ring->ring_obj = NULL;
2207+
sprintf(ring->name, amdgpu_sw_ring_name(i));
2208+
ring->use_doorbell = true;
2209+
ring->doorbell_index = adev->doorbell_index.gfx_ring0 << 1;
2210+
ring->is_sw_ring = true;
2211+
hw_prio = amdgpu_sw_ring_priority(i);
2212+
r = amdgpu_ring_init(adev, ring, 1024, &adev->gfx.eop_irq,
2213+
AMDGPU_CP_IRQ_GFX_ME0_PIPE0_EOP, hw_prio,
2214+
NULL);
2215+
if (r)
2216+
return r;
2217+
ring->wptr = 0;
2218+
}
2219+
2220+
/* init the muxer and add software rings */
2221+
r = amdgpu_ring_mux_init(&adev->gfx.muxer, &adev->gfx.gfx_ring[0],
2222+
GFX9_NUM_SW_GFX_RINGS);
2223+
if (r) {
2224+
DRM_ERROR("amdgpu_ring_mux_init failed(%d)\n", r);
2225+
return r;
2226+
}
2227+
for (i = 0; i < GFX9_NUM_SW_GFX_RINGS; i++) {
2228+
r = amdgpu_ring_mux_add_sw_ring(&adev->gfx.muxer,
2229+
&adev->gfx.sw_gfx_ring[i]);
2230+
if (r) {
2231+
DRM_ERROR("amdgpu_ring_mux_add_sw_ring failed(%d)\n", r);
2232+
return r;
2233+
}
2234+
}
2235+
}
2236+
21962237
/* set up the compute queues - allocate horizontally across pipes */
21972238
ring_id = 0;
21982239
for (i = 0; i < adev->gfx.mec.num_mec; ++i) {
@@ -2243,6 +2284,12 @@ static int gfx_v9_0_sw_fini(void *handle)
22432284
int i;
22442285
struct amdgpu_device *adev = (struct amdgpu_device *)handle;
22452286

2287+
if (adev->gfx.num_gfx_rings) {
2288+
for (i = 0; i < GFX9_NUM_SW_GFX_RINGS; i++)
2289+
amdgpu_ring_fini(&adev->gfx.sw_gfx_ring[i]);
2290+
amdgpu_ring_mux_fini(&adev->gfx.muxer);
2291+
}
2292+
22462293
for (i = 0; i < adev->gfx.num_gfx_rings; i++)
22472294
amdgpu_ring_fini(&adev->gfx.gfx_ring[i]);
22482295
for (i = 0; i < adev->gfx.num_compute_rings; i++)
@@ -5712,7 +5759,11 @@ static int gfx_v9_0_eop_irq(struct amdgpu_device *adev,
57125759

57135760
switch (me_id) {
57145761
case 0:
5715-
amdgpu_fence_process(&adev->gfx.gfx_ring[0]);
5762+
/* Fence signals are handled on the software rings*/
5763+
if (adev->gfx.num_gfx_rings) {
5764+
for (i = 0; i < GFX9_NUM_SW_GFX_RINGS; i++)
5765+
amdgpu_fence_process(&adev->gfx.sw_gfx_ring[i]);
5766+
}
57165767
break;
57175768
case 1:
57185769
case 2:
@@ -6717,6 +6768,61 @@ static const struct amdgpu_ring_funcs gfx_v9_0_ring_funcs_gfx = {
67176768
.emit_mem_sync = gfx_v9_0_emit_mem_sync,
67186769
};
67196770

6771+
static const struct amdgpu_ring_funcs gfx_v9_0_sw_ring_funcs_gfx = {
6772+
.type = AMDGPU_RING_TYPE_GFX,
6773+
.align_mask = 0xff,
6774+
.nop = PACKET3(PACKET3_NOP, 0x3FFF),
6775+
.support_64bit_ptrs = true,
6776+
.secure_submission_supported = true,
6777+
.vmhub = AMDGPU_GFXHUB_0,
6778+
.get_rptr = amdgpu_sw_ring_get_rptr_gfx,
6779+
.get_wptr = amdgpu_sw_ring_get_wptr_gfx,
6780+
.set_wptr = amdgpu_sw_ring_set_wptr_gfx,
6781+
.emit_frame_size = /* totally 242 maximum if 16 IBs */
6782+
5 + /* COND_EXEC */
6783+
7 + /* PIPELINE_SYNC */
6784+
SOC15_FLUSH_GPU_TLB_NUM_WREG * 5 +
6785+
SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 7 +
6786+
2 + /* VM_FLUSH */
6787+
8 + /* FENCE for VM_FLUSH */
6788+
20 + /* GDS switch */
6789+
4 + /* double SWITCH_BUFFER,
6790+
* the first COND_EXEC jump to the place just
6791+
* prior to this double SWITCH_BUFFER
6792+
*/
6793+
5 + /* COND_EXEC */
6794+
7 + /* HDP_flush */
6795+
4 + /* VGT_flush */
6796+
14 + /* CE_META */
6797+
31 + /* DE_META */
6798+
3 + /* CNTX_CTRL */
6799+
5 + /* HDP_INVL */
6800+
8 + 8 + /* FENCE x2 */
6801+
2 + /* SWITCH_BUFFER */
6802+
7, /* gfx_v9_0_emit_mem_sync */
6803+
.emit_ib_size = 4, /* gfx_v9_0_ring_emit_ib_gfx */
6804+
.emit_ib = gfx_v9_0_ring_emit_ib_gfx,
6805+
.emit_fence = gfx_v9_0_ring_emit_fence,
6806+
.emit_pipeline_sync = gfx_v9_0_ring_emit_pipeline_sync,
6807+
.emit_vm_flush = gfx_v9_0_ring_emit_vm_flush,
6808+
.emit_gds_switch = gfx_v9_0_ring_emit_gds_switch,
6809+
.emit_hdp_flush = gfx_v9_0_ring_emit_hdp_flush,
6810+
.test_ring = gfx_v9_0_ring_test_ring,
6811+
.test_ib = gfx_v9_0_ring_test_ib,
6812+
.insert_nop = amdgpu_sw_ring_insert_nop,
6813+
.pad_ib = amdgpu_ring_generic_pad_ib,
6814+
.emit_switch_buffer = gfx_v9_ring_emit_sb,
6815+
.emit_cntxcntl = gfx_v9_ring_emit_cntxcntl,
6816+
.init_cond_exec = gfx_v9_0_ring_emit_init_cond_exec,
6817+
.patch_cond_exec = gfx_v9_0_ring_emit_patch_cond_exec,
6818+
.emit_frame_cntl = gfx_v9_0_ring_emit_frame_cntl,
6819+
.emit_wreg = gfx_v9_0_ring_emit_wreg,
6820+
.emit_reg_wait = gfx_v9_0_ring_emit_reg_wait,
6821+
.emit_reg_write_reg_wait = gfx_v9_0_ring_emit_reg_write_reg_wait,
6822+
.soft_recovery = gfx_v9_0_ring_soft_recovery,
6823+
.emit_mem_sync = gfx_v9_0_emit_mem_sync,
6824+
};
6825+
67206826
static const struct amdgpu_ring_funcs gfx_v9_0_ring_funcs_compute = {
67216827
.type = AMDGPU_RING_TYPE_COMPUTE,
67226828
.align_mask = 0xff,
@@ -6794,6 +6900,11 @@ static void gfx_v9_0_set_ring_funcs(struct amdgpu_device *adev)
67946900
for (i = 0; i < adev->gfx.num_gfx_rings; i++)
67956901
adev->gfx.gfx_ring[i].funcs = &gfx_v9_0_ring_funcs_gfx;
67966902

6903+
if (adev->gfx.num_gfx_rings) {
6904+
for (i = 0; i < GFX9_NUM_SW_GFX_RINGS; i++)
6905+
adev->gfx.sw_gfx_ring[i].funcs = &gfx_v9_0_sw_ring_funcs_gfx;
6906+
}
6907+
67976908
for (i = 0; i < adev->gfx.num_compute_rings; i++)
67986909
adev->gfx.compute_ring[i].funcs = &gfx_v9_0_ring_funcs_compute;
67996910
}

0 commit comments

Comments
 (0)