Skip to content

Commit 56fc141

Browse files
fanhuangalexdeucher
authored andcommitted
drm/amdgpu/vcn: sriov support for vcn_v5_0_1
initialization table handshake with mmsch Signed-off-by: fanhuang <[email protected]> Acked-by: Alex Deucher <[email protected]> Signed-off-by: Alex Deucher <[email protected]>
1 parent 7ce3166 commit 56fc141

File tree

1 file changed

+248
-17
lines changed

1 file changed

+248
-17
lines changed

drivers/gpu/drm/amd/amdgpu/vcn_v5_0_1.c

Lines changed: 248 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -30,6 +30,7 @@
3030
#include "soc15_hw_ip.h"
3131
#include "vcn_v2_0.h"
3232
#include "vcn_v4_0_3.h"
33+
#include "mmsch_v5_0.h"
3334

3435
#include "vcn/vcn_5_0_0_offset.h"
3536
#include "vcn/vcn_5_0_0_sh_mask.h"
@@ -39,6 +40,7 @@
3940

4041
#include <drm/drm_drv.h>
4142

43+
static int vcn_v5_0_1_start_sriov(struct amdgpu_device *adev);
4244
static void vcn_v5_0_1_set_unified_ring_funcs(struct amdgpu_device *adev);
4345
static void vcn_v5_0_1_set_irq_funcs(struct amdgpu_device *adev);
4446
static int vcn_v5_0_1_set_pg_state(struct amdgpu_vcn_inst *vinst,
@@ -126,7 +128,14 @@ static int vcn_v5_0_1_sw_init(struct amdgpu_ip_block *ip_block)
126128

127129
ring = &adev->vcn.inst[i].ring_enc[0];
128130
ring->use_doorbell = true;
129-
ring->doorbell_index = (adev->doorbell_index.vcn.vcn_ring0_1 << 1) + 11 * vcn_inst;
131+
if (!amdgpu_sriov_vf(adev))
132+
ring->doorbell_index =
133+
(adev->doorbell_index.vcn.vcn_ring0_1 << 1) +
134+
11 * vcn_inst;
135+
else
136+
ring->doorbell_index =
137+
(adev->doorbell_index.vcn.vcn_ring0_1 << 1) +
138+
32 * vcn_inst;
130139

131140
ring->vm_hub = AMDGPU_MMHUB0(adev->vcn.inst[i].aid_id);
132141
sprintf(ring->name, "vcn_unified_%d", adev->vcn.inst[i].aid_id);
@@ -143,6 +152,12 @@ static int vcn_v5_0_1_sw_init(struct amdgpu_ip_block *ip_block)
143152
adev->vcn.supported_reset =
144153
amdgpu_get_soft_full_reset_mask(&adev->vcn.inst[0].ring_enc[0]);
145154

155+
if (amdgpu_sriov_vf(adev)) {
156+
r = amdgpu_virt_alloc_mm_table(adev);
157+
if (r)
158+
return r;
159+
}
160+
146161
vcn_v5_0_0_alloc_ip_dump(adev);
147162

148163
return amdgpu_vcn_sysfs_reset_mask_init(adev);
@@ -172,6 +187,9 @@ static int vcn_v5_0_1_sw_fini(struct amdgpu_ip_block *ip_block)
172187
drm_dev_exit(idx);
173188
}
174189

190+
if (amdgpu_sriov_vf(adev))
191+
amdgpu_virt_free_mm_table(adev);
192+
175193
for (i = 0; i < adev->vcn.num_vcn_inst; i++) {
176194
r = amdgpu_vcn_suspend(adev, i);
177195
if (r)
@@ -204,24 +222,38 @@ static int vcn_v5_0_1_hw_init(struct amdgpu_ip_block *ip_block)
204222
struct amdgpu_ring *ring;
205223
int i, r, vcn_inst;
206224

207-
if (RREG32_SOC15(VCN, GET_INST(VCN, 0), regVCN_RRMT_CNTL) & 0x100)
208-
adev->vcn.caps |= AMDGPU_VCN_CAPS(RRMT_ENABLED);
209-
for (i = 0; i < adev->vcn.num_vcn_inst; ++i) {
210-
vcn_inst = GET_INST(VCN, i);
211-
ring = &adev->vcn.inst[i].ring_enc[0];
212-
213-
if (ring->use_doorbell)
214-
adev->nbio.funcs->vcn_doorbell_range(adev, ring->use_doorbell,
215-
((adev->doorbell_index.vcn.vcn_ring0_1 << 1) +
216-
11 * vcn_inst),
217-
adev->vcn.inst[i].aid_id);
218-
219-
/* Re-init fw_shared, if required */
220-
vcn_v5_0_1_fw_shared_init(adev, i);
221-
222-
r = amdgpu_ring_test_helper(ring);
225+
if (amdgpu_sriov_vf(adev)) {
226+
r = vcn_v5_0_1_start_sriov(adev);
223227
if (r)
224228
return r;
229+
230+
for (i = 0; i < adev->vcn.num_vcn_inst; ++i) {
231+
ring = &adev->vcn.inst[i].ring_enc[0];
232+
ring->wptr = 0;
233+
ring->wptr_old = 0;
234+
vcn_v5_0_1_unified_ring_set_wptr(ring);
235+
ring->sched.ready = true;
236+
}
237+
} else {
238+
if (RREG32_SOC15(VCN, GET_INST(VCN, 0), regVCN_RRMT_CNTL) & 0x100)
239+
adev->vcn.caps |= AMDGPU_VCN_CAPS(RRMT_ENABLED);
240+
for (i = 0; i < adev->vcn.num_vcn_inst; ++i) {
241+
vcn_inst = GET_INST(VCN, i);
242+
ring = &adev->vcn.inst[i].ring_enc[0];
243+
244+
if (ring->use_doorbell)
245+
adev->nbio.funcs->vcn_doorbell_range(adev, ring->use_doorbell,
246+
((adev->doorbell_index.vcn.vcn_ring0_1 << 1) +
247+
11 * vcn_inst),
248+
adev->vcn.inst[i].aid_id);
249+
250+
/* Re-init fw_shared, if required */
251+
vcn_v5_0_1_fw_shared_init(adev, i);
252+
253+
r = amdgpu_ring_test_helper(ring);
254+
if (r)
255+
return r;
256+
}
225257
}
226258

227259
return 0;
@@ -663,6 +695,195 @@ static int vcn_v5_0_1_start_dpg_mode(struct amdgpu_vcn_inst *vinst,
663695
return 0;
664696
}
665697

698+
static int vcn_v5_0_1_start_sriov(struct amdgpu_device *adev)
699+
{
700+
int i, vcn_inst;
701+
struct amdgpu_ring *ring_enc;
702+
uint64_t cache_addr;
703+
uint64_t rb_enc_addr;
704+
uint64_t ctx_addr;
705+
uint32_t param, resp, expected;
706+
uint32_t offset, cache_size;
707+
uint32_t tmp, timeout;
708+
709+
struct amdgpu_mm_table *table = &adev->virt.mm_table;
710+
uint32_t *table_loc;
711+
uint32_t table_size;
712+
uint32_t size, size_dw;
713+
uint32_t init_status;
714+
uint32_t enabled_vcn;
715+
716+
struct mmsch_v5_0_cmd_direct_write
717+
direct_wt = { {0} };
718+
struct mmsch_v5_0_cmd_direct_read_modify_write
719+
direct_rd_mod_wt = { {0} };
720+
struct mmsch_v5_0_cmd_end end = { {0} };
721+
struct mmsch_v5_0_init_header header;
722+
723+
volatile struct amdgpu_vcn5_fw_shared *fw_shared;
724+
volatile struct amdgpu_fw_shared_rb_setup *rb_setup;
725+
726+
direct_wt.cmd_header.command_type =
727+
MMSCH_COMMAND__DIRECT_REG_WRITE;
728+
direct_rd_mod_wt.cmd_header.command_type =
729+
MMSCH_COMMAND__DIRECT_REG_READ_MODIFY_WRITE;
730+
end.cmd_header.command_type = MMSCH_COMMAND__END;
731+
732+
for (i = 0; i < adev->vcn.num_vcn_inst; i++) {
733+
vcn_inst = GET_INST(VCN, i);
734+
735+
vcn_v5_0_1_fw_shared_init(adev, vcn_inst);
736+
737+
memset(&header, 0, sizeof(struct mmsch_v5_0_init_header));
738+
header.version = MMSCH_VERSION;
739+
header.total_size = sizeof(struct mmsch_v5_0_init_header) >> 2;
740+
741+
table_loc = (uint32_t *)table->cpu_addr;
742+
table_loc += header.total_size;
743+
744+
table_size = 0;
745+
746+
MMSCH_V5_0_INSERT_DIRECT_RD_MOD_WT(SOC15_REG_OFFSET(VCN, 0, regUVD_STATUS),
747+
~UVD_STATUS__UVD_BUSY, UVD_STATUS__UVD_BUSY);
748+
749+
cache_size = AMDGPU_GPU_PAGE_ALIGN(adev->vcn.inst[i].fw->size + 4);
750+
751+
if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) {
752+
MMSCH_V5_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCN, 0,
753+
regUVD_LMI_VCPU_CACHE_64BIT_BAR_LOW),
754+
adev->firmware.ucode[AMDGPU_UCODE_ID_VCN + i].tmr_mc_addr_lo);
755+
756+
MMSCH_V5_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCN, 0,
757+
regUVD_LMI_VCPU_CACHE_64BIT_BAR_HIGH),
758+
adev->firmware.ucode[AMDGPU_UCODE_ID_VCN + i].tmr_mc_addr_hi);
759+
760+
offset = 0;
761+
MMSCH_V5_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCN, 0,
762+
regUVD_VCPU_CACHE_OFFSET0), 0);
763+
} else {
764+
MMSCH_V5_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCN, 0,
765+
regUVD_LMI_VCPU_CACHE_64BIT_BAR_LOW),
766+
lower_32_bits(adev->vcn.inst[i].gpu_addr));
767+
MMSCH_V5_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCN, 0,
768+
regUVD_LMI_VCPU_CACHE_64BIT_BAR_HIGH),
769+
upper_32_bits(adev->vcn.inst[i].gpu_addr));
770+
offset = cache_size;
771+
MMSCH_V5_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCN, 0,
772+
regUVD_VCPU_CACHE_OFFSET0),
773+
AMDGPU_UVD_FIRMWARE_OFFSET >> 3);
774+
}
775+
776+
MMSCH_V5_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCN, 0,
777+
regUVD_VCPU_CACHE_SIZE0),
778+
cache_size);
779+
780+
cache_addr = adev->vcn.inst[vcn_inst].gpu_addr + offset;
781+
MMSCH_V5_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCN, 0,
782+
regUVD_LMI_VCPU_CACHE1_64BIT_BAR_LOW), lower_32_bits(cache_addr));
783+
MMSCH_V5_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCN, 0,
784+
regUVD_LMI_VCPU_CACHE1_64BIT_BAR_HIGH), upper_32_bits(cache_addr));
785+
MMSCH_V5_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCN, 0,
786+
regUVD_VCPU_CACHE_OFFSET1), 0);
787+
MMSCH_V5_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCN, 0,
788+
regUVD_VCPU_CACHE_SIZE1), AMDGPU_VCN_STACK_SIZE);
789+
790+
cache_addr = adev->vcn.inst[vcn_inst].gpu_addr + offset +
791+
AMDGPU_VCN_STACK_SIZE;
792+
793+
MMSCH_V5_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCN, 0,
794+
regUVD_LMI_VCPU_CACHE2_64BIT_BAR_LOW), lower_32_bits(cache_addr));
795+
796+
MMSCH_V5_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCN, 0,
797+
regUVD_LMI_VCPU_CACHE2_64BIT_BAR_HIGH), upper_32_bits(cache_addr));
798+
799+
MMSCH_V5_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCN, 0,
800+
regUVD_VCPU_CACHE_OFFSET2), 0);
801+
802+
MMSCH_V5_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCN, 0,
803+
regUVD_VCPU_CACHE_SIZE2), AMDGPU_VCN_CONTEXT_SIZE);
804+
805+
fw_shared = adev->vcn.inst[vcn_inst].fw_shared.cpu_addr;
806+
rb_setup = &fw_shared->rb_setup;
807+
808+
ring_enc = &adev->vcn.inst[vcn_inst].ring_enc[0];
809+
ring_enc->wptr = 0;
810+
rb_enc_addr = ring_enc->gpu_addr;
811+
812+
rb_setup->is_rb_enabled_flags |= RB_ENABLED;
813+
rb_setup->rb_addr_lo = lower_32_bits(rb_enc_addr);
814+
rb_setup->rb_addr_hi = upper_32_bits(rb_enc_addr);
815+
rb_setup->rb_size = ring_enc->ring_size / 4;
816+
fw_shared->present_flag_0 |= cpu_to_le32(AMDGPU_VCN_VF_RB_SETUP_FLAG);
817+
818+
MMSCH_V5_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCN, 0,
819+
regUVD_LMI_VCPU_NC0_64BIT_BAR_LOW),
820+
lower_32_bits(adev->vcn.inst[vcn_inst].fw_shared.gpu_addr));
821+
MMSCH_V5_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCN, 0,
822+
regUVD_LMI_VCPU_NC0_64BIT_BAR_HIGH),
823+
upper_32_bits(adev->vcn.inst[vcn_inst].fw_shared.gpu_addr));
824+
MMSCH_V5_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCN, 0,
825+
regUVD_VCPU_NONCACHE_SIZE0),
826+
AMDGPU_GPU_PAGE_ALIGN(sizeof(struct amdgpu_vcn4_fw_shared)));
827+
MMSCH_V5_0_INSERT_END();
828+
829+
header.vcn0.init_status = 0;
830+
header.vcn0.table_offset = header.total_size;
831+
header.vcn0.table_size = table_size;
832+
header.total_size += table_size;
833+
834+
/* Send init table to mmsch */
835+
size = sizeof(struct mmsch_v5_0_init_header);
836+
table_loc = (uint32_t *)table->cpu_addr;
837+
memcpy((void *)table_loc, &header, size);
838+
839+
ctx_addr = table->gpu_addr;
840+
WREG32_SOC15(VCN, vcn_inst, regMMSCH_VF_CTX_ADDR_LO, lower_32_bits(ctx_addr));
841+
WREG32_SOC15(VCN, vcn_inst, regMMSCH_VF_CTX_ADDR_HI, upper_32_bits(ctx_addr));
842+
843+
tmp = RREG32_SOC15(VCN, vcn_inst, regMMSCH_VF_VMID);
844+
tmp &= ~MMSCH_VF_VMID__VF_CTX_VMID_MASK;
845+
tmp |= (0 << MMSCH_VF_VMID__VF_CTX_VMID__SHIFT);
846+
WREG32_SOC15(VCN, vcn_inst, regMMSCH_VF_VMID, tmp);
847+
848+
size = header.total_size;
849+
WREG32_SOC15(VCN, vcn_inst, regMMSCH_VF_CTX_SIZE, size);
850+
851+
WREG32_SOC15(VCN, vcn_inst, regMMSCH_VF_MAILBOX_RESP, 0);
852+
853+
param = 0x00000001;
854+
WREG32_SOC15(VCN, vcn_inst, regMMSCH_VF_MAILBOX_HOST, param);
855+
tmp = 0;
856+
timeout = 1000;
857+
resp = 0;
858+
expected = MMSCH_VF_MAILBOX_RESP__OK;
859+
while (resp != expected) {
860+
resp = RREG32_SOC15(VCN, vcn_inst, regMMSCH_VF_MAILBOX_RESP);
861+
if (resp != 0)
862+
break;
863+
864+
udelay(10);
865+
tmp = tmp + 10;
866+
if (tmp >= timeout) {
867+
DRM_ERROR("failed to init MMSCH. TIME-OUT after %d usec"\
868+
" waiting for regMMSCH_VF_MAILBOX_RESP "\
869+
"(expected=0x%08x, readback=0x%08x)\n",
870+
tmp, expected, resp);
871+
return -EBUSY;
872+
}
873+
}
874+
875+
enabled_vcn = amdgpu_vcn_is_disabled_vcn(adev, VCN_DECODE_RING, 0) ? 1 : 0;
876+
init_status = ((struct mmsch_v5_0_init_header *)(table_loc))->vcn0.init_status;
877+
if (resp != expected && resp != MMSCH_VF_MAILBOX_RESP__INCOMPLETE
878+
&& init_status != MMSCH_VF_ENGINE_STATUS__PASS) {
879+
DRM_ERROR("MMSCH init status is incorrect! readback=0x%08x, header init "\
880+
"status for VCN%x: 0x%x\n", resp, enabled_vcn, init_status);
881+
}
882+
}
883+
884+
return 0;
885+
}
886+
666887
/**
667888
* vcn_v5_0_1_start - VCN start
668889
*
@@ -1103,8 +1324,18 @@ static int vcn_v5_0_1_set_clockgating_state(struct amdgpu_ip_block *ip_block,
11031324
static int vcn_v5_0_1_set_pg_state(struct amdgpu_vcn_inst *vinst,
11041325
enum amd_powergating_state state)
11051326
{
1327+
struct amdgpu_device *adev = vinst->adev;
11061328
int ret = 0;
11071329

1330+
/* for SRIOV, guest should not control VCN Power-gating
1331+
* MMSCH FW should control Power-gating and clock-gating
1332+
* guest should avoid touching CGC and PG
1333+
*/
1334+
if (amdgpu_sriov_vf(adev)) {
1335+
vinst->cur_state = AMD_PG_STATE_UNGATE;
1336+
return 0;
1337+
}
1338+
11081339
if (state == vinst->cur_state)
11091340
return 0;
11101341

0 commit comments

Comments
 (0)