Skip to content

Commit 077bd80

Browse files
committed
Merge tag 'drm-msm-next-2022-11-28' of https://gitlab.freedesktop.org/drm/msm into drm-next
msm-next for v6.2 (the gpu/gem bits) - Remove exclusive-fence hack that caused over-synchronization - Fix speed-bin detection vs. probe-defer - Enable clamp_to_idle on 7c3 - Improved hangcheck detection Signed-off-by: Dave Airlie <[email protected]> From: Rob Clark <[email protected]> Link: https://patchwork.freedesktop.org/patch/msgid/CAF6AEGvT1h_S4d=YRgphgR8i7aMaxQaNW8mru7QaoUo9uiUk2A@mail.gmail.com
2 parents 92e11dd + d73b1d0 commit 077bd80

File tree

12 files changed

+150
-66
lines changed

12 files changed

+150
-66
lines changed

drivers/gpu/drm/msm/adreno/a4xx_gpu.c

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -606,8 +606,7 @@ static int a4xx_pm_suspend(struct msm_gpu *gpu) {
606606

607607
static int a4xx_get_timestamp(struct msm_gpu *gpu, uint64_t *value)
608608
{
609-
*value = gpu_read64(gpu, REG_A4XX_RBBM_PERFCTR_CP_0_LO,
610-
REG_A4XX_RBBM_PERFCTR_CP_0_HI);
609+
*value = gpu_read64(gpu, REG_A4XX_RBBM_PERFCTR_CP_0_LO);
611610

612611
return 0;
613612
}

drivers/gpu/drm/msm/adreno/a5xx_gpu.c

Lines changed: 10 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -605,11 +605,9 @@ static int a5xx_ucode_init(struct msm_gpu *gpu)
605605
a5xx_ucode_check_version(a5xx_gpu, a5xx_gpu->pfp_bo);
606606
}
607607

608-
gpu_write64(gpu, REG_A5XX_CP_ME_INSTR_BASE_LO,
609-
REG_A5XX_CP_ME_INSTR_BASE_HI, a5xx_gpu->pm4_iova);
608+
gpu_write64(gpu, REG_A5XX_CP_ME_INSTR_BASE_LO, a5xx_gpu->pm4_iova);
610609

611-
gpu_write64(gpu, REG_A5XX_CP_PFP_INSTR_BASE_LO,
612-
REG_A5XX_CP_PFP_INSTR_BASE_HI, a5xx_gpu->pfp_iova);
610+
gpu_write64(gpu, REG_A5XX_CP_PFP_INSTR_BASE_LO, a5xx_gpu->pfp_iova);
613611

614612
return 0;
615613
}
@@ -868,8 +866,7 @@ static int a5xx_hw_init(struct msm_gpu *gpu)
868866
* memory rendering at this point in time and we don't want to block off
869867
* part of the virtual memory space.
870868
*/
871-
gpu_write64(gpu, REG_A5XX_RBBM_SECVID_TSB_TRUSTED_BASE_LO,
872-
REG_A5XX_RBBM_SECVID_TSB_TRUSTED_BASE_HI, 0x00000000);
869+
gpu_write64(gpu, REG_A5XX_RBBM_SECVID_TSB_TRUSTED_BASE_LO, 0x00000000);
873870
gpu_write(gpu, REG_A5XX_RBBM_SECVID_TSB_TRUSTED_SIZE, 0x00000000);
874871

875872
/* Put the GPU into 64 bit by default */
@@ -908,8 +905,7 @@ static int a5xx_hw_init(struct msm_gpu *gpu)
908905
return ret;
909906

910907
/* Set the ringbuffer address */
911-
gpu_write64(gpu, REG_A5XX_CP_RB_BASE, REG_A5XX_CP_RB_BASE_HI,
912-
gpu->rb[0]->iova);
908+
gpu_write64(gpu, REG_A5XX_CP_RB_BASE, gpu->rb[0]->iova);
913909

914910
/*
915911
* If the microcode supports the WHERE_AM_I opcode then we can use that
@@ -936,7 +932,7 @@ static int a5xx_hw_init(struct msm_gpu *gpu)
936932
}
937933

938934
gpu_write64(gpu, REG_A5XX_CP_RB_RPTR_ADDR,
939-
REG_A5XX_CP_RB_RPTR_ADDR_HI, shadowptr(a5xx_gpu, gpu->rb[0]));
935+
shadowptr(a5xx_gpu, gpu->rb[0]));
940936
} else if (gpu->nr_rings > 1) {
941937
/* Disable preemption if WHERE_AM_I isn't available */
942938
a5xx_preempt_fini(gpu);
@@ -1239,9 +1235,9 @@ static void a5xx_fault_detect_irq(struct msm_gpu *gpu)
12391235
gpu_read(gpu, REG_A5XX_RBBM_STATUS),
12401236
gpu_read(gpu, REG_A5XX_CP_RB_RPTR),
12411237
gpu_read(gpu, REG_A5XX_CP_RB_WPTR),
1242-
gpu_read64(gpu, REG_A5XX_CP_IB1_BASE, REG_A5XX_CP_IB1_BASE_HI),
1238+
gpu_read64(gpu, REG_A5XX_CP_IB1_BASE),
12431239
gpu_read(gpu, REG_A5XX_CP_IB1_BUFSZ),
1244-
gpu_read64(gpu, REG_A5XX_CP_IB2_BASE, REG_A5XX_CP_IB2_BASE_HI),
1240+
gpu_read64(gpu, REG_A5XX_CP_IB2_BASE),
12451241
gpu_read(gpu, REG_A5XX_CP_IB2_BUFSZ));
12461242

12471243
/* Turn off the hangcheck timer to keep it from bothering us */
@@ -1427,8 +1423,7 @@ static int a5xx_pm_suspend(struct msm_gpu *gpu)
14271423

14281424
static int a5xx_get_timestamp(struct msm_gpu *gpu, uint64_t *value)
14291425
{
1430-
*value = gpu_read64(gpu, REG_A5XX_RBBM_ALWAYSON_COUNTER_LO,
1431-
REG_A5XX_RBBM_ALWAYSON_COUNTER_HI);
1426+
*value = gpu_read64(gpu, REG_A5XX_RBBM_ALWAYSON_COUNTER_LO);
14321427

14331428
return 0;
14341429
}
@@ -1465,8 +1460,7 @@ static int a5xx_crashdumper_run(struct msm_gpu *gpu,
14651460
if (IS_ERR_OR_NULL(dumper->ptr))
14661461
return -EINVAL;
14671462

1468-
gpu_write64(gpu, REG_A5XX_CP_CRASH_SCRIPT_BASE_LO,
1469-
REG_A5XX_CP_CRASH_SCRIPT_BASE_HI, dumper->iova);
1463+
gpu_write64(gpu, REG_A5XX_CP_CRASH_SCRIPT_BASE_LO, dumper->iova);
14701464

14711465
gpu_write(gpu, REG_A5XX_CP_CRASH_DUMP_CNTL, 1);
14721466

@@ -1666,8 +1660,7 @@ static u64 a5xx_gpu_busy(struct msm_gpu *gpu, unsigned long *out_sample_rate)
16661660
{
16671661
u64 busy_cycles;
16681662

1669-
busy_cycles = gpu_read64(gpu, REG_A5XX_RBBM_PERFCTR_RBBM_0_LO,
1670-
REG_A5XX_RBBM_PERFCTR_RBBM_0_HI);
1663+
busy_cycles = gpu_read64(gpu, REG_A5XX_RBBM_PERFCTR_RBBM_0_LO);
16711664
*out_sample_rate = clk_get_rate(gpu->core_clk);
16721665

16731666
return busy_cycles;

drivers/gpu/drm/msm/adreno/a5xx_preempt.c

Lines changed: 1 addition & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -137,7 +137,6 @@ void a5xx_preempt_trigger(struct msm_gpu *gpu)
137137

138138
/* Set the address of the incoming preemption record */
139139
gpu_write64(gpu, REG_A5XX_CP_CONTEXT_SWITCH_RESTORE_ADDR_LO,
140-
REG_A5XX_CP_CONTEXT_SWITCH_RESTORE_ADDR_HI,
141140
a5xx_gpu->preempt_iova[ring->id]);
142141

143142
a5xx_gpu->next_ring = ring;
@@ -211,8 +210,7 @@ void a5xx_preempt_hw_init(struct msm_gpu *gpu)
211210
}
212211

213212
/* Write a 0 to signal that we aren't switching pagetables */
214-
gpu_write64(gpu, REG_A5XX_CP_CONTEXT_SWITCH_SMMU_INFO_LO,
215-
REG_A5XX_CP_CONTEXT_SWITCH_SMMU_INFO_HI, 0);
213+
gpu_write64(gpu, REG_A5XX_CP_CONTEXT_SWITCH_SMMU_INFO_LO, 0);
216214

217215
/* Reset the preemption state */
218216
set_preempt_state(a5xx_gpu, PREEMPT_NONE);

drivers/gpu/drm/msm/adreno/a6xx_gpu.c

Lines changed: 54 additions & 30 deletions
Original file line numberDiff line numberDiff line change
@@ -247,8 +247,7 @@ static void a6xx_submit(struct msm_gpu *gpu, struct msm_gem_submit *submit)
247247
OUT_RING(ring, submit->seqno);
248248

249249
trace_msm_gpu_submit_flush(submit,
250-
gpu_read64(gpu, REG_A6XX_CP_ALWAYS_ON_COUNTER_LO,
251-
REG_A6XX_CP_ALWAYS_ON_COUNTER_HI));
250+
gpu_read64(gpu, REG_A6XX_CP_ALWAYS_ON_COUNTER_LO));
252251

253252
a6xx_flush(gpu, ring);
254253
}
@@ -947,8 +946,7 @@ static int a6xx_ucode_init(struct msm_gpu *gpu)
947946
}
948947
}
949948

950-
gpu_write64(gpu, REG_A6XX_CP_SQE_INSTR_BASE,
951-
REG_A6XX_CP_SQE_INSTR_BASE+1, a6xx_gpu->sqe_iova);
949+
gpu_write64(gpu, REG_A6XX_CP_SQE_INSTR_BASE, a6xx_gpu->sqe_iova);
952950

953951
return 0;
954952
}
@@ -999,8 +997,7 @@ static int hw_init(struct msm_gpu *gpu)
999997
* memory rendering at this point in time and we don't want to block off
1000998
* part of the virtual memory space.
1001999
*/
1002-
gpu_write64(gpu, REG_A6XX_RBBM_SECVID_TSB_TRUSTED_BASE_LO,
1003-
REG_A6XX_RBBM_SECVID_TSB_TRUSTED_BASE_HI, 0x00000000);
1000+
gpu_write64(gpu, REG_A6XX_RBBM_SECVID_TSB_TRUSTED_BASE_LO, 0x00000000);
10041001
gpu_write(gpu, REG_A6XX_RBBM_SECVID_TSB_TRUSTED_SIZE, 0x00000000);
10051002

10061003
/* Turn on 64 bit addressing for all blocks */
@@ -1049,11 +1046,9 @@ static int hw_init(struct msm_gpu *gpu)
10491046

10501047
if (!adreno_is_a650_family(adreno_gpu)) {
10511048
/* Set the GMEM VA range [0x100000:0x100000 + gpu->gmem - 1] */
1052-
gpu_write64(gpu, REG_A6XX_UCHE_GMEM_RANGE_MIN_LO,
1053-
REG_A6XX_UCHE_GMEM_RANGE_MIN_HI, 0x00100000);
1049+
gpu_write64(gpu, REG_A6XX_UCHE_GMEM_RANGE_MIN_LO, 0x00100000);
10541050

10551051
gpu_write64(gpu, REG_A6XX_UCHE_GMEM_RANGE_MAX_LO,
1056-
REG_A6XX_UCHE_GMEM_RANGE_MAX_HI,
10571052
0x00100000 + adreno_gpu->gmem - 1);
10581053
}
10591054

@@ -1145,8 +1140,7 @@ static int hw_init(struct msm_gpu *gpu)
11451140
goto out;
11461141

11471142
/* Set the ringbuffer address */
1148-
gpu_write64(gpu, REG_A6XX_CP_RB_BASE, REG_A6XX_CP_RB_BASE_HI,
1149-
gpu->rb[0]->iova);
1143+
gpu_write64(gpu, REG_A6XX_CP_RB_BASE, gpu->rb[0]->iova);
11501144

11511145
/* Targets that support extended APRIV can use the RPTR shadow from
11521146
* hardware but all the other ones need to disable the feature. Targets
@@ -1178,7 +1172,6 @@ static int hw_init(struct msm_gpu *gpu)
11781172
}
11791173

11801174
gpu_write64(gpu, REG_A6XX_CP_RB_RPTR_ADDR_LO,
1181-
REG_A6XX_CP_RB_RPTR_ADDR_HI,
11821175
shadowptr(a6xx_gpu, gpu->rb[0]));
11831176
}
11841177

@@ -1499,9 +1492,9 @@ static void a6xx_fault_detect_irq(struct msm_gpu *gpu)
14991492
gpu_read(gpu, REG_A6XX_RBBM_STATUS),
15001493
gpu_read(gpu, REG_A6XX_CP_RB_RPTR),
15011494
gpu_read(gpu, REG_A6XX_CP_RB_WPTR),
1502-
gpu_read64(gpu, REG_A6XX_CP_IB1_BASE, REG_A6XX_CP_IB1_BASE_HI),
1495+
gpu_read64(gpu, REG_A6XX_CP_IB1_BASE),
15031496
gpu_read(gpu, REG_A6XX_CP_IB1_REM_SIZE),
1504-
gpu_read64(gpu, REG_A6XX_CP_IB2_BASE, REG_A6XX_CP_IB2_BASE_HI),
1497+
gpu_read64(gpu, REG_A6XX_CP_IB2_BASE),
15051498
gpu_read(gpu, REG_A6XX_CP_IB2_REM_SIZE));
15061499

15071500
/* Turn off the hangcheck timer to keep it from bothering us */
@@ -1712,8 +1705,7 @@ static int a6xx_get_timestamp(struct msm_gpu *gpu, uint64_t *value)
17121705
/* Force the GPU power on so we can read this register */
17131706
a6xx_gmu_set_oob(&a6xx_gpu->gmu, GMU_OOB_PERFCOUNTER_SET);
17141707

1715-
*value = gpu_read64(gpu, REG_A6XX_CP_ALWAYS_ON_COUNTER_LO,
1716-
REG_A6XX_CP_ALWAYS_ON_COUNTER_HI);
1708+
*value = gpu_read64(gpu, REG_A6XX_CP_ALWAYS_ON_COUNTER_LO);
17171709

17181710
a6xx_gmu_clear_oob(&a6xx_gpu->gmu, GMU_OOB_PERFCOUNTER_SET);
17191711

@@ -1824,6 +1816,39 @@ static uint32_t a6xx_get_rptr(struct msm_gpu *gpu, struct msm_ringbuffer *ring)
18241816
return ring->memptrs->rptr = gpu_read(gpu, REG_A6XX_CP_RB_RPTR);
18251817
}
18261818

1819+
static bool a6xx_progress(struct msm_gpu *gpu, struct msm_ringbuffer *ring)
1820+
{
1821+
struct msm_cp_state cp_state = {
1822+
.ib1_base = gpu_read64(gpu, REG_A6XX_CP_IB1_BASE),
1823+
.ib2_base = gpu_read64(gpu, REG_A6XX_CP_IB2_BASE),
1824+
.ib1_rem = gpu_read(gpu, REG_A6XX_CP_IB1_REM_SIZE),
1825+
.ib2_rem = gpu_read(gpu, REG_A6XX_CP_IB2_REM_SIZE),
1826+
};
1827+
bool progress;
1828+
1829+
/*
1830+
* Adjust the remaining data to account for what has already been
1831+
* fetched from memory, but not yet consumed by the SQE.
1832+
*
1833+
* This is not *technically* correct, the amount buffered could
1834+
* exceed the IB size due to hw prefetching ahead, but:
1835+
*
1836+
* (1) We aren't trying to find the exact position, just whether
1837+
* progress has been made
1838+
* (2) The CP_REG_TO_MEM at the end of a submit should be enough
1839+
* to prevent prefetching into an unrelated submit. (And
1840+
* either way, at some point the ROQ will be full.)
1841+
*/
1842+
cp_state.ib1_rem += gpu_read(gpu, REG_A6XX_CP_CSQ_IB1_STAT) >> 16;
1843+
cp_state.ib2_rem += gpu_read(gpu, REG_A6XX_CP_CSQ_IB2_STAT) >> 16;
1844+
1845+
progress = !!memcmp(&cp_state, &ring->last_cp_state, sizeof(cp_state));
1846+
1847+
ring->last_cp_state = cp_state;
1848+
1849+
return progress;
1850+
}
1851+
18271852
static u32 a618_get_speed_bin(u32 fuse)
18281853
{
18291854
if (fuse == 0)
@@ -1879,7 +1904,7 @@ static u32 fuse_to_supp_hw(struct device *dev, struct adreno_rev rev, u32 fuse)
18791904

18801905
if (val == UINT_MAX) {
18811906
DRM_DEV_ERROR(dev,
1882-
"missing support for speed-bin: %u. Some OPPs may not be supported by hardware",
1907+
"missing support for speed-bin: %u. Some OPPs may not be supported by hardware\n",
18831908
fuse);
18841909
return UINT_MAX;
18851910
}
@@ -1889,7 +1914,7 @@ static u32 fuse_to_supp_hw(struct device *dev, struct adreno_rev rev, u32 fuse)
18891914

18901915
static int a6xx_set_supported_hw(struct device *dev, struct adreno_rev rev)
18911916
{
1892-
u32 supp_hw = UINT_MAX;
1917+
u32 supp_hw;
18931918
u32 speedbin;
18941919
int ret;
18951920

@@ -1901,15 +1926,13 @@ static int a6xx_set_supported_hw(struct device *dev, struct adreno_rev rev)
19011926
if (ret == -ENOENT) {
19021927
return 0;
19031928
} else if (ret) {
1904-
DRM_DEV_ERROR(dev,
1905-
"failed to read speed-bin (%d). Some OPPs may not be supported by hardware",
1906-
ret);
1907-
goto done;
1929+
dev_err_probe(dev, ret,
1930+
"failed to read speed-bin. Some OPPs may not be supported by hardware\n");
1931+
return ret;
19081932
}
19091933

19101934
supp_hw = fuse_to_supp_hw(dev, rev, speedbin);
19111935

1912-
done:
19131936
ret = devm_pm_opp_set_supported_hw(dev, &supp_hw, 1);
19141937
if (ret)
19151938
return ret;
@@ -1942,6 +1965,7 @@ static const struct adreno_gpu_funcs funcs = {
19421965
.create_address_space = a6xx_create_address_space,
19431966
.create_private_address_space = a6xx_create_private_address_space,
19441967
.get_rptr = a6xx_get_rptr,
1968+
.progress = a6xx_progress,
19451969
},
19461970
.get_timestamp = a6xx_get_timestamp,
19471971
};
@@ -1978,13 +2002,6 @@ struct msm_gpu *a6xx_gpu_init(struct drm_device *dev)
19782002
adreno_cmp_rev(ADRENO_REV(6, 3, 5, ANY_ID), info->rev)))
19792003
adreno_gpu->base.hw_apriv = true;
19802004

1981-
/*
1982-
* For now only clamp to idle freq for devices where this is known not
1983-
* to cause power supply issues:
1984-
*/
1985-
if (info && (info->revn == 618))
1986-
gpu->clamp_to_idle = true;
1987-
19882005
a6xx_llc_slices_init(pdev, a6xx_gpu);
19892006

19902007
ret = a6xx_set_supported_hw(&pdev->dev, config->rev);
@@ -1999,6 +2016,13 @@ struct msm_gpu *a6xx_gpu_init(struct drm_device *dev)
19992016
return ERR_PTR(ret);
20002017
}
20012018

2019+
/*
2020+
* For now only clamp to idle freq for devices where this is known not
2021+
* to cause power supply issues:
2022+
*/
2023+
if (adreno_is_a618(adreno_gpu) || adreno_is_7c3(adreno_gpu))
2024+
gpu->clamp_to_idle = true;
2025+
20022026
/* Check if there is a GMU phandle and set it up */
20032027
node = of_parse_phandle(pdev->dev.of_node, "qcom,gmu", 0);
20042028

drivers/gpu/drm/msm/adreno/a6xx_gpu_state.c

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -147,8 +147,7 @@ static int a6xx_crashdumper_run(struct msm_gpu *gpu,
147147
/* Make sure all pending memory writes are posted */
148148
wmb();
149149

150-
gpu_write64(gpu, REG_A6XX_CP_CRASH_SCRIPT_BASE_LO,
151-
REG_A6XX_CP_CRASH_SCRIPT_BASE_HI, dumper->iova);
150+
gpu_write64(gpu, REG_A6XX_CP_CRASH_SCRIPT_BASE_LO, dumper->iova);
152151

153152
gpu_write(gpu, REG_A6XX_CP_CRASH_DUMP_CNTL, 1);
154153

drivers/gpu/drm/msm/msm_drv.c

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -418,7 +418,6 @@ static int msm_drm_init(struct device *dev, const struct drm_driver *drv)
418418
priv->dev = ddev;
419419

420420
priv->wq = alloc_ordered_workqueue("msm", 0);
421-
priv->hangcheck_period = DRM_MSM_HANGCHECK_DEFAULT_PERIOD;
422421

423422
INIT_LIST_HEAD(&priv->objects);
424423
mutex_init(&priv->obj_lock);

drivers/gpu/drm/msm/msm_drv.h

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -224,7 +224,13 @@ struct msm_drm_private {
224224

225225
struct drm_atomic_state *pm_state;
226226

227-
/* For hang detection, in ms */
227+
/**
228+
* hangcheck_period: For hang detection, in ms
229+
*
230+
* Note that in practice, a submit/job will get at least two hangcheck
231+
* periods, due to checking for progress being implemented as simply
232+
* "have the CP position registers changed since last time?"
233+
*/
228234
unsigned int hangcheck_period;
229235

230236
/**

drivers/gpu/drm/msm/msm_gem_shrinker.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,7 @@
1515
/* Default disabled for now until it has some more testing on the different
1616
* iommu combinations that can be paired with the driver:
1717
*/
18-
static bool enable_eviction = false;
18+
static bool enable_eviction = true;
1919
MODULE_PARM_DESC(enable_eviction, "Enable swappable GEM buffers");
2020
module_param(enable_eviction, bool, 0600);
2121

drivers/gpu/drm/msm/msm_gem_submit.c

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -334,8 +334,7 @@ static int submit_fence_sync(struct msm_gem_submit *submit, bool no_implicit)
334334
if (ret)
335335
return ret;
336336

337-
/* exclusive fences must be ordered */
338-
if (no_implicit && !write)
337+
if (no_implicit)
339338
continue;
340339

341340
ret = drm_sched_job_add_implicit_dependencies(&submit->base,

0 commit comments

Comments
 (0)