@@ -247,8 +247,7 @@ static void a6xx_submit(struct msm_gpu *gpu, struct msm_gem_submit *submit)
247247 OUT_RING (ring , submit -> seqno );
248248
249249 trace_msm_gpu_submit_flush (submit ,
250- gpu_read64 (gpu , REG_A6XX_CP_ALWAYS_ON_COUNTER_LO ,
251- REG_A6XX_CP_ALWAYS_ON_COUNTER_HI ));
250+ gpu_read64 (gpu , REG_A6XX_CP_ALWAYS_ON_COUNTER_LO ));
252251
253252 a6xx_flush (gpu , ring );
254253}
@@ -947,8 +946,7 @@ static int a6xx_ucode_init(struct msm_gpu *gpu)
947946 }
948947 }
949948
950- gpu_write64 (gpu , REG_A6XX_CP_SQE_INSTR_BASE ,
951- REG_A6XX_CP_SQE_INSTR_BASE + 1 , a6xx_gpu -> sqe_iova );
949+ gpu_write64 (gpu , REG_A6XX_CP_SQE_INSTR_BASE , a6xx_gpu -> sqe_iova );
952950
953951 return 0 ;
954952}
@@ -999,8 +997,7 @@ static int hw_init(struct msm_gpu *gpu)
999997 * memory rendering at this point in time and we don't want to block off
1000998 * part of the virtual memory space.
1001999 */
1002- gpu_write64 (gpu , REG_A6XX_RBBM_SECVID_TSB_TRUSTED_BASE_LO ,
1003- REG_A6XX_RBBM_SECVID_TSB_TRUSTED_BASE_HI , 0x00000000 );
1000+ gpu_write64 (gpu , REG_A6XX_RBBM_SECVID_TSB_TRUSTED_BASE_LO , 0x00000000 );
10041001 gpu_write (gpu , REG_A6XX_RBBM_SECVID_TSB_TRUSTED_SIZE , 0x00000000 );
10051002
10061003 /* Turn on 64 bit addressing for all blocks */
@@ -1049,11 +1046,9 @@ static int hw_init(struct msm_gpu *gpu)
10491046
10501047 if (!adreno_is_a650_family (adreno_gpu )) {
10511048 /* Set the GMEM VA range [0x100000:0x100000 + gpu->gmem - 1] */
1052- gpu_write64 (gpu , REG_A6XX_UCHE_GMEM_RANGE_MIN_LO ,
1053- REG_A6XX_UCHE_GMEM_RANGE_MIN_HI , 0x00100000 );
1049+ gpu_write64 (gpu , REG_A6XX_UCHE_GMEM_RANGE_MIN_LO , 0x00100000 );
10541050
10551051 gpu_write64 (gpu , REG_A6XX_UCHE_GMEM_RANGE_MAX_LO ,
1056- REG_A6XX_UCHE_GMEM_RANGE_MAX_HI ,
10571052 0x00100000 + adreno_gpu -> gmem - 1 );
10581053 }
10591054
@@ -1145,8 +1140,7 @@ static int hw_init(struct msm_gpu *gpu)
11451140 goto out ;
11461141
11471142 /* Set the ringbuffer address */
1148- gpu_write64 (gpu , REG_A6XX_CP_RB_BASE , REG_A6XX_CP_RB_BASE_HI ,
1149- gpu -> rb [0 ]-> iova );
1143+ gpu_write64 (gpu , REG_A6XX_CP_RB_BASE , gpu -> rb [0 ]-> iova );
11501144
11511145 /* Targets that support extended APRIV can use the RPTR shadow from
11521146 * hardware but all the other ones need to disable the feature. Targets
@@ -1178,7 +1172,6 @@ static int hw_init(struct msm_gpu *gpu)
11781172 }
11791173
11801174 gpu_write64 (gpu , REG_A6XX_CP_RB_RPTR_ADDR_LO ,
1181- REG_A6XX_CP_RB_RPTR_ADDR_HI ,
11821175 shadowptr (a6xx_gpu , gpu -> rb [0 ]));
11831176 }
11841177
@@ -1499,9 +1492,9 @@ static void a6xx_fault_detect_irq(struct msm_gpu *gpu)
14991492 gpu_read (gpu , REG_A6XX_RBBM_STATUS ),
15001493 gpu_read (gpu , REG_A6XX_CP_RB_RPTR ),
15011494 gpu_read (gpu , REG_A6XX_CP_RB_WPTR ),
1502- gpu_read64 (gpu , REG_A6XX_CP_IB1_BASE , REG_A6XX_CP_IB1_BASE_HI ),
1495+ gpu_read64 (gpu , REG_A6XX_CP_IB1_BASE ),
15031496 gpu_read (gpu , REG_A6XX_CP_IB1_REM_SIZE ),
1504- gpu_read64 (gpu , REG_A6XX_CP_IB2_BASE , REG_A6XX_CP_IB2_BASE_HI ),
1497+ gpu_read64 (gpu , REG_A6XX_CP_IB2_BASE ),
15051498 gpu_read (gpu , REG_A6XX_CP_IB2_REM_SIZE ));
15061499
15071500 /* Turn off the hangcheck timer to keep it from bothering us */
@@ -1712,8 +1705,7 @@ static int a6xx_get_timestamp(struct msm_gpu *gpu, uint64_t *value)
17121705 /* Force the GPU power on so we can read this register */
17131706 a6xx_gmu_set_oob (& a6xx_gpu -> gmu , GMU_OOB_PERFCOUNTER_SET );
17141707
1715- * value = gpu_read64 (gpu , REG_A6XX_CP_ALWAYS_ON_COUNTER_LO ,
1716- REG_A6XX_CP_ALWAYS_ON_COUNTER_HI );
1708+ * value = gpu_read64 (gpu , REG_A6XX_CP_ALWAYS_ON_COUNTER_LO );
17171709
17181710 a6xx_gmu_clear_oob (& a6xx_gpu -> gmu , GMU_OOB_PERFCOUNTER_SET );
17191711
@@ -1824,6 +1816,39 @@ static uint32_t a6xx_get_rptr(struct msm_gpu *gpu, struct msm_ringbuffer *ring)
18241816 return ring -> memptrs -> rptr = gpu_read (gpu , REG_A6XX_CP_RB_RPTR );
18251817}
18261818
1819+ static bool a6xx_progress (struct msm_gpu * gpu , struct msm_ringbuffer * ring )
1820+ {
1821+ struct msm_cp_state cp_state = {
1822+ .ib1_base = gpu_read64 (gpu , REG_A6XX_CP_IB1_BASE ),
1823+ .ib2_base = gpu_read64 (gpu , REG_A6XX_CP_IB2_BASE ),
1824+ .ib1_rem = gpu_read (gpu , REG_A6XX_CP_IB1_REM_SIZE ),
1825+ .ib2_rem = gpu_read (gpu , REG_A6XX_CP_IB2_REM_SIZE ),
1826+ };
1827+ bool progress ;
1828+
1829+ /*
1830+ * Adjust the remaining data to account for what has already been
1831+ * fetched from memory, but not yet consumed by the SQE.
1832+ *
1833+ * This is not *technically* correct, the amount buffered could
1834+ * exceed the IB size due to hw prefetching ahead, but:
1835+ *
1836+ * (1) We aren't trying to find the exact position, just whether
1837+ * progress has been made
1838+ * (2) The CP_REG_TO_MEM at the end of a submit should be enough
1839+ * to prevent prefetching into an unrelated submit. (And
1840+ * either way, at some point the ROQ will be full.)
1841+ */
1842+ cp_state .ib1_rem += gpu_read (gpu , REG_A6XX_CP_CSQ_IB1_STAT ) >> 16 ;
1843+ cp_state .ib2_rem += gpu_read (gpu , REG_A6XX_CP_CSQ_IB2_STAT ) >> 16 ;
1844+
1845+ progress = !!memcmp (& cp_state , & ring -> last_cp_state , sizeof (cp_state ));
1846+
1847+ ring -> last_cp_state = cp_state ;
1848+
1849+ return progress ;
1850+ }
1851+
18271852static u32 a618_get_speed_bin (u32 fuse )
18281853{
18291854 if (fuse == 0 )
@@ -1879,7 +1904,7 @@ static u32 fuse_to_supp_hw(struct device *dev, struct adreno_rev rev, u32 fuse)
18791904
18801905 if (val == UINT_MAX ) {
18811906 DRM_DEV_ERROR (dev ,
1882- "missing support for speed-bin: %u. Some OPPs may not be supported by hardware" ,
1907+ "missing support for speed-bin: %u. Some OPPs may not be supported by hardware\n " ,
18831908 fuse );
18841909 return UINT_MAX ;
18851910 }
@@ -1889,7 +1914,7 @@ static u32 fuse_to_supp_hw(struct device *dev, struct adreno_rev rev, u32 fuse)
18891914
18901915static int a6xx_set_supported_hw (struct device * dev , struct adreno_rev rev )
18911916{
1892- u32 supp_hw = UINT_MAX ;
1917+ u32 supp_hw ;
18931918 u32 speedbin ;
18941919 int ret ;
18951920
@@ -1901,15 +1926,13 @@ static int a6xx_set_supported_hw(struct device *dev, struct adreno_rev rev)
19011926 if (ret == - ENOENT ) {
19021927 return 0 ;
19031928 } else if (ret ) {
1904- DRM_DEV_ERROR (dev ,
1905- "failed to read speed-bin (%d). Some OPPs may not be supported by hardware" ,
1906- ret );
1907- goto done ;
1929+ dev_err_probe (dev , ret ,
1930+ "failed to read speed-bin. Some OPPs may not be supported by hardware\n" );
1931+ return ret ;
19081932 }
19091933
19101934 supp_hw = fuse_to_supp_hw (dev , rev , speedbin );
19111935
1912- done :
19131936 ret = devm_pm_opp_set_supported_hw (dev , & supp_hw , 1 );
19141937 if (ret )
19151938 return ret ;
@@ -1942,6 +1965,7 @@ static const struct adreno_gpu_funcs funcs = {
19421965 .create_address_space = a6xx_create_address_space ,
19431966 .create_private_address_space = a6xx_create_private_address_space ,
19441967 .get_rptr = a6xx_get_rptr ,
1968+ .progress = a6xx_progress ,
19451969 },
19461970 .get_timestamp = a6xx_get_timestamp ,
19471971};
@@ -1978,13 +2002,6 @@ struct msm_gpu *a6xx_gpu_init(struct drm_device *dev)
19782002 adreno_cmp_rev (ADRENO_REV (6 , 3 , 5 , ANY_ID ), info -> rev )))
19792003 adreno_gpu -> base .hw_apriv = true;
19802004
1981- /*
1982- * For now only clamp to idle freq for devices where this is known not
1983- * to cause power supply issues:
1984- */
1985- if (info && (info -> revn == 618 ))
1986- gpu -> clamp_to_idle = true;
1987-
19882005 a6xx_llc_slices_init (pdev , a6xx_gpu );
19892006
19902007 ret = a6xx_set_supported_hw (& pdev -> dev , config -> rev );
@@ -1999,6 +2016,13 @@ struct msm_gpu *a6xx_gpu_init(struct drm_device *dev)
19992016 return ERR_PTR (ret );
20002017 }
20012018
2019+ /*
2020+ * For now only clamp to idle freq for devices where this is known not
2021+ * to cause power supply issues:
2022+ */
2023+ if (adreno_is_a618 (adreno_gpu ) || adreno_is_7c3 (adreno_gpu ))
2024+ gpu -> clamp_to_idle = true;
2025+
20022026 /* Check if there is a GMU phandle and set it up */
20032027 node = of_parse_phandle (pdev -> dev .of_node , "qcom,gmu" , 0 );
20042028
0 commit comments