Skip to content

Commit ff4a7f6

Browse files
superna9999robclark
authored andcommitted
drm/msm: adreno: add plumbing to generate bandwidth vote table for GMU
The Adreno GPU Management Unit (GMU) can also scale DDR Bandwidth along the Frequency and Power Domain level, but by default we leave the OPP core scale the interconnect ddr path. While scaling via the interconnect path was sufficient, newer GPUs like the A750 requires specific vote paremeters and bandwidth to achieve full functionality. In order to calculate vote values used by the GPU Management Unit (GMU), we need to parse all the possible OPP Bandwidths and create a vote value to be sent to the appropriate Bus Control Modules (BCMs) declared in the GPU info struct. This vote value is called IB, while on the other side the GMU also takes another vote called AB which is a 16bit quantized value of the floor bandwidth against the maximum supported bandwidth. The AB vote will be calculated later when setting the frequency. The vote array will then be used to dynamically generate the GMU bw_table sent during the GMU power-up. Reviewed-by: Akhil P Oommen <[email protected]> Signed-off-by: Neil Armstrong <[email protected]> Reviewed-by: Konrad Dybcio <[email protected]> Patchwork: https://patchwork.freedesktop.org/patch/629395/ Signed-off-by: Rob Clark <[email protected]>
1 parent 5b06195 commit ff4a7f6

File tree

3 files changed

+161
-0
lines changed

3 files changed

+161
-0
lines changed

drivers/gpu/drm/msm/adreno/a6xx_gmu.c

Lines changed: 147 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,7 @@
99
#include <linux/pm_domain.h>
1010
#include <linux/pm_opp.h>
1111
#include <soc/qcom/cmd-db.h>
12+
#include <soc/qcom/tcs.h>
1213
#include <drm/drm_gem.h>
1314

1415
#include "a6xx_gpu.h"
@@ -1294,6 +1295,104 @@ static int a6xx_gmu_memory_probe(struct a6xx_gmu *gmu)
12941295
return 0;
12951296
}
12961297

1298+
/**
1299+
* struct bcm_db - Auxiliary data pertaining to each Bus Clock Manager (BCM)
1300+
* @unit: divisor used to convert bytes/sec bw value to an RPMh msg
1301+
* @width: multiplier used to convert bytes/sec bw value to an RPMh msg
1302+
* @vcd: virtual clock domain that this bcm belongs to
1303+
* @reserved: reserved field
1304+
*/
1305+
struct bcm_db {
1306+
__le32 unit;
1307+
__le16 width;
1308+
u8 vcd;
1309+
u8 reserved;
1310+
};
1311+
1312+
static int a6xx_gmu_rpmh_bw_votes_init(struct adreno_gpu *adreno_gpu,
1313+
const struct a6xx_info *info,
1314+
struct a6xx_gmu *gmu)
1315+
{
1316+
const struct bcm_db *bcm_data[GMU_MAX_BCMS] = { 0 };
1317+
unsigned int bcm_index, bw_index, bcm_count = 0;
1318+
1319+
/* Retrieve BCM data from cmd-db */
1320+
for (bcm_index = 0; bcm_index < GMU_MAX_BCMS; bcm_index++) {
1321+
const struct a6xx_bcm *bcm = &info->bcms[bcm_index];
1322+
size_t count;
1323+
1324+
/* Stop at NULL terminated bcm entry */
1325+
if (!bcm->name)
1326+
break;
1327+
1328+
bcm_data[bcm_index] = cmd_db_read_aux_data(bcm->name, &count);
1329+
if (IS_ERR(bcm_data[bcm_index]))
1330+
return PTR_ERR(bcm_data[bcm_index]);
1331+
1332+
if (!count) {
1333+
dev_err(gmu->dev, "invalid BCM '%s' aux data size\n",
1334+
bcm->name);
1335+
return -EINVAL;
1336+
}
1337+
1338+
bcm_count++;
1339+
}
1340+
1341+
/* Generate BCM votes values for each bandwidth & BCM */
1342+
for (bw_index = 0; bw_index < gmu->nr_gpu_bws; bw_index++) {
1343+
u32 *data = gmu->gpu_ib_votes[bw_index];
1344+
u32 bw = gmu->gpu_bw_table[bw_index];
1345+
1346+
/* Calculations loosely copied from bcm_aggregate() & tcs_cmd_gen() */
1347+
for (bcm_index = 0; bcm_index < bcm_count; bcm_index++) {
1348+
const struct a6xx_bcm *bcm = &info->bcms[bcm_index];
1349+
bool commit = false;
1350+
u64 peak;
1351+
u32 vote;
1352+
1353+
if (bcm_index == bcm_count - 1 ||
1354+
(bcm_data[bcm_index + 1] &&
1355+
bcm_data[bcm_index]->vcd != bcm_data[bcm_index + 1]->vcd))
1356+
commit = true;
1357+
1358+
if (!bw) {
1359+
data[bcm_index] = BCM_TCS_CMD(commit, false, 0, 0);
1360+
continue;
1361+
}
1362+
1363+
if (bcm->fixed) {
1364+
u32 perfmode = 0;
1365+
1366+
/* GMU on A6xx votes perfmode on all valid bandwidth */
1367+
if (!adreno_is_a7xx(adreno_gpu) ||
1368+
(bcm->perfmode_bw && bw >= bcm->perfmode_bw))
1369+
perfmode = bcm->perfmode;
1370+
1371+
data[bcm_index] = BCM_TCS_CMD(commit, true, 0, perfmode);
1372+
continue;
1373+
}
1374+
1375+
/* Multiply the bandwidth by the width of the connection */
1376+
peak = (u64)bw * le16_to_cpu(bcm_data[bcm_index]->width);
1377+
do_div(peak, bcm->buswidth);
1378+
1379+
/* Input bandwidth value is in KBps, scale the value to BCM unit */
1380+
peak *= 1000;
1381+
do_div(peak, le32_to_cpu(bcm_data[bcm_index]->unit));
1382+
1383+
vote = clamp(peak, 1, BCM_TCS_CMD_VOTE_MASK);
1384+
1385+
/* GMUs on A7xx votes on both x & y */
1386+
if (adreno_is_a7xx(adreno_gpu))
1387+
data[bcm_index] = BCM_TCS_CMD(commit, true, vote, vote);
1388+
else
1389+
data[bcm_index] = BCM_TCS_CMD(commit, true, 0, vote);
1390+
}
1391+
}
1392+
1393+
return 0;
1394+
}
1395+
12971396
/* Return the 'arc-level' for the given frequency */
12981397
static unsigned int a6xx_gmu_get_arc_level(struct device *dev,
12991398
unsigned long freq)
@@ -1397,12 +1496,15 @@ static int a6xx_gmu_rpmh_arc_votes_init(struct device *dev, u32 *votes,
13971496
* The GMU votes with the RPMh for itself and on behalf of the GPU but we need
13981497
* to construct the list of votes on the CPU and send it over. Query the RPMh
13991498
* voltage levels and build the votes
1499+
* The GMU can also vote for DDR interconnects, use the OPP bandwidth entries
1500+
* and BCM parameters to build the votes.
14001501
*/
14011502

14021503
static int a6xx_gmu_rpmh_votes_init(struct a6xx_gmu *gmu)
14031504
{
14041505
struct a6xx_gpu *a6xx_gpu = container_of(gmu, struct a6xx_gpu, gmu);
14051506
struct adreno_gpu *adreno_gpu = &a6xx_gpu->base;
1507+
const struct a6xx_info *info = adreno_gpu->info->a6xx;
14061508
struct msm_gpu *gpu = &adreno_gpu->base;
14071509
int ret;
14081510

@@ -1414,6 +1516,10 @@ static int a6xx_gmu_rpmh_votes_init(struct a6xx_gmu *gmu)
14141516
ret |= a6xx_gmu_rpmh_arc_votes_init(gmu->dev, gmu->cx_arc_votes,
14151517
gmu->gmu_freqs, gmu->nr_gmu_freqs, "cx.lvl");
14161518

1519+
/* Build the interconnect votes */
1520+
if (info->bcms && gmu->nr_gpu_bws > 1)
1521+
ret |= a6xx_gmu_rpmh_bw_votes_init(adreno_gpu, info, gmu);
1522+
14171523
return ret;
14181524
}
14191525

@@ -1449,10 +1555,43 @@ static int a6xx_gmu_build_freq_table(struct device *dev, unsigned long *freqs,
14491555
return index;
14501556
}
14511557

1558+
static int a6xx_gmu_build_bw_table(struct device *dev, unsigned long *bandwidths,
1559+
u32 size)
1560+
{
1561+
int count = dev_pm_opp_get_opp_count(dev);
1562+
struct dev_pm_opp *opp;
1563+
int i, index = 0;
1564+
unsigned int bandwidth = 1;
1565+
1566+
/*
1567+
* The OPP table doesn't contain the "off" bandwidth level so we need to
1568+
* add 1 to the table size to account for it
1569+
*/
1570+
1571+
if (WARN(count + 1 > size,
1572+
"The GMU bandwidth table is being truncated\n"))
1573+
count = size - 1;
1574+
1575+
/* Set the "off" bandwidth */
1576+
bandwidths[index++] = 0;
1577+
1578+
for (i = 0; i < count; i++) {
1579+
opp = dev_pm_opp_find_bw_ceil(dev, &bandwidth, 0);
1580+
if (IS_ERR(opp))
1581+
break;
1582+
1583+
dev_pm_opp_put(opp);
1584+
bandwidths[index++] = bandwidth++;
1585+
}
1586+
1587+
return index;
1588+
}
1589+
14521590
static int a6xx_gmu_pwrlevels_probe(struct a6xx_gmu *gmu)
14531591
{
14541592
struct a6xx_gpu *a6xx_gpu = container_of(gmu, struct a6xx_gpu, gmu);
14551593
struct adreno_gpu *adreno_gpu = &a6xx_gpu->base;
1594+
const struct a6xx_info *info = adreno_gpu->info->a6xx;
14561595
struct msm_gpu *gpu = &adreno_gpu->base;
14571596

14581597
int ret = 0;
@@ -1479,6 +1618,14 @@ static int a6xx_gmu_pwrlevels_probe(struct a6xx_gmu *gmu)
14791618

14801619
gmu->current_perf_index = gmu->nr_gpu_freqs - 1;
14811620

1621+
/*
1622+
* The GMU also handles GPU Interconnect Votes so build a list
1623+
* of DDR bandwidths from the GPU OPP table
1624+
*/
1625+
if (info->bcms)
1626+
gmu->nr_gpu_bws = a6xx_gmu_build_bw_table(&gpu->pdev->dev,
1627+
gmu->gpu_bw_table, ARRAY_SIZE(gmu->gpu_bw_table));
1628+
14821629
/* Build the list of RPMh votes that we'll send to the GMU */
14831630
return a6xx_gmu_rpmh_votes_init(gmu);
14841631
}

drivers/gpu/drm/msm/adreno/a6xx_gmu.h

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,15 @@ struct a6xx_gmu_bo {
2121

2222
#define GMU_MAX_GX_FREQS 16
2323
#define GMU_MAX_CX_FREQS 4
24+
#define GMU_MAX_BCMS 3
25+
26+
struct a6xx_bcm {
27+
char *name;
28+
unsigned int buswidth;
29+
bool fixed;
30+
unsigned int perfmode;
31+
unsigned int perfmode_bw;
32+
};
2433

2534
/*
2635
* These define the different GMU wake up options - these define how both the
@@ -85,6 +94,10 @@ struct a6xx_gmu {
8594
unsigned long gpu_freqs[GMU_MAX_GX_FREQS];
8695
u32 gx_arc_votes[GMU_MAX_GX_FREQS];
8796

97+
int nr_gpu_bws;
98+
unsigned long gpu_bw_table[GMU_MAX_GX_FREQS];
99+
u32 gpu_ib_votes[GMU_MAX_GX_FREQS][GMU_MAX_BCMS];
100+
88101
int nr_gmu_freqs;
89102
unsigned long gmu_freqs[GMU_MAX_CX_FREQS];
90103
u32 cx_arc_votes[GMU_MAX_CX_FREQS];

drivers/gpu/drm/msm/adreno/a6xx_gpu.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -44,6 +44,7 @@ struct a6xx_info {
4444
u32 gmu_chipid;
4545
u32 gmu_cgc_mode;
4646
u32 prim_fifo_threshold;
47+
const struct a6xx_bcm *bcms;
4748
};
4849

4950
struct a6xx_gpu {

0 commit comments

Comments
 (0)