Skip to content

Commit af1d321

Browse files
committed
Merge tag 'arm-smmu-updates' of git://git.kernel.org/pub/scm/linux/kernel/git/will/linux into arm/smmu
Arm SMMU updates for 5.15 - SMMUv3 * Minor optimisation to avoid zeroing struct members on CMD submission * Increased use of batched commands to reduce submission latency * Refactoring in preparation for ECMDQ support - SMMUv2 * Fix races when probing devices with identical StreamIDs * Optimise walk cache flushing for Qualcomm implementations * Allow deep sleep states for some Qualcomm SoCs with shared clocks
2 parents ff11764 + fac9567 commit af1d321

File tree

5 files changed

+106
-42
lines changed

5 files changed

+106
-42
lines changed

drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c

Lines changed: 54 additions & 35 deletions
Original file line numberDiff line numberDiff line change
@@ -335,10 +335,14 @@ static int arm_smmu_cmdq_build_cmd(u64 *cmd, struct arm_smmu_cmdq_ent *ent)
335335
return 0;
336336
}
337337

338+
static struct arm_smmu_cmdq *arm_smmu_get_cmdq(struct arm_smmu_device *smmu)
339+
{
340+
return &smmu->cmdq;
341+
}
342+
338343
static void arm_smmu_cmdq_build_sync_cmd(u64 *cmd, struct arm_smmu_device *smmu,
339-
u32 prod)
344+
struct arm_smmu_queue *q, u32 prod)
340345
{
341-
struct arm_smmu_queue *q = &smmu->cmdq.q;
342346
struct arm_smmu_cmdq_ent ent = {
343347
.opcode = CMDQ_OP_CMD_SYNC,
344348
};
@@ -355,7 +359,8 @@ static void arm_smmu_cmdq_build_sync_cmd(u64 *cmd, struct arm_smmu_device *smmu,
355359
arm_smmu_cmdq_build_cmd(cmd, &ent);
356360
}
357361

358-
static void arm_smmu_cmdq_skip_err(struct arm_smmu_device *smmu)
362+
static void __arm_smmu_cmdq_skip_err(struct arm_smmu_device *smmu,
363+
struct arm_smmu_queue *q)
359364
{
360365
static const char * const cerror_str[] = {
361366
[CMDQ_ERR_CERROR_NONE_IDX] = "No error",
@@ -366,7 +371,6 @@ static void arm_smmu_cmdq_skip_err(struct arm_smmu_device *smmu)
366371

367372
int i;
368373
u64 cmd[CMDQ_ENT_DWORDS];
369-
struct arm_smmu_queue *q = &smmu->cmdq.q;
370374
u32 cons = readl_relaxed(q->cons_reg);
371375
u32 idx = FIELD_GET(CMDQ_CONS_ERR, cons);
372376
struct arm_smmu_cmdq_ent cmd_sync = {
@@ -413,6 +417,11 @@ static void arm_smmu_cmdq_skip_err(struct arm_smmu_device *smmu)
413417
queue_write(Q_ENT(q, cons), cmd, q->ent_dwords);
414418
}
415419

420+
static void arm_smmu_cmdq_skip_err(struct arm_smmu_device *smmu)
421+
{
422+
__arm_smmu_cmdq_skip_err(smmu, &smmu->cmdq.q);
423+
}
424+
416425
/*
417426
* Command queue locking.
418427
* This is a form of bastardised rwlock with the following major changes:
@@ -579,7 +588,7 @@ static int arm_smmu_cmdq_poll_until_not_full(struct arm_smmu_device *smmu,
579588
{
580589
unsigned long flags;
581590
struct arm_smmu_queue_poll qp;
582-
struct arm_smmu_cmdq *cmdq = &smmu->cmdq;
591+
struct arm_smmu_cmdq *cmdq = arm_smmu_get_cmdq(smmu);
583592
int ret = 0;
584593

585594
/*
@@ -595,7 +604,7 @@ static int arm_smmu_cmdq_poll_until_not_full(struct arm_smmu_device *smmu,
595604

596605
queue_poll_init(smmu, &qp);
597606
do {
598-
llq->val = READ_ONCE(smmu->cmdq.q.llq.val);
607+
llq->val = READ_ONCE(cmdq->q.llq.val);
599608
if (!queue_full(llq))
600609
break;
601610

@@ -614,7 +623,7 @@ static int __arm_smmu_cmdq_poll_until_msi(struct arm_smmu_device *smmu,
614623
{
615624
int ret = 0;
616625
struct arm_smmu_queue_poll qp;
617-
struct arm_smmu_cmdq *cmdq = &smmu->cmdq;
626+
struct arm_smmu_cmdq *cmdq = arm_smmu_get_cmdq(smmu);
618627
u32 *cmd = (u32 *)(Q_ENT(&cmdq->q, llq->prod));
619628

620629
queue_poll_init(smmu, &qp);
@@ -637,12 +646,12 @@ static int __arm_smmu_cmdq_poll_until_consumed(struct arm_smmu_device *smmu,
637646
struct arm_smmu_ll_queue *llq)
638647
{
639648
struct arm_smmu_queue_poll qp;
640-
struct arm_smmu_cmdq *cmdq = &smmu->cmdq;
649+
struct arm_smmu_cmdq *cmdq = arm_smmu_get_cmdq(smmu);
641650
u32 prod = llq->prod;
642651
int ret = 0;
643652

644653
queue_poll_init(smmu, &qp);
645-
llq->val = READ_ONCE(smmu->cmdq.q.llq.val);
654+
llq->val = READ_ONCE(cmdq->q.llq.val);
646655
do {
647656
if (queue_consumed(llq, prod))
648657
break;
@@ -732,12 +741,12 @@ static int arm_smmu_cmdq_issue_cmdlist(struct arm_smmu_device *smmu,
732741
u32 prod;
733742
unsigned long flags;
734743
bool owner;
735-
struct arm_smmu_cmdq *cmdq = &smmu->cmdq;
736-
struct arm_smmu_ll_queue llq = {
737-
.max_n_shift = cmdq->q.llq.max_n_shift,
738-
}, head = llq;
744+
struct arm_smmu_cmdq *cmdq = arm_smmu_get_cmdq(smmu);
745+
struct arm_smmu_ll_queue llq, head;
739746
int ret = 0;
740747

748+
llq.max_n_shift = cmdq->q.llq.max_n_shift;
749+
741750
/* 1. Allocate some space in the queue */
742751
local_irq_save(flags);
743752
llq.val = READ_ONCE(cmdq->q.llq.val);
@@ -772,7 +781,7 @@ static int arm_smmu_cmdq_issue_cmdlist(struct arm_smmu_device *smmu,
772781
arm_smmu_cmdq_write_entries(cmdq, cmds, llq.prod, n);
773782
if (sync) {
774783
prod = queue_inc_prod_n(&llq, n);
775-
arm_smmu_cmdq_build_sync_cmd(cmd_sync, smmu, prod);
784+
arm_smmu_cmdq_build_sync_cmd(cmd_sync, smmu, &cmdq->q, prod);
776785
queue_write(Q_ENT(&cmdq->q, prod), cmd_sync, CMDQ_ENT_DWORDS);
777786

778787
/*
@@ -845,8 +854,9 @@ static int arm_smmu_cmdq_issue_cmdlist(struct arm_smmu_device *smmu,
845854
return ret;
846855
}
847856

848-
static int arm_smmu_cmdq_issue_cmd(struct arm_smmu_device *smmu,
849-
struct arm_smmu_cmdq_ent *ent)
857+
static int __arm_smmu_cmdq_issue_cmd(struct arm_smmu_device *smmu,
858+
struct arm_smmu_cmdq_ent *ent,
859+
bool sync)
850860
{
851861
u64 cmd[CMDQ_ENT_DWORDS];
852862

@@ -856,12 +866,19 @@ static int arm_smmu_cmdq_issue_cmd(struct arm_smmu_device *smmu,
856866
return -EINVAL;
857867
}
858868

859-
return arm_smmu_cmdq_issue_cmdlist(smmu, cmd, 1, false);
869+
return arm_smmu_cmdq_issue_cmdlist(smmu, cmd, 1, sync);
870+
}
871+
872+
static int arm_smmu_cmdq_issue_cmd(struct arm_smmu_device *smmu,
873+
struct arm_smmu_cmdq_ent *ent)
874+
{
875+
return __arm_smmu_cmdq_issue_cmd(smmu, ent, false);
860876
}
861877

862-
static int arm_smmu_cmdq_issue_sync(struct arm_smmu_device *smmu)
878+
static int arm_smmu_cmdq_issue_cmd_with_sync(struct arm_smmu_device *smmu,
879+
struct arm_smmu_cmdq_ent *ent)
863880
{
864-
return arm_smmu_cmdq_issue_cmdlist(smmu, NULL, 0, true);
881+
return __arm_smmu_cmdq_issue_cmd(smmu, ent, true);
865882
}
866883

867884
static void arm_smmu_cmdq_batch_add(struct arm_smmu_device *smmu,
@@ -929,8 +946,7 @@ void arm_smmu_tlb_inv_asid(struct arm_smmu_device *smmu, u16 asid)
929946
.tlbi.asid = asid,
930947
};
931948

932-
arm_smmu_cmdq_issue_cmd(smmu, &cmd);
933-
arm_smmu_cmdq_issue_sync(smmu);
949+
arm_smmu_cmdq_issue_cmd_with_sync(smmu, &cmd);
934950
}
935951

936952
static void arm_smmu_sync_cd(struct arm_smmu_domain *smmu_domain,
@@ -939,7 +955,7 @@ static void arm_smmu_sync_cd(struct arm_smmu_domain *smmu_domain,
939955
size_t i;
940956
unsigned long flags;
941957
struct arm_smmu_master *master;
942-
struct arm_smmu_cmdq_batch cmds = {};
958+
struct arm_smmu_cmdq_batch cmds;
943959
struct arm_smmu_device *smmu = smmu_domain->smmu;
944960
struct arm_smmu_cmdq_ent cmd = {
945961
.opcode = CMDQ_OP_CFGI_CD,
@@ -949,6 +965,8 @@ static void arm_smmu_sync_cd(struct arm_smmu_domain *smmu_domain,
949965
},
950966
};
951967

968+
cmds.num = 0;
969+
952970
spin_lock_irqsave(&smmu_domain->devices_lock, flags);
953971
list_for_each_entry(master, &smmu_domain->devices, domain_head) {
954972
for (i = 0; i < master->num_streams; i++) {
@@ -1211,8 +1229,7 @@ static void arm_smmu_sync_ste_for_sid(struct arm_smmu_device *smmu, u32 sid)
12111229
},
12121230
};
12131231

1214-
arm_smmu_cmdq_issue_cmd(smmu, &cmd);
1215-
arm_smmu_cmdq_issue_sync(smmu);
1232+
arm_smmu_cmdq_issue_cmd_with_sync(smmu, &cmd);
12161233
}
12171234

12181235
static void arm_smmu_write_strtab_ent(struct arm_smmu_master *master, u32 sid,
@@ -1747,15 +1764,16 @@ static int arm_smmu_atc_inv_master(struct arm_smmu_master *master)
17471764
{
17481765
int i;
17491766
struct arm_smmu_cmdq_ent cmd;
1767+
struct arm_smmu_cmdq_batch cmds = {};
17501768

17511769
arm_smmu_atc_inv_to_cmd(0, 0, 0, &cmd);
17521770

17531771
for (i = 0; i < master->num_streams; i++) {
17541772
cmd.atc.sid = master->streams[i].id;
1755-
arm_smmu_cmdq_issue_cmd(master->smmu, &cmd);
1773+
arm_smmu_cmdq_batch_add(master->smmu, &cmds, &cmd);
17561774
}
17571775

1758-
return arm_smmu_cmdq_issue_sync(master->smmu);
1776+
return arm_smmu_cmdq_batch_submit(master->smmu, &cmds);
17591777
}
17601778

17611779
int arm_smmu_atc_inv_domain(struct arm_smmu_domain *smmu_domain, int ssid,
@@ -1765,7 +1783,7 @@ int arm_smmu_atc_inv_domain(struct arm_smmu_domain *smmu_domain, int ssid,
17651783
unsigned long flags;
17661784
struct arm_smmu_cmdq_ent cmd;
17671785
struct arm_smmu_master *master;
1768-
struct arm_smmu_cmdq_batch cmds = {};
1786+
struct arm_smmu_cmdq_batch cmds;
17691787

17701788
if (!(smmu_domain->smmu->features & ARM_SMMU_FEAT_ATS))
17711789
return 0;
@@ -1789,6 +1807,8 @@ int arm_smmu_atc_inv_domain(struct arm_smmu_domain *smmu_domain, int ssid,
17891807

17901808
arm_smmu_atc_inv_to_cmd(ssid, iova, size, &cmd);
17911809

1810+
cmds.num = 0;
1811+
17921812
spin_lock_irqsave(&smmu_domain->devices_lock, flags);
17931813
list_for_each_entry(master, &smmu_domain->devices, domain_head) {
17941814
if (!master->ats_enabled)
@@ -1823,8 +1843,7 @@ static void arm_smmu_tlb_inv_context(void *cookie)
18231843
} else {
18241844
cmd.opcode = CMDQ_OP_TLBI_S12_VMALL;
18251845
cmd.tlbi.vmid = smmu_domain->s2_cfg.vmid;
1826-
arm_smmu_cmdq_issue_cmd(smmu, &cmd);
1827-
arm_smmu_cmdq_issue_sync(smmu);
1846+
arm_smmu_cmdq_issue_cmd_with_sync(smmu, &cmd);
18281847
}
18291848
arm_smmu_atc_inv_domain(smmu_domain, 0, 0, 0);
18301849
}
@@ -1837,7 +1856,7 @@ static void __arm_smmu_tlb_inv_range(struct arm_smmu_cmdq_ent *cmd,
18371856
struct arm_smmu_device *smmu = smmu_domain->smmu;
18381857
unsigned long end = iova + size, num_pages = 0, tg = 0;
18391858
size_t inv_range = granule;
1840-
struct arm_smmu_cmdq_batch cmds = {};
1859+
struct arm_smmu_cmdq_batch cmds;
18411860

18421861
if (!size)
18431862
return;
@@ -1855,6 +1874,8 @@ static void __arm_smmu_tlb_inv_range(struct arm_smmu_cmdq_ent *cmd,
18551874
num_pages = size >> tg;
18561875
}
18571876

1877+
cmds.num = 0;
1878+
18581879
while (iova < end) {
18591880
if (smmu->features & ARM_SMMU_FEAT_RANGE_INV) {
18601881
/*
@@ -3338,18 +3359,16 @@ static int arm_smmu_device_reset(struct arm_smmu_device *smmu, bool bypass)
33383359

33393360
/* Invalidate any cached configuration */
33403361
cmd.opcode = CMDQ_OP_CFGI_ALL;
3341-
arm_smmu_cmdq_issue_cmd(smmu, &cmd);
3342-
arm_smmu_cmdq_issue_sync(smmu);
3362+
arm_smmu_cmdq_issue_cmd_with_sync(smmu, &cmd);
33433363

33443364
/* Invalidate any stale TLB entries */
33453365
if (smmu->features & ARM_SMMU_FEAT_HYP) {
33463366
cmd.opcode = CMDQ_OP_TLBI_EL2_ALL;
3347-
arm_smmu_cmdq_issue_cmd(smmu, &cmd);
3367+
arm_smmu_cmdq_issue_cmd_with_sync(smmu, &cmd);
33483368
}
33493369

33503370
cmd.opcode = CMDQ_OP_TLBI_NSNH_ALL;
3351-
arm_smmu_cmdq_issue_cmd(smmu, &cmd);
3352-
arm_smmu_cmdq_issue_sync(smmu);
3371+
arm_smmu_cmdq_issue_cmd_with_sync(smmu, &cmd);
33533372

33543373
/* Event queue */
33553374
writeq_relaxed(smmu->evtq.q.q_base, smmu->base + ARM_SMMU_EVTQ_BASE);

drivers/iommu/arm/arm-smmu/arm-smmu-qcom.c

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -193,6 +193,8 @@ static int qcom_adreno_smmu_init_context(struct arm_smmu_domain *smmu_domain,
193193
{
194194
struct adreno_smmu_priv *priv;
195195

196+
smmu_domain->cfg.flush_walk_prefer_tlbiasid = true;
197+
196198
/* Only enable split pagetables for the GPU device (SID 0) */
197199
if (!qcom_adreno_smmu_is_gpu_device(dev))
198200
return 0;
@@ -235,6 +237,14 @@ static const struct of_device_id qcom_smmu_client_of_match[] __maybe_unused = {
235237
{ }
236238
};
237239

240+
static int qcom_smmu_init_context(struct arm_smmu_domain *smmu_domain,
241+
struct io_pgtable_cfg *pgtbl_cfg, struct device *dev)
242+
{
243+
smmu_domain->cfg.flush_walk_prefer_tlbiasid = true;
244+
245+
return 0;
246+
}
247+
238248
static int qcom_smmu_cfg_probe(struct arm_smmu_device *smmu)
239249
{
240250
unsigned int last_s2cr = ARM_SMMU_GR0_S2CR(smmu->num_mapping_groups - 1);
@@ -358,6 +368,7 @@ static int qcom_smmu500_reset(struct arm_smmu_device *smmu)
358368
}
359369

360370
static const struct arm_smmu_impl qcom_smmu_impl = {
371+
.init_context = qcom_smmu_init_context,
361372
.cfg_probe = qcom_smmu_cfg_probe,
362373
.def_domain_type = qcom_smmu_def_domain_type,
363374
.reset = qcom_smmu500_reset,

drivers/iommu/arm/arm-smmu/arm-smmu.c

Lines changed: 38 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -327,9 +327,16 @@ static void arm_smmu_tlb_inv_range_s2(unsigned long iova, size_t size,
327327
static void arm_smmu_tlb_inv_walk_s1(unsigned long iova, size_t size,
328328
size_t granule, void *cookie)
329329
{
330-
arm_smmu_tlb_inv_range_s1(iova, size, granule, cookie,
331-
ARM_SMMU_CB_S1_TLBIVA);
332-
arm_smmu_tlb_sync_context(cookie);
330+
struct arm_smmu_domain *smmu_domain = cookie;
331+
struct arm_smmu_cfg *cfg = &smmu_domain->cfg;
332+
333+
if (cfg->flush_walk_prefer_tlbiasid) {
334+
arm_smmu_tlb_inv_context_s1(cookie);
335+
} else {
336+
arm_smmu_tlb_inv_range_s1(iova, size, granule, cookie,
337+
ARM_SMMU_CB_S1_TLBIVA);
338+
arm_smmu_tlb_sync_context(cookie);
339+
}
333340
}
334341

335342
static void arm_smmu_tlb_add_page_s1(struct iommu_iotlb_gather *gather,
@@ -1478,6 +1485,7 @@ static struct iommu_group *arm_smmu_device_group(struct device *dev)
14781485
struct iommu_group *group = NULL;
14791486
int i, idx;
14801487

1488+
mutex_lock(&smmu->stream_map_mutex);
14811489
for_each_cfg_sme(cfg, fwspec, i, idx) {
14821490
if (group && smmu->s2crs[idx].group &&
14831491
group != smmu->s2crs[idx].group)
@@ -1486,8 +1494,10 @@ static struct iommu_group *arm_smmu_device_group(struct device *dev)
14861494
group = smmu->s2crs[idx].group;
14871495
}
14881496

1489-
if (group)
1497+
if (group) {
1498+
mutex_unlock(&smmu->stream_map_mutex);
14901499
return iommu_group_ref_get(group);
1500+
}
14911501

14921502
if (dev_is_pci(dev))
14931503
group = pci_device_group(dev);
@@ -1501,6 +1511,7 @@ static struct iommu_group *arm_smmu_device_group(struct device *dev)
15011511
for_each_cfg_sme(cfg, fwspec, i, idx)
15021512
smmu->s2crs[idx].group = group;
15031513

1514+
mutex_unlock(&smmu->stream_map_mutex);
15041515
return group;
15051516
}
15061517

@@ -2281,18 +2292,38 @@ static int __maybe_unused arm_smmu_runtime_suspend(struct device *dev)
22812292

22822293
static int __maybe_unused arm_smmu_pm_resume(struct device *dev)
22832294
{
2295+
int ret;
2296+
struct arm_smmu_device *smmu = dev_get_drvdata(dev);
2297+
2298+
ret = clk_bulk_prepare(smmu->num_clks, smmu->clks);
2299+
if (ret)
2300+
return ret;
2301+
22842302
if (pm_runtime_suspended(dev))
22852303
return 0;
22862304

2287-
return arm_smmu_runtime_resume(dev);
2305+
ret = arm_smmu_runtime_resume(dev);
2306+
if (ret)
2307+
clk_bulk_unprepare(smmu->num_clks, smmu->clks);
2308+
2309+
return ret;
22882310
}
22892311

22902312
static int __maybe_unused arm_smmu_pm_suspend(struct device *dev)
22912313
{
2314+
int ret = 0;
2315+
struct arm_smmu_device *smmu = dev_get_drvdata(dev);
2316+
22922317
if (pm_runtime_suspended(dev))
2293-
return 0;
2318+
goto clk_unprepare;
2319+
2320+
ret = arm_smmu_runtime_suspend(dev);
2321+
if (ret)
2322+
return ret;
22942323

2295-
return arm_smmu_runtime_suspend(dev);
2324+
clk_unprepare:
2325+
clk_bulk_unprepare(smmu->num_clks, smmu->clks);
2326+
return ret;
22962327
}
22972328

22982329
static const struct dev_pm_ops arm_smmu_pm_ops = {

0 commit comments

Comments
 (0)