diff --git a/bsp_diff/caas/kernel/linux-intel-lts2024/0001-mm-damon-sysfs-fix-use-after-free-in-state_show.patch b/bsp_diff/caas/kernel/linux-intel-lts2024/0001-mm-damon-sysfs-fix-use-after-free-in-state_show.patch new file mode 100644 index 0000000..78796ea --- /dev/null +++ b/bsp_diff/caas/kernel/linux-intel-lts2024/0001-mm-damon-sysfs-fix-use-after-free-in-state_show.patch @@ -0,0 +1,69 @@ +From 3933cc784b142a5487abafe4eb36964c17310258 Mon Sep 17 00:00:00 2001 +From: Stanislav Fort +Date: Fri, 5 Sep 2025 13:10:46 +0300 +Subject: [PATCH 01/28] mm/damon/sysfs: fix use-after-free in state_show() + +state_show() reads kdamond->damon_ctx without holding damon_sysfs_lock. +This allows a use-after-free race: + +CPU 0 CPU 1 +----- ----- +state_show() damon_sysfs_turn_damon_on() +ctx = kdamond->damon_ctx; mutex_lock(&damon_sysfs_lock); + damon_destroy_ctx(kdamond->damon_ctx); + kdamond->damon_ctx = NULL; + mutex_unlock(&damon_sysfs_lock); +damon_is_running(ctx); /* ctx is freed */ +mutex_lock(&ctx->kdamond_lock); /* UAF */ + +(The race can also occur with damon_sysfs_kdamonds_rm_dirs() and +damon_sysfs_kdamond_release(), which free or replace the context under +damon_sysfs_lock.) + +Fix by taking damon_sysfs_lock before dereferencing the context, mirroring +the locking used in pid_show(). + +The bug has existed since state_show() first accessed kdamond->damon_ctx. + +Link: https://lkml.kernel.org/r/20250905101046.2288-1-disclosure@aisle.com +Fixes: a61ea561c871 ("mm/damon/sysfs: link DAMON for virtual address spaces monitoring") +Signed-off-by: Stanislav Fort +Reported-by: Stanislav Fort +Reviewed-by: SeongJae Park +Cc: +Signed-off-by: Andrew Morton +--- + mm/damon/sysfs.c | 14 +++++++++----- + 1 file changed, 9 insertions(+), 5 deletions(-) + +diff --git a/mm/damon/sysfs.c b/mm/damon/sysfs.c +index 58145d59881d..9ce2abc64de4 100644 +--- a/mm/damon/sysfs.c ++++ b/mm/damon/sysfs.c +@@ -1067,14 +1067,18 @@ static ssize_t state_show(struct kobject *kobj, struct kobj_attribute *attr, + { + struct damon_sysfs_kdamond *kdamond = container_of(kobj, + struct damon_sysfs_kdamond, kobj); +- struct damon_ctx *ctx = kdamond->damon_ctx; +- bool running; ++ struct damon_ctx *ctx; ++ bool running = false; + +- if (!ctx) +- running = false; +- else ++ if (!mutex_trylock(&damon_sysfs_lock)) ++ return -EBUSY; ++ ++ ctx = kdamond->damon_ctx; ++ if (ctx) + running = damon_sysfs_ctx_running(ctx); + ++ mutex_unlock(&damon_sysfs_lock); ++ + return sysfs_emit(buf, "%s\n", running ? + damon_sysfs_cmd_strs[DAMON_SYSFS_CMD_ON] : + damon_sysfs_cmd_strs[DAMON_SYSFS_CMD_OFF]); +-- +2.43.0 + diff --git a/bsp_diff/caas/kernel/linux-intel-lts2024/0002-netfilter-ctnetlink-remove-refcounting-in-expectatio.patch b/bsp_diff/caas/kernel/linux-intel-lts2024/0002-netfilter-ctnetlink-remove-refcounting-in-expectatio.patch new file mode 100644 index 0000000..ce1b8fa --- /dev/null +++ b/bsp_diff/caas/kernel/linux-intel-lts2024/0002-netfilter-ctnetlink-remove-refcounting-in-expectatio.patch @@ -0,0 +1,160 @@ +From f0d92487b02ff52a5fcc42863abf0f0b7910367e Mon Sep 17 00:00:00 2001 +From: Florian Westphal +Date: Fri, 1 Aug 2025 17:25:09 +0200 +Subject: [PATCH 02/28] netfilter: ctnetlink: remove refcounting in expectation + dumpers + +Same pattern as previous patch: do not keep the expectation object +alive via refcount, only store a cookie value and then use that +as the skip hint for dump resumption. + +AFAICS this has the same issue as the one resolved in the conntrack +dumper, when we do + if (!refcount_inc_not_zero(&exp->use)) + +to increment the refcount, there is a chance that exp == last, which +causes a double-increment of the refcount and subsequent memory leak. + +Fixes: cf6994c2b981 ("[NETFILTER]: nf_conntrack_netlink: sync expectation dumping with conntrack table dumping") +Fixes: e844a928431f ("netfilter: ctnetlink: allow to dump expectation per master conntrack") +Signed-off-by: Florian Westphal +Signed-off-by: Pablo Neira Ayuso +--- + net/netfilter/nf_conntrack_netlink.c | 41 ++++++++++++---------------- + 1 file changed, 17 insertions(+), 24 deletions(-) + +diff --git a/net/netfilter/nf_conntrack_netlink.c b/net/netfilter/nf_conntrack_netlink.c +index 18a91c031554..13836723223e 100644 +--- a/net/netfilter/nf_conntrack_netlink.c ++++ b/net/netfilter/nf_conntrack_netlink.c +@@ -3146,23 +3146,27 @@ ctnetlink_expect_event(unsigned int events, const struct nf_exp_event *item) + return 0; + } + #endif +-static int ctnetlink_exp_done(struct netlink_callback *cb) ++ ++static unsigned long ctnetlink_exp_id(const struct nf_conntrack_expect *exp) + { +- if (cb->args[1]) +- nf_ct_expect_put((struct nf_conntrack_expect *)cb->args[1]); +- return 0; ++ unsigned long id = (unsigned long)exp; ++ ++ id += nf_ct_get_id(exp->master); ++ id += exp->class; ++ ++ return id ? id : 1; + } + + static int + ctnetlink_exp_dump_table(struct sk_buff *skb, struct netlink_callback *cb) + { + struct net *net = sock_net(skb->sk); +- struct nf_conntrack_expect *exp, *last; + struct nfgenmsg *nfmsg = nlmsg_data(cb->nlh); + u_int8_t l3proto = nfmsg->nfgen_family; ++ unsigned long last_id = cb->args[1]; ++ struct nf_conntrack_expect *exp; + + rcu_read_lock(); +- last = (struct nf_conntrack_expect *)cb->args[1]; + for (; cb->args[0] < nf_ct_expect_hsize; cb->args[0]++) { + restart: + hlist_for_each_entry_rcu(exp, &nf_ct_expect_hash[cb->args[0]], +@@ -3174,7 +3178,7 @@ ctnetlink_exp_dump_table(struct sk_buff *skb, struct netlink_callback *cb) + continue; + + if (cb->args[1]) { +- if (exp != last) ++ if (ctnetlink_exp_id(exp) != last_id) + continue; + cb->args[1] = 0; + } +@@ -3183,9 +3187,7 @@ ctnetlink_exp_dump_table(struct sk_buff *skb, struct netlink_callback *cb) + cb->nlh->nlmsg_seq, + IPCTNL_MSG_EXP_NEW, + exp) < 0) { +- if (!refcount_inc_not_zero(&exp->use)) +- continue; +- cb->args[1] = (unsigned long)exp; ++ cb->args[1] = ctnetlink_exp_id(exp); + goto out; + } + } +@@ -3196,32 +3198,30 @@ ctnetlink_exp_dump_table(struct sk_buff *skb, struct netlink_callback *cb) + } + out: + rcu_read_unlock(); +- if (last) +- nf_ct_expect_put(last); +- + return skb->len; + } + + static int + ctnetlink_exp_ct_dump_table(struct sk_buff *skb, struct netlink_callback *cb) + { +- struct nf_conntrack_expect *exp, *last; + struct nfgenmsg *nfmsg = nlmsg_data(cb->nlh); + struct nf_conn *ct = cb->data; + struct nf_conn_help *help = nfct_help(ct); + u_int8_t l3proto = nfmsg->nfgen_family; ++ unsigned long last_id = cb->args[1]; ++ struct nf_conntrack_expect *exp; + + if (cb->args[0]) + return 0; + + rcu_read_lock(); +- last = (struct nf_conntrack_expect *)cb->args[1]; ++ + restart: + hlist_for_each_entry_rcu(exp, &help->expectations, lnode) { + if (l3proto && exp->tuple.src.l3num != l3proto) + continue; + if (cb->args[1]) { +- if (exp != last) ++ if (ctnetlink_exp_id(exp) != last_id) + continue; + cb->args[1] = 0; + } +@@ -3229,9 +3229,7 @@ ctnetlink_exp_ct_dump_table(struct sk_buff *skb, struct netlink_callback *cb) + cb->nlh->nlmsg_seq, + IPCTNL_MSG_EXP_NEW, + exp) < 0) { +- if (!refcount_inc_not_zero(&exp->use)) +- continue; +- cb->args[1] = (unsigned long)exp; ++ cb->args[1] = ctnetlink_exp_id(exp); + goto out; + } + } +@@ -3242,9 +3240,6 @@ ctnetlink_exp_ct_dump_table(struct sk_buff *skb, struct netlink_callback *cb) + cb->args[0] = 1; + out: + rcu_read_unlock(); +- if (last) +- nf_ct_expect_put(last); +- + return skb->len; + } + +@@ -3263,7 +3258,6 @@ static int ctnetlink_dump_exp_ct(struct net *net, struct sock *ctnl, + struct nf_conntrack_zone zone; + struct netlink_dump_control c = { + .dump = ctnetlink_exp_ct_dump_table, +- .done = ctnetlink_exp_done, + }; + + err = ctnetlink_parse_tuple(cda, &tuple, CTA_EXPECT_MASTER, +@@ -3313,7 +3307,6 @@ static int ctnetlink_get_expect(struct sk_buff *skb, + else { + struct netlink_dump_control c = { + .dump = ctnetlink_exp_dump_table, +- .done = ctnetlink_exp_done, + }; + return netlink_dump_start(info->sk, skb, info->nlh, &c); + } +-- +2.43.0 + diff --git a/bsp_diff/caas/kernel/linux-intel-lts2024/0003-net-sched-Fix-backlog-accounting-in-qdisc_dequeue_in.patch b/bsp_diff/caas/kernel/linux-intel-lts2024/0003-net-sched-Fix-backlog-accounting-in-qdisc_dequeue_in.patch new file mode 100644 index 0000000..90a83fa --- /dev/null +++ b/bsp_diff/caas/kernel/linux-intel-lts2024/0003-net-sched-Fix-backlog-accounting-in-qdisc_dequeue_in.patch @@ -0,0 +1,313 @@ +From 575a05daff4bf2a131f11e339d650f17ab2d3196 Mon Sep 17 00:00:00 2001 +From: William Liu +Date: Tue, 12 Aug 2025 23:57:57 +0000 +Subject: [PATCH 03/28] net/sched: Fix backlog accounting in + qdisc_dequeue_internal + +This issue applies for the following qdiscs: hhf, fq, fq_codel, and +fq_pie, and occurs in their change handlers when adjusting to the new +limit. The problem is the following in the values passed to the +subsequent qdisc_tree_reduce_backlog call given a tbf parent: + + When the tbf parent runs out of tokens, skbs of these qdiscs will + be placed in gso_skb. Their peek handlers are qdisc_peek_dequeued, + which accounts for both qlen and backlog. However, in the case of + qdisc_dequeue_internal, ONLY qlen is accounted for when pulling + from gso_skb. This means that these qdiscs are missing a + qdisc_qstats_backlog_dec when dropping packets to satisfy the + new limit in their change handlers. + + One can observe this issue with the following (with tc patched to + support a limit of 0): + + export TARGET=fq + tc qdisc del dev lo root + tc qdisc add dev lo root handle 1: tbf rate 8bit burst 100b latency 1ms + tc qdisc replace dev lo handle 3: parent 1:1 $TARGET limit 1000 + echo ''; echo 'add child'; tc -s -d qdisc show dev lo + ping -I lo -f -c2 -s32 -W0.001 127.0.0.1 2>&1 >/dev/null + echo ''; echo 'after ping'; tc -s -d qdisc show dev lo + tc qdisc change dev lo handle 3: parent 1:1 $TARGET limit 0 + echo ''; echo 'after limit drop'; tc -s -d qdisc show dev lo + tc qdisc replace dev lo handle 2: parent 1:1 sfq + echo ''; echo 'post graft'; tc -s -d qdisc show dev lo + + The second to last show command shows 0 packets but a positive + number (74) of backlog bytes. The problem becomes clearer in the + last show command, where qdisc_purge_queue triggers + qdisc_tree_reduce_backlog with the positive backlog and causes an + underflow in the tbf parent's backlog (4096 Mb instead of 0). + +To fix this issue, the codepath for all clients of qdisc_dequeue_internal +has been simplified: codel, pie, hhf, fq, fq_pie, and fq_codel. +qdisc_dequeue_internal handles the backlog adjustments for all cases that +do not directly use the dequeue handler. + +The old fq_codel_change limit adjustment loop accumulated the arguments to +the subsequent qdisc_tree_reduce_backlog call through the cstats field. +However, this is confusing and error prone as fq_codel_dequeue could also +potentially mutate this field (which qdisc_dequeue_internal calls in the +non gso_skb case), so we have unified the code here with other qdiscs. + +Fixes: 2d3cbfd6d54a ("net_sched: Flush gso_skb list too during ->change()") +Fixes: 4b549a2ef4be ("fq_codel: Fair Queue Codel AQM") +Fixes: 10239edf86f1 ("net-qdisc-hhf: Heavy-Hitter Filter (HHF) qdisc") +Signed-off-by: William Liu +Reviewed-by: Savino Dicanosa +Link: https://patch.msgid.link/20250812235725.45243-1-will@willsroot.io +Signed-off-by: Jakub Kicinski +--- + include/net/sch_generic.h | 11 ++++++++--- + net/sched/sch_codel.c | 12 +++++++----- + net/sched/sch_fq.c | 12 +++++++----- + net/sched/sch_fq_codel.c | 12 +++++++----- + net/sched/sch_fq_pie.c | 12 +++++++----- + net/sched/sch_hhf.c | 12 +++++++----- + net/sched/sch_pie.c | 12 +++++++----- + 7 files changed, 50 insertions(+), 33 deletions(-) + +diff --git a/include/net/sch_generic.h b/include/net/sch_generic.h +index fc3ffc0b0574..6a029238fc06 100644 +--- a/include/net/sch_generic.h ++++ b/include/net/sch_generic.h +@@ -1046,12 +1046,17 @@ static inline struct sk_buff *qdisc_dequeue_internal(struct Qdisc *sch, bool dir + skb = __skb_dequeue(&sch->gso_skb); + if (skb) { + sch->q.qlen--; ++ qdisc_qstats_backlog_dec(sch, skb); + return skb; + } +- if (direct) +- return __qdisc_dequeue_head(&sch->q); +- else ++ if (direct) { ++ skb = __qdisc_dequeue_head(&sch->q); ++ if (skb) ++ qdisc_qstats_backlog_dec(sch, skb); ++ return skb; ++ } else { + return sch->dequeue(sch); ++ } + } + + static inline struct sk_buff *qdisc_dequeue_head(struct Qdisc *sch) +diff --git a/net/sched/sch_codel.c b/net/sched/sch_codel.c +index afd9805cb68e..655eb64cd176 100644 +--- a/net/sched/sch_codel.c ++++ b/net/sched/sch_codel.c +@@ -100,9 +100,9 @@ static const struct nla_policy codel_policy[TCA_CODEL_MAX + 1] = { + static int codel_change(struct Qdisc *sch, struct nlattr *opt, + struct netlink_ext_ack *extack) + { ++ unsigned int dropped_pkts = 0, dropped_bytes = 0; + struct codel_sched_data *q = qdisc_priv(sch); + struct nlattr *tb[TCA_CODEL_MAX + 1]; +- unsigned int qlen, dropped = 0; + int err; + + err = nla_parse_nested_deprecated(tb, TCA_CODEL_MAX, opt, +@@ -141,15 +141,17 @@ static int codel_change(struct Qdisc *sch, struct nlattr *opt, + WRITE_ONCE(q->params.ecn, + !!nla_get_u32(tb[TCA_CODEL_ECN])); + +- qlen = sch->q.qlen; + while (sch->q.qlen > sch->limit) { + struct sk_buff *skb = qdisc_dequeue_internal(sch, true); + +- dropped += qdisc_pkt_len(skb); +- qdisc_qstats_backlog_dec(sch, skb); ++ if (!skb) ++ break; ++ ++ dropped_pkts++; ++ dropped_bytes += qdisc_pkt_len(skb); + rtnl_qdisc_drop(skb, sch); + } +- qdisc_tree_reduce_backlog(sch, qlen - sch->q.qlen, dropped); ++ qdisc_tree_reduce_backlog(sch, dropped_pkts, dropped_bytes); + + sch_tree_unlock(sch); + return 0; +diff --git a/net/sched/sch_fq.c b/net/sched/sch_fq.c +index 1af9768cd8ff..df11a54e390c 100644 +--- a/net/sched/sch_fq.c ++++ b/net/sched/sch_fq.c +@@ -1001,11 +1001,11 @@ static int fq_load_priomap(struct fq_sched_data *q, + static int fq_change(struct Qdisc *sch, struct nlattr *opt, + struct netlink_ext_ack *extack) + { ++ unsigned int dropped_pkts = 0, dropped_bytes = 0; + struct fq_sched_data *q = qdisc_priv(sch); + struct nlattr *tb[TCA_FQ_MAX + 1]; +- int err, drop_count = 0; +- unsigned drop_len = 0; + u32 fq_log; ++ int err; + + err = nla_parse_nested_deprecated(tb, TCA_FQ_MAX, opt, fq_policy, + NULL); +@@ -1112,16 +1112,18 @@ static int fq_change(struct Qdisc *sch, struct nlattr *opt, + err = fq_resize(sch, fq_log); + sch_tree_lock(sch); + } ++ + while (sch->q.qlen > sch->limit) { + struct sk_buff *skb = qdisc_dequeue_internal(sch, false); + + if (!skb) + break; +- drop_len += qdisc_pkt_len(skb); ++ ++ dropped_pkts++; ++ dropped_bytes += qdisc_pkt_len(skb); + rtnl_kfree_skbs(skb, skb); +- drop_count++; + } +- qdisc_tree_reduce_backlog(sch, drop_count, drop_len); ++ qdisc_tree_reduce_backlog(sch, dropped_pkts, dropped_bytes); + + sch_tree_unlock(sch); + return err; +diff --git a/net/sched/sch_fq_codel.c b/net/sched/sch_fq_codel.c +index 551b7cbdae90..5e4c69d4df41 100644 +--- a/net/sched/sch_fq_codel.c ++++ b/net/sched/sch_fq_codel.c +@@ -365,6 +365,7 @@ static const struct nla_policy fq_codel_policy[TCA_FQ_CODEL_MAX + 1] = { + static int fq_codel_change(struct Qdisc *sch, struct nlattr *opt, + struct netlink_ext_ack *extack) + { ++ unsigned int dropped_pkts = 0, dropped_bytes = 0; + struct fq_codel_sched_data *q = qdisc_priv(sch); + struct nlattr *tb[TCA_FQ_CODEL_MAX + 1]; + u32 quantum = 0; +@@ -442,13 +443,14 @@ static int fq_codel_change(struct Qdisc *sch, struct nlattr *opt, + q->memory_usage > q->memory_limit) { + struct sk_buff *skb = qdisc_dequeue_internal(sch, false); + +- q->cstats.drop_len += qdisc_pkt_len(skb); ++ if (!skb) ++ break; ++ ++ dropped_pkts++; ++ dropped_bytes += qdisc_pkt_len(skb); + rtnl_kfree_skbs(skb, skb); +- q->cstats.drop_count++; + } +- qdisc_tree_reduce_backlog(sch, q->cstats.drop_count, q->cstats.drop_len); +- q->cstats.drop_count = 0; +- q->cstats.drop_len = 0; ++ qdisc_tree_reduce_backlog(sch, dropped_pkts, dropped_bytes); + + sch_tree_unlock(sch); + return 0; +diff --git a/net/sched/sch_fq_pie.c b/net/sched/sch_fq_pie.c +index 6ed08b705f8a..5881d34d58b4 100644 +--- a/net/sched/sch_fq_pie.c ++++ b/net/sched/sch_fq_pie.c +@@ -285,10 +285,9 @@ static struct sk_buff *fq_pie_qdisc_dequeue(struct Qdisc *sch) + static int fq_pie_change(struct Qdisc *sch, struct nlattr *opt, + struct netlink_ext_ack *extack) + { ++ unsigned int dropped_pkts = 0, dropped_bytes = 0; + struct fq_pie_sched_data *q = qdisc_priv(sch); + struct nlattr *tb[TCA_FQ_PIE_MAX + 1]; +- unsigned int len_dropped = 0; +- unsigned int num_dropped = 0; + int err; + + err = nla_parse_nested(tb, TCA_FQ_PIE_MAX, opt, fq_pie_policy, extack); +@@ -366,11 +365,14 @@ static int fq_pie_change(struct Qdisc *sch, struct nlattr *opt, + while (sch->q.qlen > sch->limit) { + struct sk_buff *skb = qdisc_dequeue_internal(sch, false); + +- len_dropped += qdisc_pkt_len(skb); +- num_dropped += 1; ++ if (!skb) ++ break; ++ ++ dropped_pkts++; ++ dropped_bytes += qdisc_pkt_len(skb); + rtnl_kfree_skbs(skb, skb); + } +- qdisc_tree_reduce_backlog(sch, num_dropped, len_dropped); ++ qdisc_tree_reduce_backlog(sch, dropped_pkts, dropped_bytes); + + sch_tree_unlock(sch); + return 0; +diff --git a/net/sched/sch_hhf.c b/net/sched/sch_hhf.c +index 5aa434b46707..2d4855e28a28 100644 +--- a/net/sched/sch_hhf.c ++++ b/net/sched/sch_hhf.c +@@ -508,9 +508,9 @@ static const struct nla_policy hhf_policy[TCA_HHF_MAX + 1] = { + static int hhf_change(struct Qdisc *sch, struct nlattr *opt, + struct netlink_ext_ack *extack) + { ++ unsigned int dropped_pkts = 0, dropped_bytes = 0; + struct hhf_sched_data *q = qdisc_priv(sch); + struct nlattr *tb[TCA_HHF_MAX + 1]; +- unsigned int qlen, prev_backlog; + int err; + u64 non_hh_quantum; + u32 new_quantum = q->quantum; +@@ -561,15 +561,17 @@ static int hhf_change(struct Qdisc *sch, struct nlattr *opt, + usecs_to_jiffies(us)); + } + +- qlen = sch->q.qlen; +- prev_backlog = sch->qstats.backlog; + while (sch->q.qlen > sch->limit) { + struct sk_buff *skb = qdisc_dequeue_internal(sch, false); + ++ if (!skb) ++ break; ++ ++ dropped_pkts++; ++ dropped_bytes += qdisc_pkt_len(skb); + rtnl_kfree_skbs(skb, skb); + } +- qdisc_tree_reduce_backlog(sch, qlen - sch->q.qlen, +- prev_backlog - sch->qstats.backlog); ++ qdisc_tree_reduce_backlog(sch, dropped_pkts, dropped_bytes); + + sch_tree_unlock(sch); + return 0; +diff --git a/net/sched/sch_pie.c b/net/sched/sch_pie.c +index db61cbc21b13..a6a5874f4c3a 100644 +--- a/net/sched/sch_pie.c ++++ b/net/sched/sch_pie.c +@@ -138,9 +138,9 @@ static const struct nla_policy pie_policy[TCA_PIE_MAX + 1] = { + static int pie_change(struct Qdisc *sch, struct nlattr *opt, + struct netlink_ext_ack *extack) + { ++ unsigned int dropped_pkts = 0, dropped_bytes = 0; + struct pie_sched_data *q = qdisc_priv(sch); + struct nlattr *tb[TCA_PIE_MAX + 1]; +- unsigned int qlen, dropped = 0; + int err; + + err = nla_parse_nested_deprecated(tb, TCA_PIE_MAX, opt, pie_policy, +@@ -190,15 +190,17 @@ static int pie_change(struct Qdisc *sch, struct nlattr *opt, + nla_get_u32(tb[TCA_PIE_DQ_RATE_ESTIMATOR])); + + /* Drop excess packets if new limit is lower */ +- qlen = sch->q.qlen; + while (sch->q.qlen > sch->limit) { + struct sk_buff *skb = qdisc_dequeue_internal(sch, true); + +- dropped += qdisc_pkt_len(skb); +- qdisc_qstats_backlog_dec(sch, skb); ++ if (!skb) ++ break; ++ ++ dropped_pkts++; ++ dropped_bytes += qdisc_pkt_len(skb); + rtnl_qdisc_drop(skb, sch); + } +- qdisc_tree_reduce_backlog(sch, qlen - sch->q.qlen, dropped); ++ qdisc_tree_reduce_backlog(sch, dropped_pkts, dropped_bytes); + + sch_tree_unlock(sch); + return 0; +-- +2.43.0 + diff --git a/bsp_diff/caas/kernel/linux-intel-lts2024/0004-wifi-cfg80211-Add-missing-lock-in-cfg80211_check_and.patch b/bsp_diff/caas/kernel/linux-intel-lts2024/0004-wifi-cfg80211-Add-missing-lock-in-cfg80211_check_and.patch new file mode 100644 index 0000000..61e40f9 --- /dev/null +++ b/bsp_diff/caas/kernel/linux-intel-lts2024/0004-wifi-cfg80211-Add-missing-lock-in-cfg80211_check_and.patch @@ -0,0 +1,89 @@ +From 438e77686c5a01d228dbdef1bee7cbeb92c49ea2 Mon Sep 17 00:00:00 2001 +From: Alexander Wetzel +Date: Thu, 17 Jul 2025 18:25:45 +0200 +Subject: [PATCH 04/28] wifi: cfg80211: Add missing lock in + cfg80211_check_and_end_cac() + +Callers of wdev_chandef() must hold the wiphy mutex. + +But the worker cfg80211_propagate_cac_done_wk() never takes the lock. +Which triggers the warning below with the mesh_peer_connected_dfs +test from hostapd and not (yet) released mac80211 code changes: + +WARNING: CPU: 0 PID: 495 at net/wireless/chan.c:1552 wdev_chandef+0x60/0x165 +Modules linked in: +CPU: 0 UID: 0 PID: 495 Comm: kworker/u4:2 Not tainted 6.14.0-rc5-wt-g03960e6f9d47 #33 13c287eeabfe1efea01c0bcc863723ab082e17cf +Workqueue: cfg80211 cfg80211_propagate_cac_done_wk +Stack: + 00000000 00000001 ffffff00 6093267c + 00000000 6002ec30 6d577c50 60037608 + 00000000 67e8d108 6063717b 00000000 +Call Trace: + [<6002ec30>] ? _printk+0x0/0x98 + [<6003c2b3>] show_stack+0x10e/0x11a + [<6002ec30>] ? _printk+0x0/0x98 + [<60037608>] dump_stack_lvl+0x71/0xb8 + [<6063717b>] ? wdev_chandef+0x60/0x165 + [<6003766d>] dump_stack+0x1e/0x20 + [<6005d1b7>] __warn+0x101/0x20f + [<6005d3a8>] warn_slowpath_fmt+0xe3/0x15d + [<600b0c5c>] ? mark_lock.part.0+0x0/0x4ec + [<60751191>] ? __this_cpu_preempt_check+0x0/0x16 + [<600b11a2>] ? mark_held_locks+0x5a/0x6e + [<6005d2c5>] ? warn_slowpath_fmt+0x0/0x15d + [<60052e53>] ? unblock_signals+0x3a/0xe7 + [<60052f2d>] ? um_set_signals+0x2d/0x43 + [<60751191>] ? __this_cpu_preempt_check+0x0/0x16 + [<607508b2>] ? lock_is_held_type+0x207/0x21f + [<6063717b>] wdev_chandef+0x60/0x165 + [<605f89b4>] regulatory_propagate_dfs_state+0x247/0x43f + [<60052f00>] ? um_set_signals+0x0/0x43 + [<605e6bfd>] cfg80211_propagate_cac_done_wk+0x3a/0x4a + [<6007e460>] process_scheduled_works+0x3bc/0x60e + [<6007d0ec>] ? move_linked_works+0x4d/0x81 + [<6007d120>] ? assign_work+0x0/0xaa + [<6007f81f>] worker_thread+0x220/0x2dc + [<600786ef>] ? set_pf_worker+0x0/0x57 + [<60087c96>] ? to_kthread+0x0/0x43 + [<6008ab3c>] kthread+0x2d3/0x2e2 + [<6007f5ff>] ? worker_thread+0x0/0x2dc + [<6006c05b>] ? calculate_sigpending+0x0/0x56 + [<6003b37d>] new_thread_handler+0x4a/0x64 +irq event stamp: 614611 +hardirqs last enabled at (614621): [<00000000600bc96b>] __up_console_sem+0x82/0xaf +hardirqs last disabled at (614630): [<00000000600bc92c>] __up_console_sem+0x43/0xaf +softirqs last enabled at (614268): [<00000000606c55c6>] __ieee80211_wake_queue+0x933/0x985 +softirqs last disabled at (614266): [<00000000606c52d6>] __ieee80211_wake_queue+0x643/0x985 + +Fixes: 26ec17a1dc5e ("cfg80211: Fix radar event during another phy CAC") +Signed-off-by: Alexander Wetzel +Link: https://patch.msgid.link/20250717162547.94582-1-Alexander@wetzel-home.de +Signed-off-by: Johannes Berg +--- + net/wireless/reg.c | 3 +++ + 1 file changed, 3 insertions(+) + +diff --git a/net/wireless/reg.c b/net/wireless/reg.c +index f6846eb0f4b8..69a7f55e9de4 100644 +--- a/net/wireless/reg.c ++++ b/net/wireless/reg.c +@@ -4234,6 +4234,8 @@ static void cfg80211_check_and_end_cac(struct cfg80211_registered_device *rdev) + struct wireless_dev *wdev; + unsigned int link_id; + ++ wiphy_lock(&rdev->wiphy); ++ + /* If we finished CAC or received radar, we should end any + * CAC running on the same channels. + * the check !cfg80211_chandef_dfs_usable contain 2 options: +@@ -4258,6 +4260,7 @@ static void cfg80211_check_and_end_cac(struct cfg80211_registered_device *rdev) + rdev_end_cac(rdev, wdev->netdev, link_id); + } + } ++ wiphy_unlock(&rdev->wiphy); + } + + void regulatory_propagate_dfs_state(struct wiphy *wiphy, +-- +2.43.0 + diff --git a/bsp_diff/caas/kernel/linux-intel-lts2024/0005-f2fs-compress-fix-UAF-of-f2fs_inode_info-in-f2fs_fre.patch b/bsp_diff/caas/kernel/linux-intel-lts2024/0005-f2fs-compress-fix-UAF-of-f2fs_inode_info-in-f2fs_fre.patch new file mode 100644 index 0000000..840b6a0 --- /dev/null +++ b/bsp_diff/caas/kernel/linux-intel-lts2024/0005-f2fs-compress-fix-UAF-of-f2fs_inode_info-in-f2fs_fre.patch @@ -0,0 +1,209 @@ +From 00dc2a0ec03e6d468f1cf02abd361e43f2f103bf Mon Sep 17 00:00:00 2001 +From: Zhiguo Niu +Date: Fri, 13 Jun 2025 09:50:45 +0800 +Subject: [PATCH 05/28] f2fs: compress: fix UAF of f2fs_inode_info in + f2fs_free_dic + +The decompress_io_ctx may be released asynchronously after +I/O completion. If this file is deleted immediately after read, +and the kworker of processing post_read_wq has not been executed yet +due to high workloads, It is possible that the inode(f2fs_inode_info) +is evicted and freed before it is used f2fs_free_dic. + +The UAF case as below: + Thread A Thread B + - f2fs_decompress_end_io + - f2fs_put_dic + - queue_work + add free_dic work to post_read_wq + - do_unlink + - iput + - evict + - call_rcu +This file is deleted after read. + + Thread C kworker to process post_read_wq + - rcu_do_batch + - f2fs_free_inode + - kmem_cache_free + inode is freed by rcu + - process_scheduled_works + - f2fs_late_free_dic + - f2fs_free_dic + - f2fs_release_decomp_mem + read (dic->inode)->i_compress_algorithm + +This patch store compress_algorithm and sbi in dic to avoid inode UAF. + +In addition, the previous solution is deprecated in [1] may cause system hang. +[1] https://lore.kernel.org/all/c36ab955-c8db-4a8b-a9d0-f07b5f426c3f@kernel.org + +Cc: Daeho Jeong +Fixes: bff139b49d9f ("f2fs: handle decompress only post processing in softirq") +Signed-off-by: Zhiguo Niu +Signed-off-by: Baocong Liu +Reviewed-by: Chao Yu +Signed-off-by: Jaegeuk Kim +--- + fs/f2fs/compress.c | 35 ++++++++++++++++++----------------- + fs/f2fs/f2fs.h | 2 ++ + 2 files changed, 20 insertions(+), 17 deletions(-) + +diff --git a/fs/f2fs/compress.c b/fs/f2fs/compress.c +index 5a9b6d5f3ae0..4e4e952ea2bb 100644 +--- a/fs/f2fs/compress.c ++++ b/fs/f2fs/compress.c +@@ -213,13 +213,13 @@ static int lzo_decompress_pages(struct decompress_io_ctx *dic) + ret = lzo1x_decompress_safe(dic->cbuf->cdata, dic->clen, + dic->rbuf, &dic->rlen); + if (ret != LZO_E_OK) { +- f2fs_err_ratelimited(F2FS_I_SB(dic->inode), ++ f2fs_err_ratelimited(dic->sbi, + "lzo decompress failed, ret:%d", ret); + return -EIO; + } + + if (dic->rlen != PAGE_SIZE << dic->log_cluster_size) { +- f2fs_err_ratelimited(F2FS_I_SB(dic->inode), ++ f2fs_err_ratelimited(dic->sbi, + "lzo invalid rlen:%zu, expected:%lu", + dic->rlen, PAGE_SIZE << dic->log_cluster_size); + return -EIO; +@@ -293,13 +293,13 @@ static int lz4_decompress_pages(struct decompress_io_ctx *dic) + ret = LZ4_decompress_safe(dic->cbuf->cdata, dic->rbuf, + dic->clen, dic->rlen); + if (ret < 0) { +- f2fs_err_ratelimited(F2FS_I_SB(dic->inode), ++ f2fs_err_ratelimited(dic->sbi, + "lz4 decompress failed, ret:%d", ret); + return -EIO; + } + + if (ret != PAGE_SIZE << dic->log_cluster_size) { +- f2fs_err_ratelimited(F2FS_I_SB(dic->inode), ++ f2fs_err_ratelimited(dic->sbi, + "lz4 invalid ret:%d, expected:%lu", + ret, PAGE_SIZE << dic->log_cluster_size); + return -EIO; +@@ -427,7 +427,7 @@ static int zstd_init_decompress_ctx(struct decompress_io_ctx *dic) + + stream = zstd_init_dstream(max_window_size, workspace, workspace_size); + if (!stream) { +- f2fs_err_ratelimited(F2FS_I_SB(dic->inode), ++ f2fs_err_ratelimited(dic->sbi, + "%s zstd_init_dstream failed", __func__); + vfree(workspace); + return -EIO; +@@ -463,14 +463,14 @@ static int zstd_decompress_pages(struct decompress_io_ctx *dic) + + ret = zstd_decompress_stream(stream, &outbuf, &inbuf); + if (zstd_is_error(ret)) { +- f2fs_err_ratelimited(F2FS_I_SB(dic->inode), ++ f2fs_err_ratelimited(dic->sbi, + "%s zstd_decompress_stream failed, ret: %d", + __func__, zstd_get_error_code(ret)); + return -EIO; + } + + if (dic->rlen != outbuf.pos) { +- f2fs_err_ratelimited(F2FS_I_SB(dic->inode), ++ f2fs_err_ratelimited(dic->sbi, + "%s ZSTD invalid rlen:%zu, expected:%lu", + __func__, dic->rlen, + PAGE_SIZE << dic->log_cluster_size); +@@ -729,7 +729,7 @@ static void f2fs_release_decomp_mem(struct decompress_io_ctx *dic, + + void f2fs_decompress_cluster(struct decompress_io_ctx *dic, bool in_task) + { +- struct f2fs_sb_info *sbi = F2FS_I_SB(dic->inode); ++ struct f2fs_sb_info *sbi = dic->sbi; + struct f2fs_inode_info *fi = F2FS_I(dic->inode); + const struct f2fs_compress_ops *cops = + f2fs_cops[fi->i_compress_algorithm]; +@@ -802,7 +802,7 @@ void f2fs_end_read_compressed_page(struct page *page, bool failed, + { + struct decompress_io_ctx *dic = + (struct decompress_io_ctx *)page_private(page); +- struct f2fs_sb_info *sbi = F2FS_I_SB(dic->inode); ++ struct f2fs_sb_info *sbi = dic->sbi; + + dec_page_count(sbi, F2FS_RD_DATA); + +@@ -1627,11 +1627,10 @@ static inline bool allow_memalloc_for_decomp(struct f2fs_sb_info *sbi, + static int f2fs_prepare_decomp_mem(struct decompress_io_ctx *dic, + bool pre_alloc) + { +- const struct f2fs_compress_ops *cops = +- f2fs_cops[F2FS_I(dic->inode)->i_compress_algorithm]; ++ const struct f2fs_compress_ops *cops = f2fs_cops[dic->compress_algorithm]; + int i; + +- if (!allow_memalloc_for_decomp(F2FS_I_SB(dic->inode), pre_alloc)) ++ if (!allow_memalloc_for_decomp(dic->sbi, pre_alloc)) + return 0; + + dic->tpages = page_array_alloc(dic->inode, dic->cluster_size); +@@ -1664,10 +1663,9 @@ static int f2fs_prepare_decomp_mem(struct decompress_io_ctx *dic, + static void f2fs_release_decomp_mem(struct decompress_io_ctx *dic, + bool bypass_destroy_callback, bool pre_alloc) + { +- const struct f2fs_compress_ops *cops = +- f2fs_cops[F2FS_I(dic->inode)->i_compress_algorithm]; ++ const struct f2fs_compress_ops *cops = f2fs_cops[dic->compress_algorithm]; + +- if (!allow_memalloc_for_decomp(F2FS_I_SB(dic->inode), pre_alloc)) ++ if (!allow_memalloc_for_decomp(dic->sbi, pre_alloc)) + return; + + if (!bypass_destroy_callback && cops->destroy_decompress_ctx) +@@ -1702,6 +1700,8 @@ struct decompress_io_ctx *f2fs_alloc_dic(struct compress_ctx *cc) + + dic->magic = F2FS_COMPRESSED_PAGE_MAGIC; + dic->inode = cc->inode; ++ dic->sbi = sbi; ++ dic->compress_algorithm = F2FS_I(cc->inode)->i_compress_algorithm; + atomic_set(&dic->remaining_pages, cc->nr_cpages); + dic->cluster_idx = cc->cluster_idx; + dic->cluster_size = cc->cluster_size; +@@ -1745,6 +1745,8 @@ static void f2fs_free_dic(struct decompress_io_ctx *dic, + bool bypass_destroy_callback) + { + int i; ++ /* use sbi in dic to avoid UFA of dic->inode*/ ++ struct f2fs_sb_info *sbi __maybe_unused = dic->sbi; + + f2fs_release_decomp_mem(dic, bypass_destroy_callback, true); + +@@ -1787,8 +1789,7 @@ static void f2fs_put_dic(struct decompress_io_ctx *dic, bool in_task) + f2fs_free_dic(dic, false); + } else { + INIT_WORK(&dic->free_work, f2fs_late_free_dic); +- queue_work(F2FS_I_SB(dic->inode)->post_read_wq, +- &dic->free_work); ++ queue_work(dic->sbi->post_read_wq, &dic->free_work); + } + } + } +diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h +index b07f9a17b844..cdd33e4bcba7 100644 +--- a/fs/f2fs/f2fs.h ++++ b/fs/f2fs/f2fs.h +@@ -1526,6 +1526,7 @@ struct compress_io_ctx { + struct decompress_io_ctx { + u32 magic; /* magic number to indicate page is compressed */ + struct inode *inode; /* inode the context belong to */ ++ struct f2fs_sb_info *sbi; /* f2fs_sb_info pointer */ + pgoff_t cluster_idx; /* cluster index number */ + unsigned int cluster_size; /* page count in cluster */ + unsigned int log_cluster_size; /* log of cluster size */ +@@ -1566,6 +1567,7 @@ struct decompress_io_ctx { + + bool failed; /* IO error occurred before decompression? */ + bool need_verity; /* need fs-verity verification after decompression? */ ++ unsigned char compress_algorithm; /* backup algorithm type */ + void *private; /* payload buffer for specified decompression algorithm */ + void *private2; /* extra payload buffer */ + struct work_struct verity_work; /* work to verify the decompressed pages */ +-- +2.43.0 + diff --git a/bsp_diff/caas/kernel/linux-intel-lts2024/0006-md-make-rdev_addable-usable-for-rcu-mode.patch b/bsp_diff/caas/kernel/linux-intel-lts2024/0006-md-make-rdev_addable-usable-for-rcu-mode.patch new file mode 100644 index 0000000..6662000 --- /dev/null +++ b/bsp_diff/caas/kernel/linux-intel-lts2024/0006-md-make-rdev_addable-usable-for-rcu-mode.patch @@ -0,0 +1,75 @@ +From 2f711bdea0facce57b356e71647c41b99d4cf70e Mon Sep 17 00:00:00 2001 +From: Yang Erkun +Date: Thu, 31 Jul 2025 19:45:30 +0800 +Subject: [PATCH 06/28] md: make rdev_addable usable for rcu mode + +Our testcase trigger panic: + +BUG: kernel NULL pointer dereference, address: 00000000000000e0 +... +Oops: Oops: 0000 [#1] SMP NOPTI +CPU: 2 UID: 0 PID: 85 Comm: kworker/2:1 Not tainted 6.16.0+ #94 +PREEMPT(none) +Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS +1.16.1-2.fc37 04/01/2014 +Workqueue: md_misc md_start_sync +RIP: 0010:rdev_addable+0x4d/0xf0 +... +Call Trace: + + md_start_sync+0x329/0x480 + process_one_work+0x226/0x6d0 + worker_thread+0x19e/0x340 + kthread+0x10f/0x250 + ret_from_fork+0x14d/0x180 + ret_from_fork_asm+0x1a/0x30 + +Modules linked in: raid10 +CR2: 00000000000000e0 +---[ end trace 0000000000000000 ]--- +RIP: 0010:rdev_addable+0x4d/0xf0 + +md_spares_need_change in md_start_sync will call rdev_addable which +protected by rcu_read_lock/rcu_read_unlock. This rcu context will help +protect rdev won't be released, but rdev->mddev will be set to NULL +before we call synchronize_rcu in md_kick_rdev_from_array. Fix this by +using READ_ONCE and check does rdev->mddev still alive. + +Fixes: bc08041b32ab ("md: suspend array in md_start_sync() if array need reconfiguration") +Fixes: 570b9147deb6 ("md: use RCU lock to protect traversal in md_spares_need_change()") +Signed-off-by: Yang Erkun +Link: https://lore.kernel.org/linux-raid/20250731114530.776670-1-yangerkun@huawei.com +Signed-off-by: Yu Kuai +--- + drivers/md/md.c | 8 +++++++- + 1 file changed, 7 insertions(+), 1 deletion(-) + +diff --git a/drivers/md/md.c b/drivers/md/md.c +index d26307644292..6f13ff936a95 100644 +--- a/drivers/md/md.c ++++ b/drivers/md/md.c +@@ -9367,6 +9367,12 @@ static bool rdev_is_spare(struct md_rdev *rdev) + + static bool rdev_addable(struct md_rdev *rdev) + { ++ struct mddev *mddev; ++ ++ mddev = READ_ONCE(rdev->mddev); ++ if (!mddev) ++ return false; ++ + /* rdev is already used, don't add it again. */ + if (test_bit(Candidate, &rdev->flags) || rdev->raid_disk >= 0 || + test_bit(Faulty, &rdev->flags)) +@@ -9377,7 +9383,7 @@ static bool rdev_addable(struct md_rdev *rdev) + return true; + + /* Allow to add if array is read-write. */ +- if (md_is_rdwr(rdev->mddev)) ++ if (md_is_rdwr(mddev)) + return true; + + /* +-- +2.43.0 + diff --git a/bsp_diff/caas/kernel/linux-intel-lts2024/0007-mm-fix-uprobe-pte-be-overwritten-when-expanding-vma.patch b/bsp_diff/caas/kernel/linux-intel-lts2024/0007-mm-fix-uprobe-pte-be-overwritten-when-expanding-vma.patch new file mode 100644 index 0000000..3299565 --- /dev/null +++ b/bsp_diff/caas/kernel/linux-intel-lts2024/0007-mm-fix-uprobe-pte-be-overwritten-when-expanding-vma.patch @@ -0,0 +1,135 @@ +From 473698ed0bfa15becde5223ee2857608ed36ca68 Mon Sep 17 00:00:00 2001 +From: Pu Lehui +Date: Thu, 29 May 2025 15:56:47 +0000 +Subject: [PATCH 07/28] mm: fix uprobe pte be overwritten when expanding vma + +Patch series "Fix uprobe pte be overwritten when expanding vma". + +This patch (of 4): + +We encountered a BUG alert triggered by Syzkaller as follows: + BUG: Bad rss-counter state mm:00000000b4a60fca type:MM_ANONPAGES val:1 + +And we can reproduce it with the following steps: +1. register uprobe on file at zero offset +2. mmap the file at zero offset: + addr1 = mmap(NULL, 2 * 4096, PROT_NONE, MAP_PRIVATE, fd, 0); +3. mremap part of vma1 to new vma2: + addr2 = mremap(addr1, 4096, 2 * 4096, MREMAP_MAYMOVE); +4. mremap back to orig addr1: + mremap(addr2, 4096, 4096, MREMAP_MAYMOVE | MREMAP_FIXED, addr1); + +In step 3, the vma1 range [addr1, addr1 + 4096] will be remap to new vma2 +with range [addr2, addr2 + 8192], and remap uprobe anon page from the vma1 +to vma2, then unmap the vma1 range [addr1, addr1 + 4096]. + +In step 4, the vma2 range [addr2, addr2 + 4096] will be remap back to the +addr range [addr1, addr1 + 4096]. Since the addr range [addr1 + 4096, +addr1 + 8192] still maps the file, it will take vma_merge_new_range to +expand the range, and then do uprobe_mmap in vma_complete. Since the +merged vma pgoff is also zero offset, it will install uprobe anon page to +the merged vma. However, the upcomming move_page_tables step, which use +set_pte_at to remap the vma2 uprobe pte to the merged vma, will overwrite +the newly uprobe pte in the merged vma, and lead that pte to be orphan. + +Since the uprobe pte will be remapped to the merged vma, we can remove the +unnecessary uprobe_mmap upon merged vma. + +This problem was first found in linux-6.6.y and also exists in the +community syzkaller: +https://lore.kernel.org/all/000000000000ada39605a5e71711@google.com/T/ + +Link: https://lkml.kernel.org/r/20250529155650.4017699-1-pulehui@huaweicloud.com +Link: https://lkml.kernel.org/r/20250529155650.4017699-2-pulehui@huaweicloud.com +Fixes: 2b1444983508 ("uprobes, mm, x86: Add the ability to install and remove uprobes breakpoints") +Signed-off-by: Pu Lehui +Suggested-by: Lorenzo Stoakes +Reviewed-by: Lorenzo Stoakes +Acked-by: David Hildenbrand +Cc: Jann Horn +Cc: Liam Howlett +Cc: "Masami Hiramatsu (Google)" +Cc: Oleg Nesterov +Cc: Peter Zijlstra +Cc: Vlastimil Babka +Cc: +Signed-off-by: Andrew Morton +--- + mm/vma.c | 20 +++++++++++++++++--- + mm/vma.h | 7 +++++++ + 2 files changed, 24 insertions(+), 3 deletions(-) + +diff --git a/mm/vma.c b/mm/vma.c +index 0ab9a2787524..b023ac8b6d9f 100644 +--- a/mm/vma.c ++++ b/mm/vma.c +@@ -240,10 +240,13 @@ static void vma_complete(struct vma_prepare *vp, struct vma_iterator *vmi, + + if (vp->file) { + i_mmap_unlock_write(vp->mapping); +- uprobe_mmap(vp->vma); + +- if (vp->adj_next) +- uprobe_mmap(vp->adj_next); ++ if (!vp->skip_vma_uprobe) { ++ uprobe_mmap(vp->vma); ++ ++ if (vp->adj_next) ++ uprobe_mmap(vp->adj_next); ++ } + } + + if (vp->remove) { +@@ -595,6 +598,9 @@ static int commit_merge(struct vma_merge_struct *vmg, + + init_multi_vma_prep(&vp, vmg->vma, adjust, remove, remove2); + ++ if (vmg && vmg->skip_vma_uprobe) ++ vp.skip_vma_uprobe = true; ++ + VM_WARN_ON(vp.anon_vma && adjust && adjust->anon_vma && + vp.anon_vma != adjust->anon_vma); + +@@ -1662,6 +1668,14 @@ struct vm_area_struct *copy_vma(struct vm_area_struct **vmap, + faulted_in_anon_vma = false; + } + ++ /* ++ * If the VMA we are copying might contain a uprobe PTE, ensure ++ * that we do not establish one upon merge. Otherwise, when mremap() ++ * moves page tables, it will orphan the newly created PTE. ++ */ ++ if (vma->vm_file) ++ vmg.skip_vma_uprobe = true; ++ + new_vma = find_vma_prev(mm, addr, &vmg.prev); + if (new_vma && new_vma->vm_start < addr + len) + return NULL; /* should never get here */ +diff --git a/mm/vma.h b/mm/vma.h +index 7eae75a4438b..87d259f4f9e2 100644 +--- a/mm/vma.h ++++ b/mm/vma.h +@@ -19,6 +19,8 @@ struct vma_prepare { + struct vm_area_struct *insert; + struct vm_area_struct *remove; + struct vm_area_struct *remove2; ++ ++ bool skip_vma_uprobe :1; + }; + + struct unlink_vma_file_batch { +@@ -93,6 +95,11 @@ struct vma_merge_struct { + * execute the merge, returning NULL. + */ + bool give_up_on_oom :1; ++ ++ /* ++ * If set, skip uprobe_mmap upon merged vma. ++ */ ++ bool skip_vma_uprobe :1; + }; + + static inline bool vmg_nomem(struct vma_merge_struct *vmg) +-- +2.43.0 + diff --git a/bsp_diff/caas/kernel/linux-intel-lts2024/0008-um-work-around-sched_yield-not-yielding-in-time-trav.patch b/bsp_diff/caas/kernel/linux-intel-lts2024/0008-um-work-around-sched_yield-not-yielding-in-time-trav.patch new file mode 100644 index 0000000..cd1bc4a --- /dev/null +++ b/bsp_diff/caas/kernel/linux-intel-lts2024/0008-um-work-around-sched_yield-not-yielding-in-time-trav.patch @@ -0,0 +1,61 @@ +From 4ea0e60686d4d047807334a3372fcb1bf40ca432 Mon Sep 17 00:00:00 2001 +From: Benjamin Berg +Date: Fri, 14 Mar 2025 14:08:15 +0100 +Subject: [PATCH 08/28] um: work around sched_yield not yielding in time-travel + mode + +sched_yield by a userspace may not actually cause scheduling in +time-travel mode as no time has passed. In the case seen it appears to +be a badly implemented userspace spinlock in ASAN. Unfortunately, with +time-travel it causes an extreme slowdown or even deadlock depending on +the kernel configuration (CONFIG_UML_MAX_USERSPACE_ITERATIONS). + +Work around it by accounting time to the process whenever it executes a +sched_yield syscall. + +Signed-off-by: Benjamin Berg +Link: https://patch.msgid.link/20250314130815.226872-1-benjamin@sipsolutions.net +Signed-off-by: Johannes Berg +--- + arch/um/include/linux/time-internal.h | 2 ++ + arch/um/kernel/skas/syscall.c | 11 +++++++++++ + 2 files changed, 13 insertions(+) + +diff --git a/arch/um/include/linux/time-internal.h b/arch/um/include/linux/time-internal.h +index b22226634ff6..138908b999d7 100644 +--- a/arch/um/include/linux/time-internal.h ++++ b/arch/um/include/linux/time-internal.h +@@ -83,6 +83,8 @@ extern void time_travel_not_configured(void); + #define time_travel_del_event(...) time_travel_not_configured() + #endif /* CONFIG_UML_TIME_TRAVEL_SUPPORT */ + ++extern unsigned long tt_extra_sched_jiffies; ++ + /* + * Without CONFIG_UML_TIME_TRAVEL_SUPPORT this is a linker error if used, + * which is intentional since we really shouldn't link it in that case. +diff --git a/arch/um/kernel/skas/syscall.c b/arch/um/kernel/skas/syscall.c +index b09e85279d2b..a5beaea2967e 100644 +--- a/arch/um/kernel/skas/syscall.c ++++ b/arch/um/kernel/skas/syscall.c +@@ -31,6 +31,17 @@ void handle_syscall(struct uml_pt_regs *r) + goto out; + + syscall = UPT_SYSCALL_NR(r); ++ ++ /* ++ * If no time passes, then sched_yield may not actually yield, causing ++ * broken spinlock implementations in userspace (ASAN) to hang for long ++ * periods of time. ++ */ ++ if ((time_travel_mode == TT_MODE_INFCPU || ++ time_travel_mode == TT_MODE_EXTERNAL) && ++ syscall == __NR_sched_yield) ++ tt_extra_sched_jiffies += 1; ++ + if (syscall >= 0 && syscall < __NR_syscalls) { + unsigned long ret = EXECUTE_SYSCALL(syscall, regs); + +-- +2.43.0 + diff --git a/bsp_diff/caas/kernel/linux-intel-lts2024/0009-ext4-introduce-ITAIL-helper.patch b/bsp_diff/caas/kernel/linux-intel-lts2024/0009-ext4-introduce-ITAIL-helper.patch new file mode 100644 index 0000000..1f8e4db --- /dev/null +++ b/bsp_diff/caas/kernel/linux-intel-lts2024/0009-ext4-introduce-ITAIL-helper.patch @@ -0,0 +1,82 @@ +From 88c85a7f142ff1b410075f6c778677c6774acebc Mon Sep 17 00:00:00 2001 +From: Ye Bin +Date: Sat, 8 Feb 2025 14:31:40 +0800 +Subject: [PATCH 09/28] ext4: introduce ITAIL helper + +Introduce ITAIL helper to get the bound of xattr in inode. + +Signed-off-by: Ye Bin +Reviewed-by: Jan Kara +Link: https://patch.msgid.link/20250208063141.1539283-2-yebin@huaweicloud.com +Signed-off-by: Theodore Ts'o +--- + fs/ext4/xattr.c | 10 +++++----- + fs/ext4/xattr.h | 3 +++ + 2 files changed, 8 insertions(+), 5 deletions(-) + +diff --git a/fs/ext4/xattr.c b/fs/ext4/xattr.c +index 6ff94cdf1515..7cdece4ea6fa 100644 +--- a/fs/ext4/xattr.c ++++ b/fs/ext4/xattr.c +@@ -649,7 +649,7 @@ ext4_xattr_ibody_get(struct inode *inode, int name_index, const char *name, + return error; + raw_inode = ext4_raw_inode(&iloc); + header = IHDR(inode, raw_inode); +- end = (void *)raw_inode + EXT4_SB(inode->i_sb)->s_inode_size; ++ end = ITAIL(inode, raw_inode); + error = xattr_check_inode(inode, header, end); + if (error) + goto cleanup; +@@ -793,7 +793,7 @@ ext4_xattr_ibody_list(struct dentry *dentry, char *buffer, size_t buffer_size) + return error; + raw_inode = ext4_raw_inode(&iloc); + header = IHDR(inode, raw_inode); +- end = (void *)raw_inode + EXT4_SB(inode->i_sb)->s_inode_size; ++ end = ITAIL(inode, raw_inode); + error = xattr_check_inode(inode, header, end); + if (error) + goto cleanup; +@@ -879,7 +879,7 @@ int ext4_get_inode_usage(struct inode *inode, qsize_t *usage) + goto out; + raw_inode = ext4_raw_inode(&iloc); + header = IHDR(inode, raw_inode); +- end = (void *)raw_inode + EXT4_SB(inode->i_sb)->s_inode_size; ++ end = ITAIL(inode, raw_inode); + ret = xattr_check_inode(inode, header, end); + if (ret) + goto out; +@@ -2244,7 +2244,7 @@ int ext4_xattr_ibody_find(struct inode *inode, struct ext4_xattr_info *i, + header = IHDR(inode, raw_inode); + is->s.base = is->s.first = IFIRST(header); + is->s.here = is->s.first; +- is->s.end = (void *)raw_inode + EXT4_SB(inode->i_sb)->s_inode_size; ++ is->s.end = ITAIL(inode, raw_inode); + if (ext4_test_inode_state(inode, EXT4_STATE_XATTR)) { + error = xattr_check_inode(inode, header, is->s.end); + if (error) +@@ -2795,7 +2795,7 @@ int ext4_expand_extra_isize_ea(struct inode *inode, int new_extra_isize, + */ + + base = IFIRST(header); +- end = (void *)raw_inode + EXT4_SB(inode->i_sb)->s_inode_size; ++ end = ITAIL(inode, raw_inode); + min_offs = end - base; + total_ino = sizeof(struct ext4_xattr_ibody_header) + sizeof(u32); + +diff --git a/fs/ext4/xattr.h b/fs/ext4/xattr.h +index b25c2d7b5f99..5197f17ffd9a 100644 +--- a/fs/ext4/xattr.h ++++ b/fs/ext4/xattr.h +@@ -67,6 +67,9 @@ struct ext4_xattr_entry { + ((void *)raw_inode + \ + EXT4_GOOD_OLD_INODE_SIZE + \ + EXT4_I(inode)->i_extra_isize)) ++#define ITAIL(inode, raw_inode) \ ++ ((void *)(raw_inode) + \ ++ EXT4_SB((inode)->i_sb)->s_inode_size) + #define IFIRST(hdr) ((struct ext4_xattr_entry *)((hdr)+1)) + + /* +-- +2.43.0 + diff --git a/bsp_diff/caas/kernel/linux-intel-lts2024/0010-ext4-fix-out-of-bound-read-in-ext4_xattr_inode_dec_r.patch b/bsp_diff/caas/kernel/linux-intel-lts2024/0010-ext4-fix-out-of-bound-read-in-ext4_xattr_inode_dec_r.patch new file mode 100644 index 0000000..dae5dde --- /dev/null +++ b/bsp_diff/caas/kernel/linux-intel-lts2024/0010-ext4-fix-out-of-bound-read-in-ext4_xattr_inode_dec_r.patch @@ -0,0 +1,195 @@ +From 4b6a284e6cf233687695d76ebe86a36ffa259c19 Mon Sep 17 00:00:00 2001 +From: Ye Bin +Date: Sat, 8 Feb 2025 14:31:41 +0800 +Subject: [PATCH 10/28] ext4: fix out-of-bound read in + ext4_xattr_inode_dec_ref_all() + +There's issue as follows: +BUG: KASAN: use-after-free in ext4_xattr_inode_dec_ref_all+0x6ff/0x790 +Read of size 4 at addr ffff88807b003000 by task syz-executor.0/15172 + +CPU: 3 PID: 15172 Comm: syz-executor.0 +Call Trace: + __dump_stack lib/dump_stack.c:82 [inline] + dump_stack+0xbe/0xfd lib/dump_stack.c:123 + print_address_description.constprop.0+0x1e/0x280 mm/kasan/report.c:400 + __kasan_report.cold+0x6c/0x84 mm/kasan/report.c:560 + kasan_report+0x3a/0x50 mm/kasan/report.c:585 + ext4_xattr_inode_dec_ref_all+0x6ff/0x790 fs/ext4/xattr.c:1137 + ext4_xattr_delete_inode+0x4c7/0xda0 fs/ext4/xattr.c:2896 + ext4_evict_inode+0xb3b/0x1670 fs/ext4/inode.c:323 + evict+0x39f/0x880 fs/inode.c:622 + iput_final fs/inode.c:1746 [inline] + iput fs/inode.c:1772 [inline] + iput+0x525/0x6c0 fs/inode.c:1758 + ext4_orphan_cleanup fs/ext4/super.c:3298 [inline] + ext4_fill_super+0x8c57/0xba40 fs/ext4/super.c:5300 + mount_bdev+0x355/0x410 fs/super.c:1446 + legacy_get_tree+0xfe/0x220 fs/fs_context.c:611 + vfs_get_tree+0x8d/0x2f0 fs/super.c:1576 + do_new_mount fs/namespace.c:2983 [inline] + path_mount+0x119a/0x1ad0 fs/namespace.c:3316 + do_mount+0xfc/0x110 fs/namespace.c:3329 + __do_sys_mount fs/namespace.c:3540 [inline] + __se_sys_mount+0x219/0x2e0 fs/namespace.c:3514 + do_syscall_64+0x33/0x40 arch/x86/entry/common.c:46 + entry_SYSCALL_64_after_hwframe+0x67/0xd1 + +Memory state around the buggy address: + ffff88807b002f00: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 + ffff88807b002f80: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 +>ffff88807b003000: ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff + ^ + ffff88807b003080: ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff + ffff88807b003100: ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff + +Above issue happens as ext4_xattr_delete_inode() isn't check xattr +is valid if xattr is in inode. +To solve above issue call xattr_check_inode() check if xattr if valid +in inode. In fact, we can directly verify in ext4_iget_extra_inode(), +so that there is no divergent verification. + +Fixes: e50e5129f384 ("ext4: xattr-in-inode support") +Signed-off-by: Ye Bin +Reviewed-by: Jan Kara +Link: https://patch.msgid.link/20250208063141.1539283-3-yebin@huaweicloud.com +Signed-off-by: Theodore Ts'o +--- + fs/ext4/inode.c | 5 +++++ + fs/ext4/xattr.c | 26 +------------------------- + fs/ext4/xattr.h | 7 +++++++ + 3 files changed, 13 insertions(+), 25 deletions(-) + +diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c +index 7923602271ad..c76d8a6871d6 100644 +--- a/fs/ext4/inode.c ++++ b/fs/ext4/inode.c +@@ -4682,6 +4682,11 @@ static inline int ext4_iget_extra_inode(struct inode *inode, + *magic == cpu_to_le32(EXT4_XATTR_MAGIC)) { + int err; + ++ err = xattr_check_inode(inode, IHDR(inode, raw_inode), ++ ITAIL(inode, raw_inode)); ++ if (err) ++ return err; ++ + ext4_set_inode_state(inode, EXT4_STATE_XATTR); + err = ext4_find_inline_data_nolock(inode); + if (!err && ext4_has_inline_data(inode)) +diff --git a/fs/ext4/xattr.c b/fs/ext4/xattr.c +index 7cdece4ea6fa..8ced9beba2f7 100644 +--- a/fs/ext4/xattr.c ++++ b/fs/ext4/xattr.c +@@ -308,7 +308,7 @@ __ext4_xattr_check_block(struct inode *inode, struct buffer_head *bh, + __ext4_xattr_check_block((inode), (bh), __func__, __LINE__) + + +-static inline int ++int + __xattr_check_inode(struct inode *inode, struct ext4_xattr_ibody_header *header, + void *end, const char *function, unsigned int line) + { +@@ -316,9 +316,6 @@ __xattr_check_inode(struct inode *inode, struct ext4_xattr_ibody_header *header, + function, line); + } + +-#define xattr_check_inode(inode, header, end) \ +- __xattr_check_inode((inode), (header), (end), __func__, __LINE__) +- + static int + xattr_find_entry(struct inode *inode, struct ext4_xattr_entry **pentry, + void *end, int name_index, const char *name, int sorted) +@@ -650,9 +647,6 @@ ext4_xattr_ibody_get(struct inode *inode, int name_index, const char *name, + raw_inode = ext4_raw_inode(&iloc); + header = IHDR(inode, raw_inode); + end = ITAIL(inode, raw_inode); +- error = xattr_check_inode(inode, header, end); +- if (error) +- goto cleanup; + entry = IFIRST(header); + error = xattr_find_entry(inode, &entry, end, name_index, name, 0); + if (error) +@@ -783,7 +777,6 @@ ext4_xattr_ibody_list(struct dentry *dentry, char *buffer, size_t buffer_size) + struct ext4_xattr_ibody_header *header; + struct ext4_inode *raw_inode; + struct ext4_iloc iloc; +- void *end; + int error; + + if (!ext4_test_inode_state(inode, EXT4_STATE_XATTR)) +@@ -793,14 +786,9 @@ ext4_xattr_ibody_list(struct dentry *dentry, char *buffer, size_t buffer_size) + return error; + raw_inode = ext4_raw_inode(&iloc); + header = IHDR(inode, raw_inode); +- end = ITAIL(inode, raw_inode); +- error = xattr_check_inode(inode, header, end); +- if (error) +- goto cleanup; + error = ext4_xattr_list_entries(dentry, IFIRST(header), + buffer, buffer_size); + +-cleanup: + brelse(iloc.bh); + return error; + } +@@ -868,7 +856,6 @@ int ext4_get_inode_usage(struct inode *inode, qsize_t *usage) + struct ext4_xattr_ibody_header *header; + struct ext4_xattr_entry *entry; + qsize_t ea_inode_refs = 0; +- void *end; + int ret; + + lockdep_assert_held_read(&EXT4_I(inode)->xattr_sem); +@@ -879,10 +866,6 @@ int ext4_get_inode_usage(struct inode *inode, qsize_t *usage) + goto out; + raw_inode = ext4_raw_inode(&iloc); + header = IHDR(inode, raw_inode); +- end = ITAIL(inode, raw_inode); +- ret = xattr_check_inode(inode, header, end); +- if (ret) +- goto out; + + for (entry = IFIRST(header); !IS_LAST_ENTRY(entry); + entry = EXT4_XATTR_NEXT(entry)) +@@ -2246,9 +2229,6 @@ int ext4_xattr_ibody_find(struct inode *inode, struct ext4_xattr_info *i, + is->s.here = is->s.first; + is->s.end = ITAIL(inode, raw_inode); + if (ext4_test_inode_state(inode, EXT4_STATE_XATTR)) { +- error = xattr_check_inode(inode, header, is->s.end); +- if (error) +- return error; + /* Find the named attribute. */ + error = xattr_find_entry(inode, &is->s.here, is->s.end, + i->name_index, i->name, 0); +@@ -2799,10 +2779,6 @@ int ext4_expand_extra_isize_ea(struct inode *inode, int new_extra_isize, + min_offs = end - base; + total_ino = sizeof(struct ext4_xattr_ibody_header) + sizeof(u32); + +- error = xattr_check_inode(inode, header, end); +- if (error) +- goto cleanup; +- + ifree = ext4_xattr_free_space(base, &min_offs, base, &total_ino); + if (ifree >= isize_diff) + goto shift; +diff --git a/fs/ext4/xattr.h b/fs/ext4/xattr.h +index 5197f17ffd9a..1fedf44d4fb6 100644 +--- a/fs/ext4/xattr.h ++++ b/fs/ext4/xattr.h +@@ -209,6 +209,13 @@ extern int ext4_xattr_ibody_set(handle_t *handle, struct inode *inode, + extern struct mb_cache *ext4_xattr_create_cache(void); + extern void ext4_xattr_destroy_cache(struct mb_cache *); + ++extern int ++__xattr_check_inode(struct inode *inode, struct ext4_xattr_ibody_header *header, ++ void *end, const char *function, unsigned int line); ++ ++#define xattr_check_inode(inode, header, end) \ ++ __xattr_check_inode((inode), (header), (end), __func__, __LINE__) ++ + #ifdef CONFIG_EXT4_FS_SECURITY + extern int ext4_init_security(handle_t *handle, struct inode *inode, + struct inode *dir, const struct qstr *qstr); +-- +2.43.0 + diff --git a/bsp_diff/caas/kernel/linux-intel-lts2024/0011-idpf-check-error-for-register_netdev-on-init.patch b/bsp_diff/caas/kernel/linux-intel-lts2024/0011-idpf-check-error-for-register_netdev-on-init.patch new file mode 100644 index 0000000..f100bd2 --- /dev/null +++ b/bsp_diff/caas/kernel/linux-intel-lts2024/0011-idpf-check-error-for-register_netdev-on-init.patch @@ -0,0 +1,97 @@ +From 0aebbc9f811df8eeb13b68d542a46db2ef5a72ff Mon Sep 17 00:00:00 2001 +From: Emil Tantilov +Date: Fri, 14 Feb 2025 09:18:16 -0800 +Subject: [PATCH 11/28] idpf: check error for register_netdev() on init + +Current init logic ignores the error code from register_netdev(), +which will cause WARN_ON() on attempt to unregister it, if there was one, +and there is no info for the user that the creation of the netdev failed. + +WARNING: CPU: 89 PID: 6902 at net/core/dev.c:11512 unregister_netdevice_many_notify+0x211/0x1a10 +... +[ 3707.563641] unregister_netdev+0x1c/0x30 +[ 3707.563656] idpf_vport_dealloc+0x5cf/0xce0 [idpf] +[ 3707.563684] idpf_deinit_task+0xef/0x160 [idpf] +[ 3707.563712] idpf_vc_core_deinit+0x84/0x320 [idpf] +[ 3707.563739] idpf_remove+0xbf/0x780 [idpf] +[ 3707.563769] pci_device_remove+0xab/0x1e0 +[ 3707.563786] device_release_driver_internal+0x371/0x530 +[ 3707.563803] driver_detach+0xbf/0x180 +[ 3707.563816] bus_remove_driver+0x11b/0x2a0 +[ 3707.563829] pci_unregister_driver+0x2a/0x250 + +Introduce an error check and log the vport number and error code. +On removal make sure to check VPORT_REG_NETDEV flag prior to calling +unregister and free on the netdev. + +Add local variables for idx, vport_config and netdev for readability. + +Fixes: 0fe45467a104 ("idpf: add create vport and netdev configuration") +Suggested-by: Tony Nguyen +Signed-off-by: Emil Tantilov +Reviewed-by: Simon Horman +Tested-by: Samuel Salin +Signed-off-by: Tony Nguyen +--- + drivers/net/ethernet/intel/idpf/idpf_lib.c | 31 +++++++++++++++------- + 1 file changed, 22 insertions(+), 9 deletions(-) + +diff --git a/drivers/net/ethernet/intel/idpf/idpf_lib.c b/drivers/net/ethernet/intel/idpf/idpf_lib.c +index 52d9caab2fcb..16142c4aa77b 100644 +--- a/drivers/net/ethernet/intel/idpf/idpf_lib.c ++++ b/drivers/net/ethernet/intel/idpf/idpf_lib.c +@@ -910,15 +910,19 @@ static int idpf_stop(struct net_device *netdev) + static void idpf_decfg_netdev(struct idpf_vport *vport) + { + struct idpf_adapter *adapter = vport->adapter; ++ u16 idx = vport->idx; + + kfree(vport->rx_ptype_lkup); + vport->rx_ptype_lkup = NULL; + +- unregister_netdev(vport->netdev); +- free_netdev(vport->netdev); ++ if (test_and_clear_bit(IDPF_VPORT_REG_NETDEV, ++ adapter->vport_config[idx]->flags)) { ++ unregister_netdev(vport->netdev); ++ free_netdev(vport->netdev); ++ } + vport->netdev = NULL; + +- adapter->netdevs[vport->idx] = NULL; ++ adapter->netdevs[idx] = NULL; + } + + /** +@@ -1538,13 +1542,22 @@ void idpf_init_task(struct work_struct *work) + } + + for (index = 0; index < adapter->max_vports; index++) { +- if (adapter->netdevs[index] && +- !test_bit(IDPF_VPORT_REG_NETDEV, +- adapter->vport_config[index]->flags)) { +- register_netdev(adapter->netdevs[index]); +- set_bit(IDPF_VPORT_REG_NETDEV, +- adapter->vport_config[index]->flags); ++ struct net_device *netdev = adapter->netdevs[index]; ++ struct idpf_vport_config *vport_config; ++ ++ vport_config = adapter->vport_config[index]; ++ ++ if (!netdev || ++ test_bit(IDPF_VPORT_REG_NETDEV, vport_config->flags)) ++ continue; ++ ++ err = register_netdev(netdev); ++ if (err) { ++ dev_err(&pdev->dev, "failed to register netdev for vport %d: %pe\n", ++ index, ERR_PTR(err)); ++ continue; + } ++ set_bit(IDPF_VPORT_REG_NETDEV, vport_config->flags); + } + + /* As all the required vports are created, clear the reset flag +-- +2.43.0 + diff --git a/bsp_diff/caas/kernel/linux-intel-lts2024/0012-ibmvnic-Use-kernel-helpers-for-hex-dumps.patch b/bsp_diff/caas/kernel/linux-intel-lts2024/0012-ibmvnic-Use-kernel-helpers-for-hex-dumps.patch new file mode 100644 index 0000000..e79902e --- /dev/null +++ b/bsp_diff/caas/kernel/linux-intel-lts2024/0012-ibmvnic-Use-kernel-helpers-for-hex-dumps.patch @@ -0,0 +1,114 @@ +From bea7453986cde5a7831d7fae9085db3bb196bfe7 Mon Sep 17 00:00:00 2001 +From: Nick Child +Date: Thu, 20 Mar 2025 16:29:51 -0500 +Subject: [PATCH 12/28] ibmvnic: Use kernel helpers for hex dumps + +Previously, when the driver was printing hex dumps, the buffer was cast +to an 8 byte long and printed using string formatters. If the buffer +size was not a multiple of 8 then a read buffer overflow was possible. + +Therefore, create a new ibmvnic function that loops over a buffer and +calls hex_dump_to_buffer instead. + +This patch address KASAN reports like the one below: + ibmvnic 30000003 env3: Login Buffer: + ibmvnic 30000003 env3: 01000000af000000 + <...> + ibmvnic 30000003 env3: 2e6d62692e736261 + ibmvnic 30000003 env3: 65050003006d6f63 + ================================================================== + BUG: KASAN: slab-out-of-bounds in ibmvnic_login+0xacc/0xffc [ibmvnic] + Read of size 8 at addr c0000001331a9aa8 by task ip/17681 + <...> + Allocated by task 17681: + <...> + ibmvnic_login+0x2f0/0xffc [ibmvnic] + ibmvnic_open+0x148/0x308 [ibmvnic] + __dev_open+0x1ac/0x304 + <...> + The buggy address is located 168 bytes inside of + allocated 175-byte region [c0000001331a9a00, c0000001331a9aaf) + <...> + ================================================================= + ibmvnic 30000003 env3: 000000000033766e + +Fixes: 032c5e82847a ("Driver for IBM System i/p VNIC protocol") +Signed-off-by: Nick Child +Reviewed-by: Dave Marquardt +Reviewed-by: Simon Horman +Link: https://patch.msgid.link/20250320212951.11142-1-nnac123@linux.ibm.com +Signed-off-by: Jakub Kicinski +--- + drivers/net/ethernet/ibm/ibmvnic.c | 30 ++++++++++++++++++------------ + 1 file changed, 18 insertions(+), 12 deletions(-) + +diff --git a/drivers/net/ethernet/ibm/ibmvnic.c b/drivers/net/ethernet/ibm/ibmvnic.c +index 61db00b2b33e..a9b3e7216789 100644 +--- a/drivers/net/ethernet/ibm/ibmvnic.c ++++ b/drivers/net/ethernet/ibm/ibmvnic.c +@@ -4841,6 +4841,18 @@ static void vnic_add_client_data(struct ibmvnic_adapter *adapter, + strscpy(vlcd->name, adapter->netdev->name, len); + } + ++static void ibmvnic_print_hex_dump(struct net_device *dev, void *buf, ++ size_t len) ++{ ++ unsigned char hex_str[16 * 3]; ++ ++ for (size_t i = 0; i < len; i += 16) { ++ hex_dump_to_buffer((unsigned char *)buf + i, len - i, 16, 8, ++ hex_str, sizeof(hex_str), false); ++ netdev_dbg(dev, "%s\n", hex_str); ++ } ++} ++ + static int send_login(struct ibmvnic_adapter *adapter) + { + struct ibmvnic_login_rsp_buffer *login_rsp_buffer; +@@ -4951,10 +4963,8 @@ static int send_login(struct ibmvnic_adapter *adapter) + vnic_add_client_data(adapter, vlcd); + + netdev_dbg(adapter->netdev, "Login Buffer:\n"); +- for (i = 0; i < (adapter->login_buf_sz - 1) / 8 + 1; i++) { +- netdev_dbg(adapter->netdev, "%016lx\n", +- ((unsigned long *)(adapter->login_buf))[i]); +- } ++ ibmvnic_print_hex_dump(adapter->netdev, adapter->login_buf, ++ adapter->login_buf_sz); + + memset(&crq, 0, sizeof(crq)); + crq.login.first = IBMVNIC_CRQ_CMD; +@@ -5331,15 +5341,13 @@ static void handle_query_ip_offload_rsp(struct ibmvnic_adapter *adapter) + { + struct device *dev = &adapter->vdev->dev; + struct ibmvnic_query_ip_offload_buffer *buf = &adapter->ip_offload_buf; +- int i; + + dma_unmap_single(dev, adapter->ip_offload_tok, + sizeof(adapter->ip_offload_buf), DMA_FROM_DEVICE); + + netdev_dbg(adapter->netdev, "Query IP Offload Buffer:\n"); +- for (i = 0; i < (sizeof(adapter->ip_offload_buf) - 1) / 8 + 1; i++) +- netdev_dbg(adapter->netdev, "%016lx\n", +- ((unsigned long *)(buf))[i]); ++ ibmvnic_print_hex_dump(adapter->netdev, buf, ++ sizeof(adapter->ip_offload_buf)); + + netdev_dbg(adapter->netdev, "ipv4_chksum = %d\n", buf->ipv4_chksum); + netdev_dbg(adapter->netdev, "ipv6_chksum = %d\n", buf->ipv6_chksum); +@@ -5570,10 +5578,8 @@ static int handle_login_rsp(union ibmvnic_crq *login_rsp_crq, + netdev->mtu = adapter->req_mtu - ETH_HLEN; + + netdev_dbg(adapter->netdev, "Login Response Buffer:\n"); +- for (i = 0; i < (adapter->login_rsp_buf_sz - 1) / 8 + 1; i++) { +- netdev_dbg(adapter->netdev, "%016lx\n", +- ((unsigned long *)(adapter->login_rsp_buf))[i]); +- } ++ ibmvnic_print_hex_dump(netdev, adapter->login_rsp_buf, ++ adapter->login_rsp_buf_sz); + + /* Sanity checks */ + if (login->num_txcomp_subcrqs != login_rsp->num_txsubm_subcrqs || +-- +2.43.0 + diff --git a/bsp_diff/caas/kernel/linux-intel-lts2024/0013-net-mlx5-HWS-change-error-flow-on-matcher-disconnect.patch b/bsp_diff/caas/kernel/linux-intel-lts2024/0013-net-mlx5-HWS-change-error-flow-on-matcher-disconnect.patch new file mode 100644 index 0000000..2d514af --- /dev/null +++ b/bsp_diff/caas/kernel/linux-intel-lts2024/0013-net-mlx5-HWS-change-error-flow-on-matcher-disconnect.patch @@ -0,0 +1,91 @@ +From 429f5ae5ee3e0dd00e74c60bbde100f00415161a Mon Sep 17 00:00:00 2001 +From: Yevgeny Kliteynik +Date: Thu, 2 Jan 2025 20:14:05 +0200 +Subject: [PATCH 13/28] net/mlx5: HWS, change error flow on matcher disconnect + +Currently, when firmware failure occurs during matcher disconnect flow, +the error flow of the function reconnects the matcher back and returns +an error, which continues running the calling function and eventually +frees the matcher that is being disconnected. +This leads to a case where we have a freed matcher on the matchers list, +which in turn leads to use-after-free and eventual crash. + +This patch fixes that by not trying to reconnect the matcher back when +some FW command fails during disconnect. + +Note that we're dealing here with FW error. We can't overcome this +problem. This might lead to bad steering state (e.g. wrong connection +between matchers), and will also lead to resource leakage, as it is +the case with any other error handling during resource destruction. + +However, the goal here is to allow the driver to continue and not crash +the machine with use-after-free error. + +Signed-off-by: Yevgeny Kliteynik +Signed-off-by: Itamar Gozlan +Reviewed-by: Mark Bloch +Signed-off-by: Tariq Toukan +Link: https://patch.msgid.link/20250102181415.1477316-7-tariqt@nvidia.com +Signed-off-by: Jakub Kicinski +--- + .../mlx5/core/steering/hws/mlx5hws_matcher.c | 24 +++++++------------ + 1 file changed, 8 insertions(+), 16 deletions(-) + +diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/mlx5hws_matcher.c b/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/mlx5hws_matcher.c +index 61a1155d4b4f..ce541c60c5b4 100644 +--- a/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/mlx5hws_matcher.c ++++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/mlx5hws_matcher.c +@@ -165,14 +165,14 @@ static int hws_matcher_disconnect(struct mlx5hws_matcher *matcher) + next->match_ste.rtc_0_id, + next->match_ste.rtc_1_id); + if (ret) { +- mlx5hws_err(tbl->ctx, "Failed to disconnect matcher\n"); +- goto matcher_reconnect; ++ mlx5hws_err(tbl->ctx, "Fatal error, failed to disconnect matcher\n"); ++ return ret; + } + } else { + ret = mlx5hws_table_connect_to_miss_table(tbl, tbl->default_miss.miss_tbl); + if (ret) { +- mlx5hws_err(tbl->ctx, "Failed to disconnect last matcher\n"); +- goto matcher_reconnect; ++ mlx5hws_err(tbl->ctx, "Fatal error, failed to disconnect last matcher\n"); ++ return ret; + } + } + +@@ -180,27 +180,19 @@ static int hws_matcher_disconnect(struct mlx5hws_matcher *matcher) + if (prev_ft_id == tbl->ft_id) { + ret = mlx5hws_table_update_connected_miss_tables(tbl); + if (ret) { +- mlx5hws_err(tbl->ctx, "Fatal error, failed to update connected miss table\n"); +- goto matcher_reconnect; ++ mlx5hws_err(tbl->ctx, ++ "Fatal error, failed to update connected miss table\n"); ++ return ret; + } + } + + ret = mlx5hws_table_ft_set_default_next_ft(tbl, prev_ft_id); + if (ret) { + mlx5hws_err(tbl->ctx, "Fatal error, failed to restore matcher ft default miss\n"); +- goto matcher_reconnect; ++ return ret; + } + + return 0; +- +-matcher_reconnect: +- if (list_empty(&tbl->matchers_list) || !prev) +- list_add(&matcher->list_node, &tbl->matchers_list); +- else +- /* insert after prev matcher */ +- list_add(&matcher->list_node, &prev->list_node); +- +- return ret; + } + + static void hws_matcher_set_rtc_attr_sz(struct mlx5hws_matcher *matcher, +-- +2.43.0 + diff --git a/bsp_diff/caas/kernel/linux-intel-lts2024/0014-kernel-be-more-careful-about-dup_mmap-failures-and-u.patch b/bsp_diff/caas/kernel/linux-intel-lts2024/0014-kernel-be-more-careful-about-dup_mmap-failures-and-u.patch new file mode 100644 index 0000000..654b6f2 --- /dev/null +++ b/bsp_diff/caas/kernel/linux-intel-lts2024/0014-kernel-be-more-careful-about-dup_mmap-failures-and-u.patch @@ -0,0 +1,109 @@ +From 9c06c5ebab048265bf070834637bb17639d75ec3 Mon Sep 17 00:00:00 2001 +From: "Liam R. Howlett" +Date: Mon, 27 Jan 2025 12:02:21 -0500 +Subject: [PATCH 14/28] kernel: be more careful about dup_mmap() failures and + uprobe registering + +If a memory allocation fails during dup_mmap(), the maple tree can be left +in an unsafe state for other iterators besides the exit path. All the +locks are dropped before the exit_mmap() call (in mm/mmap.c), but the +incomplete mm_struct can be reached through (at least) the rmap finding +the vmas which have a pointer back to the mm_struct. + +Up to this point, there have been no issues with being able to find an +mm_struct that was only partially initialised. Syzbot was able to make +the incomplete mm_struct fail with recent forking changes, so it has been +proven unsafe to use the mm_struct that hasn't been initialised, as +referenced in the link below. + +Although 8ac662f5da19f ("fork: avoid inappropriate uprobe access to +invalid mm") fixed the uprobe access, it does not completely remove the +race. + +This patch sets the MMF_OOM_SKIP to avoid the iteration of the vmas on the +oom side (even though this is extremely unlikely to be selected as an oom +victim in the race window), and sets MMF_UNSTABLE to avoid other potential +users from using a partially initialised mm_struct. + +When registering vmas for uprobe, skip the vmas in an mm that is marked +unstable. Modifying a vma in an unstable mm may cause issues if the mm +isn't fully initialised. + +Link: https://lore.kernel.org/all/6756d273.050a0220.2477f.003d.GAE@google.com/ +Link: https://lkml.kernel.org/r/20250127170221.1761366-1-Liam.Howlett@oracle.com +Fixes: d24062914837 ("fork: use __mt_dup() to duplicate maple tree in dup_mmap()") +Signed-off-by: Liam R. Howlett +Reviewed-by: Lorenzo Stoakes +Cc: Oleg Nesterov +Cc: Masami Hiramatsu +Cc: Jann Horn +Cc: Peter Zijlstra +Cc: Michal Hocko +Cc: Peng Zhang +Cc: Matthew Wilcox +Signed-off-by: Andrew Morton +--- + kernel/events/uprobes.c | 4 ++++ + kernel/fork.c | 17 ++++++++++++++--- + 2 files changed, 18 insertions(+), 3 deletions(-) + +diff --git a/kernel/events/uprobes.c b/kernel/events/uprobes.c +index 0013743deecb..e756fe636719 100644 +--- a/kernel/events/uprobes.c ++++ b/kernel/events/uprobes.c +@@ -26,6 +26,7 @@ + #include + #include + #include ++#include /* check_stable_address_space */ + + #include + +@@ -1112,6 +1113,9 @@ register_for_each_vma(struct uprobe *uprobe, struct uprobe_consumer *new) + * returns NULL in find_active_uprobe_rcu(). + */ + mmap_write_lock(mm); ++ if (check_stable_address_space(mm)) ++ goto unlock; ++ + vma = find_vma(mm, info->vaddr); + if (!vma || !valid_vma(vma, is_register) || + file_inode(vma->vm_file) != uprobe->inode) +diff --git a/kernel/fork.c b/kernel/fork.c +index 3798764cb326..24bea193df60 100644 +--- a/kernel/fork.c ++++ b/kernel/fork.c +@@ -751,7 +751,8 @@ static __latent_entropy int dup_mmap(struct mm_struct *mm, + mt_set_in_rcu(vmi.mas.tree); + ksm_fork(mm, oldmm); + khugepaged_fork(mm, oldmm); +- } else if (mpnt) { ++ } else { ++ + /* + * The entire maple tree has already been duplicated. If the + * mmap duplication fails, mark the failure point with +@@ -759,8 +760,18 @@ static __latent_entropy int dup_mmap(struct mm_struct *mm, + * stop releasing VMAs that have not been duplicated after this + * point. + */ +- mas_set_range(&vmi.mas, mpnt->vm_start, mpnt->vm_end - 1); +- mas_store(&vmi.mas, XA_ZERO_ENTRY); ++ if (mpnt) { ++ mas_set_range(&vmi.mas, mpnt->vm_start, mpnt->vm_end - 1); ++ mas_store(&vmi.mas, XA_ZERO_ENTRY); ++ /* Avoid OOM iterating a broken tree */ ++ set_bit(MMF_OOM_SKIP, &mm->flags); ++ } ++ /* ++ * The mm_struct is going to exit, but the locks will be dropped ++ * first. Set the mm_struct as unstable is advisable as it is ++ * not fully initialised. ++ */ ++ set_bit(MMF_UNSTABLE, &mm->flags); + } + out: + mmap_write_unlock(mm); +-- +2.43.0 + diff --git a/bsp_diff/caas/kernel/linux-intel-lts2024/0015-kernfs-Fix-UAF-in-polling-when-open-file-is-released.patch b/bsp_diff/caas/kernel/linux-intel-lts2024/0015-kernfs-Fix-UAF-in-polling-when-open-file-is-released.patch new file mode 100644 index 0000000..1c138bf --- /dev/null +++ b/bsp_diff/caas/kernel/linux-intel-lts2024/0015-kernfs-Fix-UAF-in-polling-when-open-file-is-released.patch @@ -0,0 +1,297 @@ +From 58795336d6a5b05b86afe716f798f726e5ef123c Mon Sep 17 00:00:00 2001 +From: Chen Ridong +Date: Fri, 22 Aug 2025 07:07:14 +0000 +Subject: [PATCH 15/28] kernfs: Fix UAF in polling when open file is released + +A use-after-free (UAF) vulnerability was identified in the PSI (Pressure +Stall Information) monitoring mechanism: + +BUG: KASAN: slab-use-after-free in psi_trigger_poll+0x3c/0x140 +Read of size 8 at addr ffff3de3d50bd308 by task systemd/1 + +psi_trigger_poll+0x3c/0x140 +cgroup_pressure_poll+0x70/0xa0 +cgroup_file_poll+0x8c/0x100 +kernfs_fop_poll+0x11c/0x1c0 +ep_item_poll.isra.0+0x188/0x2c0 + +Allocated by task 1: +cgroup_file_open+0x88/0x388 +kernfs_fop_open+0x73c/0xaf0 +do_dentry_open+0x5fc/0x1200 +vfs_open+0xa0/0x3f0 +do_open+0x7e8/0xd08 +path_openat+0x2fc/0x6b0 +do_filp_open+0x174/0x368 + +Freed by task 8462: +cgroup_file_release+0x130/0x1f8 +kernfs_drain_open_files+0x17c/0x440 +kernfs_drain+0x2dc/0x360 +kernfs_show+0x1b8/0x288 +cgroup_file_show+0x150/0x268 +cgroup_pressure_write+0x1dc/0x340 +cgroup_file_write+0x274/0x548 + +Reproduction Steps: +1. Open test/cpu.pressure and establish epoll monitoring +2. Disable monitoring: echo 0 > test/cgroup.pressure +3. Re-enable monitoring: echo 1 > test/cgroup.pressure + +The race condition occurs because: +1. When cgroup.pressure is disabled (echo 0 > cgroup.pressure), it: + - Releases PSI triggers via cgroup_file_release() + - Frees of->priv through kernfs_drain_open_files() +2. While epoll still holds reference to the file and continues polling +3. Re-enabling (echo 1 > cgroup.pressure) accesses freed of->priv + +epolling disable/enable cgroup.pressure +fd=open(cpu.pressure) +while(1) +... +epoll_wait +kernfs_fop_poll +kernfs_get_active = true echo 0 > cgroup.pressure +... cgroup_file_show + kernfs_show + // inactive kn + kernfs_drain_open_files + cft->release(of); + kfree(ctx); + ... +kernfs_get_active = false + echo 1 > cgroup.pressure + kernfs_show + kernfs_activate_one(kn); +kernfs_fop_poll +kernfs_get_active = true +cgroup_file_poll +psi_trigger_poll +// UAF +... +end: close(fd) + +To address this issue, introduce kernfs_get_active_of() for kernfs open +files to obtain active references. This function will fail if the open file +has been released. Replace kernfs_get_active() with kernfs_get_active_of() +to prevent further operations on released file descriptors. + +Fixes: 34f26a15611a ("sched/psi: Per-cgroup PSI accounting disable/re-enable interface") +Cc: stable +Reported-by: Zhang Zhaotian +Signed-off-by: Chen Ridong +Acked-by: Tejun Heo +Link: https://lore.kernel.org/r/20250822070715.1565236-2-chenridong@huaweicloud.com +Signed-off-by: Greg Kroah-Hartman +--- + fs/kernfs/file.c | 58 +++++++++++++++++++++++++++++++----------------- + 1 file changed, 38 insertions(+), 20 deletions(-) + +diff --git a/fs/kernfs/file.c b/fs/kernfs/file.c +index 2d9d5dfa19b8..c81cffa72b1c 100644 +--- a/fs/kernfs/file.c ++++ b/fs/kernfs/file.c +@@ -70,6 +70,24 @@ static struct kernfs_open_node *of_on(struct kernfs_open_file *of) + !list_empty(&of->list)); + } + ++/* Get active reference to kernfs node for an open file */ ++static struct kernfs_open_file *kernfs_get_active_of(struct kernfs_open_file *of) ++{ ++ /* Skip if file was already released */ ++ if (unlikely(of->released)) ++ return NULL; ++ ++ if (!kernfs_get_active(of->kn)) ++ return NULL; ++ ++ return of; ++} ++ ++static void kernfs_put_active_of(struct kernfs_open_file *of) ++{ ++ return kernfs_put_active(of->kn); ++} ++ + /** + * kernfs_deref_open_node_locked - Get kernfs_open_node corresponding to @kn + * +@@ -139,7 +157,7 @@ static void kernfs_seq_stop_active(struct seq_file *sf, void *v) + + if (ops->seq_stop) + ops->seq_stop(sf, v); +- kernfs_put_active(of->kn); ++ kernfs_put_active_of(of); + } + + static void *kernfs_seq_start(struct seq_file *sf, loff_t *ppos) +@@ -152,7 +170,7 @@ static void *kernfs_seq_start(struct seq_file *sf, loff_t *ppos) + * the ops aren't called concurrently for the same open file. + */ + mutex_lock(&of->mutex); +- if (!kernfs_get_active(of->kn)) ++ if (!kernfs_get_active_of(of)) + return ERR_PTR(-ENODEV); + + ops = kernfs_ops(of->kn); +@@ -238,7 +256,7 @@ static ssize_t kernfs_file_read_iter(struct kiocb *iocb, struct iov_iter *iter) + * the ops aren't called concurrently for the same open file. + */ + mutex_lock(&of->mutex); +- if (!kernfs_get_active(of->kn)) { ++ if (!kernfs_get_active_of(of)) { + len = -ENODEV; + mutex_unlock(&of->mutex); + goto out_free; +@@ -252,7 +270,7 @@ static ssize_t kernfs_file_read_iter(struct kiocb *iocb, struct iov_iter *iter) + else + len = -EINVAL; + +- kernfs_put_active(of->kn); ++ kernfs_put_active_of(of); + mutex_unlock(&of->mutex); + + if (len < 0) +@@ -323,7 +341,7 @@ static ssize_t kernfs_fop_write_iter(struct kiocb *iocb, struct iov_iter *iter) + * the ops aren't called concurrently for the same open file. + */ + mutex_lock(&of->mutex); +- if (!kernfs_get_active(of->kn)) { ++ if (!kernfs_get_active_of(of)) { + mutex_unlock(&of->mutex); + len = -ENODEV; + goto out_free; +@@ -335,7 +353,7 @@ static ssize_t kernfs_fop_write_iter(struct kiocb *iocb, struct iov_iter *iter) + else + len = -EINVAL; + +- kernfs_put_active(of->kn); ++ kernfs_put_active_of(of); + mutex_unlock(&of->mutex); + + if (len > 0) +@@ -357,13 +375,13 @@ static void kernfs_vma_open(struct vm_area_struct *vma) + if (!of->vm_ops) + return; + +- if (!kernfs_get_active(of->kn)) ++ if (!kernfs_get_active_of(of)) + return; + + if (of->vm_ops->open) + of->vm_ops->open(vma); + +- kernfs_put_active(of->kn); ++ kernfs_put_active_of(of); + } + + static vm_fault_t kernfs_vma_fault(struct vm_fault *vmf) +@@ -375,14 +393,14 @@ static vm_fault_t kernfs_vma_fault(struct vm_fault *vmf) + if (!of->vm_ops) + return VM_FAULT_SIGBUS; + +- if (!kernfs_get_active(of->kn)) ++ if (!kernfs_get_active_of(of)) + return VM_FAULT_SIGBUS; + + ret = VM_FAULT_SIGBUS; + if (of->vm_ops->fault) + ret = of->vm_ops->fault(vmf); + +- kernfs_put_active(of->kn); ++ kernfs_put_active_of(of); + return ret; + } + +@@ -395,7 +413,7 @@ static vm_fault_t kernfs_vma_page_mkwrite(struct vm_fault *vmf) + if (!of->vm_ops) + return VM_FAULT_SIGBUS; + +- if (!kernfs_get_active(of->kn)) ++ if (!kernfs_get_active_of(of)) + return VM_FAULT_SIGBUS; + + ret = 0; +@@ -404,7 +422,7 @@ static vm_fault_t kernfs_vma_page_mkwrite(struct vm_fault *vmf) + else + file_update_time(file); + +- kernfs_put_active(of->kn); ++ kernfs_put_active_of(of); + return ret; + } + +@@ -418,14 +436,14 @@ static int kernfs_vma_access(struct vm_area_struct *vma, unsigned long addr, + if (!of->vm_ops) + return -EINVAL; + +- if (!kernfs_get_active(of->kn)) ++ if (!kernfs_get_active_of(of)) + return -EINVAL; + + ret = -EINVAL; + if (of->vm_ops->access) + ret = of->vm_ops->access(vma, addr, buf, len, write); + +- kernfs_put_active(of->kn); ++ kernfs_put_active_of(of); + return ret; + } + +@@ -455,7 +473,7 @@ static int kernfs_fop_mmap(struct file *file, struct vm_area_struct *vma) + mutex_lock(&of->mutex); + + rc = -ENODEV; +- if (!kernfs_get_active(of->kn)) ++ if (!kernfs_get_active_of(of)) + goto out_unlock; + + ops = kernfs_ops(of->kn); +@@ -490,7 +508,7 @@ static int kernfs_fop_mmap(struct file *file, struct vm_area_struct *vma) + } + vma->vm_ops = &kernfs_vm_ops; + out_put: +- kernfs_put_active(of->kn); ++ kernfs_put_active_of(of); + out_unlock: + mutex_unlock(&of->mutex); + +@@ -852,7 +870,7 @@ static __poll_t kernfs_fop_poll(struct file *filp, poll_table *wait) + struct kernfs_node *kn = kernfs_dentry_node(filp->f_path.dentry); + __poll_t ret; + +- if (!kernfs_get_active(kn)) ++ if (!kernfs_get_active_of(of)) + return DEFAULT_POLLMASK|EPOLLERR|EPOLLPRI; + + if (kn->attr.ops->poll) +@@ -860,7 +878,7 @@ static __poll_t kernfs_fop_poll(struct file *filp, poll_table *wait) + else + ret = kernfs_generic_poll(of, wait); + +- kernfs_put_active(kn); ++ kernfs_put_active_of(of); + return ret; + } + +@@ -875,7 +893,7 @@ static loff_t kernfs_fop_llseek(struct file *file, loff_t offset, int whence) + * the ops aren't called concurrently for the same open file. + */ + mutex_lock(&of->mutex); +- if (!kernfs_get_active(of->kn)) { ++ if (!kernfs_get_active_of(of)) { + mutex_unlock(&of->mutex); + return -ENODEV; + } +@@ -886,7 +904,7 @@ static loff_t kernfs_fop_llseek(struct file *file, loff_t offset, int whence) + else + ret = generic_file_llseek(file, offset, whence); + +- kernfs_put_active(of->kn); ++ kernfs_put_active_of(of); + mutex_unlock(&of->mutex); + return ret; + } +-- +2.43.0 + diff --git a/bsp_diff/caas/kernel/linux-intel-lts2024/0016-libceph-fix-invalid-accesses-to-ceph_connection_v1_i.patch b/bsp_diff/caas/kernel/linux-intel-lts2024/0016-libceph-fix-invalid-accesses-to-ceph_connection_v1_i.patch new file mode 100644 index 0000000..65a9588 --- /dev/null +++ b/bsp_diff/caas/kernel/linux-intel-lts2024/0016-libceph-fix-invalid-accesses-to-ceph_connection_v1_i.patch @@ -0,0 +1,57 @@ +From b64b2ae6a7597c83d6cf856eb5dc98e64b424715 Mon Sep 17 00:00:00 2001 +From: Ilya Dryomov +Date: Thu, 3 Jul 2025 12:10:50 +0200 +Subject: [PATCH 16/28] libceph: fix invalid accesses to + ceph_connection_v1_info + +There is a place where generic code in messenger.c is reading and +another place where it is writing to con->v1 union member without +checking that the union member is active (i.e. msgr1 is in use). + +On 64-bit systems, con->v1.auth_retry overlaps with con->v2.out_iter, +so such a read is almost guaranteed to return a bogus value instead of +0 when msgr2 is in use. This ends up being fairly benign because the +side effect is just the invalidation of the authorizer and successive +fetching of new tickets. + +con->v1.connect_seq overlaps with con->v2.conn_bufs and the fact that +it's being written to can cause more serious consequences, but luckily +it's not something that happens often. + +Cc: stable@vger.kernel.org +Fixes: cd1a677cad99 ("libceph, ceph: implement msgr2.1 protocol (crc and secure modes)") +Signed-off-by: Ilya Dryomov +Reviewed-by: Viacheslav Dubeyko +--- + net/ceph/messenger.c | 7 ++++--- + 1 file changed, 4 insertions(+), 3 deletions(-) + +diff --git a/net/ceph/messenger.c b/net/ceph/messenger.c +index d1b5705dc0c6..9f6d860411cb 100644 +--- a/net/ceph/messenger.c ++++ b/net/ceph/messenger.c +@@ -1524,7 +1524,7 @@ static void con_fault_finish(struct ceph_connection *con) + * in case we faulted due to authentication, invalidate our + * current tickets so that we can get new ones. + */ +- if (con->v1.auth_retry) { ++ if (!ceph_msgr2(from_msgr(con->msgr)) && con->v1.auth_retry) { + dout("auth_retry %d, invalidating\n", con->v1.auth_retry); + if (con->ops->invalidate_authorizer) + con->ops->invalidate_authorizer(con); +@@ -1714,9 +1714,10 @@ static void clear_standby(struct ceph_connection *con) + { + /* come back from STANDBY? */ + if (con->state == CEPH_CON_S_STANDBY) { +- dout("clear_standby %p and ++connect_seq\n", con); ++ dout("clear_standby %p\n", con); + con->state = CEPH_CON_S_PREOPEN; +- con->v1.connect_seq++; ++ if (!ceph_msgr2(from_msgr(con->msgr))) ++ con->v1.connect_seq++; + WARN_ON(ceph_con_flag_test(con, CEPH_CON_F_WRITE_PENDING)); + WARN_ON(ceph_con_flag_test(con, CEPH_CON_F_KEEPALIVE_PENDING)); + } +-- +2.43.0 + diff --git a/bsp_diff/caas/kernel/linux-intel-lts2024/0017-io_uring-kbuf-always-use-READ_ONCE-to-read-ring-prov.patch b/bsp_diff/caas/kernel/linux-intel-lts2024/0017-io_uring-kbuf-always-use-READ_ONCE-to-read-ring-prov.patch new file mode 100644 index 0000000..8dcd8bb --- /dev/null +++ b/bsp_diff/caas/kernel/linux-intel-lts2024/0017-io_uring-kbuf-always-use-READ_ONCE-to-read-ring-prov.patch @@ -0,0 +1,62 @@ +From ed5a4fdfb269ee8d25ad484791cd168b2fdeebf5 Mon Sep 17 00:00:00 2001 +From: Jens Axboe +Date: Wed, 27 Aug 2025 15:27:30 -0600 +Subject: [PATCH 17/28] io_uring/kbuf: always use READ_ONCE() to read ring + provided buffer lengths + +Since the buffers are mapped from userspace, it is prudent to use +READ_ONCE() to read the value into a local variable, and use that for +any other actions taken. Having a stable read of the buffer length +avoids worrying about it changing after checking, or being read multiple +times. + +Similarly, the buffer may well change in between it being picked and +being committed. Ensure the looping for incremental ring buffer commit +stops if it hits a zero sized buffer, as no further progress can be made +at that point. + +Fixes: ae98dbf43d75 ("io_uring/kbuf: add support for incremental buffer consumption") +Link: https://lore.kernel.org/io-uring/tencent_000C02641F6250C856D0C26228DE29A3D30A@qq.com/ +Reported-by: Qingyue Zhang +Reported-by: Suoxing Zhang +Signed-off-by: Jens Axboe +--- + io_uring/kbuf.c | 8 +++++--- + 1 file changed, 5 insertions(+), 3 deletions(-) + +diff --git a/io_uring/kbuf.c b/io_uring/kbuf.c +index 9bd27deeee6f..1dbfbccad9ca 100644 +--- a/io_uring/kbuf.c ++++ b/io_uring/kbuf.c +@@ -140,6 +140,7 @@ static void __user *io_ring_buffer_select(struct io_kiocb *req, size_t *len, + __u16 tail, head = bl->head; + struct io_uring_buf *buf; + void __user *ret; ++ u32 buf_len; + + tail = smp_load_acquire(&br->tail); + if (unlikely(tail == head)) +@@ -149,8 +150,9 @@ static void __user *io_ring_buffer_select(struct io_kiocb *req, size_t *len, + req->flags |= REQ_F_BL_EMPTY; + + buf = io_ring_head_to_buf(br, head, bl->mask); +- if (*len == 0 || *len > buf->len) +- *len = buf->len; ++ buf_len = READ_ONCE(buf->len); ++ if (*len == 0 || *len > buf_len) ++ *len = buf_len; + req->flags |= REQ_F_BUFFER_RING | REQ_F_BUFFERS_COMMIT; + req->buf_list = bl; + req->buf_index = buf->bid; +@@ -257,7 +259,7 @@ static int io_ring_buffers_peek(struct io_kiocb *req, struct buf_sel_arg *arg, + + req->buf_index = buf->bid; + do { +- u32 len = buf->len; ++ u32 len = READ_ONCE(buf->len); + + /* truncate end piece, if needed, for non partial buffers */ + if (len > arg->max_len) { +-- +2.43.0 + diff --git a/bsp_diff/caas/kernel/linux-intel-lts2024/0018-rv-Use-strings-in-da-monitors-tracepoints.patch b/bsp_diff/caas/kernel/linux-intel-lts2024/0018-rv-Use-strings-in-da-monitors-tracepoints.patch new file mode 100644 index 0000000..9bbdb40 --- /dev/null +++ b/bsp_diff/caas/kernel/linux-intel-lts2024/0018-rv-Use-strings-in-da-monitors-tracepoints.patch @@ -0,0 +1,184 @@ +From 486715046c50c98f36fe6df2bee0b04c56552427 Mon Sep 17 00:00:00 2001 +From: Gabriele Monaco +Date: Mon, 28 Jul 2025 15:50:15 +0200 +Subject: [PATCH 18/28] rv: Use strings in da monitors tracepoints + +Using DA monitors tracepoints with KASAN enabled triggers the following +warning: + + BUG: KASAN: global-out-of-bounds in do_trace_event_raw_event_event_da_monitor+0xd6/0x1a0 + Read of size 32 at addr ffffffffaada8980 by task ... + Call Trace: + + [...] + do_trace_event_raw_event_event_da_monitor+0xd6/0x1a0 + ? __pfx_do_trace_event_raw_event_event_da_monitor+0x10/0x10 + ? trace_event_sncid+0x83/0x200 + trace_event_sncid+0x163/0x200 + [...] + The buggy address belongs to the variable: + automaton_snep+0x4e0/0x5e0 + +This is caused by the tracepoints reading 32 bytes __array instead of +__string from the automata definition. Such strings are literals and +reading 32 bytes ends up in out of bound memory accesses (e.g. the next +automaton's data in this case). +The error is harmless as, while printing the string, we stop at the null +terminator, but it should still be fixed. + +Use the __string facilities while defining the tracepoints to avoid +reading out of bound memory. + +Cc: Masami Hiramatsu +Cc: Ingo Molnar +Cc: Peter Zijlstra +Cc: Tomas Glozar +Cc: Juri Lelli +Cc: Clark Williams +Cc: John Kacur +Link: https://lore.kernel.org/20250728135022.255578-4-gmonaco@redhat.com +Fixes: 792575348ff7 ("rv/include: Add deterministic automata monitor definition via C macros") +Reviewed-by: Nam Cao +Signed-off-by: Gabriele Monaco +Signed-off-by: Steven Rostedt (Google) +--- + include/trace/events/rv.h | 76 +++++++++++++++++++-------------------- + 1 file changed, 38 insertions(+), 38 deletions(-) + +diff --git a/include/trace/events/rv.h b/include/trace/events/rv.h +index 56592da9301c..89b59b154afd 100644 +--- a/include/trace/events/rv.h ++++ b/include/trace/events/rv.h +@@ -16,23 +16,23 @@ DECLARE_EVENT_CLASS(event_da_monitor, + TP_ARGS(state, event, next_state, final_state), + + TP_STRUCT__entry( +- __array( char, state, MAX_DA_NAME_LEN ) +- __array( char, event, MAX_DA_NAME_LEN ) +- __array( char, next_state, MAX_DA_NAME_LEN ) +- __field( bool, final_state ) ++ __string( state, state ) ++ __string( event, event ) ++ __string( next_state, next_state ) ++ __field( bool, final_state ) + ), + + TP_fast_assign( +- memcpy(__entry->state, state, MAX_DA_NAME_LEN); +- memcpy(__entry->event, event, MAX_DA_NAME_LEN); +- memcpy(__entry->next_state, next_state, MAX_DA_NAME_LEN); +- __entry->final_state = final_state; ++ __assign_str(state); ++ __assign_str(event); ++ __assign_str(next_state); ++ __entry->final_state = final_state; + ), + + TP_printk("%s x %s -> %s %s", +- __entry->state, +- __entry->event, +- __entry->next_state, ++ __get_str(state), ++ __get_str(event), ++ __get_str(next_state), + __entry->final_state ? "(final)" : "") + ); + +@@ -43,18 +43,18 @@ DECLARE_EVENT_CLASS(error_da_monitor, + TP_ARGS(state, event), + + TP_STRUCT__entry( +- __array( char, state, MAX_DA_NAME_LEN ) +- __array( char, event, MAX_DA_NAME_LEN ) ++ __string( state, state ) ++ __string( event, event ) + ), + + TP_fast_assign( +- memcpy(__entry->state, state, MAX_DA_NAME_LEN); +- memcpy(__entry->event, event, MAX_DA_NAME_LEN); ++ __assign_str(state); ++ __assign_str(event); + ), + + TP_printk("event %s not expected in the state %s", +- __entry->event, +- __entry->state) ++ __get_str(event), ++ __get_str(state)) + ); + + #ifdef CONFIG_RV_MON_WIP +@@ -76,26 +76,26 @@ DECLARE_EVENT_CLASS(event_da_monitor_id, + TP_ARGS(id, state, event, next_state, final_state), + + TP_STRUCT__entry( +- __field( int, id ) +- __array( char, state, MAX_DA_NAME_LEN ) +- __array( char, event, MAX_DA_NAME_LEN ) +- __array( char, next_state, MAX_DA_NAME_LEN ) +- __field( bool, final_state ) ++ __field( int, id ) ++ __string( state, state ) ++ __string( event, event ) ++ __string( next_state, next_state ) ++ __field( bool, final_state ) + ), + + TP_fast_assign( +- memcpy(__entry->state, state, MAX_DA_NAME_LEN); +- memcpy(__entry->event, event, MAX_DA_NAME_LEN); +- memcpy(__entry->next_state, next_state, MAX_DA_NAME_LEN); +- __entry->id = id; +- __entry->final_state = final_state; ++ __assign_str(state); ++ __assign_str(event); ++ __assign_str(next_state); ++ __entry->id = id; ++ __entry->final_state = final_state; + ), + + TP_printk("%d: %s x %s -> %s %s", + __entry->id, +- __entry->state, +- __entry->event, +- __entry->next_state, ++ __get_str(state), ++ __get_str(event), ++ __get_str(next_state), + __entry->final_state ? "(final)" : "") + ); + +@@ -106,21 +106,21 @@ DECLARE_EVENT_CLASS(error_da_monitor_id, + TP_ARGS(id, state, event), + + TP_STRUCT__entry( +- __field( int, id ) +- __array( char, state, MAX_DA_NAME_LEN ) +- __array( char, event, MAX_DA_NAME_LEN ) ++ __field( int, id ) ++ __string( state, state ) ++ __string( event, event ) + ), + + TP_fast_assign( +- memcpy(__entry->state, state, MAX_DA_NAME_LEN); +- memcpy(__entry->event, event, MAX_DA_NAME_LEN); +- __entry->id = id; ++ __assign_str(state); ++ __assign_str(event); ++ __entry->id = id; + ), + + TP_printk("%d: event %s not expected in the state %s", + __entry->id, +- __entry->event, +- __entry->state) ++ __get_str(event), ++ __get_str(state)) + ); + + #ifdef CONFIG_RV_MON_WWNR +-- +2.43.0 + diff --git a/bsp_diff/caas/kernel/linux-intel-lts2024/0019-coresight-holding-cscfg_csdev_lock-while-removing-cs.patch b/bsp_diff/caas/kernel/linux-intel-lts2024/0019-coresight-holding-cscfg_csdev_lock-while-removing-cs.patch new file mode 100644 index 0000000..ef6d2a7 --- /dev/null +++ b/bsp_diff/caas/kernel/linux-intel-lts2024/0019-coresight-holding-cscfg_csdev_lock-while-removing-cs.patch @@ -0,0 +1,54 @@ +From 3ffe8f12948cd816ec1d4c3430b5f234c4d5fe3f Mon Sep 17 00:00:00 2001 +From: Yeoreum Yun +Date: Wed, 14 May 2025 17:19:50 +0100 +Subject: [PATCH 19/28] coresight: holding cscfg_csdev_lock while removing + cscfg from csdev + +There'll be possible race scenario for coresight config: + +CPU0 CPU1 +(perf enable) load module + cscfg_load_config_sets() + activate config. // sysfs + (sys_active_cnt == 1) +... +cscfg_csdev_enable_active_config() + lock(csdev->cscfg_csdev_lock) + deactivate config // sysfs + (sys_activec_cnt == 0) + cscfg_unload_config_sets() + cscfg_remove_owned_csdev_configs() + // here load config activate by CPU1 + unlock(csdev->cscfg_csdev_lock) + +iterating config_csdev_list could be raced with config_csdev_list's +entry delete. + +To resolve this race , hold csdev->cscfg_csdev_lock() while +cscfg_remove_owned_csdev_configs() + +Fixes: 02bd588e12df ("coresight: configuration: Update API to permit dynamic load/unload") +Signed-off-by: Yeoreum Yun +Reviewed-by: Leo Yan +Signed-off-by: Suzuki K Poulose +Link: https://lore.kernel.org/r/20250514161951.3427590-3-yeoreum.yun@arm.com +--- + drivers/hwtracing/coresight/coresight-syscfg.c | 2 ++ + 1 file changed, 2 insertions(+) + +diff --git a/drivers/hwtracing/coresight/coresight-syscfg.c b/drivers/hwtracing/coresight/coresight-syscfg.c +index 30a561d87481..6de6cde2d9f0 100644 +--- a/drivers/hwtracing/coresight/coresight-syscfg.c ++++ b/drivers/hwtracing/coresight/coresight-syscfg.c +@@ -395,6 +395,8 @@ static void cscfg_remove_owned_csdev_configs(struct coresight_device *csdev, voi + if (list_empty(&csdev->config_csdev_list)) + return; + ++ guard(raw_spinlock_irqsave)(&csdev->cscfg_csdev_lock); ++ + list_for_each_entry_safe(config_csdev, tmp, &csdev->config_csdev_list, node) { + if (config_csdev->config_desc->load_owner == load_owner) + list_del(&config_csdev->node); +-- +2.43.0 + diff --git a/bsp_diff/caas/kernel/linux-intel-lts2024/0020-ALSA-usb-audio-Kill-timer-properly-at-removal.patch b/bsp_diff/caas/kernel/linux-intel-lts2024/0020-ALSA-usb-audio-Kill-timer-properly-at-removal.patch new file mode 100644 index 0000000..5e48a3c --- /dev/null +++ b/bsp_diff/caas/kernel/linux-intel-lts2024/0020-ALSA-usb-audio-Kill-timer-properly-at-removal.patch @@ -0,0 +1,49 @@ +From f2c39dd7fb73a1f3befe9d8af0e4c1bbb223e8f6 Mon Sep 17 00:00:00 2001 +From: Takashi Iwai +Date: Mon, 19 May 2025 23:20:30 +0200 +Subject: [PATCH 20/28] ALSA: usb-audio: Kill timer properly at removal + +The USB-audio MIDI code initializes the timer, but in a rare case, the +driver might be freed without the disconnect call. This leaves the +timer in an active state while the assigned object is released via +snd_usbmidi_free(), which ends up with a kernel warning when the debug +configuration is enabled, as spotted by fuzzer. + +For avoiding the problem, put timer_shutdown_sync() at +snd_usbmidi_free(), so that the timer can be killed properly. +While we're at it, replace the existing timer_delete_sync() at the +disconnect callback with timer_shutdown_sync(), too. + +Reported-by: syzbot+d8f72178ab6783a7daea@syzkaller.appspotmail.com +Closes: https://lore.kernel.org/681c70d7.050a0220.a19a9.00c6.GAE@google.com +Cc: +Link: https://patch.msgid.link/20250519212031.14436-1-tiwai@suse.de +Signed-off-by: Takashi Iwai +--- + sound/usb/midi.c | 3 ++- + 1 file changed, 2 insertions(+), 1 deletion(-) + +diff --git a/sound/usb/midi.c b/sound/usb/midi.c +index a792ada18863..c3de2b137435 100644 +--- a/sound/usb/midi.c ++++ b/sound/usb/midi.c +@@ -1530,6 +1530,7 @@ static void snd_usbmidi_free(struct snd_usb_midi *umidi) + snd_usbmidi_in_endpoint_delete(ep->in); + } + mutex_destroy(&umidi->mutex); ++ timer_shutdown_sync(&umidi->error_timer); + kfree(umidi); + } + +@@ -1553,7 +1554,7 @@ void snd_usbmidi_disconnect(struct list_head *p) + spin_unlock_irq(&umidi->disc_lock); + up_write(&umidi->disc_rwsem); + +- del_timer_sync(&umidi->error_timer); ++ timer_shutdown_sync(&umidi->error_timer); + + for (i = 0; i < MIDI_MAX_ENDPOINTS; ++i) { + struct snd_usb_midi_endpoint *ep = &umidi->endpoints[i]; +-- +2.43.0 + diff --git a/bsp_diff/caas/kernel/linux-intel-lts2024/0021-clk-sunxi-ng-h616-Add-sigma-delta-modulation-setting.patch b/bsp_diff/caas/kernel/linux-intel-lts2024/0021-clk-sunxi-ng-h616-Add-sigma-delta-modulation-setting.patch new file mode 100644 index 0000000..b71708c --- /dev/null +++ b/bsp_diff/caas/kernel/linux-intel-lts2024/0021-clk-sunxi-ng-h616-Add-sigma-delta-modulation-setting.patch @@ -0,0 +1,119 @@ +From 0444f99cd6eba23364d9dc2687729ce0a0f56e65 Mon Sep 17 00:00:00 2001 +From: Ryan Walklin +Date: Wed, 23 Oct 2024 20:56:57 +1300 +Subject: [PATCH 21/28] clk: sunxi-ng: h616: Add sigma-delta modulation + settings for audio PLL + +Allwinner has previously released a H616 audio driver which also +provides sigma-delta modulation for the audio PLL clocks. This approach +is used in other Allwinner SoCs, including the H3 and A64. + +The manual-provided clock values are: +PLL_AUDIO(hs) = 24 MHz*N/M1 +PLL_AUDIO(4X) = 24 MHz*N/M0/M1/P +PLL_AUDIO(2X) = 24 MHz*N/M0/M1/P/2 +PLL_AUDIO(1X) = 24 MHz*N/M0/M1/P/4 + +A fixed post-divider of 2 is used to account for a M0 divider of +2, which cannot be modelled by the existing macros and ccu_nm struct. + +Add SDM to the H616 clock control unit driver. + +Signed-off-by: Ryan Walklin +Tested-by: Marcus Cooper +Reviewed-by: Andre Przywara +Link: https://patch.msgid.link/20241023075917.186835-2-ryan@testtoast.com +[wens@csie.org: Fixed whitespace errors] +Signed-off-by: Chen-Yu Tsai +--- + drivers/clk/sunxi-ng/ccu-sun50i-h616.c | 44 +++++++++++++++++--------- + 1 file changed, 29 insertions(+), 15 deletions(-) + +diff --git a/drivers/clk/sunxi-ng/ccu-sun50i-h616.c b/drivers/clk/sunxi-ng/ccu-sun50i-h616.c +index 84e406ddf9d1..b383ae2b2d44 100644 +--- a/drivers/clk/sunxi-ng/ccu-sun50i-h616.c ++++ b/drivers/clk/sunxi-ng/ccu-sun50i-h616.c +@@ -216,19 +216,29 @@ static struct ccu_nkmp pll_de_clk = { + }; + + /* +- * TODO: Determine SDM settings for the audio PLL. The manual suggests +- * PLL_FACTOR_N=16, PLL_POST_DIV_P=2, OUTPUT_DIV=2, pattern=0xe000c49b +- * for 24.576 MHz, and PLL_FACTOR_N=22, PLL_POST_DIV_P=3, OUTPUT_DIV=2, +- * pattern=0xe001288c for 22.5792 MHz. +- * This clashes with our fixed PLL_POST_DIV_P. ++ * Sigma-delta modulation settings table obtained from the vendor SDK driver. ++ * There are additional M0 and M1 divider bits not modelled here, so forced to ++ * fixed values in the probe routine. Sigma-delta modulation allows providing a ++ * fractional-N divider in the PLL, to help reaching those specific ++ * frequencies with less error. + */ ++static struct ccu_sdm_setting pll_audio_sdm_table[] = { ++ { .rate = 90316800, .pattern = 0xc001288d, .m = 3, .n = 22 }, ++ { .rate = 98304000, .pattern = 0xc001eb85, .m = 5, .n = 40 }, ++}; ++ + #define SUN50I_H616_PLL_AUDIO_REG 0x078 + static struct ccu_nm pll_audio_hs_clk = { + .enable = BIT(31), + .lock = BIT(28), + .n = _SUNXI_CCU_MULT_MIN(8, 8, 12), +- .m = _SUNXI_CCU_DIV(1, 1), /* input divider */ ++ .m = _SUNXI_CCU_DIV(16, 6), ++ .sdm = _SUNXI_CCU_SDM(pll_audio_sdm_table, ++ BIT(24), 0x178, BIT(31)), ++ .fixed_post_div = 2, + .common = { ++ .features = CCU_FEATURE_FIXED_POSTDIV | ++ CCU_FEATURE_SIGMA_DELTA_MOD, + .reg = 0x078, + .hw.init = CLK_HW_INIT("pll-audio-hs", "osc24M", + &ccu_nm_ops, +@@ -685,18 +695,20 @@ static const struct clk_hw *clk_parent_pll_audio[] = { + }; + + /* +- * The divider of pll-audio is fixed to 24 for now, so 24576000 and 22579200 +- * rates can be set exactly in conjunction with sigma-delta modulation. ++ * The PLL_AUDIO_4X clock defaults to 24.5714 MHz according to the manual, with ++ * a final divider of 1. The 2X and 1X clocks use 2 and 4 respectively. The 1x ++ * clock is set to either 24576000 or 22579200 for 48Khz and 44.1Khz (and ++ * multiples). + */ + static CLK_FIXED_FACTOR_HWS(pll_audio_1x_clk, "pll-audio-1x", + clk_parent_pll_audio, +- 96, 1, CLK_SET_RATE_PARENT); ++ 4, 1, CLK_SET_RATE_PARENT); + static CLK_FIXED_FACTOR_HWS(pll_audio_2x_clk, "pll-audio-2x", + clk_parent_pll_audio, +- 48, 1, CLK_SET_RATE_PARENT); ++ 2, 1, CLK_SET_RATE_PARENT); + static CLK_FIXED_FACTOR_HWS(pll_audio_4x_clk, "pll-audio-4x", + clk_parent_pll_audio, +- 24, 1, CLK_SET_RATE_PARENT); ++ 1, 1, CLK_SET_RATE_PARENT); + + static const struct clk_hw *pll_periph0_parents[] = { + &pll_periph0_clk.common.hw +@@ -1136,12 +1148,14 @@ static int sun50i_h616_ccu_probe(struct platform_device *pdev) + } + + /* +- * Force the post-divider of pll-audio to 12 and the output divider +- * of it to 2, so 24576000 and 22579200 rates can be set exactly. ++ * Set the output-divider for the pll-audio clocks (M0) to 2 and the ++ * input divider (M1) to 1 as recommended by the manual when using ++ * SDM. + */ + val = readl(reg + SUN50I_H616_PLL_AUDIO_REG); +- val &= ~(GENMASK(21, 16) | BIT(0)); +- writel(val | (11 << 16) | BIT(0), reg + SUN50I_H616_PLL_AUDIO_REG); ++ val &= ~BIT(1); ++ val |= BIT(0); ++ writel(val, reg + SUN50I_H616_PLL_AUDIO_REG); + + /* + * First clock parent (osc32K) is unusable for CEC. But since there +-- +2.43.0 + diff --git a/bsp_diff/caas/kernel/linux-intel-lts2024/0022-clk-sunxi-ng-h616-Reparent-CPU-clock-during-frequenc.patch b/bsp_diff/caas/kernel/linux-intel-lts2024/0022-clk-sunxi-ng-h616-Reparent-CPU-clock-during-frequenc.patch new file mode 100644 index 0000000..e2e46c6 --- /dev/null +++ b/bsp_diff/caas/kernel/linux-intel-lts2024/0022-clk-sunxi-ng-h616-Reparent-CPU-clock-during-frequenc.patch @@ -0,0 +1,78 @@ +From 649aacd6abe85bfea85ba10be194f080db518e98 Mon Sep 17 00:00:00 2001 +From: Andre Przywara +Date: Fri, 25 Oct 2024 11:56:20 +0100 +Subject: [PATCH 22/28] clk: sunxi-ng: h616: Reparent CPU clock during + frequency changes + +The H616 user manual recommends to re-parent the CPU clock during +frequency changes of the PLL, and recommends PLL_PERI0(1X), which runs +at 600 MHz. Also it asks to disable and then re-enable the PLL lock bit, +after the factor changes have been applied. + +Add clock notifiers for the PLL and the CPU mux clock, using the existing +notifier callbacks, and tell them to use mux 4 (the PLL_PERI0(1X) source), +and bit 29 (the LOCK_ENABLE) bit. The existing code already follows the +correct algorithms. + +Signed-off-by: Andre Przywara +Link: https://lore.kernel.org/r/20241025105620.1891596-1-andre.przywara@arm.com +Tested-by: Evgeny Boger +Reviewed-by: Chen-Yu Tsai +Signed-off-by: Stephen Boyd +--- + drivers/clk/sunxi-ng/ccu-sun50i-h616.c | 28 ++++++++++++++++++++++++-- + 1 file changed, 26 insertions(+), 2 deletions(-) + +diff --git a/drivers/clk/sunxi-ng/ccu-sun50i-h616.c b/drivers/clk/sunxi-ng/ccu-sun50i-h616.c +index b383ae2b2d44..5c06c4f27bf2 100644 +--- a/drivers/clk/sunxi-ng/ccu-sun50i-h616.c ++++ b/drivers/clk/sunxi-ng/ccu-sun50i-h616.c +@@ -1107,11 +1107,24 @@ static const u32 usb2_clk_regs[] = { + SUN50I_H616_USB3_CLK_REG, + }; + ++static struct ccu_mux_nb sun50i_h616_cpu_nb = { ++ .common = &cpux_clk.common, ++ .cm = &cpux_clk.mux, ++ .delay_us = 1, /* manual doesn't really say */ ++ .bypass_index = 4, /* PLL_PERI0@600MHz, as recommended by manual */ ++}; ++ ++static struct ccu_pll_nb sun50i_h616_pll_cpu_nb = { ++ .common = &pll_cpux_clk.common, ++ .enable = BIT(29), /* LOCK_ENABLE */ ++ .lock = BIT(28), ++}; ++ + static int sun50i_h616_ccu_probe(struct platform_device *pdev) + { + void __iomem *reg; + u32 val; +- int i; ++ int ret, i; + + reg = devm_platform_ioremap_resource(pdev, 0); + if (IS_ERR(reg)) +@@ -1166,7 +1179,18 @@ static int sun50i_h616_ccu_probe(struct platform_device *pdev) + val |= BIT(24); + writel(val, reg + SUN50I_H616_HDMI_CEC_CLK_REG); + +- return devm_sunxi_ccu_probe(&pdev->dev, reg, &sun50i_h616_ccu_desc); ++ ret = devm_sunxi_ccu_probe(&pdev->dev, reg, &sun50i_h616_ccu_desc); ++ if (ret) ++ return ret; ++ ++ /* Reparent CPU during CPU PLL rate changes */ ++ ccu_mux_notifier_register(pll_cpux_clk.common.hw.clk, ++ &sun50i_h616_cpu_nb); ++ ++ /* Re-lock the CPU PLL after any rate changes */ ++ ccu_pll_notifier_register(&sun50i_h616_pll_cpu_nb); ++ ++ return 0; + } + + static const struct of_device_id sun50i_h616_ccu_ids[] = { +-- +2.43.0 + diff --git a/bsp_diff/caas/kernel/linux-intel-lts2024/0023-clk-sunxi-ng-h616-Reparent-GPU-clock-during-frequenc.patch b/bsp_diff/caas/kernel/linux-intel-lts2024/0023-clk-sunxi-ng-h616-Reparent-GPU-clock-during-frequenc.patch new file mode 100644 index 0000000..84d8e03 --- /dev/null +++ b/bsp_diff/caas/kernel/linux-intel-lts2024/0023-clk-sunxi-ng-h616-Reparent-GPU-clock-during-frequenc.patch @@ -0,0 +1,99 @@ +From 31d45bb5150037be527518838e10d709337e1e78 Mon Sep 17 00:00:00 2001 +From: Philippe Simons +Date: Thu, 20 Feb 2025 12:38:08 +0100 +Subject: [PATCH 23/28] clk: sunxi-ng: h616: Reparent GPU clock during + frequency changes + +The H616 manual does not state that the GPU PLL supports +dynamic frequency configuration, so we must take extra care when changing +the frequency. Currently any attempt to do device DVFS on the GPU lead +to panfrost various ooops, and GPU hangs. + +The manual describes the algorithm for changing the PLL +frequency, which the CPU PLL notifier code already support, so we reuse +that to reparent the GPU clock to GPU1 clock during frequency +changes. + +Signed-off-by: Philippe Simons +Reviewed-by: Andre Przywara +Reviewed-by: Jernej Skrabec +Link: https://patch.msgid.link/20250220113808.1122414-2-simons.philippe@gmail.com +Signed-off-by: Chen-Yu Tsai +--- + drivers/clk/sunxi-ng/ccu-sun50i-h616.c | 36 +++++++++++++++++++++++++- + 1 file changed, 35 insertions(+), 1 deletion(-) + +diff --git a/drivers/clk/sunxi-ng/ccu-sun50i-h616.c b/drivers/clk/sunxi-ng/ccu-sun50i-h616.c +index 5c06c4f27bf2..d0d54a2398f1 100644 +--- a/drivers/clk/sunxi-ng/ccu-sun50i-h616.c ++++ b/drivers/clk/sunxi-ng/ccu-sun50i-h616.c +@@ -328,10 +328,16 @@ static SUNXI_CCU_M_WITH_MUX_GATE(gpu0_clk, "gpu0", gpu0_parents, 0x670, + 24, 1, /* mux */ + BIT(31), /* gate */ + CLK_SET_RATE_PARENT); ++ ++/* ++ * This clk is needed as a temporary fall back during GPU PLL freq changes. ++ * Set CLK_IS_CRITICAL flag to prevent from being disabled. ++ */ ++#define SUN50I_H616_GPU_CLK1_REG 0x674 + static SUNXI_CCU_M_WITH_GATE(gpu1_clk, "gpu1", "pll-periph0-2x", 0x674, + 0, 2, /* M */ + BIT(31),/* gate */ +- 0); ++ CLK_IS_CRITICAL); + + static SUNXI_CCU_GATE(bus_gpu_clk, "bus-gpu", "psi-ahb1-ahb2", + 0x67c, BIT(0), 0); +@@ -1120,6 +1126,19 @@ static struct ccu_pll_nb sun50i_h616_pll_cpu_nb = { + .lock = BIT(28), + }; + ++static struct ccu_mux_nb sun50i_h616_gpu_nb = { ++ .common = &gpu0_clk.common, ++ .cm = &gpu0_clk.mux, ++ .delay_us = 1, /* manual doesn't really say */ ++ .bypass_index = 1, /* GPU_CLK1@400MHz */ ++}; ++ ++static struct ccu_pll_nb sun50i_h616_pll_gpu_nb = { ++ .common = &pll_gpu_clk.common, ++ .enable = BIT(29), /* LOCK_ENABLE */ ++ .lock = BIT(28), ++}; ++ + static int sun50i_h616_ccu_probe(struct platform_device *pdev) + { + void __iomem *reg; +@@ -1170,6 +1189,14 @@ static int sun50i_h616_ccu_probe(struct platform_device *pdev) + val |= BIT(0); + writel(val, reg + SUN50I_H616_PLL_AUDIO_REG); + ++ /* ++ * Set the input-divider for the gpu1 clock to 3, to reach a safe 400 MHz. ++ */ ++ val = readl(reg + SUN50I_H616_GPU_CLK1_REG); ++ val &= ~GENMASK(1, 0); ++ val |= 2; ++ writel(val, reg + SUN50I_H616_GPU_CLK1_REG); ++ + /* + * First clock parent (osc32K) is unusable for CEC. But since there + * is no good way to force parent switch (both run with same frequency), +@@ -1190,6 +1217,13 @@ static int sun50i_h616_ccu_probe(struct platform_device *pdev) + /* Re-lock the CPU PLL after any rate changes */ + ccu_pll_notifier_register(&sun50i_h616_pll_cpu_nb); + ++ /* Reparent GPU during GPU PLL rate changes */ ++ ccu_mux_notifier_register(pll_gpu_clk.common.hw.clk, ++ &sun50i_h616_gpu_nb); ++ ++ /* Re-lock the GPU PLL after any rate changes */ ++ ccu_pll_notifier_register(&sun50i_h616_pll_gpu_nb); ++ + return 0; + } + +-- +2.43.0 + diff --git a/bsp_diff/caas/kernel/linux-intel-lts2024/0024-kasan-avoid-sleepable-page-allocation-from-atomic-co.patch b/bsp_diff/caas/kernel/linux-intel-lts2024/0024-kasan-avoid-sleepable-page-allocation-from-atomic-co.patch new file mode 100644 index 0000000..df6f4f8 --- /dev/null +++ b/bsp_diff/caas/kernel/linux-intel-lts2024/0024-kasan-avoid-sleepable-page-allocation-from-atomic-co.patch @@ -0,0 +1,195 @@ +From 06c2f1b2182a5f1e832a419ede76b6a8c7f8882e Mon Sep 17 00:00:00 2001 +From: Alexander Gordeev +Date: Thu, 15 May 2025 15:55:38 +0200 +Subject: [PATCH 24/28] kasan: avoid sleepable page allocation from atomic + context + +apply_to_pte_range() enters the lazy MMU mode and then invokes +kasan_populate_vmalloc_pte() callback on each page table walk iteration. +However, the callback can go into sleep when trying to allocate a single +page, e.g. if an architecutre disables preemption on lazy MMU mode enter. + +On s390 if make arch_enter_lazy_mmu_mode() -> preempt_enable() and +arch_leave_lazy_mmu_mode() -> preempt_disable(), such crash occurs: + +[ 0.663336] BUG: sleeping function called from invalid context at ./include/linux/sched/mm.h:321 +[ 0.663348] in_atomic(): 1, irqs_disabled(): 0, non_block: 0, pid: 2, name: kthreadd +[ 0.663358] preempt_count: 1, expected: 0 +[ 0.663366] RCU nest depth: 0, expected: 0 +[ 0.663375] no locks held by kthreadd/2. +[ 0.663383] Preemption disabled at: +[ 0.663386] [<0002f3284cbb4eda>] apply_to_pte_range+0xfa/0x4a0 +[ 0.663405] CPU: 0 UID: 0 PID: 2 Comm: kthreadd Not tainted 6.15.0-rc5-gcc-kasan-00043-gd76bb1ebb558-dirty #162 PREEMPT +[ 0.663408] Hardware name: IBM 3931 A01 701 (KVM/Linux) +[ 0.663409] Call Trace: +[ 0.663410] [<0002f3284c385f58>] dump_stack_lvl+0xe8/0x140 +[ 0.663413] [<0002f3284c507b9e>] __might_resched+0x66e/0x700 +[ 0.663415] [<0002f3284cc4f6c0>] __alloc_frozen_pages_noprof+0x370/0x4b0 +[ 0.663419] [<0002f3284ccc73c0>] alloc_pages_mpol+0x1a0/0x4a0 +[ 0.663421] [<0002f3284ccc8518>] alloc_frozen_pages_noprof+0x88/0xc0 +[ 0.663424] [<0002f3284ccc8572>] alloc_pages_noprof+0x22/0x120 +[ 0.663427] [<0002f3284cc341ac>] get_free_pages_noprof+0x2c/0xc0 +[ 0.663429] [<0002f3284cceba70>] kasan_populate_vmalloc_pte+0x50/0x120 +[ 0.663433] [<0002f3284cbb4ef8>] apply_to_pte_range+0x118/0x4a0 +[ 0.663435] [<0002f3284cbc7c14>] apply_to_pmd_range+0x194/0x3e0 +[ 0.663437] [<0002f3284cbc99be>] __apply_to_page_range+0x2fe/0x7a0 +[ 0.663440] [<0002f3284cbc9e88>] apply_to_page_range+0x28/0x40 +[ 0.663442] [<0002f3284ccebf12>] kasan_populate_vmalloc+0x82/0xa0 +[ 0.663445] [<0002f3284cc1578c>] alloc_vmap_area+0x34c/0xc10 +[ 0.663448] [<0002f3284cc1c2a6>] __get_vm_area_node+0x186/0x2a0 +[ 0.663451] [<0002f3284cc1e696>] __vmalloc_node_range_noprof+0x116/0x310 +[ 0.663454] [<0002f3284cc1d950>] __vmalloc_node_noprof+0xd0/0x110 +[ 0.663457] [<0002f3284c454b88>] alloc_thread_stack_node+0xf8/0x330 +[ 0.663460] [<0002f3284c458d56>] dup_task_struct+0x66/0x4d0 +[ 0.663463] [<0002f3284c45be90>] copy_process+0x280/0x4b90 +[ 0.663465] [<0002f3284c460940>] kernel_clone+0xd0/0x4b0 +[ 0.663467] [<0002f3284c46115e>] kernel_thread+0xbe/0xe0 +[ 0.663469] [<0002f3284c4e440e>] kthreadd+0x50e/0x7f0 +[ 0.663472] [<0002f3284c38c04a>] __ret_from_fork+0x8a/0xf0 +[ 0.663475] [<0002f3284ed57ff2>] ret_from_fork+0xa/0x38 + +Instead of allocating single pages per-PTE, bulk-allocate the shadow +memory prior to applying kasan_populate_vmalloc_pte() callback on a page +range. + +Link: https://lkml.kernel.org/r/c61d3560297c93ed044f0b1af085610353a06a58.1747316918.git.agordeev@linux.ibm.com +Fixes: 3c5c3cfb9ef4 ("kasan: support backing vmalloc space with real shadow memory") +Signed-off-by: Alexander Gordeev +Suggested-by: Andrey Ryabinin +Reviewed-by: Harry Yoo +Cc: Daniel Axtens +Cc: +Signed-off-by: Andrew Morton +--- + mm/kasan/shadow.c | 92 +++++++++++++++++++++++++++++++++++++++-------- + 1 file changed, 78 insertions(+), 14 deletions(-) + +diff --git a/mm/kasan/shadow.c b/mm/kasan/shadow.c +index 88d1c9dcb507..d2c70cd2afb1 100644 +--- a/mm/kasan/shadow.c ++++ b/mm/kasan/shadow.c +@@ -292,33 +292,99 @@ void __init __weak kasan_populate_early_vm_area_shadow(void *start, + { + } + ++struct vmalloc_populate_data { ++ unsigned long start; ++ struct page **pages; ++}; ++ + static int kasan_populate_vmalloc_pte(pte_t *ptep, unsigned long addr, +- void *unused) ++ void *_data) + { +- unsigned long page; ++ struct vmalloc_populate_data *data = _data; ++ struct page *page; + pte_t pte; ++ int index; + + if (likely(!pte_none(ptep_get(ptep)))) + return 0; + +- page = __get_free_page(GFP_KERNEL); +- if (!page) +- return -ENOMEM; +- +- __memset((void *)page, KASAN_VMALLOC_INVALID, PAGE_SIZE); +- pte = pfn_pte(PFN_DOWN(__pa(page)), PAGE_KERNEL); ++ index = PFN_DOWN(addr - data->start); ++ page = data->pages[index]; ++ __memset(page_to_virt(page), KASAN_VMALLOC_INVALID, PAGE_SIZE); ++ pte = pfn_pte(page_to_pfn(page), PAGE_KERNEL); + + spin_lock(&init_mm.page_table_lock); + if (likely(pte_none(ptep_get(ptep)))) { + set_pte_at(&init_mm, addr, ptep, pte); +- page = 0; ++ data->pages[index] = NULL; + } + spin_unlock(&init_mm.page_table_lock); +- if (page) +- free_page(page); ++ ++ return 0; ++} ++ ++static void ___free_pages_bulk(struct page **pages, int nr_pages) ++{ ++ int i; ++ ++ for (i = 0; i < nr_pages; i++) { ++ if (pages[i]) { ++ __free_pages(pages[i], 0); ++ pages[i] = NULL; ++ } ++ } ++} ++ ++static int ___alloc_pages_bulk(struct page **pages, int nr_pages) ++{ ++ unsigned long nr_populated, nr_total = nr_pages; ++ struct page **page_array = pages; ++ ++ while (nr_pages) { ++ nr_populated = alloc_pages_bulk(GFP_KERNEL, nr_pages, pages); ++ if (!nr_populated) { ++ ___free_pages_bulk(page_array, nr_total - nr_pages); ++ return -ENOMEM; ++ } ++ pages += nr_populated; ++ nr_pages -= nr_populated; ++ } ++ + return 0; + } + ++static int __kasan_populate_vmalloc(unsigned long start, unsigned long end) ++{ ++ unsigned long nr_pages, nr_total = PFN_UP(end - start); ++ struct vmalloc_populate_data data; ++ int ret = 0; ++ ++ data.pages = (struct page **)__get_free_page(GFP_KERNEL | __GFP_ZERO); ++ if (!data.pages) ++ return -ENOMEM; ++ ++ while (nr_total) { ++ nr_pages = min(nr_total, PAGE_SIZE / sizeof(data.pages[0])); ++ ret = ___alloc_pages_bulk(data.pages, nr_pages); ++ if (ret) ++ break; ++ ++ data.start = start; ++ ret = apply_to_page_range(&init_mm, start, nr_pages * PAGE_SIZE, ++ kasan_populate_vmalloc_pte, &data); ++ ___free_pages_bulk(data.pages, nr_pages); ++ if (ret) ++ break; ++ ++ start += nr_pages * PAGE_SIZE; ++ nr_total -= nr_pages; ++ } ++ ++ free_page((unsigned long)data.pages); ++ ++ return ret; ++} ++ + int kasan_populate_vmalloc(unsigned long addr, unsigned long size) + { + unsigned long shadow_start, shadow_end; +@@ -348,9 +414,7 @@ int kasan_populate_vmalloc(unsigned long addr, unsigned long size) + shadow_start = PAGE_ALIGN_DOWN(shadow_start); + shadow_end = PAGE_ALIGN(shadow_end); + +- ret = apply_to_page_range(&init_mm, shadow_start, +- shadow_end - shadow_start, +- kasan_populate_vmalloc_pte, NULL); ++ ret = __kasan_populate_vmalloc(shadow_start, shadow_end); + if (ret) + return ret; + +-- +2.43.0 + diff --git a/bsp_diff/caas/kernel/linux-intel-lts2024/0025-perf-dwc_pcie-Qualify-RAS-DES-VSEC-Capability-by-Ven.patch b/bsp_diff/caas/kernel/linux-intel-lts2024/0025-perf-dwc_pcie-Qualify-RAS-DES-VSEC-Capability-by-Ven.patch new file mode 100644 index 0000000..5f6b596 --- /dev/null +++ b/bsp_diff/caas/kernel/linux-intel-lts2024/0025-perf-dwc_pcie-Qualify-RAS-DES-VSEC-Capability-by-Ven.patch @@ -0,0 +1,156 @@ +From 9c87440ba40d8e185f0122bbcdad88071c03823f Mon Sep 17 00:00:00 2001 +From: Bjorn Helgaas +Date: Mon, 9 Dec 2024 16:29:38 -0600 +Subject: [PATCH 25/28] perf/dwc_pcie: Qualify RAS DES VSEC Capability by + Vendor, Revision + +PCI Vendor-Specific (VSEC) Capabilities are defined by each vendor. +Devices from different vendors may advertise a VSEC Capability with the DWC +RAS DES functionality, but the vendors may assign different VSEC IDs. + +Search for the DWC RAS DES Capability using the VSEC ID and VSEC Rev +chosen by the vendor. + +This does not fix a current problem because Alibaba, Ampere, and Qualcomm +all assigned the same VSEC ID and VSEC Rev for the DWC RAS DES Capability. + +The potential issue is that we may add support for a device from another +vendor, where the vendor has already assigned DWC_PCIE_VSEC_RAS_DES_ID +(0x02) for an unrelated VSEC. In that event, dwc_pcie_des_cap() would find +the unrelated VSEC and mistakenly assume it was a DWC RAS DES Capability. + +Signed-off-by: Bjorn Helgaas +Reviewed-and-tested-by: Ilkka Koskinen +Reviewed-and-tested-by: Shuai Xue +Link: https://lore.kernel.org/r/20241209222938.3219364-1-helgaas@kernel.org +Signed-off-by: Will Deacon +--- + drivers/perf/dwc_pcie_pmu.c | 65 ++++++++++++++++++++----------------- + 1 file changed, 35 insertions(+), 30 deletions(-) + +diff --git a/drivers/perf/dwc_pcie_pmu.c b/drivers/perf/dwc_pcie_pmu.c +index 7dbda36884c8..9687083c42fb 100644 +--- a/drivers/perf/dwc_pcie_pmu.c ++++ b/drivers/perf/dwc_pcie_pmu.c +@@ -20,7 +20,6 @@ + #include + #include + +-#define DWC_PCIE_VSEC_RAS_DES_ID 0x02 + #define DWC_PCIE_EVENT_CNT_CTL 0x8 + + /* +@@ -101,13 +100,21 @@ struct dwc_pcie_dev_info { + struct list_head dev_node; + }; + +-struct dwc_pcie_vendor_id { +- int vendor_id; ++struct dwc_pcie_pmu_vsec_id { ++ u16 vendor_id; ++ u16 vsec_id; ++ u8 vsec_rev; + }; + +-static const struct dwc_pcie_vendor_id dwc_pcie_vendor_ids[] = { +- {.vendor_id = PCI_VENDOR_ID_ALIBABA }, +- {.vendor_id = PCI_VENDOR_ID_QCOM }, ++/* ++ * VSEC IDs are allocated by the vendor, so a given ID may mean different ++ * things to different vendors. See PCIe r6.0, sec 7.9.5.2. ++ */ ++static const struct dwc_pcie_pmu_vsec_id dwc_pcie_pmu_vsec_ids[] = { ++ { .vendor_id = PCI_VENDOR_ID_ALIBABA, ++ .vsec_id = 0x02, .vsec_rev = 0x4 }, ++ { .vendor_id = PCI_VENDOR_ID_QCOM, ++ .vsec_id = 0x02, .vsec_rev = 0x4 }, + {} /* terminator */ + }; + +@@ -519,31 +526,28 @@ static void dwc_pcie_unregister_pmu(void *data) + perf_pmu_unregister(&pcie_pmu->pmu); + } + +-static bool dwc_pcie_match_des_cap(struct pci_dev *pdev) ++static u16 dwc_pcie_des_cap(struct pci_dev *pdev) + { +- const struct dwc_pcie_vendor_id *vid; +- u16 vsec = 0; ++ const struct dwc_pcie_pmu_vsec_id *vid; ++ u16 vsec; + u32 val; + + if (!pci_is_pcie(pdev) || !(pci_pcie_type(pdev) == PCI_EXP_TYPE_ROOT_PORT)) +- return false; ++ return 0; + +- for (vid = dwc_pcie_vendor_ids; vid->vendor_id; vid++) { ++ for (vid = dwc_pcie_pmu_vsec_ids; vid->vendor_id; vid++) { + vsec = pci_find_vsec_capability(pdev, vid->vendor_id, +- DWC_PCIE_VSEC_RAS_DES_ID); +- if (vsec) +- break; ++ vid->vsec_id); ++ if (vsec) { ++ pci_read_config_dword(pdev, vsec + PCI_VNDR_HEADER, ++ &val); ++ if (PCI_VNDR_HEADER_REV(val) == vid->vsec_rev) { ++ pci_dbg(pdev, "Detected PCIe Vendor-Specific Extended Capability RAS DES\n"); ++ return vsec; ++ } ++ } + } +- if (!vsec) +- return false; +- +- pci_read_config_dword(pdev, vsec + PCI_VNDR_HEADER, &val); +- if (PCI_VNDR_HEADER_REV(val) != 0x04) +- return false; +- +- pci_dbg(pdev, +- "Detected PCIe Vendor-Specific Extended Capability RAS DES\n"); +- return true; ++ return 0; + } + + static void dwc_pcie_unregister_dev(struct dwc_pcie_dev_info *dev_info) +@@ -589,7 +593,7 @@ static int dwc_pcie_pmu_notifier(struct notifier_block *nb, + + switch (action) { + case BUS_NOTIFY_ADD_DEVICE: +- if (!dwc_pcie_match_des_cap(pdev)) ++ if (!dwc_pcie_des_cap(pdev)) + return NOTIFY_DONE; + if (dwc_pcie_register_dev(pdev)) + return NOTIFY_BAD; +@@ -614,13 +618,14 @@ static int dwc_pcie_pmu_probe(struct platform_device *plat_dev) + struct pci_dev *pdev = plat_dev->dev.platform_data; + struct dwc_pcie_pmu *pcie_pmu; + char *name; +- u32 sbdf, val; ++ u32 sbdf; + u16 vsec; + int ret; + +- vsec = pci_find_vsec_capability(pdev, pdev->vendor, +- DWC_PCIE_VSEC_RAS_DES_ID); +- pci_read_config_dword(pdev, vsec + PCI_VNDR_HEADER, &val); ++ vsec = dwc_pcie_des_cap(pdev); ++ if (!vsec) ++ return -ENODEV; ++ + sbdf = plat_dev->id; + name = devm_kasprintf(&plat_dev->dev, GFP_KERNEL, "dwc_rootport_%x", sbdf); + if (!name) +@@ -741,7 +746,7 @@ static int __init dwc_pcie_pmu_init(void) + int ret; + + for_each_pci_dev(pdev) { +- if (!dwc_pcie_match_des_cap(pdev)) ++ if (!dwc_pcie_des_cap(pdev)) + continue; + + ret = dwc_pcie_register_dev(pdev); +-- +2.43.0 + diff --git a/bsp_diff/caas/kernel/linux-intel-lts2024/0026-perf-dwc_pcie-fix-duplicate-pci_dev-devices.patch b/bsp_diff/caas/kernel/linux-intel-lts2024/0026-perf-dwc_pcie-fix-duplicate-pci_dev-devices.patch new file mode 100644 index 0000000..1babdfb --- /dev/null +++ b/bsp_diff/caas/kernel/linux-intel-lts2024/0026-perf-dwc_pcie-fix-duplicate-pci_dev-devices.patch @@ -0,0 +1,74 @@ +From bc76e0dd4f2ea1dc229e684b7e5a431987c265a8 Mon Sep 17 00:00:00 2001 +From: Yunhui Cui +Date: Thu, 20 Feb 2025 20:17:16 +0800 +Subject: [PATCH 26/28] perf/dwc_pcie: fix duplicate pci_dev devices + +During platform_device_register, wrongly using struct device +pci_dev as platform_data caused a kmemdup copy of pci_dev. Worse +still, accessing the duplicated device leads to list corruption as its +mutex content (e.g., list, magic) remains the same as the original. + +Signed-off-by: Yunhui Cui +Reviewed-by: Shuai Xue +Link: https://lore.kernel.org/r/20250220121716.50324-3-cuiyunhui@bytedance.com +Signed-off-by: Will Deacon +--- + drivers/perf/dwc_pcie_pmu.c | 18 ++++++++++++------ + 1 file changed, 12 insertions(+), 6 deletions(-) + +diff --git a/drivers/perf/dwc_pcie_pmu.c b/drivers/perf/dwc_pcie_pmu.c +index 9687083c42fb..bde0a0a06cff 100644 +--- a/drivers/perf/dwc_pcie_pmu.c ++++ b/drivers/perf/dwc_pcie_pmu.c +@@ -564,9 +564,7 @@ static int dwc_pcie_register_dev(struct pci_dev *pdev) + u32 sbdf; + + sbdf = (pci_domain_nr(pdev->bus) << 16) | PCI_DEVID(pdev->bus->number, pdev->devfn); +- plat_dev = platform_device_register_data(NULL, "dwc_pcie_pmu", sbdf, +- pdev, sizeof(*pdev)); +- ++ plat_dev = platform_device_register_simple("dwc_pcie_pmu", sbdf, NULL, 0); + if (IS_ERR(plat_dev)) + return PTR_ERR(plat_dev); + +@@ -615,18 +613,26 @@ static struct notifier_block dwc_pcie_pmu_nb = { + + static int dwc_pcie_pmu_probe(struct platform_device *plat_dev) + { +- struct pci_dev *pdev = plat_dev->dev.platform_data; ++ struct pci_dev *pdev; + struct dwc_pcie_pmu *pcie_pmu; + char *name; + u32 sbdf; + u16 vsec; + int ret; + ++ sbdf = plat_dev->id; ++ pdev = pci_get_domain_bus_and_slot(sbdf >> 16, PCI_BUS_NUM(sbdf & 0xffff), ++ sbdf & 0xff); ++ if (!pdev) { ++ pr_err("No pdev found for the sbdf 0x%x\n", sbdf); ++ return -ENODEV; ++ } ++ + vsec = dwc_pcie_des_cap(pdev); + if (!vsec) + return -ENODEV; + +- sbdf = plat_dev->id; ++ pci_dev_put(pdev); + name = devm_kasprintf(&plat_dev->dev, GFP_KERNEL, "dwc_rootport_%x", sbdf); + if (!name) + return -ENOMEM; +@@ -641,7 +647,7 @@ static int dwc_pcie_pmu_probe(struct platform_device *plat_dev) + pcie_pmu->on_cpu = -1; + pcie_pmu->pmu = (struct pmu){ + .name = name, +- .parent = &pdev->dev, ++ .parent = &plat_dev->dev, + .module = THIS_MODULE, + .attr_groups = dwc_pcie_attr_groups, + .capabilities = PERF_PMU_CAP_NO_EXCLUDE, +-- +2.43.0 + diff --git a/bsp_diff/caas/kernel/linux-intel-lts2024/0027-mm-memory-failure-fix-VM_BUG_ON_PAGE-PagePoisoned-pa.patch b/bsp_diff/caas/kernel/linux-intel-lts2024/0027-mm-memory-failure-fix-VM_BUG_ON_PAGE-PagePoisoned-pa.patch new file mode 100644 index 0000000..72cfb14 --- /dev/null +++ b/bsp_diff/caas/kernel/linux-intel-lts2024/0027-mm-memory-failure-fix-VM_BUG_ON_PAGE-PagePoisoned-pa.patch @@ -0,0 +1,109 @@ +From efebdeb1b9c19728eede6ba86b40876e5725c10d Mon Sep 17 00:00:00 2001 +From: Miaohe Lin +Date: Thu, 28 Aug 2025 10:46:18 +0800 +Subject: [PATCH 27/28] mm/memory-failure: fix + VM_BUG_ON_PAGE(PagePoisoned(page)) when unpoison memory + +When I did memory failure tests, below panic occurs: + +page dumped because: VM_BUG_ON_PAGE(PagePoisoned(page)) +kernel BUG at include/linux/page-flags.h:616! +Oops: invalid opcode: 0000 [#1] PREEMPT SMP NOPTI +CPU: 3 PID: 720 Comm: bash Not tainted 6.10.0-rc1-00195-g148743902568 #40 +RIP: 0010:unpoison_memory+0x2f3/0x590 +RSP: 0018:ffffa57fc8787d60 EFLAGS: 00000246 +RAX: 0000000000000037 RBX: 0000000000000009 RCX: ffff9be25fcdc9c8 +RDX: 0000000000000000 RSI: 0000000000000027 RDI: ffff9be25fcdc9c0 +RBP: 0000000000300000 R08: ffffffffb4956f88 R09: 0000000000009ffb +R10: 0000000000000284 R11: ffffffffb4926fa0 R12: ffffe6b00c000000 +R13: ffff9bdb453dfd00 R14: 0000000000000000 R15: fffffffffffffffe +FS: 00007f08f04e4740(0000) GS:ffff9be25fcc0000(0000) knlGS:0000000000000000 +CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 +CR2: 0000564787a30410 CR3: 000000010d4e2000 CR4: 00000000000006f0 +Call Trace: + + unpoison_memory+0x2f3/0x590 + simple_attr_write_xsigned.constprop.0.isra.0+0xb3/0x110 + debugfs_attr_write+0x42/0x60 + full_proxy_write+0x5b/0x80 + vfs_write+0xd5/0x540 + ksys_write+0x64/0xe0 + do_syscall_64+0xb9/0x1d0 + entry_SYSCALL_64_after_hwframe+0x77/0x7f +RIP: 0033:0x7f08f0314887 +RSP: 002b:00007ffece710078 EFLAGS: 00000246 ORIG_RAX: 0000000000000001 +RAX: ffffffffffffffda RBX: 0000000000000009 RCX: 00007f08f0314887 +RDX: 0000000000000009 RSI: 0000564787a30410 RDI: 0000000000000001 +RBP: 0000564787a30410 R08: 000000000000fefe R09: 000000007fffffff +R10: 0000000000000000 R11: 0000000000000246 R12: 0000000000000009 +R13: 00007f08f041b780 R14: 00007f08f0417600 R15: 00007f08f0416a00 + +Modules linked in: hwpoison_inject +---[ end trace 0000000000000000 ]--- +RIP: 0010:unpoison_memory+0x2f3/0x590 +RSP: 0018:ffffa57fc8787d60 EFLAGS: 00000246 +RAX: 0000000000000037 RBX: 0000000000000009 RCX: ffff9be25fcdc9c8 +RDX: 0000000000000000 RSI: 0000000000000027 RDI: ffff9be25fcdc9c0 +RBP: 0000000000300000 R08: ffffffffb4956f88 R09: 0000000000009ffb +R10: 0000000000000284 R11: ffffffffb4926fa0 R12: ffffe6b00c000000 +R13: ffff9bdb453dfd00 R14: 0000000000000000 R15: fffffffffffffffe +FS: 00007f08f04e4740(0000) GS:ffff9be25fcc0000(0000) knlGS:0000000000000000 +CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 +CR2: 0000564787a30410 CR3: 000000010d4e2000 CR4: 00000000000006f0 +Kernel panic - not syncing: Fatal exception +Kernel Offset: 0x31c00000 from 0xffffffff81000000 (relocation range: 0xffffffff80000000-0xffffffffbfffffff) +---[ end Kernel panic - not syncing: Fatal exception ]--- + +The root cause is that unpoison_memory() tries to check the PG_HWPoison +flags of an uninitialized page. So VM_BUG_ON_PAGE(PagePoisoned(page)) is +triggered. This can be reproduced by below steps: + +1.Offline memory block: + + echo offline > /sys/devices/system/memory/memory12/state + +2.Get offlined memory pfn: + + page-types -b n -rlN + +3.Write pfn to unpoison-pfn + + echo > /sys/kernel/debug/hwpoison/unpoison-pfn + +This scenario can be identified by pfn_to_online_page() returning NULL. +And ZONE_DEVICE pages are never expected, so we can simply fail if +pfn_to_online_page() == NULL to fix the bug. + +Link: https://lkml.kernel.org/r/20250828024618.1744895-1-linmiaohe@huawei.com +Fixes: f1dd2cd13c4b ("mm, memory_hotplug: do not associate hotadded memory to zones until online") +Signed-off-by: Miaohe Lin +Suggested-by: David Hildenbrand +Acked-by: David Hildenbrand +Cc: Naoya Horiguchi +Cc: +Signed-off-by: Andrew Morton +--- + mm/memory-failure.c | 7 +++---- + 1 file changed, 3 insertions(+), 4 deletions(-) + +diff --git a/mm/memory-failure.c b/mm/memory-failure.c +index 8c8d78d6d306..32d8e19e7671 100644 +--- a/mm/memory-failure.c ++++ b/mm/memory-failure.c +@@ -2570,10 +2570,9 @@ int unpoison_memory(unsigned long pfn) + static DEFINE_RATELIMIT_STATE(unpoison_rs, DEFAULT_RATELIMIT_INTERVAL, + DEFAULT_RATELIMIT_BURST); + +- if (!pfn_valid(pfn)) +- return -ENXIO; +- +- p = pfn_to_page(pfn); ++ p = pfn_to_online_page(pfn); ++ if (!p) ++ return -EIO; + folio = page_folio(p); + + mutex_lock(&mf_mutex); +-- +2.43.0 + diff --git a/bsp_diff/caas/kernel/linux-intel-lts2024/0028-ublk-fix-race-between-io_uring_cmd_complete_in_task-.patch b/bsp_diff/caas/kernel/linux-intel-lts2024/0028-ublk-fix-race-between-io_uring_cmd_complete_in_task-.patch new file mode 100644 index 0000000..710aa01 --- /dev/null +++ b/bsp_diff/caas/kernel/linux-intel-lts2024/0028-ublk-fix-race-between-io_uring_cmd_complete_in_task-.patch @@ -0,0 +1,93 @@ +From bafafa14fa4f0ea8cdce49d36e5c2e3de66c488e Mon Sep 17 00:00:00 2001 +From: Ming Lei +Date: Fri, 25 Apr 2025 09:37:40 +0800 +Subject: [PATCH 28/28] ublk: fix race between io_uring_cmd_complete_in_task + and ublk_cancel_cmd + +ublk_cancel_cmd() calls io_uring_cmd_done() to complete uring_cmd, but +we may have scheduled task work via io_uring_cmd_complete_in_task() for +dispatching request, then kernel crash can be triggered. + +Fix it by not trying to canceling the command if ublk block request is +started. + +Fixes: 216c8f5ef0f2 ("ublk: replace monitor with cancelable uring_cmd") +Reported-by: Jared Holzman +Tested-by: Jared Holzman +Closes: https://lore.kernel.org/linux-block/d2179120-171b-47ba-b664-23242981ef19@nvidia.com/ +Signed-off-by: Ming Lei +Link: https://lore.kernel.org/r/20250425013742.1079549-3-ming.lei@redhat.com +Signed-off-by: Jens Axboe +--- + drivers/block/ublk_drv.c | 27 +++++++++++++++++++++------ + 1 file changed, 21 insertions(+), 6 deletions(-) + +diff --git a/drivers/block/ublk_drv.c b/drivers/block/ublk_drv.c +index defcc964ecab..577beca59e3b 100644 +--- a/drivers/block/ublk_drv.c ++++ b/drivers/block/ublk_drv.c +@@ -1513,14 +1513,31 @@ static bool ublk_abort_requests(struct ublk_device *ub, struct ublk_queue *ubq) + return !was_canceled; + } + +-static void ublk_cancel_cmd(struct ublk_queue *ubq, struct ublk_io *io, ++static void ublk_cancel_cmd(struct ublk_queue *ubq, unsigned tag, + unsigned int issue_flags) + { ++ struct ublk_io *io = &ubq->ios[tag]; ++ struct ublk_device *ub = ubq->dev; ++ struct request *req; + bool done; + + if (!(io->flags & UBLK_IO_FLAG_ACTIVE)) + return; + ++ /* ++ * Don't try to cancel this command if the request is started for ++ * avoiding race between io_uring_cmd_done() and ++ * io_uring_cmd_complete_in_task(). ++ * ++ * Either the started request will be aborted via __ublk_abort_rq(), ++ * then this uring_cmd is canceled next time, or it will be done in ++ * task work function ublk_dispatch_req() because io_uring guarantees ++ * that ublk_dispatch_req() is always called ++ */ ++ req = blk_mq_tag_to_rq(ub->tag_set.tags[ubq->q_id], tag); ++ if (req && blk_mq_request_started(req)) ++ return; ++ + spin_lock(&ubq->cancel_lock); + done = !!(io->flags & UBLK_IO_FLAG_CANCELED); + if (!done) +@@ -1543,7 +1560,6 @@ static void ublk_uring_cmd_cancel_fn(struct io_uring_cmd *cmd, + struct task_struct *task; + struct ublk_device *ub; + bool need_schedule; +- struct ublk_io *io; + + if (WARN_ON_ONCE(!ubq)) + return; +@@ -1558,9 +1574,8 @@ static void ublk_uring_cmd_cancel_fn(struct io_uring_cmd *cmd, + ub = ubq->dev; + need_schedule = ublk_abort_requests(ub, ubq); + +- io = &ubq->ios[pdu->tag]; +- WARN_ON_ONCE(io->cmd != cmd); +- ublk_cancel_cmd(ubq, io, issue_flags); ++ WARN_ON_ONCE(ubq->ios[pdu->tag].cmd != cmd); ++ ublk_cancel_cmd(ubq, pdu->tag, issue_flags); + + if (need_schedule) { + if (ublk_nosrv_should_stop_dev(ub)) +@@ -1580,7 +1595,7 @@ static void ublk_cancel_queue(struct ublk_queue *ubq) + int i; + + for (i = 0; i < ubq->q_depth; i++) +- ublk_cancel_cmd(ubq, &ubq->ios[i], IO_URING_F_UNLOCKED); ++ ublk_cancel_cmd(ubq, i, IO_URING_F_UNLOCKED); + } + + /* Cancel all pending commands, must be called after del_gendisk() returns */ +-- +2.43.0 + diff --git a/bsp_diff/caas/kernel/linux-intel-lts2024/series b/bsp_diff/caas/kernel/linux-intel-lts2024/series new file mode 100644 index 0000000..5a17ecb --- /dev/null +++ b/bsp_diff/caas/kernel/linux-intel-lts2024/series @@ -0,0 +1,75 @@ +#CVE-2025-39877 +0001-mm-damon-sysfs-fix-use-after-free-in-state_show.patch + +#CVE-2025-39764 +0002-netfilter-ctnetlink-remove-refcounting-in-expectatio.patch + +#CVE-2025-39677 +0003-net-sched-Fix-backlog-accounting-in-qdisc_dequeue_in.patch + +#CVE-2025-38643 +0004-wifi-cfg80211-Add-missing-lock-in-cfg80211_check_and.patch + +#CVE-2025-38627 +0005-f2fs-compress-fix-UAF-of-f2fs_inode_info-in-f2fs_fre.patch + +#CVE-2025-38621 +0006-md-make-rdev_addable-usable-for-rcu-mode.patch + +#CVE-2025-38207 +0007-mm-fix-uprobe-pte-be-overwritten-when-expanding-vma.patch + +#CVE-2025-37880 +0008-um-work-around-sched_yield-not-yielding-in-time-trav.patch + +#CVE-2025-22121 +0009-ext4-introduce-ITAIL-helper.patch +0010-ext4-fix-out-of-bound-read-in-ext4_xattr_inode_dec_r.patch + +#CVE-2025-22116 +0011-idpf-check-error-for-register_netdev-on-init.patch + +#CVE-2025-22104 +0012-ibmvnic-Use-kernel-helpers-for-hex-dumps.patch + +#CVE-2025-21751 +0013-net-mlx5-HWS-change-error-flow-on-matcher-disconnect.patch + +#CVE-2025-21709 +0014-kernel-be-more-careful-about-dup_mmap-failures-and-u.patch + +#CVE-2025-39881 +0015-kernfs-Fix-UAF-in-polling-when-open-file-is-released.patch + +#CVE-2025-39880 +0016-libceph-fix-invalid-accesses-to-ceph_connection_v1_i.patch + +#CVE-2025-39816 +0017-io_uring-kbuf-always-use-READ_ONCE-to-read-ring-prov.patch + +#CVE-2025-38636 +0018-rv-Use-strings-in-da-monitors-tracepoints.patch + +#CVE-2025-38132 +0019-coresight-holding-cscfg_csdev_lock-while-removing-cs.patch + +#CVE-2025-38105 +0020-ALSA-usb-audio-Kill-timer-properly-at-removal.patch + +#CVE-2025-38041 +0021-clk-sunxi-ng-h616-Add-sigma-delta-modulation-setting.patch +0022-clk-sunxi-ng-h616-Reparent-CPU-clock-during-frequenc.patch +0023-clk-sunxi-ng-h616-Reparent-GPU-clock-during-frequenc.patch + +#CVE-2025-38029 +0024-kasan-avoid-sleepable-page-allocation-from-atomic-co.patch + +#CVE-2025-37746 +0025-perf-dwc_pcie-Qualify-RAS-DES-VSEC-Capability-by-Ven.patch +0026-perf-dwc_pcie-fix-duplicate-pci_dev-devices.patch + +#CVE-2025-39883 +0027-mm-memory-failure-fix-VM_BUG_ON_PAGE-PagePoisoned-pa.patch + +#CVE-2025-37906 +0028-ublk-fix-race-between-io_uring_cmd_complete_in_task-.patch