diff --git a/include/linux/bpf_local_storage.h b/include/linux/bpf_local_storage.h index ab7244d8108f6..5888e012dfe37 100644 --- a/include/linux/bpf_local_storage.h +++ b/include/linux/bpf_local_storage.h @@ -15,6 +15,7 @@ #include #include #include +#include #define BPF_LOCAL_STORAGE_CACHE_SIZE 16 @@ -23,7 +24,7 @@ rcu_read_lock_bh_held()) struct bpf_local_storage_map_bucket { struct hlist_head list; - raw_spinlock_t lock; + rqspinlock_t lock; }; /* Thp map is not the primary owner of a bpf_local_storage_elem. @@ -99,7 +100,7 @@ struct bpf_local_storage { * bpf_local_storage_elem. */ struct rcu_head rcu; - raw_spinlock_t lock; /* Protect adding/removing from the "list" */ + rqspinlock_t lock; /* Protect adding/removing from the "list" */ }; /* U16_MAX is much more than enough for sk local storage @@ -169,8 +170,7 @@ bpf_local_storage_lookup(struct bpf_local_storage *local_storage, void bpf_local_storage_destroy(struct bpf_local_storage *local_storage); void bpf_local_storage_map_free(struct bpf_map *map, - struct bpf_local_storage_cache *cache, - int __percpu *busy_counter); + struct bpf_local_storage_cache *cache); int bpf_local_storage_map_check_btf(const struct bpf_map *map, const struct btf *btf, @@ -180,10 +180,10 @@ int bpf_local_storage_map_check_btf(const struct bpf_map *map, void bpf_selem_link_storage_nolock(struct bpf_local_storage *local_storage, struct bpf_local_storage_elem *selem); -void bpf_selem_unlink(struct bpf_local_storage_elem *selem, bool reuse_now); +int bpf_selem_unlink(struct bpf_local_storage_elem *selem, bool reuse_now); -void bpf_selem_link_map(struct bpf_local_storage_map *smap, - struct bpf_local_storage_elem *selem); +int bpf_selem_link_map(struct bpf_local_storage_map *smap, + struct bpf_local_storage_elem *selem); struct bpf_local_storage_elem * bpf_selem_alloc(struct bpf_local_storage_map *smap, void *owner, void *value, diff --git a/kernel/bpf/bpf_cgrp_storage.c b/kernel/bpf/bpf_cgrp_storage.c index 0687a760974a4..a57abb2956d55 100644 --- a/kernel/bpf/bpf_cgrp_storage.c +++ b/kernel/bpf/bpf_cgrp_storage.c @@ -11,29 +11,6 @@ DEFINE_BPF_STORAGE_CACHE(cgroup_cache); -static DEFINE_PER_CPU(int, bpf_cgrp_storage_busy); - -static void bpf_cgrp_storage_lock(void) -{ - cant_migrate(); - this_cpu_inc(bpf_cgrp_storage_busy); -} - -static void bpf_cgrp_storage_unlock(void) -{ - this_cpu_dec(bpf_cgrp_storage_busy); -} - -static bool bpf_cgrp_storage_trylock(void) -{ - cant_migrate(); - if (unlikely(this_cpu_inc_return(bpf_cgrp_storage_busy) != 1)) { - this_cpu_dec(bpf_cgrp_storage_busy); - return false; - } - return true; -} - static struct bpf_local_storage __rcu **cgroup_storage_ptr(void *owner) { struct cgroup *cg = owner; @@ -45,16 +22,14 @@ void bpf_cgrp_storage_free(struct cgroup *cgroup) { struct bpf_local_storage *local_storage; - rcu_read_lock_dont_migrate(); + rcu_read_lock(); local_storage = rcu_dereference(cgroup->bpf_cgrp_storage); if (!local_storage) goto out; - bpf_cgrp_storage_lock(); bpf_local_storage_destroy(local_storage); - bpf_cgrp_storage_unlock(); out: - rcu_read_unlock_migrate(); + rcu_read_unlock(); } static struct bpf_local_storage_data * @@ -83,9 +58,7 @@ static void *bpf_cgrp_storage_lookup_elem(struct bpf_map *map, void *key) if (IS_ERR(cgroup)) return ERR_CAST(cgroup); - bpf_cgrp_storage_lock(); sdata = cgroup_storage_lookup(cgroup, map, true); - bpf_cgrp_storage_unlock(); cgroup_put(cgroup); return sdata ? sdata->data : NULL; } @@ -102,10 +75,8 @@ static long bpf_cgrp_storage_update_elem(struct bpf_map *map, void *key, if (IS_ERR(cgroup)) return PTR_ERR(cgroup); - bpf_cgrp_storage_lock(); sdata = bpf_local_storage_update(cgroup, (struct bpf_local_storage_map *)map, value, map_flags, false, GFP_ATOMIC); - bpf_cgrp_storage_unlock(); cgroup_put(cgroup); return PTR_ERR_OR_ZERO(sdata); } @@ -118,8 +89,7 @@ static int cgroup_storage_delete(struct cgroup *cgroup, struct bpf_map *map) if (!sdata) return -ENOENT; - bpf_selem_unlink(SELEM(sdata), false); - return 0; + return bpf_selem_unlink(SELEM(sdata), false); } static long bpf_cgrp_storage_delete_elem(struct bpf_map *map, void *key) @@ -132,9 +102,7 @@ static long bpf_cgrp_storage_delete_elem(struct bpf_map *map, void *key) if (IS_ERR(cgroup)) return PTR_ERR(cgroup); - bpf_cgrp_storage_lock(); err = cgroup_storage_delete(cgroup, map); - bpf_cgrp_storage_unlock(); cgroup_put(cgroup); return err; } @@ -151,7 +119,7 @@ static struct bpf_map *cgroup_storage_map_alloc(union bpf_attr *attr) static void cgroup_storage_map_free(struct bpf_map *map) { - bpf_local_storage_map_free(map, &cgroup_cache, &bpf_cgrp_storage_busy); + bpf_local_storage_map_free(map, &cgroup_cache); } /* *gfp_flags* is a hidden argument provided by the verifier */ @@ -159,7 +127,6 @@ BPF_CALL_5(bpf_cgrp_storage_get, struct bpf_map *, map, struct cgroup *, cgroup, void *, value, u64, flags, gfp_t, gfp_flags) { struct bpf_local_storage_data *sdata; - bool nobusy; WARN_ON_ONCE(!bpf_rcu_lock_held()); if (flags & ~(BPF_LOCAL_STORAGE_GET_F_CREATE)) @@ -168,38 +135,27 @@ BPF_CALL_5(bpf_cgrp_storage_get, struct bpf_map *, map, struct cgroup *, cgroup, if (!cgroup) return (unsigned long)NULL; - nobusy = bpf_cgrp_storage_trylock(); - - sdata = cgroup_storage_lookup(cgroup, map, nobusy); + sdata = cgroup_storage_lookup(cgroup, map, NULL); if (sdata) - goto unlock; + goto out; /* only allocate new storage, when the cgroup is refcounted */ if (!percpu_ref_is_dying(&cgroup->self.refcnt) && - (flags & BPF_LOCAL_STORAGE_GET_F_CREATE) && nobusy) + (flags & BPF_LOCAL_STORAGE_GET_F_CREATE)) sdata = bpf_local_storage_update(cgroup, (struct bpf_local_storage_map *)map, value, BPF_NOEXIST, false, gfp_flags); -unlock: - if (nobusy) - bpf_cgrp_storage_unlock(); +out: return IS_ERR_OR_NULL(sdata) ? (unsigned long)NULL : (unsigned long)sdata->data; } BPF_CALL_2(bpf_cgrp_storage_delete, struct bpf_map *, map, struct cgroup *, cgroup) { - int ret; - WARN_ON_ONCE(!bpf_rcu_lock_held()); if (!cgroup) return -EINVAL; - if (!bpf_cgrp_storage_trylock()) - return -EBUSY; - - ret = cgroup_storage_delete(cgroup, map); - bpf_cgrp_storage_unlock(); - return ret; + return cgroup_storage_delete(cgroup, map); } const struct bpf_map_ops cgrp_storage_map_ops = { diff --git a/kernel/bpf/bpf_inode_storage.c b/kernel/bpf/bpf_inode_storage.c index e54cce2b91754..470f4b02c79ea 100644 --- a/kernel/bpf/bpf_inode_storage.c +++ b/kernel/bpf/bpf_inode_storage.c @@ -110,9 +110,7 @@ static int inode_storage_delete(struct inode *inode, struct bpf_map *map) if (!sdata) return -ENOENT; - bpf_selem_unlink(SELEM(sdata), false); - - return 0; + return bpf_selem_unlink(SELEM(sdata), false); } static long bpf_fd_inode_storage_delete_elem(struct bpf_map *map, void *key) @@ -186,7 +184,7 @@ static struct bpf_map *inode_storage_map_alloc(union bpf_attr *attr) static void inode_storage_map_free(struct bpf_map *map) { - bpf_local_storage_map_free(map, &inode_cache, NULL); + bpf_local_storage_map_free(map, &inode_cache); } const struct bpf_map_ops inode_storage_map_ops = { diff --git a/kernel/bpf/bpf_local_storage.c b/kernel/bpf/bpf_local_storage.c index b931fbceb54da..3ce4dd7e7fc6f 100644 --- a/kernel/bpf/bpf_local_storage.c +++ b/kernel/bpf/bpf_local_storage.c @@ -19,9 +19,9 @@ static struct bpf_local_storage_map_bucket * select_bucket(struct bpf_local_storage_map *smap, - struct bpf_local_storage_elem *selem) + struct bpf_local_storage *local_storage) { - return &smap->buckets[hash_ptr(selem, smap->bucket_log)]; + return &smap->buckets[hash_ptr(local_storage, smap->bucket_log)]; } static int mem_charge(struct bpf_local_storage_map *smap, void *owner, u32 size) @@ -371,37 +371,6 @@ static bool check_storage_bpf_ma(struct bpf_local_storage *local_storage, return selem_smap->bpf_ma; } -static void bpf_selem_unlink_storage(struct bpf_local_storage_elem *selem, - bool reuse_now) -{ - struct bpf_local_storage_map *storage_smap; - struct bpf_local_storage *local_storage; - bool bpf_ma, free_local_storage = false; - HLIST_HEAD(selem_free_list); - unsigned long flags; - - if (unlikely(!selem_linked_to_storage_lockless(selem))) - /* selem has already been unlinked from sk */ - return; - - local_storage = rcu_dereference_check(selem->local_storage, - bpf_rcu_lock_held()); - storage_smap = rcu_dereference_check(local_storage->smap, - bpf_rcu_lock_held()); - bpf_ma = check_storage_bpf_ma(local_storage, storage_smap, selem); - - raw_spin_lock_irqsave(&local_storage->lock, flags); - if (likely(selem_linked_to_storage(selem))) - free_local_storage = bpf_selem_unlink_storage_nolock( - local_storage, selem, true, &selem_free_list); - raw_spin_unlock_irqrestore(&local_storage->lock, flags); - - bpf_selem_free_list(&selem_free_list, reuse_now); - - if (free_local_storage) - bpf_local_storage_free(local_storage, storage_smap, bpf_ma, reuse_now); -} - void bpf_selem_link_storage_nolock(struct bpf_local_storage *local_storage, struct bpf_local_storage_elem *selem) { @@ -409,44 +378,113 @@ void bpf_selem_link_storage_nolock(struct bpf_local_storage *local_storage, hlist_add_head_rcu(&selem->snode, &local_storage->list); } -static void bpf_selem_unlink_map(struct bpf_local_storage_elem *selem) +static int bpf_selem_unlink_map(struct bpf_local_storage_elem *selem) { + struct bpf_local_storage *local_storage; struct bpf_local_storage_map *smap; struct bpf_local_storage_map_bucket *b; unsigned long flags; + int err; if (unlikely(!selem_linked_to_map_lockless(selem))) /* selem has already be unlinked from smap */ - return; + return 0; + local_storage = rcu_dereference_check(selem->local_storage, + bpf_rcu_lock_held()); smap = rcu_dereference_check(SDATA(selem)->smap, bpf_rcu_lock_held()); - b = select_bucket(smap, selem); - raw_spin_lock_irqsave(&b->lock, flags); + b = select_bucket(smap, local_storage); + err = raw_res_spin_lock_irqsave(&b->lock, flags); + if (err) + return err; + + if (likely(selem_linked_to_map(selem))) + hlist_del_init_rcu(&selem->map_node); + raw_res_spin_unlock_irqrestore(&b->lock, flags); + + return 0; +} + +static void bpf_selem_unlink_map_nolock(struct bpf_local_storage_elem *selem) +{ if (likely(selem_linked_to_map(selem))) hlist_del_init_rcu(&selem->map_node); - raw_spin_unlock_irqrestore(&b->lock, flags); } -void bpf_selem_link_map(struct bpf_local_storage_map *smap, - struct bpf_local_storage_elem *selem) +int bpf_selem_link_map(struct bpf_local_storage_map *smap, + struct bpf_local_storage_elem *selem) { - struct bpf_local_storage_map_bucket *b = select_bucket(smap, selem); + struct bpf_local_storage *local_storage; + struct bpf_local_storage_map_bucket *b; unsigned long flags; + int err; + + local_storage = rcu_dereference_check(selem->local_storage, + bpf_rcu_lock_held()); + b = select_bucket(smap, local_storage); + err = raw_res_spin_lock_irqsave(&b->lock, flags); + if (err) + return err; - raw_spin_lock_irqsave(&b->lock, flags); RCU_INIT_POINTER(SDATA(selem)->smap, smap); hlist_add_head_rcu(&selem->map_node, &b->list); - raw_spin_unlock_irqrestore(&b->lock, flags); + raw_res_spin_unlock_irqrestore(&b->lock, flags); + + return 0; } -void bpf_selem_unlink(struct bpf_local_storage_elem *selem, bool reuse_now) +static void bpf_selem_link_map_nolock(struct bpf_local_storage_map *smap, + struct bpf_local_storage_elem *selem, + struct bpf_local_storage_map_bucket *b) { - /* Always unlink from map before unlinking from local_storage - * because selem will be freed after successfully unlinked from - * the local_storage. - */ - bpf_selem_unlink_map(selem); - bpf_selem_unlink_storage(selem, reuse_now); + RCU_INIT_POINTER(SDATA(selem)->smap, smap); + hlist_add_head_rcu(&selem->map_node, &b->list); +} + +int bpf_selem_unlink(struct bpf_local_storage_elem *selem, bool reuse_now) +{ + struct bpf_local_storage_map *storage_smap; + struct bpf_local_storage *local_storage; + bool bpf_ma, free_local_storage = false; + HLIST_HEAD(selem_free_list); + unsigned long flags; + int err; + + if (unlikely(!selem_linked_to_storage_lockless(selem))) + /* selem has already been unlinked from sk */ + return 0; + + local_storage = rcu_dereference_check(selem->local_storage, + bpf_rcu_lock_held()); + storage_smap = rcu_dereference_check(local_storage->smap, + bpf_rcu_lock_held()); + bpf_ma = check_storage_bpf_ma(local_storage, storage_smap, selem); + + err = raw_res_spin_lock_irqsave(&local_storage->lock, flags); + if (err) + return err; + + if (likely(selem_linked_to_storage(selem))) { + /* Always unlink from map before unlinking from local_storage + * because selem will be freed after successfully unlinked from + * the local_storage. + */ + err = bpf_selem_unlink_map(selem); + if (err) + goto out; + + free_local_storage = bpf_selem_unlink_storage_nolock( + local_storage, selem, true, &selem_free_list); + } +out: + raw_res_spin_unlock_irqrestore(&local_storage->lock, flags); + + bpf_selem_free_list(&selem_free_list, reuse_now); + + if (free_local_storage) + bpf_local_storage_free(local_storage, storage_smap, bpf_ma, reuse_now); + + return err; } void __bpf_local_storage_insert_cache(struct bpf_local_storage *local_storage, @@ -454,16 +492,20 @@ void __bpf_local_storage_insert_cache(struct bpf_local_storage *local_storage, struct bpf_local_storage_elem *selem) { unsigned long flags; + int err; /* spinlock is needed to avoid racing with the * parallel delete. Otherwise, publishing an already * deleted sdata to the cache will become a use-after-free * problem in the next bpf_local_storage_lookup(). */ - raw_spin_lock_irqsave(&local_storage->lock, flags); + err = raw_res_spin_lock_irqsave(&local_storage->lock, flags); + if (err) + return; + if (selem_linked_to_storage(selem)) rcu_assign_pointer(local_storage->cache[smap->cache_idx], SDATA(selem)); - raw_spin_unlock_irqrestore(&local_storage->lock, flags); + raw_res_spin_unlock_irqrestore(&local_storage->lock, flags); } static int check_flags(const struct bpf_local_storage_data *old_sdata, @@ -487,6 +529,8 @@ int bpf_local_storage_alloc(void *owner, { struct bpf_local_storage *prev_storage, *storage; struct bpf_local_storage **owner_storage_ptr; + struct bpf_local_storage_map_bucket *b; + unsigned long flags; int err; err = mem_charge(smap, owner, sizeof(*storage)); @@ -505,11 +549,17 @@ int bpf_local_storage_alloc(void *owner, RCU_INIT_POINTER(storage->smap, smap); INIT_HLIST_HEAD(&storage->list); - raw_spin_lock_init(&storage->lock); + raw_res_spin_lock_init(&storage->lock); storage->owner = owner; bpf_selem_link_storage_nolock(storage, first_selem); - bpf_selem_link_map(smap, first_selem); + + b = select_bucket(smap, storage); + err = raw_res_spin_lock_irqsave(&b->lock, flags); + if (err) + goto uncharge; + + bpf_selem_link_map_nolock(smap, first_selem, b); owner_storage_ptr = (struct bpf_local_storage **)owner_storage(smap, owner); @@ -525,7 +575,8 @@ int bpf_local_storage_alloc(void *owner, */ prev_storage = cmpxchg(owner_storage_ptr, NULL, storage); if (unlikely(prev_storage)) { - bpf_selem_unlink_map(first_selem); + bpf_selem_unlink_map_nolock(first_selem); + raw_res_spin_unlock_irqrestore(&b->lock, flags); err = -EAGAIN; goto uncharge; @@ -539,6 +590,7 @@ int bpf_local_storage_alloc(void *owner, * bucket->list under rcu_read_lock(). */ } + raw_res_spin_unlock_irqrestore(&b->lock, flags); return 0; @@ -560,8 +612,9 @@ bpf_local_storage_update(void *owner, struct bpf_local_storage_map *smap, struct bpf_local_storage_data *old_sdata = NULL; struct bpf_local_storage_elem *alloc_selem, *selem = NULL; struct bpf_local_storage *local_storage; + struct bpf_local_storage_map_bucket *b; HLIST_HEAD(old_selem_free_list); - unsigned long flags; + unsigned long flags, b_flags; int err; /* BPF_EXIST and BPF_NOEXIST cannot be both set */ @@ -620,7 +673,9 @@ bpf_local_storage_update(void *owner, struct bpf_local_storage_map *smap, if (!alloc_selem) return ERR_PTR(-ENOMEM); - raw_spin_lock_irqsave(&local_storage->lock, flags); + err = raw_res_spin_lock_irqsave(&local_storage->lock, flags); + if (err) + return ERR_PTR(err); /* Recheck local_storage->list under local_storage->lock */ if (unlikely(hlist_empty(&local_storage->list))) { @@ -645,22 +700,29 @@ bpf_local_storage_update(void *owner, struct bpf_local_storage_map *smap, goto unlock; } + b = select_bucket(smap, local_storage); + + err = raw_res_spin_lock_irqsave(&b->lock, b_flags); + if (err) + goto unlock; + alloc_selem = NULL; /* First, link the new selem to the map */ - bpf_selem_link_map(smap, selem); + bpf_selem_link_map_nolock(smap, selem, b); /* Second, link (and publish) the new selem to local_storage */ bpf_selem_link_storage_nolock(local_storage, selem); /* Third, remove old selem, SELEM(old_sdata) */ if (old_sdata) { - bpf_selem_unlink_map(SELEM(old_sdata)); + bpf_selem_unlink_map_nolock(SELEM(old_sdata)); bpf_selem_unlink_storage_nolock(local_storage, SELEM(old_sdata), true, &old_selem_free_list); } + raw_res_spin_unlock_irqrestore(&b->lock, b_flags); unlock: - raw_spin_unlock_irqrestore(&local_storage->lock, flags); + raw_res_spin_unlock_irqrestore(&local_storage->lock, flags); bpf_selem_free_list(&old_selem_free_list, false); if (alloc_selem) { mem_uncharge(smap, owner, smap->elem_size); @@ -736,6 +798,7 @@ void bpf_local_storage_destroy(struct bpf_local_storage *local_storage) HLIST_HEAD(free_selem_list); struct hlist_node *n; unsigned long flags; + int err; storage_smap = rcu_dereference_check(local_storage->smap, bpf_rcu_lock_held()); bpf_ma = check_storage_bpf_ma(local_storage, storage_smap, NULL); @@ -749,12 +812,12 @@ void bpf_local_storage_destroy(struct bpf_local_storage *local_storage) * when unlinking elem from the local_storage->list and * the map's bucket->list. */ - raw_spin_lock_irqsave(&local_storage->lock, flags); + while (raw_res_spin_lock_irqsave(&local_storage->lock, flags)); hlist_for_each_entry_safe(selem, n, &local_storage->list, snode) { /* Always unlink from map before unlinking from * local_storage. */ - bpf_selem_unlink_map(selem); + while (bpf_selem_unlink_map(selem)); /* If local_storage list has only one element, the * bpf_selem_unlink_storage_nolock() will return true. * Otherwise, it will return false. The current loop iteration @@ -764,7 +827,7 @@ void bpf_local_storage_destroy(struct bpf_local_storage *local_storage) free_storage = bpf_selem_unlink_storage_nolock( local_storage, selem, true, &free_selem_list); } - raw_spin_unlock_irqrestore(&local_storage->lock, flags); + raw_res_spin_unlock_irqrestore(&local_storage->lock, flags); bpf_selem_free_list(&free_selem_list, true); @@ -821,7 +884,7 @@ bpf_local_storage_map_alloc(union bpf_attr *attr, for (i = 0; i < nbuckets; i++) { INIT_HLIST_HEAD(&smap->buckets[i].list); - raw_spin_lock_init(&smap->buckets[i].lock); + raw_res_spin_lock_init(&smap->buckets[i].lock); } smap->elem_size = offsetof(struct bpf_local_storage_elem, @@ -854,8 +917,7 @@ bpf_local_storage_map_alloc(union bpf_attr *attr, } void bpf_local_storage_map_free(struct bpf_map *map, - struct bpf_local_storage_cache *cache, - int __percpu *busy_counter) + struct bpf_local_storage_cache *cache) { struct bpf_local_storage_map_bucket *b; struct bpf_local_storage_elem *selem; @@ -888,11 +950,7 @@ void bpf_local_storage_map_free(struct bpf_map *map, while ((selem = hlist_entry_safe( rcu_dereference_raw(hlist_first_rcu(&b->list)), struct bpf_local_storage_elem, map_node))) { - if (busy_counter) - this_cpu_inc(*busy_counter); - bpf_selem_unlink(selem, true); - if (busy_counter) - this_cpu_dec(*busy_counter); + while (bpf_selem_unlink(selem, true)); cond_resched_rcu(); } rcu_read_unlock(); diff --git a/kernel/bpf/bpf_task_storage.c b/kernel/bpf/bpf_task_storage.c index a1dc1bf0848a5..4d53aebe67848 100644 --- a/kernel/bpf/bpf_task_storage.c +++ b/kernel/bpf/bpf_task_storage.c @@ -20,29 +20,6 @@ DEFINE_BPF_STORAGE_CACHE(task_cache); -static DEFINE_PER_CPU(int, bpf_task_storage_busy); - -static void bpf_task_storage_lock(void) -{ - cant_migrate(); - this_cpu_inc(bpf_task_storage_busy); -} - -static void bpf_task_storage_unlock(void) -{ - this_cpu_dec(bpf_task_storage_busy); -} - -static bool bpf_task_storage_trylock(void) -{ - cant_migrate(); - if (unlikely(this_cpu_inc_return(bpf_task_storage_busy) != 1)) { - this_cpu_dec(bpf_task_storage_busy); - return false; - } - return true; -} - static struct bpf_local_storage __rcu **task_storage_ptr(void *owner) { struct task_struct *task = owner; @@ -70,17 +47,15 @@ void bpf_task_storage_free(struct task_struct *task) { struct bpf_local_storage *local_storage; - rcu_read_lock_dont_migrate(); + rcu_read_lock(); local_storage = rcu_dereference(task->bpf_storage); if (!local_storage) goto out; - bpf_task_storage_lock(); bpf_local_storage_destroy(local_storage); - bpf_task_storage_unlock(); out: - rcu_read_unlock_migrate(); + rcu_read_unlock(); } static void *bpf_pid_task_storage_lookup_elem(struct bpf_map *map, void *key) @@ -106,9 +81,7 @@ static void *bpf_pid_task_storage_lookup_elem(struct bpf_map *map, void *key) goto out; } - bpf_task_storage_lock(); sdata = task_storage_lookup(task, map, true); - bpf_task_storage_unlock(); put_pid(pid); return sdata ? sdata->data : NULL; out: @@ -143,11 +116,9 @@ static long bpf_pid_task_storage_update_elem(struct bpf_map *map, void *key, goto out; } - bpf_task_storage_lock(); sdata = bpf_local_storage_update( task, (struct bpf_local_storage_map *)map, value, map_flags, true, GFP_ATOMIC); - bpf_task_storage_unlock(); err = PTR_ERR_OR_ZERO(sdata); out: @@ -155,8 +126,7 @@ static long bpf_pid_task_storage_update_elem(struct bpf_map *map, void *key, return err; } -static int task_storage_delete(struct task_struct *task, struct bpf_map *map, - bool nobusy) +static int task_storage_delete(struct task_struct *task, struct bpf_map *map) { struct bpf_local_storage_data *sdata; @@ -164,12 +134,7 @@ static int task_storage_delete(struct task_struct *task, struct bpf_map *map, if (!sdata) return -ENOENT; - if (!nobusy) - return -EBUSY; - - bpf_selem_unlink(SELEM(sdata), false); - - return 0; + return bpf_selem_unlink(SELEM(sdata), false); } static long bpf_pid_task_storage_delete_elem(struct bpf_map *map, void *key) @@ -194,111 +159,50 @@ static long bpf_pid_task_storage_delete_elem(struct bpf_map *map, void *key) goto out; } - bpf_task_storage_lock(); - err = task_storage_delete(task, map, true); - bpf_task_storage_unlock(); + err = task_storage_delete(task, map); out: put_pid(pid); return err; } -/* Called by bpf_task_storage_get*() helpers */ -static void *__bpf_task_storage_get(struct bpf_map *map, - struct task_struct *task, void *value, - u64 flags, gfp_t gfp_flags, bool nobusy) +/* *gfp_flags* is a hidden argument provided by the verifier */ +BPF_CALL_5(bpf_task_storage_get, struct bpf_map *, map, struct task_struct *, + task, void *, value, u64, flags, gfp_t, gfp_flags) { struct bpf_local_storage_data *sdata; - sdata = task_storage_lookup(task, map, nobusy); + WARN_ON_ONCE(!bpf_rcu_lock_held()); + if (flags & ~BPF_LOCAL_STORAGE_GET_F_CREATE || !task) + return (unsigned long)NULL; + + sdata = task_storage_lookup(task, map, true); if (sdata) - return sdata->data; + return (unsigned long)sdata->data; /* only allocate new storage, when the task is refcounted */ if (refcount_read(&task->usage) && - (flags & BPF_LOCAL_STORAGE_GET_F_CREATE) && nobusy) { + (flags & BPF_LOCAL_STORAGE_GET_F_CREATE)) { sdata = bpf_local_storage_update( task, (struct bpf_local_storage_map *)map, value, BPF_NOEXIST, false, gfp_flags); - return IS_ERR(sdata) ? NULL : sdata->data; + return IS_ERR(sdata) ? (unsigned long)NULL : (unsigned long)sdata->data; } - return NULL; -} - -/* *gfp_flags* is a hidden argument provided by the verifier */ -BPF_CALL_5(bpf_task_storage_get_recur, struct bpf_map *, map, struct task_struct *, - task, void *, value, u64, flags, gfp_t, gfp_flags) -{ - bool nobusy; - void *data; - - WARN_ON_ONCE(!bpf_rcu_lock_held()); - if (flags & ~BPF_LOCAL_STORAGE_GET_F_CREATE || !task) - return (unsigned long)NULL; - - nobusy = bpf_task_storage_trylock(); - data = __bpf_task_storage_get(map, task, value, flags, - gfp_flags, nobusy); - if (nobusy) - bpf_task_storage_unlock(); - return (unsigned long)data; -} - -/* *gfp_flags* is a hidden argument provided by the verifier */ -BPF_CALL_5(bpf_task_storage_get, struct bpf_map *, map, struct task_struct *, - task, void *, value, u64, flags, gfp_t, gfp_flags) -{ - void *data; - - WARN_ON_ONCE(!bpf_rcu_lock_held()); - if (flags & ~BPF_LOCAL_STORAGE_GET_F_CREATE || !task) - return (unsigned long)NULL; - - bpf_task_storage_lock(); - data = __bpf_task_storage_get(map, task, value, flags, - gfp_flags, true); - bpf_task_storage_unlock(); - return (unsigned long)data; -} - -BPF_CALL_2(bpf_task_storage_delete_recur, struct bpf_map *, map, struct task_struct *, - task) -{ - bool nobusy; - int ret; - - WARN_ON_ONCE(!bpf_rcu_lock_held()); - if (!task) - return -EINVAL; - - nobusy = bpf_task_storage_trylock(); - /* This helper must only be called from places where the lifetime of the task - * is guaranteed. Either by being refcounted or by being protected - * by an RCU read-side critical section. - */ - ret = task_storage_delete(task, map, nobusy); - if (nobusy) - bpf_task_storage_unlock(); - return ret; + return (unsigned long)NULL; } BPF_CALL_2(bpf_task_storage_delete, struct bpf_map *, map, struct task_struct *, task) { - int ret; - WARN_ON_ONCE(!bpf_rcu_lock_held()); if (!task) return -EINVAL; - bpf_task_storage_lock(); /* This helper must only be called from places where the lifetime of the task * is guaranteed. Either by being refcounted or by being protected * by an RCU read-side critical section. */ - ret = task_storage_delete(task, map, true); - bpf_task_storage_unlock(); - return ret; + return task_storage_delete(task, map); } static int notsupp_get_next_key(struct bpf_map *map, void *key, void *next_key) @@ -313,7 +217,7 @@ static struct bpf_map *task_storage_map_alloc(union bpf_attr *attr) static void task_storage_map_free(struct bpf_map *map) { - bpf_local_storage_map_free(map, &task_cache, &bpf_task_storage_busy); + bpf_local_storage_map_free(map, &task_cache); } BTF_ID_LIST_GLOBAL_SINGLE(bpf_local_storage_map_btf_id, struct, bpf_local_storage_map) @@ -332,17 +236,6 @@ const struct bpf_map_ops task_storage_map_ops = { .map_owner_storage_ptr = task_storage_ptr, }; -const struct bpf_func_proto bpf_task_storage_get_recur_proto = { - .func = bpf_task_storage_get_recur, - .gpl_only = false, - .ret_type = RET_PTR_TO_MAP_VALUE_OR_NULL, - .arg1_type = ARG_CONST_MAP_PTR, - .arg2_type = ARG_PTR_TO_BTF_ID_OR_NULL, - .arg2_btf_id = &btf_tracing_ids[BTF_TRACING_TYPE_TASK], - .arg3_type = ARG_PTR_TO_MAP_VALUE_OR_NULL, - .arg4_type = ARG_ANYTHING, -}; - const struct bpf_func_proto bpf_task_storage_get_proto = { .func = bpf_task_storage_get, .gpl_only = false, @@ -354,15 +247,6 @@ const struct bpf_func_proto bpf_task_storage_get_proto = { .arg4_type = ARG_ANYTHING, }; -const struct bpf_func_proto bpf_task_storage_delete_recur_proto = { - .func = bpf_task_storage_delete_recur, - .gpl_only = false, - .ret_type = RET_INTEGER, - .arg1_type = ARG_CONST_MAP_PTR, - .arg2_type = ARG_PTR_TO_BTF_ID_OR_NULL, - .arg2_btf_id = &btf_tracing_ids[BTF_TRACING_TYPE_TASK], -}; - const struct bpf_func_proto bpf_task_storage_delete_proto = { .func = bpf_task_storage_delete, .gpl_only = false, diff --git a/kernel/bpf/helpers.c b/kernel/bpf/helpers.c index c9fab9a356dfc..1d83f29a8986d 100644 --- a/kernel/bpf/helpers.c +++ b/kernel/bpf/helpers.c @@ -2040,12 +2040,8 @@ bpf_base_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog) return &bpf_get_cgroup_classid_curr_proto; #endif case BPF_FUNC_task_storage_get: - if (bpf_prog_check_recur(prog)) - return &bpf_task_storage_get_recur_proto; return &bpf_task_storage_get_proto; case BPF_FUNC_task_storage_delete: - if (bpf_prog_check_recur(prog)) - return &bpf_task_storage_delete_recur_proto; return &bpf_task_storage_delete_proto; default: break; diff --git a/net/core/bpf_sk_storage.c b/net/core/bpf_sk_storage.c index 2e538399757fe..7037b841cf119 100644 --- a/net/core/bpf_sk_storage.c +++ b/net/core/bpf_sk_storage.c @@ -40,9 +40,7 @@ static int bpf_sk_storage_del(struct sock *sk, struct bpf_map *map) if (!sdata) return -ENOENT; - bpf_selem_unlink(SELEM(sdata), false); - - return 0; + return bpf_selem_unlink(SELEM(sdata), false); } /* Called by __sk_destruct() & bpf_sk_storage_clone() */ @@ -64,7 +62,7 @@ void bpf_sk_storage_free(struct sock *sk) static void bpf_sk_storage_map_free(struct bpf_map *map) { - bpf_local_storage_map_free(map, &sk_cache, NULL); + bpf_local_storage_map_free(map, &sk_cache); } static struct bpf_map *bpf_sk_storage_map_alloc(union bpf_attr *attr) @@ -194,7 +192,9 @@ int bpf_sk_storage_clone(const struct sock *sk, struct sock *newsk) } if (new_sk_storage) { - bpf_selem_link_map(smap, copy_selem); + ret = bpf_selem_link_map(smap, copy_selem); + if (ret) + goto out; bpf_selem_link_storage_nolock(new_sk_storage, copy_selem); } else { ret = bpf_local_storage_alloc(newsk, smap, copy_selem, GFP_ATOMIC); diff --git a/tools/testing/selftests/bpf/map_tests/task_storage_map.c b/tools/testing/selftests/bpf/map_tests/task_storage_map.c deleted file mode 100644 index a4121d2248ac8..0000000000000 --- a/tools/testing/selftests/bpf/map_tests/task_storage_map.c +++ /dev/null @@ -1,128 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -/* Copyright (C) 2022. Huawei Technologies Co., Ltd */ -#define _GNU_SOURCE -#include -#include -#include -#include -#include -#include -#include - -#include -#include - -#include "bpf_util.h" -#include "test_maps.h" -#include "task_local_storage_helpers.h" -#include "read_bpf_task_storage_busy.skel.h" - -struct lookup_ctx { - bool start; - bool stop; - int pid_fd; - int map_fd; - int loop; -}; - -static void *lookup_fn(void *arg) -{ - struct lookup_ctx *ctx = arg; - long value; - int i = 0; - - while (!ctx->start) - usleep(1); - - while (!ctx->stop && i++ < ctx->loop) - bpf_map_lookup_elem(ctx->map_fd, &ctx->pid_fd, &value); - return NULL; -} - -static void abort_lookup(struct lookup_ctx *ctx, pthread_t *tids, unsigned int nr) -{ - unsigned int i; - - ctx->stop = true; - ctx->start = true; - for (i = 0; i < nr; i++) - pthread_join(tids[i], NULL); -} - -void test_task_storage_map_stress_lookup(void) -{ -#define MAX_NR_THREAD 4096 - unsigned int i, nr = 256, loop = 8192, cpu = 0; - struct read_bpf_task_storage_busy *skel; - pthread_t tids[MAX_NR_THREAD]; - struct lookup_ctx ctx; - cpu_set_t old, new; - const char *cfg; - int err; - - cfg = getenv("TASK_STORAGE_MAP_NR_THREAD"); - if (cfg) { - nr = atoi(cfg); - if (nr > MAX_NR_THREAD) - nr = MAX_NR_THREAD; - } - cfg = getenv("TASK_STORAGE_MAP_NR_LOOP"); - if (cfg) - loop = atoi(cfg); - cfg = getenv("TASK_STORAGE_MAP_PIN_CPU"); - if (cfg) - cpu = atoi(cfg); - - skel = read_bpf_task_storage_busy__open_and_load(); - err = libbpf_get_error(skel); - CHECK(err, "open_and_load", "error %d\n", err); - - /* Only for a fully preemptible kernel */ - if (!skel->kconfig->CONFIG_PREEMPTION) { - printf("%s SKIP (no CONFIG_PREEMPTION)\n", __func__); - read_bpf_task_storage_busy__destroy(skel); - skips++; - return; - } - - /* Save the old affinity setting */ - sched_getaffinity(getpid(), sizeof(old), &old); - - /* Pinned on a specific CPU */ - CPU_ZERO(&new); - CPU_SET(cpu, &new); - sched_setaffinity(getpid(), sizeof(new), &new); - - ctx.start = false; - ctx.stop = false; - ctx.pid_fd = sys_pidfd_open(getpid(), 0); - ctx.map_fd = bpf_map__fd(skel->maps.task); - ctx.loop = loop; - for (i = 0; i < nr; i++) { - err = pthread_create(&tids[i], NULL, lookup_fn, &ctx); - if (err) { - abort_lookup(&ctx, tids, i); - CHECK(err, "pthread_create", "error %d\n", err); - goto out; - } - } - - ctx.start = true; - for (i = 0; i < nr; i++) - pthread_join(tids[i], NULL); - - skel->bss->pid = getpid(); - err = read_bpf_task_storage_busy__attach(skel); - CHECK(err, "attach", "error %d\n", err); - - /* Trigger program */ - sys_gettid(); - skel->bss->pid = 0; - - CHECK(skel->bss->busy != 0, "bad bpf_task_storage_busy", "got %d\n", skel->bss->busy); -out: - read_bpf_task_storage_busy__destroy(skel); - /* Restore affinity setting */ - sched_setaffinity(getpid(), sizeof(old), &old); - printf("%s:PASS\n", __func__); -} diff --git a/tools/testing/selftests/bpf/prog_tests/btf_dump.c b/tools/testing/selftests/bpf/prog_tests/btf_dump.c index 10cba526d3e63..f1642794f70ed 100644 --- a/tools/testing/selftests/bpf/prog_tests/btf_dump.c +++ b/tools/testing/selftests/bpf/prog_tests/btf_dump.c @@ -875,8 +875,8 @@ static void test_btf_dump_var_data(struct btf *btf, struct btf_dump *d, TEST_BTF_DUMP_VAR(btf, d, NULL, str, "cpu_number", int, BTF_F_COMPACT, "int cpu_number = (int)100", 100); #endif - TEST_BTF_DUMP_VAR(btf, d, NULL, str, "bpf_cgrp_storage_busy", int, BTF_F_COMPACT, - "static int bpf_cgrp_storage_busy = (int)2", 2); + TEST_BTF_DUMP_VAR(btf, d, NULL, str, "bpf_bprintf_nest_level", int, BTF_F_COMPACT, + "static int bpf_bprintf_nest_level = (int)2", 2); } struct btf_dump_string_ctx { diff --git a/tools/testing/selftests/bpf/prog_tests/task_local_storage.c b/tools/testing/selftests/bpf/prog_tests/task_local_storage.c index 42e822ea352f1..559727b05e086 100644 --- a/tools/testing/selftests/bpf/prog_tests/task_local_storage.c +++ b/tools/testing/selftests/bpf/prog_tests/task_local_storage.c @@ -117,19 +117,19 @@ static void test_recursion(void) map_fd = bpf_map__fd(skel->maps.map_a); err = bpf_map_lookup_elem(map_fd, &task_fd, &value); ASSERT_OK(err, "lookup map_a"); - ASSERT_EQ(value, 201, "map_a value"); - ASSERT_EQ(skel->bss->nr_del_errs, 1, "bpf_task_storage_delete busy"); + ASSERT_EQ(value, 200, "map_a value"); + ASSERT_EQ(skel->bss->nr_del_errs, 0, "bpf_task_storage_delete busy"); map_fd = bpf_map__fd(skel->maps.map_b); err = bpf_map_lookup_elem(map_fd, &task_fd, &value); ASSERT_OK(err, "lookup map_b"); - ASSERT_EQ(value, 100, "map_b value"); + ASSERT_EQ(value, 1, "map_b value"); prog_fd = bpf_program__fd(skel->progs.on_update); memset(&info, 0, sizeof(info)); err = bpf_prog_get_info_by_fd(prog_fd, &info, &info_len); ASSERT_OK(err, "get prog info"); - ASSERT_EQ(info.recursion_misses, 0, "on_update prog recursion"); + ASSERT_EQ(info.recursion_misses, 2, "on_update prog recursion"); prog_fd = bpf_program__fd(skel->progs.on_enter); memset(&info, 0, sizeof(info)); diff --git a/tools/testing/selftests/bpf/progs/read_bpf_task_storage_busy.c b/tools/testing/selftests/bpf/progs/read_bpf_task_storage_busy.c deleted file mode 100644 index 69da05bb6c63e..0000000000000 --- a/tools/testing/selftests/bpf/progs/read_bpf_task_storage_busy.c +++ /dev/null @@ -1,38 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -/* Copyright (C) 2022. Huawei Technologies Co., Ltd */ -#include "vmlinux.h" -#include -#include - -extern bool CONFIG_PREEMPTION __kconfig __weak; -extern const int bpf_task_storage_busy __ksym; - -char _license[] SEC("license") = "GPL"; - -int pid = 0; -int busy = 0; - -struct { - __uint(type, BPF_MAP_TYPE_TASK_STORAGE); - __uint(map_flags, BPF_F_NO_PREALLOC); - __type(key, int); - __type(value, long); -} task SEC(".maps"); - -SEC("raw_tp/sys_enter") -int BPF_PROG(read_bpf_task_storage_busy) -{ - int *value; - - if (!CONFIG_PREEMPTION) - return 0; - - if (bpf_get_current_pid_tgid() >> 32 != pid) - return 0; - - value = bpf_this_cpu_ptr(&bpf_task_storage_busy); - if (value) - busy = *value; - - return 0; -}