@@ -30,18 +30,12 @@ struct bpf_htab {
30
30
struct pcpu_freelist freelist ;
31
31
struct bpf_lru lru ;
32
32
};
33
- void __percpu * extra_elems ;
33
+ struct htab_elem * __percpu * extra_elems ;
34
34
atomic_t count ; /* number of elements in this hashtable */
35
35
u32 n_buckets ; /* number of hash buckets */
36
36
u32 elem_size ; /* size of each element in bytes */
37
37
};
38
38
39
- enum extra_elem_state {
40
- HTAB_NOT_AN_EXTRA_ELEM = 0 ,
41
- HTAB_EXTRA_ELEM_FREE ,
42
- HTAB_EXTRA_ELEM_USED
43
- };
44
-
45
39
/* each htab element is struct htab_elem + key + value */
46
40
struct htab_elem {
47
41
union {
@@ -56,7 +50,6 @@ struct htab_elem {
56
50
};
57
51
union {
58
52
struct rcu_head rcu ;
59
- enum extra_elem_state state ;
60
53
struct bpf_lru_node lru_node ;
61
54
};
62
55
u32 hash ;
@@ -77,6 +70,11 @@ static bool htab_is_percpu(const struct bpf_htab *htab)
77
70
htab -> map .map_type == BPF_MAP_TYPE_LRU_PERCPU_HASH ;
78
71
}
79
72
73
+ static bool htab_is_prealloc (const struct bpf_htab * htab )
74
+ {
75
+ return !(htab -> map .map_flags & BPF_F_NO_PREALLOC );
76
+ }
77
+
80
78
static inline void htab_elem_set_ptr (struct htab_elem * l , u32 key_size ,
81
79
void __percpu * pptr )
82
80
{
@@ -128,17 +126,20 @@ static struct htab_elem *prealloc_lru_pop(struct bpf_htab *htab, void *key,
128
126
129
127
static int prealloc_init (struct bpf_htab * htab )
130
128
{
129
+ u32 num_entries = htab -> map .max_entries ;
131
130
int err = - ENOMEM , i ;
132
131
133
- htab -> elems = bpf_map_area_alloc (htab -> elem_size *
134
- htab -> map .max_entries );
132
+ if (!htab_is_percpu (htab ) && !htab_is_lru (htab ))
133
+ num_entries += num_possible_cpus ();
134
+
135
+ htab -> elems = bpf_map_area_alloc (htab -> elem_size * num_entries );
135
136
if (!htab -> elems )
136
137
return - ENOMEM ;
137
138
138
139
if (!htab_is_percpu (htab ))
139
140
goto skip_percpu_elems ;
140
141
141
- for (i = 0 ; i < htab -> map . max_entries ; i ++ ) {
142
+ for (i = 0 ; i < num_entries ; i ++ ) {
142
143
u32 size = round_up (htab -> map .value_size , 8 );
143
144
void __percpu * pptr ;
144
145
@@ -166,11 +167,11 @@ static int prealloc_init(struct bpf_htab *htab)
166
167
if (htab_is_lru (htab ))
167
168
bpf_lru_populate (& htab -> lru , htab -> elems ,
168
169
offsetof(struct htab_elem , lru_node ),
169
- htab -> elem_size , htab -> map . max_entries );
170
+ htab -> elem_size , num_entries );
170
171
else
171
172
pcpu_freelist_populate (& htab -> freelist ,
172
173
htab -> elems + offsetof(struct htab_elem , fnode ),
173
- htab -> elem_size , htab -> map . max_entries );
174
+ htab -> elem_size , num_entries );
174
175
175
176
return 0 ;
176
177
@@ -191,16 +192,22 @@ static void prealloc_destroy(struct bpf_htab *htab)
191
192
192
193
static int alloc_extra_elems (struct bpf_htab * htab )
193
194
{
194
- void __percpu * pptr ;
195
+ struct htab_elem * __percpu * pptr , * l_new ;
196
+ struct pcpu_freelist_node * l ;
195
197
int cpu ;
196
198
197
- pptr = __alloc_percpu_gfp (htab -> elem_size , 8 , GFP_USER | __GFP_NOWARN );
199
+ pptr = __alloc_percpu_gfp (sizeof (struct htab_elem * ), 8 ,
200
+ GFP_USER | __GFP_NOWARN );
198
201
if (!pptr )
199
202
return - ENOMEM ;
200
203
201
204
for_each_possible_cpu (cpu ) {
202
- ((struct htab_elem * )per_cpu_ptr (pptr , cpu ))-> state =
203
- HTAB_EXTRA_ELEM_FREE ;
205
+ l = pcpu_freelist_pop (& htab -> freelist );
206
+ /* pop will succeed, since prealloc_init()
207
+ * preallocated extra num_possible_cpus elements
208
+ */
209
+ l_new = container_of (l , struct htab_elem , fnode );
210
+ * per_cpu_ptr (pptr , cpu ) = l_new ;
204
211
}
205
212
htab -> extra_elems = pptr ;
206
213
return 0 ;
@@ -342,25 +349,25 @@ static struct bpf_map *htab_map_alloc(union bpf_attr *attr)
342
349
raw_spin_lock_init (& htab -> buckets [i ].lock );
343
350
}
344
351
345
- if (!percpu && !lru ) {
346
- /* lru itself can remove the least used element, so
347
- * there is no need for an extra elem during map_update.
348
- */
349
- err = alloc_extra_elems (htab );
350
- if (err )
351
- goto free_buckets ;
352
- }
353
-
354
352
if (prealloc ) {
355
353
err = prealloc_init (htab );
356
354
if (err )
357
- goto free_extra_elems ;
355
+ goto free_buckets ;
356
+
357
+ if (!percpu && !lru ) {
358
+ /* lru itself can remove the least used element, so
359
+ * there is no need for an extra elem during map_update.
360
+ */
361
+ err = alloc_extra_elems (htab );
362
+ if (err )
363
+ goto free_prealloc ;
364
+ }
358
365
}
359
366
360
367
return & htab -> map ;
361
368
362
- free_extra_elems :
363
- free_percpu (htab -> extra_elems );
369
+ free_prealloc :
370
+ prealloc_destroy (htab );
364
371
free_buckets :
365
372
bpf_map_area_free (htab -> buckets );
366
373
free_htab :
@@ -575,12 +582,7 @@ static void htab_elem_free_rcu(struct rcu_head *head)
575
582
576
583
static void free_htab_elem (struct bpf_htab * htab , struct htab_elem * l )
577
584
{
578
- if (l -> state == HTAB_EXTRA_ELEM_USED ) {
579
- l -> state = HTAB_EXTRA_ELEM_FREE ;
580
- return ;
581
- }
582
-
583
- if (!(htab -> map .map_flags & BPF_F_NO_PREALLOC )) {
585
+ if (htab_is_prealloc (htab )) {
584
586
pcpu_freelist_push (& htab -> freelist , & l -> fnode );
585
587
} else {
586
588
atomic_dec (& htab -> count );
@@ -610,47 +612,43 @@ static void pcpu_copy_value(struct bpf_htab *htab, void __percpu *pptr,
610
612
static struct htab_elem * alloc_htab_elem (struct bpf_htab * htab , void * key ,
611
613
void * value , u32 key_size , u32 hash ,
612
614
bool percpu , bool onallcpus ,
613
- bool old_elem_exists )
615
+ struct htab_elem * old_elem )
614
616
{
615
617
u32 size = htab -> map .value_size ;
616
- bool prealloc = ! (htab -> map . map_flags & BPF_F_NO_PREALLOC );
617
- struct htab_elem * l_new ;
618
+ bool prealloc = htab_is_prealloc (htab );
619
+ struct htab_elem * l_new , * * pl_new ;
618
620
void __percpu * pptr ;
619
- int err = 0 ;
620
621
621
622
if (prealloc ) {
622
- struct pcpu_freelist_node * l ;
623
+ if (old_elem ) {
624
+ /* if we're updating the existing element,
625
+ * use per-cpu extra elems to avoid freelist_pop/push
626
+ */
627
+ pl_new = this_cpu_ptr (htab -> extra_elems );
628
+ l_new = * pl_new ;
629
+ * pl_new = old_elem ;
630
+ } else {
631
+ struct pcpu_freelist_node * l ;
623
632
624
- l = pcpu_freelist_pop (& htab -> freelist );
625
- if (!l )
626
- err = - E2BIG ;
627
- else
633
+ l = pcpu_freelist_pop (& htab -> freelist );
634
+ if (!l )
635
+ return ERR_PTR (- E2BIG );
628
636
l_new = container_of (l , struct htab_elem , fnode );
629
- } else {
630
- if (atomic_inc_return (& htab -> count ) > htab -> map .max_entries ) {
631
- atomic_dec (& htab -> count );
632
- err = - E2BIG ;
633
- } else {
634
- l_new = kmalloc (htab -> elem_size ,
635
- GFP_ATOMIC | __GFP_NOWARN );
636
- if (!l_new )
637
- return ERR_PTR (- ENOMEM );
638
637
}
639
- }
640
-
641
- if (err ) {
642
- if (!old_elem_exists )
643
- return ERR_PTR (err );
644
-
645
- /* if we're updating the existing element and the hash table
646
- * is full, use per-cpu extra elems
647
- */
648
- l_new = this_cpu_ptr (htab -> extra_elems );
649
- if (l_new -> state != HTAB_EXTRA_ELEM_FREE )
650
- return ERR_PTR (- E2BIG );
651
- l_new -> state = HTAB_EXTRA_ELEM_USED ;
652
638
} else {
653
- l_new -> state = HTAB_NOT_AN_EXTRA_ELEM ;
639
+ if (atomic_inc_return (& htab -> count ) > htab -> map .max_entries )
640
+ if (!old_elem ) {
641
+ /* when map is full and update() is replacing
642
+ * old element, it's ok to allocate, since
643
+ * old element will be freed immediately.
644
+ * Otherwise return an error
645
+ */
646
+ atomic_dec (& htab -> count );
647
+ return ERR_PTR (- E2BIG );
648
+ }
649
+ l_new = kmalloc (htab -> elem_size , GFP_ATOMIC | __GFP_NOWARN );
650
+ if (!l_new )
651
+ return ERR_PTR (- ENOMEM );
654
652
}
655
653
656
654
memcpy (l_new -> key , key , key_size );
@@ -731,7 +729,7 @@ static int htab_map_update_elem(struct bpf_map *map, void *key, void *value,
731
729
goto err ;
732
730
733
731
l_new = alloc_htab_elem (htab , key , value , key_size , hash , false, false,
734
- !! l_old );
732
+ l_old );
735
733
if (IS_ERR (l_new )) {
736
734
/* all pre-allocated elements are in use or memory exhausted */
737
735
ret = PTR_ERR (l_new );
@@ -744,7 +742,8 @@ static int htab_map_update_elem(struct bpf_map *map, void *key, void *value,
744
742
hlist_nulls_add_head_rcu (& l_new -> hash_node , head );
745
743
if (l_old ) {
746
744
hlist_nulls_del_rcu (& l_old -> hash_node );
747
- free_htab_elem (htab , l_old );
745
+ if (!htab_is_prealloc (htab ))
746
+ free_htab_elem (htab , l_old );
748
747
}
749
748
ret = 0 ;
750
749
err :
@@ -856,7 +855,7 @@ static int __htab_percpu_map_update_elem(struct bpf_map *map, void *key,
856
855
value , onallcpus );
857
856
} else {
858
857
l_new = alloc_htab_elem (htab , key , value , key_size ,
859
- hash , true, onallcpus , false );
858
+ hash , true, onallcpus , NULL );
860
859
if (IS_ERR (l_new )) {
861
860
ret = PTR_ERR (l_new );
862
861
goto err ;
@@ -1024,8 +1023,7 @@ static void delete_all_elements(struct bpf_htab *htab)
1024
1023
1025
1024
hlist_nulls_for_each_entry_safe (l , n , head , hash_node ) {
1026
1025
hlist_nulls_del_rcu (& l -> hash_node );
1027
- if (l -> state != HTAB_EXTRA_ELEM_USED )
1028
- htab_elem_free (htab , l );
1026
+ htab_elem_free (htab , l );
1029
1027
}
1030
1028
}
1031
1029
}
@@ -1045,7 +1043,7 @@ static void htab_map_free(struct bpf_map *map)
1045
1043
* not have executed. Wait for them.
1046
1044
*/
1047
1045
rcu_barrier ();
1048
- if (htab -> map . map_flags & BPF_F_NO_PREALLOC )
1046
+ if (! htab_is_prealloc ( htab ) )
1049
1047
delete_all_elements (htab );
1050
1048
else
1051
1049
prealloc_destroy (htab );
0 commit comments