Skip to content

Commit 76313d1

Browse files
Florian Westphalummakynes
authored andcommitted
netfilter: nft_set_pipapo: store index in scratch maps
Pipapo needs a scratchpad area to keep state during matching. This state can be large and thus cannot reside on stack. Each set preallocates percpu areas for this. On each match stage, one scratchpad half starts with all-zero and the other is inited to all-ones. At the end of each stage, the half that starts with all-ones is always zero. Before next field is tested, pointers to the two halves are swapped, i.e. resmap pointer turns into fill pointer and vice versa. After the last field has been processed, pipapo stashes the index toggle in a percpu variable, with assumption that next packet will start with the all-zero half and sets all bits in the other to 1. This isn't reliable. There can be multiple sets and we can't be sure that the upper and lower half of all set scratch map is always in sync (lookups can be conditional), so one set might have swapped, but other might not have been queried. Thus we need to keep the index per-set-and-cpu, just like the scratchpad. Note that this bug fix is incomplete, there is a related issue. avx2 and normal implementation might use slightly different areas of the map array space due to the avx2 alignment requirements, so m->scratch (generic/fallback implementation) and ->scratch_aligned (avx) may partially overlap. scratch and scratch_aligned are not distinct objects, the latter is just the aligned address of the former. After this change, write to scratch_align->map_index may write to scratch->map, so this issue becomes more prominent, we can set to 1 a bit in the supposedly-all-zero area of scratch->map[]. A followup patch will remove the scratch_aligned and makes generic and avx code use the same (aligned) area. Its done in a separate change to ease review. Fixes: 3c4287f ("nf_tables: Add set type for arbitrary concatenation of ranges") Reviewed-by: Stefano Brivio <[email protected]> Signed-off-by: Florian Westphal <[email protected]> Signed-off-by: Pablo Neira Ayuso <[email protected]>
1 parent 60c0c23 commit 76313d1

File tree

3 files changed

+44
-26
lines changed

3 files changed

+44
-26
lines changed

net/netfilter/nft_set_pipapo.c

Lines changed: 25 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -342,9 +342,6 @@
342342
#include "nft_set_pipapo_avx2.h"
343343
#include "nft_set_pipapo.h"
344344

345-
/* Current working bitmap index, toggled between field matches */
346-
static DEFINE_PER_CPU(bool, nft_pipapo_scratch_index);
347-
348345
/**
349346
* pipapo_refill() - For each set bit, set bits from selected mapping table item
350347
* @map: Bitmap to be scanned for set bits
@@ -412,6 +409,7 @@ bool nft_pipapo_lookup(const struct net *net, const struct nft_set *set,
412409
const u32 *key, const struct nft_set_ext **ext)
413410
{
414411
struct nft_pipapo *priv = nft_set_priv(set);
412+
struct nft_pipapo_scratch *scratch;
415413
unsigned long *res_map, *fill_map;
416414
u8 genmask = nft_genmask_cur(net);
417415
const u8 *rp = (const u8 *)key;
@@ -422,15 +420,17 @@ bool nft_pipapo_lookup(const struct net *net, const struct nft_set *set,
422420

423421
local_bh_disable();
424422

425-
map_index = raw_cpu_read(nft_pipapo_scratch_index);
426-
427423
m = rcu_dereference(priv->match);
428424

429425
if (unlikely(!m || !*raw_cpu_ptr(m->scratch)))
430426
goto out;
431427

432-
res_map = *raw_cpu_ptr(m->scratch) + (map_index ? m->bsize_max : 0);
433-
fill_map = *raw_cpu_ptr(m->scratch) + (map_index ? 0 : m->bsize_max);
428+
scratch = *raw_cpu_ptr(m->scratch);
429+
430+
map_index = scratch->map_index;
431+
432+
res_map = scratch->map + (map_index ? m->bsize_max : 0);
433+
fill_map = scratch->map + (map_index ? 0 : m->bsize_max);
434434

435435
memset(res_map, 0xff, m->bsize_max * sizeof(*res_map));
436436

@@ -460,7 +460,7 @@ bool nft_pipapo_lookup(const struct net *net, const struct nft_set *set,
460460
b = pipapo_refill(res_map, f->bsize, f->rules, fill_map, f->mt,
461461
last);
462462
if (b < 0) {
463-
raw_cpu_write(nft_pipapo_scratch_index, map_index);
463+
scratch->map_index = map_index;
464464
local_bh_enable();
465465

466466
return false;
@@ -477,7 +477,7 @@ bool nft_pipapo_lookup(const struct net *net, const struct nft_set *set,
477477
* current inactive bitmap is clean and can be reused as
478478
* *next* bitmap (not initial) for the next packet.
479479
*/
480-
raw_cpu_write(nft_pipapo_scratch_index, map_index);
480+
scratch->map_index = map_index;
481481
local_bh_enable();
482482

483483
return true;
@@ -1123,12 +1123,12 @@ static int pipapo_realloc_scratch(struct nft_pipapo_match *clone,
11231123
int i;
11241124

11251125
for_each_possible_cpu(i) {
1126-
unsigned long *scratch;
1126+
struct nft_pipapo_scratch *scratch;
11271127
#ifdef NFT_PIPAPO_ALIGN
1128-
unsigned long *scratch_aligned;
1128+
void *scratch_aligned;
11291129
#endif
1130-
1131-
scratch = kzalloc_node(bsize_max * sizeof(*scratch) * 2 +
1130+
scratch = kzalloc_node(struct_size(scratch, map,
1131+
bsize_max * 2) +
11321132
NFT_PIPAPO_ALIGN_HEADROOM,
11331133
GFP_KERNEL, cpu_to_node(i));
11341134
if (!scratch) {
@@ -1147,7 +1147,16 @@ static int pipapo_realloc_scratch(struct nft_pipapo_match *clone,
11471147
*per_cpu_ptr(clone->scratch, i) = scratch;
11481148

11491149
#ifdef NFT_PIPAPO_ALIGN
1150-
scratch_aligned = NFT_PIPAPO_LT_ALIGN(scratch);
1150+
/* Align &scratch->map (not the struct itself): the extra
1151+
* %NFT_PIPAPO_ALIGN_HEADROOM bytes passed to kzalloc_node()
1152+
* above guarantee we can waste up to those bytes in order
1153+
* to align the map field regardless of its offset within
1154+
* the struct.
1155+
*/
1156+
BUILD_BUG_ON(offsetof(struct nft_pipapo_scratch, map) > NFT_PIPAPO_ALIGN_HEADROOM);
1157+
1158+
scratch_aligned = NFT_PIPAPO_LT_ALIGN(&scratch->map);
1159+
scratch_aligned -= offsetof(struct nft_pipapo_scratch, map);
11511160
*per_cpu_ptr(clone->scratch_aligned, i) = scratch_aligned;
11521161
#endif
11531162
}
@@ -2136,7 +2145,7 @@ static int nft_pipapo_init(const struct nft_set *set,
21362145
m->field_count = field_count;
21372146
m->bsize_max = 0;
21382147

2139-
m->scratch = alloc_percpu(unsigned long *);
2148+
m->scratch = alloc_percpu(struct nft_pipapo_scratch *);
21402149
if (!m->scratch) {
21412150
err = -ENOMEM;
21422151
goto out_scratch;
@@ -2145,7 +2154,7 @@ static int nft_pipapo_init(const struct nft_set *set,
21452154
*per_cpu_ptr(m->scratch, i) = NULL;
21462155

21472156
#ifdef NFT_PIPAPO_ALIGN
2148-
m->scratch_aligned = alloc_percpu(unsigned long *);
2157+
m->scratch_aligned = alloc_percpu(struct nft_pipapo_scratch *);
21492158
if (!m->scratch_aligned) {
21502159
err = -ENOMEM;
21512160
goto out_free;

net/netfilter/nft_set_pipapo.h

Lines changed: 12 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -130,6 +130,16 @@ struct nft_pipapo_field {
130130
union nft_pipapo_map_bucket *mt;
131131
};
132132

133+
/**
134+
* struct nft_pipapo_scratch - percpu data used for lookup and matching
135+
* @map_index: Current working bitmap index, toggled between field matches
136+
* @map: store partial matching results during lookup
137+
*/
138+
struct nft_pipapo_scratch {
139+
u8 map_index;
140+
unsigned long map[];
141+
};
142+
133143
/**
134144
* struct nft_pipapo_match - Data used for lookup and matching
135145
* @field_count Amount of fields in set
@@ -142,9 +152,9 @@ struct nft_pipapo_field {
142152
struct nft_pipapo_match {
143153
int field_count;
144154
#ifdef NFT_PIPAPO_ALIGN
145-
unsigned long * __percpu *scratch_aligned;
155+
struct nft_pipapo_scratch * __percpu *scratch_aligned;
146156
#endif
147-
unsigned long * __percpu *scratch;
157+
struct nft_pipapo_scratch * __percpu *scratch;
148158
size_t bsize_max;
149159
struct rcu_head rcu;
150160
struct nft_pipapo_field f[] __counted_by(field_count);

net/netfilter/nft_set_pipapo_avx2.c

Lines changed: 7 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -71,9 +71,6 @@
7171
#define NFT_PIPAPO_AVX2_ZERO(reg) \
7272
asm volatile("vpxor %ymm" #reg ", %ymm" #reg ", %ymm" #reg)
7373

74-
/* Current working bitmap index, toggled between field matches */
75-
static DEFINE_PER_CPU(bool, nft_pipapo_avx2_scratch_index);
76-
7774
/**
7875
* nft_pipapo_avx2_prepare() - Prepare before main algorithm body
7976
*
@@ -1120,11 +1117,12 @@ bool nft_pipapo_avx2_lookup(const struct net *net, const struct nft_set *set,
11201117
const u32 *key, const struct nft_set_ext **ext)
11211118
{
11221119
struct nft_pipapo *priv = nft_set_priv(set);
1123-
unsigned long *res, *fill, *scratch;
1120+
struct nft_pipapo_scratch *scratch;
11241121
u8 genmask = nft_genmask_cur(net);
11251122
const u8 *rp = (const u8 *)key;
11261123
struct nft_pipapo_match *m;
11271124
struct nft_pipapo_field *f;
1125+
unsigned long *res, *fill;
11281126
bool map_index;
11291127
int i, ret = 0;
11301128

@@ -1146,10 +1144,11 @@ bool nft_pipapo_avx2_lookup(const struct net *net, const struct nft_set *set,
11461144
kernel_fpu_end();
11471145
return false;
11481146
}
1149-
map_index = raw_cpu_read(nft_pipapo_avx2_scratch_index);
11501147

1151-
res = scratch + (map_index ? m->bsize_max : 0);
1152-
fill = scratch + (map_index ? 0 : m->bsize_max);
1148+
map_index = scratch->map_index;
1149+
1150+
res = scratch->map + (map_index ? m->bsize_max : 0);
1151+
fill = scratch->map + (map_index ? 0 : m->bsize_max);
11531152

11541153
/* Starting map doesn't need to be set for this implementation */
11551154

@@ -1221,7 +1220,7 @@ bool nft_pipapo_avx2_lookup(const struct net *net, const struct nft_set *set,
12211220

12221221
out:
12231222
if (i % 2)
1224-
raw_cpu_write(nft_pipapo_avx2_scratch_index, !map_index);
1223+
scratch->map_index = !map_index;
12251224
kernel_fpu_end();
12261225

12271226
return ret >= 0;

0 commit comments

Comments
 (0)