Skip to content

Commit 7bb50f3

Browse files
committed
Merge branch 'net-fib_rules-add-dscp-selector-support'
Ido Schimmel says: ==================== net: fib_rules: Add DSCP selector support Currently, the kernel rejects IPv4 FIB rules that try to match on the upper three DSCP bits: # ip -4 rule add tos 0x1c table 100 # ip -4 rule add tos 0x3c table 100 Error: Invalid tos. The reason for that is that historically users of the FIB lookup API only populated the lower three DSCP bits in the TOS field of the IPv4 flow key ('flowi4_tos'), which fits the TOS definition from the initial IPv4 specification (RFC 791). This is not very useful nowadays and instead some users want to be able to match on the six bits DSCP field, which replaced the TOS and IP precedence fields over 25 years ago (RFC 2474). In addition, the current behavior differs between IPv4 and IPv6 which does allow users to match on the entire DSCP field using the TOS selector. Recent patchsets made sure that callers of the FIB lookup API now populate the entire DSCP field in the IPv4 flow key. Therefore, it is now possible to extend FIB rules to match on DSCP. This is done by adding a new DSCP attribute which is implemented for both IPv4 and IPv6 to provide user space programs a consistent behavior between both address families. The behavior of the old TOS selector is unchanged and IPv4 FIB rules using it will only match on the lower three DSCP bits. The kernel will reject rules that try to use both selectors. Patch #1 adds the new DSCP attribute but rejects its usage. Patches #2-#3 implement IPv4 and IPv6 support. Patch #4 allows user space to use the new attribute. Patches #5-#6 add selftests. ==================== Link: https://patch.msgid.link/[email protected] Signed-off-by: Jakub Kicinski <[email protected]>
2 parents 716425d + 2bf1259 commit 7bb50f3

File tree

5 files changed

+184
-8
lines changed

5 files changed

+184
-8
lines changed

include/uapi/linux/fib_rules.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -67,6 +67,7 @@ enum {
6767
FRA_IP_PROTO, /* ip proto */
6868
FRA_SPORT_RANGE, /* sport */
6969
FRA_DPORT_RANGE, /* dport */
70+
FRA_DSCP, /* dscp */
7071
__FRA_MAX
7172
};
7273

net/core/fib_rules.c

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,7 @@
1111
#include <linux/list.h>
1212
#include <linux/module.h>
1313
#include <net/net_namespace.h>
14+
#include <net/inet_dscp.h>
1415
#include <net/sock.h>
1516
#include <net/fib_rules.h>
1617
#include <net/ip_tunnels.h>
@@ -766,7 +767,8 @@ static const struct nla_policy fib_rule_policy[FRA_MAX + 1] = {
766767
[FRA_PROTOCOL] = { .type = NLA_U8 },
767768
[FRA_IP_PROTO] = { .type = NLA_U8 },
768769
[FRA_SPORT_RANGE] = { .len = sizeof(struct fib_rule_port_range) },
769-
[FRA_DPORT_RANGE] = { .len = sizeof(struct fib_rule_port_range) }
770+
[FRA_DPORT_RANGE] = { .len = sizeof(struct fib_rule_port_range) },
771+
[FRA_DSCP] = NLA_POLICY_MAX(NLA_U8, INET_DSCP_MASK >> 2),
770772
};
771773

772774
int fib_nl_newrule(struct sk_buff *skb, struct nlmsghdr *nlh,

net/ipv4/fib_rules.c

Lines changed: 50 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -37,6 +37,7 @@ struct fib4_rule {
3737
u8 dst_len;
3838
u8 src_len;
3939
dscp_t dscp;
40+
u8 dscp_full:1; /* DSCP or TOS selector */
4041
__be32 src;
4142
__be32 srcmask;
4243
__be32 dst;
@@ -186,7 +187,15 @@ INDIRECT_CALLABLE_SCOPE int fib4_rule_match(struct fib_rule *rule,
186187
((daddr ^ r->dst) & r->dstmask))
187188
return 0;
188189

189-
if (r->dscp && !fib_dscp_masked_match(r->dscp, fl4))
190+
/* When DSCP selector is used we need to match on the entire DSCP field
191+
* in the flow information structure. When TOS selector is used we need
192+
* to mask the upper three DSCP bits prior to matching to maintain
193+
* legacy behavior.
194+
*/
195+
if (r->dscp_full && r->dscp != inet_dsfield_to_dscp(fl4->flowi4_tos))
196+
return 0;
197+
else if (!r->dscp_full && r->dscp &&
198+
!fib_dscp_masked_match(r->dscp, fl4))
190199
return 0;
191200

192201
if (rule->ip_proto && (rule->ip_proto != fl4->flowi4_proto))
@@ -217,6 +226,20 @@ static struct fib_table *fib_empty_table(struct net *net)
217226
return NULL;
218227
}
219228

229+
static int fib4_nl2rule_dscp(const struct nlattr *nla, struct fib4_rule *rule4,
230+
struct netlink_ext_ack *extack)
231+
{
232+
if (rule4->dscp) {
233+
NL_SET_ERR_MSG(extack, "Cannot specify both TOS and DSCP");
234+
return -EINVAL;
235+
}
236+
237+
rule4->dscp = inet_dsfield_to_dscp(nla_get_u8(nla) << 2);
238+
rule4->dscp_full = true;
239+
240+
return 0;
241+
}
242+
220243
static int fib4_rule_configure(struct fib_rule *rule, struct sk_buff *skb,
221244
struct fib_rule_hdr *frh,
222245
struct nlattr **tb,
@@ -238,6 +261,10 @@ static int fib4_rule_configure(struct fib_rule *rule, struct sk_buff *skb,
238261
}
239262
rule4->dscp = inet_dsfield_to_dscp(frh->tos);
240263

264+
if (tb[FRA_DSCP] &&
265+
fib4_nl2rule_dscp(tb[FRA_DSCP], rule4, extack) < 0)
266+
goto errout;
267+
241268
/* split local/main if they are not already split */
242269
err = fib_unmerge(net);
243270
if (err)
@@ -320,9 +347,19 @@ static int fib4_rule_compare(struct fib_rule *rule, struct fib_rule_hdr *frh,
320347
if (frh->dst_len && (rule4->dst_len != frh->dst_len))
321348
return 0;
322349

323-
if (frh->tos && inet_dscp_to_dsfield(rule4->dscp) != frh->tos)
350+
if (frh->tos &&
351+
(rule4->dscp_full ||
352+
inet_dscp_to_dsfield(rule4->dscp) != frh->tos))
324353
return 0;
325354

355+
if (tb[FRA_DSCP]) {
356+
dscp_t dscp;
357+
358+
dscp = inet_dsfield_to_dscp(nla_get_u8(tb[FRA_DSCP]) << 2);
359+
if (!rule4->dscp_full || rule4->dscp != dscp)
360+
return 0;
361+
}
362+
326363
#ifdef CONFIG_IP_ROUTE_CLASSID
327364
if (tb[FRA_FLOW] && (rule4->tclassid != nla_get_u32(tb[FRA_FLOW])))
328365
return 0;
@@ -344,7 +381,15 @@ static int fib4_rule_fill(struct fib_rule *rule, struct sk_buff *skb,
344381

345382
frh->dst_len = rule4->dst_len;
346383
frh->src_len = rule4->src_len;
347-
frh->tos = inet_dscp_to_dsfield(rule4->dscp);
384+
385+
if (rule4->dscp_full) {
386+
frh->tos = 0;
387+
if (nla_put_u8(skb, FRA_DSCP,
388+
inet_dscp_to_dsfield(rule4->dscp) >> 2))
389+
goto nla_put_failure;
390+
} else {
391+
frh->tos = inet_dscp_to_dsfield(rule4->dscp);
392+
}
348393

349394
if ((rule4->dst_len &&
350395
nla_put_in_addr(skb, FRA_DST, rule4->dst)) ||
@@ -366,7 +411,8 @@ static size_t fib4_rule_nlmsg_payload(struct fib_rule *rule)
366411
{
367412
return nla_total_size(4) /* dst */
368413
+ nla_total_size(4) /* src */
369-
+ nla_total_size(4); /* flow */
414+
+ nla_total_size(4) /* flow */
415+
+ nla_total_size(1); /* dscp */
370416
}
371417

372418
static void fib4_rule_flush_cache(struct fib_rules_ops *ops)

net/ipv6/fib6_rules.c

Lines changed: 40 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -27,6 +27,7 @@ struct fib6_rule {
2727
struct rt6key src;
2828
struct rt6key dst;
2929
dscp_t dscp;
30+
u8 dscp_full:1; /* DSCP or TOS selector */
3031
};
3132

3233
static bool fib6_rule_matchall(const struct fib_rule *rule)
@@ -345,6 +346,20 @@ INDIRECT_CALLABLE_SCOPE int fib6_rule_match(struct fib_rule *rule,
345346
return 1;
346347
}
347348

349+
static int fib6_nl2rule_dscp(const struct nlattr *nla, struct fib6_rule *rule6,
350+
struct netlink_ext_ack *extack)
351+
{
352+
if (rule6->dscp) {
353+
NL_SET_ERR_MSG(extack, "Cannot specify both TOS and DSCP");
354+
return -EINVAL;
355+
}
356+
357+
rule6->dscp = inet_dsfield_to_dscp(nla_get_u8(nla) << 2);
358+
rule6->dscp_full = true;
359+
360+
return 0;
361+
}
362+
348363
static int fib6_rule_configure(struct fib_rule *rule, struct sk_buff *skb,
349364
struct fib_rule_hdr *frh,
350365
struct nlattr **tb,
@@ -361,6 +376,9 @@ static int fib6_rule_configure(struct fib_rule *rule, struct sk_buff *skb,
361376
}
362377
rule6->dscp = inet_dsfield_to_dscp(frh->tos);
363378

379+
if (tb[FRA_DSCP] && fib6_nl2rule_dscp(tb[FRA_DSCP], rule6, extack) < 0)
380+
goto errout;
381+
364382
if (rule->action == FR_ACT_TO_TBL && !rule->l3mdev) {
365383
if (rule->table == RT6_TABLE_UNSPEC) {
366384
NL_SET_ERR_MSG(extack, "Invalid table");
@@ -413,9 +431,19 @@ static int fib6_rule_compare(struct fib_rule *rule, struct fib_rule_hdr *frh,
413431
if (frh->dst_len && (rule6->dst.plen != frh->dst_len))
414432
return 0;
415433

416-
if (frh->tos && inet_dscp_to_dsfield(rule6->dscp) != frh->tos)
434+
if (frh->tos &&
435+
(rule6->dscp_full ||
436+
inet_dscp_to_dsfield(rule6->dscp) != frh->tos))
417437
return 0;
418438

439+
if (tb[FRA_DSCP]) {
440+
dscp_t dscp;
441+
442+
dscp = inet_dsfield_to_dscp(nla_get_u8(tb[FRA_DSCP]) << 2);
443+
if (!rule6->dscp_full || rule6->dscp != dscp)
444+
return 0;
445+
}
446+
419447
if (frh->src_len &&
420448
nla_memcmp(tb[FRA_SRC], &rule6->src.addr, sizeof(struct in6_addr)))
421449
return 0;
@@ -434,7 +462,15 @@ static int fib6_rule_fill(struct fib_rule *rule, struct sk_buff *skb,
434462

435463
frh->dst_len = rule6->dst.plen;
436464
frh->src_len = rule6->src.plen;
437-
frh->tos = inet_dscp_to_dsfield(rule6->dscp);
465+
466+
if (rule6->dscp_full) {
467+
frh->tos = 0;
468+
if (nla_put_u8(skb, FRA_DSCP,
469+
inet_dscp_to_dsfield(rule6->dscp) >> 2))
470+
goto nla_put_failure;
471+
} else {
472+
frh->tos = inet_dscp_to_dsfield(rule6->dscp);
473+
}
438474

439475
if ((rule6->dst.plen &&
440476
nla_put_in6_addr(skb, FRA_DST, &rule6->dst.addr)) ||
@@ -450,7 +486,8 @@ static int fib6_rule_fill(struct fib_rule *rule, struct sk_buff *skb,
450486
static size_t fib6_rule_nlmsg_payload(struct fib_rule *rule)
451487
{
452488
return nla_total_size(16) /* dst */
453-
+ nla_total_size(16); /* src */
489+
+ nla_total_size(16) /* src */
490+
+ nla_total_size(1); /* dscp */
454491
}
455492

456493
static void fib6_rule_flush_cache(struct fib_rules_ops *ops)

tools/testing/selftests/net/fib_rule_tests.sh

Lines changed: 90 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -274,6 +274,23 @@ fib_rule6_test()
274274
"$getnomatch" "ipproto ipv6-icmp match" \
275275
"ipproto ipv6-tcp no match"
276276
fi
277+
278+
fib_check_iproute_support "dscp" "tos"
279+
if [ $? -eq 0 ]; then
280+
match="dscp 0x3f"
281+
getmatch="tos 0xfc"
282+
getnomatch="tos 0xf4"
283+
fib_rule6_test_match_n_redirect "$match" "$getmatch" \
284+
"$getnomatch" "dscp redirect to table" \
285+
"dscp no redirect to table"
286+
287+
match="dscp 0x3f"
288+
getmatch="from $SRC_IP6 iif $DEV tos 0xfc"
289+
getnomatch="from $SRC_IP6 iif $DEV tos 0xf4"
290+
fib_rule6_test_match_n_redirect "$match" "$getmatch" \
291+
"$getnomatch" "iif dscp redirect to table" \
292+
"iif dscp no redirect to table"
293+
fi
277294
}
278295

279296
fib_rule6_vrf_test()
@@ -319,6 +336,34 @@ fib_rule6_connect_test()
319336
log_test $? 1 "rule6 dsfield tcp no connect (dsfield 0x20)"
320337

321338
$IP -6 rule del dsfield 0x04 table $RTABLE_PEER
339+
340+
ip rule help 2>&1 | grep -q dscp
341+
if [ $? -ne 0 ]; then
342+
echo "SKIP: iproute2 iprule too old, missing dscp match"
343+
cleanup_peer
344+
return
345+
fi
346+
347+
$IP -6 rule add dscp 0x3f table $RTABLE_PEER
348+
349+
nettest -q -6 -B -t 5 -N $testns -O $peerns -U -D -Q 0xfc \
350+
-l 2001:db8::1:11 -r 2001:db8::1:11
351+
log_test $? 0 "rule6 dscp udp connect"
352+
353+
nettest -q -6 -B -t 5 -N $testns -O $peerns -Q 0xfc \
354+
-l 2001:db8::1:11 -r 2001:db8::1:11
355+
log_test $? 0 "rule6 dscp tcp connect"
356+
357+
nettest -q -6 -B -t 5 -N $testns -O $peerns -U -D -Q 0xf4 \
358+
-l 2001:db8::1:11 -r 2001:db8::1:11
359+
log_test $? 1 "rule6 dscp udp no connect"
360+
361+
nettest -q -6 -B -t 5 -N $testns -O $peerns -Q 0xf4 \
362+
-l 2001:db8::1:11 -r 2001:db8::1:11
363+
log_test $? 1 "rule6 dscp tcp no connect"
364+
365+
$IP -6 rule del dscp 0x3f table $RTABLE_PEER
366+
322367
cleanup_peer
323368
}
324369

@@ -468,6 +513,23 @@ fib_rule4_test()
468513
"$getnomatch" "ipproto icmp match" \
469514
"ipproto tcp no match"
470515
fi
516+
517+
fib_check_iproute_support "dscp" "tos"
518+
if [ $? -eq 0 ]; then
519+
match="dscp 0x3f"
520+
getmatch="tos 0xfc"
521+
getnomatch="tos 0xf4"
522+
fib_rule4_test_match_n_redirect "$match" "$getmatch" \
523+
"$getnomatch" "dscp redirect to table" \
524+
"dscp no redirect to table"
525+
526+
match="dscp 0x3f"
527+
getmatch="from $SRC_IP iif $DEV tos 0xfc"
528+
getnomatch="from $SRC_IP iif $DEV tos 0xf4"
529+
fib_rule4_test_match_n_redirect "$match" "$getmatch" \
530+
"$getnomatch" "iif dscp redirect to table" \
531+
"iif dscp no redirect to table"
532+
fi
471533
}
472534

473535
fib_rule4_vrf_test()
@@ -513,6 +575,34 @@ fib_rule4_connect_test()
513575
log_test $? 1 "rule4 dsfield tcp no connect (dsfield 0x20)"
514576

515577
$IP -4 rule del dsfield 0x04 table $RTABLE_PEER
578+
579+
ip rule help 2>&1 | grep -q dscp
580+
if [ $? -ne 0 ]; then
581+
echo "SKIP: iproute2 iprule too old, missing dscp match"
582+
cleanup_peer
583+
return
584+
fi
585+
586+
$IP -4 rule add dscp 0x3f table $RTABLE_PEER
587+
588+
nettest -q -B -t 5 -N $testns -O $peerns -D -U -Q 0xfc \
589+
-l 198.51.100.11 -r 198.51.100.11
590+
log_test $? 0 "rule4 dscp udp connect"
591+
592+
nettest -q -B -t 5 -N $testns -O $peerns -Q 0xfc \
593+
-l 198.51.100.11 -r 198.51.100.11
594+
log_test $? 0 "rule4 dscp tcp connect"
595+
596+
nettest -q -B -t 5 -N $testns -O $peerns -D -U -Q 0xf4 \
597+
-l 198.51.100.11 -r 198.51.100.11
598+
log_test $? 1 "rule4 dscp udp no connect"
599+
600+
nettest -q -B -t 5 -N $testns -O $peerns -Q 0xf4 \
601+
-l 198.51.100.11 -r 198.51.100.11
602+
log_test $? 1 "rule4 dscp tcp no connect"
603+
604+
$IP -4 rule del dscp 0x3f table $RTABLE_PEER
605+
516606
cleanup_peer
517607
}
518608
################################################################################

0 commit comments

Comments
 (0)