Skip to content

Commit 54eb149

Browse files
kerumetogvisor-bot
authored andcommitted
Refactoring nftables package
nftables/nftables.go was split up accordingly to more easily manage its types (nftables_types.go) and the byte operations within it (see nft_{op}.go files). NOTE: Files under the nftables package are still not thread safe. Future changes will add synchronization primitives to resolve this issue. PiperOrigin-RevId: 769794678
1 parent e3c4c4c commit 54eb149

15 files changed

+2328
-2027
lines changed

pkg/tcpip/nftables/BUILD

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,20 @@ package(
88
go_library(
99
name = "nftables",
1010
srcs = [
11+
"nft_bitwise.go",
12+
"nft_byteorder.go",
13+
"nft_comparison.go",
14+
"nft_counter.go",
15+
"nft_immediate.go",
16+
"nft_last.go",
17+
"nft_metaload.go",
18+
"nft_metaset.go",
19+
"nft_payload_load.go",
20+
"nft_payload_set.go",
21+
"nft_ranged.go",
22+
"nft_route.go",
1123
"nftables.go",
24+
"nftables_types.go",
1225
"nftinterp.go",
1326
],
1427
deps = [

pkg/tcpip/nftables/nft_bitwise.go

Lines changed: 190 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,190 @@
1+
// Copyright 2024 The gVisor Authors.
2+
//
3+
// Licensed under the Apache License, Version 2.0 (the "License");
4+
// you may not use this file except in compliance with the License.
5+
// You may obtain a copy of the License at
6+
//
7+
// http://www.apache.org/licenses/LICENSE-2.0
8+
//
9+
// Unless required by applicable law or agreed to in writing, software
10+
// distributed under the License is distributed on an "AS IS" BASIS,
11+
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12+
// See the License for the specific language governing permissions and
13+
// limitations under the License.
14+
15+
package nftables
16+
17+
import (
18+
"encoding/binary"
19+
"fmt"
20+
21+
"gvisor.dev/gvisor/pkg/abi/linux"
22+
"gvisor.dev/gvisor/pkg/tcpip/stack"
23+
)
24+
25+
// bitwiseOp is the bitwise operator for a bitwise operation.
26+
// Note: corresponds to enum nft_bitwise_ops from
27+
// include/uapi/linux/netfilter/nf_tables.h and uses the same constants.
28+
type bitwiseOp int
29+
30+
// bitwiseOpStrings is a map of bitwiseOp to its string representation.
31+
var bitwiseOpStrings = map[bitwiseOp]string{
32+
linux.NFT_BITWISE_BOOL: "bitwise boolean",
33+
linux.NFT_BITWISE_LSHIFT: "bitwise <<",
34+
linux.NFT_BITWISE_RSHIFT: "bitwise >>",
35+
}
36+
37+
// String for bitwiseOp returns the string representation of the bitwise
38+
// operator.
39+
func (bop bitwiseOp) String() string {
40+
if str, ok := bitwiseOpStrings[bop]; ok {
41+
return str
42+
}
43+
panic(fmt.Sprintf("invalid bitwise operator: %d", int(bop)))
44+
}
45+
46+
// bitwise is an operation that performs bitwise math operations over data in
47+
// a given register, storing the result in a destination register.
48+
// Note: bitwise operations are not supported for the verdict register.
49+
type bitwise struct {
50+
sreg uint8 // Number of the source register.
51+
dreg uint8 // Number of the destination register.
52+
bop bitwiseOp // Bitwise operator to use.
53+
blen uint8 // Number of bytes to apply bitwise operation to.
54+
mask bytesData // Mask to apply bitwise & for boolean operations (before ^).
55+
xor bytesData // Xor to apply bitwise ^ for boolean operations (after &).
56+
shift uint32 // Shift to apply bitwise <</>> for non-boolean operations.
57+
58+
// Note: Technically, the linux kernel has defined bool, lshift, and rshift
59+
// as the 3 types of bitwise operations. However, we have not been able to
60+
// observe the lshift or rshift operations used by the nft binary. Thus, we
61+
// have no way to test the interpretation of these operations. Maintaining
62+
// consistency with the linux kernel, we have fully implemented lshift and
63+
// rshift, and We will leave the code here in case we are able to observe
64+
// their use in the future (perhaps outside the nft binary debug output).
65+
}
66+
67+
// newBitwiseBool creates a new bitwise boolean operation.
68+
func newBitwiseBool(sreg, dreg uint8, mask, xor []byte) (*bitwise, error) {
69+
if isVerdictRegister(sreg) || isVerdictRegister(dreg) {
70+
return nil, fmt.Errorf("bitwise operation cannot use verdict register as source or destination")
71+
}
72+
blen := len(mask)
73+
if blen != len(xor) {
74+
return nil, fmt.Errorf("bitwise boolean operation mask and xor must be the same length")
75+
}
76+
if blen > linux.NFT_REG_SIZE || (blen > linux.NFT_REG32_SIZE && (is4ByteRegister(sreg) || is4ByteRegister(dreg))) {
77+
return nil, fmt.Errorf("bitwise operation length %d is too long for source register %d, destination register %d", blen, sreg, dreg)
78+
}
79+
return &bitwise{sreg: sreg, dreg: dreg, bop: linux.NFT_BITWISE_BOOL, blen: uint8(blen), mask: newBytesData(mask), xor: newBytesData(xor)}, nil
80+
}
81+
82+
// newBitwiseShift creates a new bitwise shift operation.
83+
func newBitwiseShift(sreg, dreg, blen uint8, shift uint32, right bool) (*bitwise, error) {
84+
if isVerdictRegister(sreg) || isVerdictRegister(dreg) {
85+
return nil, fmt.Errorf("bitwise operation cannot use verdict register as source or destination")
86+
}
87+
if blen > linux.NFT_REG_SIZE || (blen > linux.NFT_REG32_SIZE && (is4ByteRegister(sreg) || is4ByteRegister(dreg))) {
88+
return nil, fmt.Errorf("bitwise operation length %d is too long for source register %d, destination register %d", blen, sreg, dreg)
89+
}
90+
if shift >= bitshiftLimit {
91+
return nil, fmt.Errorf("bitwise operation shift %d must be less than %d", shift, bitshiftLimit)
92+
}
93+
bop := bitwiseOp(linux.NFT_BITWISE_LSHIFT)
94+
if right {
95+
bop = linux.NFT_BITWISE_RSHIFT
96+
}
97+
return &bitwise{sreg: sreg, dreg: dreg, blen: blen, bop: bop, shift: shift}, nil
98+
}
99+
100+
// evaluateBitwiseBool performs the bitwise boolean operation on the source register
101+
// data and stores the result in the destination register.
102+
func evaluateBitwiseBool(sregBuf, dregBuf, mask, xor []byte) {
103+
for i := 0; i < len(mask); i++ {
104+
dregBuf[i] = (sregBuf[i] & mask[i]) ^ xor[i]
105+
}
106+
}
107+
108+
// evaluateBitwiseLshift performs the bitwise left shift operation on source
109+
// register in 4 byte chunks and stores the result in the destination register.
110+
func evaluateBitwiseLshift(sregBuf, dregBuf []byte, shift uint32) {
111+
carry := uint32(0)
112+
113+
// Rounds down to nearest 4-byte multiple.
114+
for start := (len(sregBuf) - 1) & ^3; start >= 0; start -= 4 {
115+
// Extracts the 4-byte chunk from the source register, padding if necessary.
116+
var chunk uint32
117+
if start+4 <= len(sregBuf) {
118+
chunk = binary.BigEndian.Uint32(sregBuf[start:])
119+
} else {
120+
var padded [4]byte
121+
copy(padded[:], sregBuf[start:])
122+
chunk = binary.BigEndian.Uint32(padded[:])
123+
}
124+
125+
// Does left shift, adds the carry, and calculates the new carry.
126+
res := (chunk << shift) | carry
127+
carry = chunk >> (bitshiftLimit - shift)
128+
129+
// Stores the result in the destination register, using temporary buffer
130+
// if necessary.
131+
if start+4 <= len(dregBuf) {
132+
binary.BigEndian.PutUint32(dregBuf[start:], res)
133+
} else {
134+
var padded [4]byte
135+
binary.BigEndian.PutUint32(padded[:], res)
136+
copy(dregBuf[start:], padded[:])
137+
}
138+
}
139+
}
140+
141+
// evaluateBitwiseRshift performs the bitwise right shift operation on source
142+
// register in 4 byte chunks and stores the result in the destination register.
143+
func evaluateBitwiseRshift(sregBuf, dregBuf []byte, shift uint32) {
144+
carry := uint32(0)
145+
146+
for start := 0; start < len(sregBuf); start += 4 {
147+
// Extracts the 4-byte chunk from the source register, padding if necessary.
148+
var chunk uint32
149+
if start+4 <= len(sregBuf) {
150+
chunk = binary.BigEndian.Uint32(sregBuf[start:])
151+
} else {
152+
var padded [4]byte
153+
copy(padded[:], sregBuf[start:])
154+
chunk = binary.BigEndian.Uint32(padded[:])
155+
}
156+
157+
// Does right shift, adds the carry, and calculates the new carry.
158+
res := carry | (chunk >> shift)
159+
carry = chunk << (bitshiftLimit - shift)
160+
161+
// Stores the result in the destination register, using temporary buffer
162+
// if necessary.
163+
if start+4 <= len(dregBuf) {
164+
binary.BigEndian.PutUint32(dregBuf[start:], res)
165+
} else {
166+
var padded [4]byte
167+
binary.BigEndian.PutUint32(padded[:], res)
168+
copy(dregBuf[start:], padded[:])
169+
}
170+
}
171+
}
172+
173+
// evaluate for bitwise performs the bitwise operation on the source register
174+
// data and stores the result in the destination register.
175+
func (op bitwise) evaluate(regs *registerSet, pkt *stack.PacketBuffer, rule *Rule) {
176+
// Gets the specified buffers of the source and destination registers.
177+
sregBuf := getRegisterBuffer(regs, op.sreg)[:op.blen]
178+
dregBuf := getRegisterBuffer(regs, op.dreg)[:op.blen]
179+
180+
if op.bop == linux.NFT_BITWISE_BOOL {
181+
evaluateBitwiseBool(sregBuf, dregBuf, op.mask.data, op.xor.data)
182+
return
183+
} else {
184+
if op.bop == linux.NFT_BITWISE_LSHIFT {
185+
evaluateBitwiseLshift(sregBuf, dregBuf, op.shift)
186+
} else {
187+
evaluateBitwiseRshift(sregBuf, dregBuf, op.shift)
188+
}
189+
}
190+
}
Lines changed: 148 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,148 @@
1+
// Copyright 2025 The gVisor Authors.
2+
//
3+
// Licensed under the Apache License, Version 2.0 (the "License");
4+
// you may not use this file except in compliance with the License.
5+
// You may obtain a copy of the License at
6+
//
7+
// http://www.apache.org/licenses/LICENSE-2.0
8+
//
9+
// Unless required by applicable law or agreed to in writing, software
10+
// distributed under the License is distributed on an "AS IS" BASIS,
11+
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12+
// See the License for the specific language governing permissions and
13+
// limitations under the License.
14+
15+
package nftables
16+
17+
import (
18+
"encoding/binary"
19+
"fmt"
20+
21+
"gvisor.dev/gvisor/pkg/abi/linux"
22+
"gvisor.dev/gvisor/pkg/tcpip/stack"
23+
)
24+
25+
// byteorder is an operation that performs byte order operations on a register.
26+
// Note: byteorder operations are not supported for the verdict register.
27+
type byteorder struct {
28+
sreg uint8 // Number of the source register.
29+
dreg uint8 // Number of the destination register.
30+
bop byteorderOp // Byte order operation to perform.
31+
blen uint8 // Number of total bytes to operate on.
32+
size uint8 // Granular size in bytes to operate on.
33+
}
34+
35+
// byteorderOp is the byte order operator for a byteorder operation.
36+
// Note: corresponds to enum nft_byteorder_ops from
37+
// include/uapi/linux/netfilter/nf_tables.h and uses the same constants.
38+
type byteorderOp int
39+
40+
// byteorderOpStrings is a map of byteorder operator to its string
41+
// representation.
42+
var byteorderOpStrings = map[byteorderOp]string{
43+
linux.NFT_BYTEORDER_NTOH: "network to host",
44+
linux.NFT_BYTEORDER_HTON: "host to network",
45+
}
46+
47+
// String for byteorderOp returns the string representation of the byteorder
48+
// operator.
49+
func (bop byteorderOp) String() string {
50+
if bopStr, ok := byteorderOpStrings[bop]; ok {
51+
return bopStr
52+
}
53+
panic(fmt.Sprintf("invalid byteorder operator: %d", int(bop)))
54+
}
55+
56+
// validateByteorderOp ensures the byteorder operator is valid.
57+
func validateByteorderOp(bop byteorderOp) error {
58+
switch bop {
59+
// Supported operators.
60+
case linux.NFT_BYTEORDER_NTOH, linux.NFT_BYTEORDER_HTON:
61+
return nil
62+
default:
63+
return fmt.Errorf("invalid byteorder operator: %d", int(bop))
64+
}
65+
}
66+
67+
// newByteorder creates a new byteorder operation.
68+
func newByteorder(sreg, dreg uint8, bop byteorderOp, blen, size uint8) (*byteorder, error) {
69+
if isVerdictRegister(sreg) || isVerdictRegister(dreg) {
70+
return nil, fmt.Errorf("byteorder operation cannot use verdict register")
71+
}
72+
if err := validateByteorderOp(bop); err != nil {
73+
return nil, err
74+
}
75+
if blen > linux.NFT_REG_SIZE {
76+
return nil, fmt.Errorf("byteorder operation cannot have length greater than the max register size of %d bytes", linux.NFT_REG_SIZE)
77+
}
78+
if (is4ByteRegister(sreg) || is4ByteRegister(dreg)) && blen > linux.NFT_REG32_SIZE {
79+
return nil, fmt.Errorf("byteorder operation cannot have length greater than the max register size of %d bytes", linux.NFT_REG32_SIZE)
80+
}
81+
if size > blen {
82+
return nil, fmt.Errorf("byteorder operation cannot have size greater than length")
83+
}
84+
if size != 2 && size != 4 && size != 8 {
85+
return nil, fmt.Errorf("byteorder operation size must be 2, 4, or 8 bytes")
86+
}
87+
return &byteorder{sreg: sreg, dreg: dreg, bop: bop, blen: blen, size: size}, nil
88+
}
89+
90+
// evaluate for byteorder performs the byte order operation on the source
91+
// register and stores the result in the destination register.
92+
func (op byteorder) evaluate(regs *registerSet, pkt *stack.PacketBuffer, rule *Rule) {
93+
// Gets the source and destination registers.
94+
src := getRegisterBuffer(regs, op.sreg)
95+
dst := getRegisterBuffer(regs, op.dreg)
96+
97+
// Performs the byte order operations on the source register and stores the
98+
// result in as many bytes as are available in the destination register.
99+
switch op.size {
100+
case 8:
101+
switch op.bop {
102+
case linux.NFT_BYTEORDER_NTOH:
103+
for i := uint8(0); i < op.blen; i += 8 {
104+
networkNum := binary.BigEndian.Uint64(src[i : i+8])
105+
binary.NativeEndian.PutUint64(dst[i:], networkNum)
106+
}
107+
case linux.NFT_BYTEORDER_HTON:
108+
for i := uint8(0); i < op.blen; i += 8 {
109+
hostNum := binary.NativeEndian.Uint64(src[i : i+8])
110+
binary.BigEndian.PutUint64(dst[i:], hostNum)
111+
}
112+
}
113+
114+
case 4:
115+
switch op.bop {
116+
case linux.NFT_BYTEORDER_NTOH:
117+
for i := uint8(0); i < op.blen; i += 4 {
118+
networkNum := binary.BigEndian.Uint32(src[i : i+4])
119+
binary.NativeEndian.PutUint32(dst[i:], networkNum)
120+
}
121+
case linux.NFT_BYTEORDER_HTON:
122+
for i := uint8(0); i < op.blen; i += 4 {
123+
hostNum := binary.NativeEndian.Uint32(src[i : i+4])
124+
binary.BigEndian.PutUint32(dst[i:], hostNum)
125+
}
126+
}
127+
128+
case 2:
129+
switch op.bop {
130+
case linux.NFT_BYTEORDER_NTOH:
131+
for i := uint8(0); i < op.blen; i += 2 {
132+
networkNum := binary.BigEndian.Uint16(src[i : i+2])
133+
binary.NativeEndian.PutUint16(dst[i:], networkNum)
134+
}
135+
case linux.NFT_BYTEORDER_HTON:
136+
for i := uint8(0); i < op.blen; i += 2 {
137+
hostNum := binary.NativeEndian.Uint16(src[i : i+2])
138+
binary.BigEndian.PutUint16(dst[i:], hostNum)
139+
}
140+
}
141+
}
142+
143+
// Zeroes out excess bytes of the destination register.
144+
// This is done since comparison can be done in multiples of 4 bytes.
145+
if rem := op.blen % 4; rem != 0 {
146+
clear(dst[op.blen : op.blen+4-rem])
147+
}
148+
}

0 commit comments

Comments
 (0)