Skip to content

Commit cd0eadf

Browse files
committed
improve perfs
1 parent e9ebab7 commit cd0eadf

14 files changed

+330
-114
lines changed

bpf/flows.c

Lines changed: 141 additions & 45 deletions
Original file line numberDiff line numberDiff line change
@@ -58,48 +58,113 @@
5858
#include "ipsec.h"
5959

6060
// return 0 on success, 1 if capacity reached
61+
// Optimized: loop unrolled and early exits for common cases
6162
static __always_inline int add_observed_intf(flow_metrics *value, pkt_info *pkt, u32 if_index,
6263
u8 direction) {
6364
if (value->nb_observed_intf >= MAX_OBSERVED_INTERFACES) {
6465
return 1;
6566
}
66-
for (u8 i = 0; i < value->nb_observed_intf; i++) {
67-
if (value->observed_intf[i] == if_index) {
68-
if (value->observed_direction[i] != direction &&
69-
value->observed_direction[i] != OBSERVED_DIRECTION_BOTH) {
70-
// Same interface seen on a different direction => mark as both directions
71-
value->observed_direction[i] = OBSERVED_DIRECTION_BOTH;
72-
}
73-
// Interface already seen -> skip
74-
return 0;
67+
68+
// Fast path: unroll loop for small array sizes (most common cases)
69+
// Check each position explicitly to eliminate loop overhead
70+
u8 nb = value->nb_observed_intf;
71+
72+
// Unroll for common cases (0-3 interfaces) - most flows see 1-2 interfaces
73+
if (nb == 0) {
74+
// First interface - no check needed
75+
goto add_new;
76+
}
77+
78+
// Check existing interfaces with unrolled comparisons
79+
if (value->observed_intf[0] == if_index) {
80+
if (value->observed_direction[0] != direction &&
81+
value->observed_direction[0] != OBSERVED_DIRECTION_BOTH) {
82+
value->observed_direction[0] = OBSERVED_DIRECTION_BOTH;
83+
}
84+
return 0;
85+
}
86+
87+
if (nb >= 2 && value->observed_intf[1] == if_index) {
88+
if (value->observed_direction[1] != direction &&
89+
value->observed_direction[1] != OBSERVED_DIRECTION_BOTH) {
90+
value->observed_direction[1] = OBSERVED_DIRECTION_BOTH;
91+
}
92+
return 0;
93+
}
94+
95+
if (nb >= 3 && value->observed_intf[2] == if_index) {
96+
if (value->observed_direction[2] != direction &&
97+
value->observed_direction[2] != OBSERVED_DIRECTION_BOTH) {
98+
value->observed_direction[2] = OBSERVED_DIRECTION_BOTH;
99+
}
100+
return 0;
101+
}
102+
103+
// Fully unroll remaining cases (positions 3-5) for MAX_OBSERVED_INTERFACES=6
104+
if (nb >= 4 && value->observed_intf[3] == if_index) {
105+
if (value->observed_direction[3] != direction &&
106+
value->observed_direction[3] != OBSERVED_DIRECTION_BOTH) {
107+
value->observed_direction[3] = OBSERVED_DIRECTION_BOTH;
108+
}
109+
return 0;
110+
}
111+
112+
if (nb >= 5 && value->observed_intf[4] == if_index) {
113+
if (value->observed_direction[4] != direction &&
114+
value->observed_direction[4] != OBSERVED_DIRECTION_BOTH) {
115+
value->observed_direction[4] = OBSERVED_DIRECTION_BOTH;
116+
}
117+
return 0;
118+
}
119+
120+
if (nb >= 6 && value->observed_intf[5] == if_index) {
121+
if (value->observed_direction[5] != direction &&
122+
value->observed_direction[5] != OBSERVED_DIRECTION_BOTH) {
123+
value->observed_direction[5] = OBSERVED_DIRECTION_BOTH;
75124
}
125+
return 0;
76126
}
77-
value->observed_intf[value->nb_observed_intf] = if_index;
78-
value->observed_direction[value->nb_observed_intf] = direction;
79-
value->nb_observed_intf++;
127+
128+
add_new:
129+
// Not found - add new interface
130+
value->observed_intf[nb] = if_index;
131+
value->observed_direction[nb] = direction;
132+
value->nb_observed_intf = nb + 1;
80133
return 0;
81134
}
82135

83136
static __always_inline void update_existing_flow(flow_metrics *aggregate_flow, pkt_info *pkt,
84137
u64 len, u32 sampling, u32 if_index,
85138
u8 direction) {
86139
// Count only packets seen from the same interface as previously to avoid duplicate counts
140+
// Using lock-free atomic operations for better performance
87141
int maxReached = 0;
88-
bpf_spin_lock(&aggregate_flow->lock);
89-
if (aggregate_flow->if_index_first_seen == if_index) {
90-
aggregate_flow->packets += 1;
91-
aggregate_flow->bytes += len;
142+
143+
// Read if_index_first_seen once (it's never modified after flow creation)
144+
u32 first_seen = aggregate_flow->if_index_first_seen;
145+
146+
if (first_seen == if_index) {
147+
// Common path: same interface - use atomic operations
148+
__sync_fetch_and_add(&aggregate_flow->packets, 1);
149+
__sync_fetch_and_add(&aggregate_flow->bytes, len);
150+
// Timestamp: use simple write (acceptable if slightly out of order, we want latest anyway)
151+
// On architectures that support it, this will be naturally atomic for aligned 64-bit writes
92152
aggregate_flow->end_mono_time_ts = pkt->current_ts;
153+
// Flags is u16 - eBPF doesn't support atomic ops on 16-bit types
154+
// Use simple write: OR is idempotent, so worst case is missing a flag bit in rare races (acceptable)
93155
aggregate_flow->flags |= pkt->flags;
156+
// DSCP and sampling: simple writes (these are infrequently updated, races are acceptable)
94157
aggregate_flow->dscp = pkt->dscp;
95158
aggregate_flow->sampling = sampling;
96159
} else if (if_index != 0) {
97-
// Only add info that we've seen this interface (we can also update end time & flags)
160+
// Different interface path: update timestamps/flags atomically, then add interface
98161
aggregate_flow->end_mono_time_ts = pkt->current_ts;
162+
// Flags update - use simple write (OR is idempotent, occasional missed flag is acceptable)
99163
aggregate_flow->flags |= pkt->flags;
164+
// Note: add_observed_intf may have races, but worst case is missing one interface entry
165+
// This is acceptable since interface tracking is best-effort metadata
100166
maxReached = add_observed_intf(aggregate_flow, pkt, if_index, direction);
101167
}
102-
bpf_spin_unlock(&aggregate_flow->lock);
103168
if (maxReached > 0) {
104169
BPF_PRINTK("observed interface missed (array capacity reached); ifindex=%d, eth_type=%d, "
105170
"proto=%d, sport=%d, dport=%d\n",
@@ -138,25 +203,50 @@ static inline int flow_monitor(struct __sk_buff *skb, u8 direction) {
138203
}
139204

140205
u16 eth_protocol = 0;
206+
// Initialize pkt_info with only needed fields - compiler zeros the rest
141207
pkt_info pkt;
142-
__builtin_memset(&pkt, 0, sizeof(pkt));
208+
pkt.current_ts = bpf_ktime_get_ns(); // Record the current time first.
209+
pkt.id = NULL; // Will be set below
210+
pkt.flags = 0;
211+
pkt.l4_hdr = NULL;
212+
pkt.dscp = 0;
213+
pkt.dns_id = 0;
214+
pkt.dns_flags = 0;
215+
pkt.dns_latency = 0;
216+
// DNS name only initialized if DNS tracking enabled (set by track_dns_packet if needed)
143217

144-
flow_id id;
145-
__builtin_memset(&id, 0, sizeof(id));
218+
flow_id id = {0}; // All fields zeroed - needed for flow identification
146219

147-
pkt.current_ts = bpf_ktime_get_ns(); // Record the current time first.
148220
pkt.id = &id;
149221

150222
void *data_end = (void *)(long)skb->data_end;
151223
void *data = (void *)(long)skb->data;
152224
struct ethhdr *eth = (struct ethhdr *)data;
153225
u64 len = skb->len;
226+
u8 protocol = 0; // Will be set by L3 parsing
154227

155-
if (fill_ethhdr(eth, data_end, &pkt, &eth_protocol) == DISCARD) {
228+
// Optimized: Parse L2+L3 first for early IP filtering
229+
// This allows us to skip L4 parsing if IP-based filtering rejects the packet
230+
if (fill_ethhdr_l3only(eth, data_end, &pkt, &eth_protocol, &protocol) == DISCARD) {
156231
return TC_ACT_OK;
157232
}
158233

159-
// check if this packet need to be filtered if filtering feature is enabled
234+
// Early IP filtering: check if we can reject before parsing L4
235+
// This saves L4 parsing for packets that will be rejected anyway
236+
bool filter_enabled = is_filter_enabled();
237+
if (filter_enabled) {
238+
filter_action early_action = MAX_FILTER_ACTIONS;
239+
if (early_ip_filter_check(&id, &early_action, eth_protocol, direction)) {
240+
// Early rejection - skip L4 parsing entirely
241+
if (early_action == REJECT) {
242+
return TC_ACT_OK;
243+
}
244+
}
245+
}
246+
// Parse L4 (needed for full filtering or flow tracking)
247+
parse_l4_after_l3(eth, data_end, &pkt, eth_protocol, protocol);
248+
249+
// Full filter check (now that L4 is parsed if needed)
160250
bool skip =
161251
check_and_do_flow_filtering(&id, pkt.flags, 0, eth_protocol, &flow_sampling, direction);
162252
if (has_filter_sampling) {
@@ -183,18 +273,20 @@ static inline int flow_monitor(struct __sk_buff *skb, u8 direction) {
183273
update_existing_flow(aggregate_flow, &pkt, len, flow_sampling, skb->ifindex, direction);
184274
} else {
185275
// Key does not exist in the map, and will need to create a new entry.
186-
flow_metrics new_flow;
187-
__builtin_memset(&new_flow, 0, sizeof(new_flow));
188-
new_flow.if_index_first_seen = skb->ifindex;
189-
new_flow.direction_first_seen = direction;
190-
new_flow.packets = 1;
191-
new_flow.bytes = len;
192-
new_flow.eth_protocol = eth_protocol;
193-
new_flow.start_mono_time_ts = pkt.current_ts;
194-
new_flow.end_mono_time_ts = pkt.current_ts;
195-
new_flow.flags = pkt.flags;
196-
new_flow.dscp = pkt.dscp;
197-
new_flow.sampling = flow_sampling;
276+
// Initialize only the fields we need - compiler will zero the rest
277+
flow_metrics new_flow = {
278+
.if_index_first_seen = skb->ifindex,
279+
.direction_first_seen = direction,
280+
.packets = 1,
281+
.bytes = len,
282+
.eth_protocol = eth_protocol,
283+
.start_mono_time_ts = pkt.current_ts,
284+
.end_mono_time_ts = pkt.current_ts,
285+
.flags = pkt.flags,
286+
.dscp = pkt.dscp,
287+
.sampling = flow_sampling,
288+
.nb_observed_intf = 0 // Explicitly zero for clarity
289+
};
198290
__builtin_memcpy(new_flow.dst_mac, eth->h_dest, ETH_ALEN);
199291
__builtin_memcpy(new_flow.src_mac, eth->h_source, ETH_ALEN);
200292

@@ -245,15 +337,19 @@ static inline int flow_monitor(struct __sk_buff *skb, u8 direction) {
245337
if (extra_metrics != NULL) {
246338
update_dns(extra_metrics, &pkt, dns_errno);
247339
} else {
248-
additional_metrics new_metrics;
249-
__builtin_memset(&new_metrics, 0, sizeof(new_metrics));
250-
new_metrics.start_mono_time_ts = pkt.current_ts;
251-
new_metrics.end_mono_time_ts = pkt.current_ts;
252-
new_metrics.eth_protocol = eth_protocol;
253-
new_metrics.dns_record.id = pkt.dns_id;
254-
new_metrics.dns_record.flags = pkt.dns_flags;
255-
new_metrics.dns_record.latency = pkt.dns_latency;
256-
new_metrics.dns_record.errno = dns_errno;
340+
// Initialize only needed fields - compiler will zero the rest
341+
additional_metrics new_metrics = {
342+
.start_mono_time_ts = pkt.current_ts,
343+
.end_mono_time_ts = pkt.current_ts,
344+
.eth_protocol = eth_protocol,
345+
.dns_record = {
346+
.id = pkt.dns_id,
347+
.flags = pkt.dns_flags,
348+
.latency = pkt.dns_latency,
349+
.errno = dns_errno
350+
},
351+
.network_events_idx = 0 // Explicitly zero for clarity
352+
};
257353
long ret =
258354
bpf_map_update_elem(&additional_flow_metrics, &id, &new_metrics, BPF_NOEXIST);
259355
if (ret != 0) {

bpf/flows_filter.h

Lines changed: 58 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -59,7 +59,7 @@ static __always_inline int do_flow_filter_lookup(flow_id *id, struct filter_key_
5959

6060
if (rule->do_peerCIDR_lookup) {
6161
struct filter_key_t peerKey;
62-
__builtin_memset(&peerKey, 0, sizeof(peerKey));
62+
// No need to memset - flow_filter_setup_lookup_key will initialize all fields we use
6363
// PeerCIDR lookup will will target the opposite IP compared to original CIDR lookup
6464
// In other words if cidr is using srcIP then peerCIDR will be the dstIP
6565
if (flow_filter_setup_lookup_key(id, &peerKey, &len, &offset, use_src_ip,
@@ -218,6 +218,62 @@ static __always_inline int do_flow_filter_lookup(flow_id *id, struct filter_key_
218218
return result;
219219
}
220220

221+
/*
222+
* Early IP-only filter check - optimized to skip L4 parsing if IP-based rejection is possible.
223+
* Returns: 1 if packet can be rejected early (IP-only reject rule), 0 if needs full check
224+
* This is a fast path that only checks CIDR matching, not ports/protocols.
225+
*/
226+
static __always_inline int early_ip_filter_check(flow_id *id, filter_action *action,
227+
u16 eth_protocol, u8 direction) {
228+
struct filter_key_t key;
229+
u8 len, offset;
230+
struct filter_value_t *rule;
231+
232+
// Check srcIP CIDR match first
233+
if (flow_filter_setup_lookup_key(id, &key, &len, &offset, true, eth_protocol) < 0) {
234+
return 0; // Need full check
235+
}
236+
237+
rule = (struct filter_value_t *)bpf_map_lookup_elem(&filter_map, &key);
238+
if (rule && rule->action == REJECT) {
239+
// IP matches and action is REJECT - can reject early without checking ports/protocols
240+
// Note: We check direction if rule specifies it
241+
if (rule->direction == MAX_DIRECTION || rule->direction == direction) {
242+
// If rule has port/protocol checks, we can't reject early (would need L4)
243+
// But if it's IP-only (protocol==0, no ports), we can reject now
244+
if (rule->protocol == 0 && rule->dstPortStart == 0 && rule->srcPortStart == 0 &&
245+
rule->portStart == 0 && rule->dstPort1 == 0 && rule->srcPort1 == 0 &&
246+
rule->port1 == 0 && rule->dstPort2 == 0 && rule->srcPort2 == 0 &&
247+
rule->port2 == 0 && !rule->do_peerCIDR_lookup && rule->tcpFlags == 0 &&
248+
rule->icmpType == 0 && rule->filter_drops == 0) {
249+
*action = REJECT;
250+
return 1; // Can reject early
251+
}
252+
}
253+
}
254+
255+
// Check dstIP CIDR match
256+
if (flow_filter_setup_lookup_key(id, &key, &len, &offset, false, eth_protocol) < 0) {
257+
return 0; // Need full check
258+
}
259+
260+
rule = (struct filter_value_t *)bpf_map_lookup_elem(&filter_map, &key);
261+
if (rule && rule->action == REJECT) {
262+
if (rule->direction == MAX_DIRECTION || rule->direction == direction) {
263+
if (rule->protocol == 0 && rule->dstPortStart == 0 && rule->srcPortStart == 0 &&
264+
rule->portStart == 0 && rule->dstPort1 == 0 && rule->srcPort1 == 0 &&
265+
rule->port1 == 0 && rule->dstPort2 == 0 && rule->srcPort2 == 0 &&
266+
rule->port2 == 0 && !rule->do_peerCIDR_lookup && rule->tcpFlags == 0 &&
267+
rule->icmpType == 0 && rule->filter_drops == 0) {
268+
*action = REJECT;
269+
return 1; // Can reject early
270+
}
271+
}
272+
}
273+
274+
return 0; // Need full check with L4
275+
}
276+
221277
/*
222278
* check if the flow match filter rule and return >= 1 if the flow is to be dropped
223279
*/
@@ -228,7 +284,7 @@ static __always_inline int is_flow_filtered(flow_id *id, filter_action *action,
228284
u8 len, offset;
229285
int result = 0;
230286

231-
__builtin_memset(&key, 0, sizeof(key));
287+
// No need to memset - flow_filter_setup_lookup_key will initialize all fields we use
232288
*action = MAX_FILTER_ACTIONS;
233289

234290
// Lets do first CIDR match using srcIP.

bpf/network_events_monitoring.h

Lines changed: 13 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -7,11 +7,20 @@
77

88
#include "utils.h"
99

10+
// Optimized: unroll loop for small array (MAX_NETWORK_EVENTS=4)
1011
static inline bool md_already_exists(u8 network_events[MAX_NETWORK_EVENTS][MAX_EVENT_MD], u8 *md) {
11-
for (u8 i = 0; i < MAX_NETWORK_EVENTS; i++) {
12-
if (__builtin_memcmp(network_events[i], md, MAX_EVENT_MD) == 0) {
13-
return true;
14-
}
12+
// Unroll comparisons for common case - most flows have 1-2 events
13+
if (__builtin_memcmp(network_events[0], md, MAX_EVENT_MD) == 0) {
14+
return true;
15+
}
16+
if (__builtin_memcmp(network_events[1], md, MAX_EVENT_MD) == 0) {
17+
return true;
18+
}
19+
if (__builtin_memcmp(network_events[2], md, MAX_EVENT_MD) == 0) {
20+
return true;
21+
}
22+
if (__builtin_memcmp(network_events[3], md, MAX_EVENT_MD) == 0) {
23+
return true;
1524
}
1625
return false;
1726
}

bpf/types.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -98,7 +98,7 @@ typedef struct flow_metrics_t {
9898
u8 dst_mac[ETH_ALEN];
9999
// OS interface index
100100
u32 if_index_first_seen;
101-
struct bpf_spin_lock lock;
101+
// Lock removed - using lock-free atomic operations for better performance
102102
u32 sampling;
103103
u8 direction_first_seen;
104104
// The positive errno of a failed map insertion that caused a flow

0 commit comments

Comments
 (0)