Skip to content

Commit 6d9d2e7

Browse files
authored
NETOBSERV-1112: This patch fixes a bug where RTT was not visible for flow logs at times. (#159)
* Make RTT values show up on both flow directions * Fix RTT calculation error for container hooks. Signed-off-by: Dushyant Behl <[email protected]> Signed-off-by: Dushyant Behl <[email protected]> --------- Signed-off-by: Dushyant Behl <[email protected]> Signed-off-by: Dushyant Behl <[email protected]>
1 parent af7d59d commit 6d9d2e7

File tree

10 files changed

+130
-74
lines changed

10 files changed

+130
-74
lines changed

bpf/dns_tracker.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -97,7 +97,7 @@ static inline int trace_dns(struct sk_buff *skb) {
9797
if ((bpf_ntohs(dns.flags) & DNS_QR_FLAG) == 0) { /* dns query */
9898
fill_dns_id(&id, &dns_req, bpf_ntohs(dns.id), false);
9999
if (bpf_map_lookup_elem(&dns_flows, &dns_req) == NULL) {
100-
u64 ts = bpf_ktime_get_ns();
100+
u64 ts = bpf_ktime_get_ns();
101101
bpf_map_update_elem(&dns_flows, &dns_req, &ts, BPF_ANY);
102102
}
103103
id.direction = EGRESS;

bpf/flows.c

Lines changed: 14 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -40,17 +40,14 @@ static inline int flow_monitor(struct __sk_buff *skb, u8 direction) {
4040
return TC_ACT_OK;
4141
}
4242

43-
// Record the current time first.
44-
u64 current_time = bpf_ktime_get_ns();
43+
pkt_info pkt;
44+
__builtin_memset(&pkt, 0, sizeof(pkt));
4545

4646
flow_id id;
4747
__builtin_memset(&id, 0, sizeof(id));
4848

49-
pkt_info pkt;
50-
__builtin_memset(&pkt, 0, sizeof(pkt));
51-
49+
pkt.current_ts = bpf_ktime_get_ns(); // Record the current time first.
5250
pkt.id = &id;
53-
pkt.current_ts = current_time;
5451

5552
void *data_end = (void *)(long)skb->data_end;
5653
void *data = (void *)(long)skb->data;
@@ -60,30 +57,28 @@ static inline int flow_monitor(struct __sk_buff *skb, u8 direction) {
6057
return TC_ACT_OK;
6158
}
6259

63-
if (enable_rtt) {
64-
// This is currently gated as its not to be enabled by default.
65-
calculate_flow_rtt(&pkt, direction, data_end);
66-
}
67-
6860
//Set extra fields
6961
id.if_index = skb->ifindex;
7062
id.direction = direction;
7163

64+
// We calculate the RTT before looking up aggregated_flows map because we want
65+
// to keep the critical section between map lookup and update consume minimum time.
66+
if (enable_rtt) {
67+
// This is currently not to be enabled by default.
68+
calculate_flow_rtt(&pkt, direction, data_end);
69+
}
70+
7271
// TODO: we need to add spinlock here when we deprecate versions prior to 5.1, or provide
7372
// a spinlocked alternative version and use it selectively https://lwn.net/Articles/779120/
7473
flow_metrics *aggregate_flow = (flow_metrics *)bpf_map_lookup_elem(&aggregated_flows, &id);
7574
if (aggregate_flow != NULL) {
7675
aggregate_flow->packets += 1;
7776
aggregate_flow->bytes += skb->len;
78-
aggregate_flow->end_mono_time_ts = current_time;
77+
aggregate_flow->end_mono_time_ts = pkt.current_ts;
7978
aggregate_flow->flags |= pkt.flags;
8079

8180
// Does not matter the gate. Will be zero if not enabled.
82-
if (pkt.rtt > 0) {
83-
/* Since RTT is calculated for few packets we need to check if it is non zero value then only we update
84-
* the flow. If we remove this check a packet which fails to calculate RTT will override the previous valid
85-
* RTT with 0.
86-
*/
81+
if (pkt.rtt > aggregate_flow->flow_rtt) {
8782
aggregate_flow->flow_rtt = pkt.rtt;
8883
}
8984

@@ -101,8 +96,8 @@ static inline int flow_monitor(struct __sk_buff *skb, u8 direction) {
10196
flow_metrics new_flow = {
10297
.packets = 1,
10398
.bytes = skb->len,
104-
.start_mono_time_ts = current_time,
105-
.end_mono_time_ts = current_time,
99+
.start_mono_time_ts = pkt.current_ts,
100+
.end_mono_time_ts = pkt.current_ts,
106101
.flags = pkt.flags,
107102
.flow_rtt = pkt.rtt
108103
};

bpf/rtt_tracker.h

Lines changed: 92 additions & 36 deletions
Original file line numberDiff line numberDiff line change
@@ -10,9 +10,11 @@
1010
#include "utils.h"
1111
#include "maps_definition.h"
1212

13-
static __always_inline void fill_flow_seq_id(flow_seq_id *seq_id, pkt_info *pkt, u32 seq, u8 reversed) {
13+
const u64 MIN_RTT = 50000; //50 micro seconds
14+
15+
static __always_inline void fill_flow_seq_id(flow_seq_id *seq_id, pkt_info *pkt, u32 seq, bool reverse) {
1416
flow_id *id = pkt->id;
15-
if (reversed) {
17+
if (reverse) {
1618
__builtin_memcpy(seq_id->src_ip, id->dst_ip, IP_MAX_LEN);
1719
__builtin_memcpy(seq_id->dst_ip, id->src_ip, IP_MAX_LEN);
1820
seq_id->src_port = id->dst_port;
@@ -23,49 +25,104 @@ static __always_inline void fill_flow_seq_id(flow_seq_id *seq_id, pkt_info *pkt,
2325
seq_id->src_port = id->src_port;
2426
seq_id->dst_port = id->dst_port;
2527
}
28+
seq_id->transport_protocol = id->transport_protocol;
2629
seq_id->seq_id = seq;
30+
seq_id->if_index = id->if_index;
2731
}
2832

29-
static __always_inline void calculate_flow_rtt_tcp(pkt_info *pkt, u8 direction, void *data_end, flow_seq_id *seq_id) {
30-
struct tcphdr *tcp = (struct tcphdr *) pkt->l4_hdr;
31-
if ( !tcp || ((void *)tcp + sizeof(*tcp) > data_end) ) {
32-
return;
33-
}
33+
static __always_inline void reverse_flow_id_struct(flow_id *src, flow_id *dst) {
34+
// Fields which remain same
35+
dst->eth_protocol = src->eth_protocol;
36+
dst->transport_protocol = src->transport_protocol;
37+
dst->if_index = src->if_index;
38+
39+
// Fields which should be reversed
40+
dst->direction = (src->direction == INGRESS) ? EGRESS : INGRESS;
41+
__builtin_memcpy(dst->src_mac, src->dst_mac, ETH_ALEN);
42+
__builtin_memcpy(dst->dst_mac, src->src_mac, ETH_ALEN);
43+
__builtin_memcpy(dst->src_ip, src->dst_ip, IP_MAX_LEN);
44+
__builtin_memcpy(dst->dst_ip, src->src_ip, IP_MAX_LEN);
45+
dst->src_port = src->dst_port;
46+
dst->dst_port = src->src_port;
47+
/* ICMP type can be ignore for now. We only deal with TCP packets for now.*/
48+
}
3449

35-
switch (direction) {
36-
case EGRESS: {
37-
if (IS_SYN_PACKET(pkt)) {
38-
// Record the outgoing syn sequence number
39-
u32 seq = bpf_ntohl(tcp->seq);
40-
fill_flow_seq_id(seq_id, pkt, seq, 0);
50+
static __always_inline void update_reverse_flow_rtt(pkt_info *pkt, u32 seq) {
51+
flow_id rev_flow_id;
52+
__builtin_memset(&rev_flow_id, 0, sizeof(rev_flow_id));
53+
reverse_flow_id_struct(pkt->id, &rev_flow_id);
4154

42-
long ret = bpf_map_update_elem(&flow_sequences, seq_id, &pkt->current_ts, BPF_ANY);
55+
flow_metrics *reverse_flow = (flow_metrics *)bpf_map_lookup_elem(&aggregated_flows, &rev_flow_id);
56+
if (reverse_flow != NULL) {
57+
if (pkt->rtt > reverse_flow->flow_rtt) {
58+
reverse_flow->flow_rtt = pkt->rtt;
59+
long ret = bpf_map_update_elem(&aggregated_flows, &rev_flow_id, reverse_flow, BPF_EXIST);
4360
if (trace_messages && ret != 0) {
44-
bpf_printk("err saving flow sequence record %d", ret);
61+
bpf_printk("error updating rtt value in flow %d\n", ret);
4562
}
4663
}
47-
break;
4864
}
49-
case INGRESS: {
50-
if (IS_ACK_PACKET(pkt)) {
51-
// Stored sequence should be ack_seq - 1
52-
u32 seq = bpf_ntohl(tcp->ack_seq) - 1;
53-
// check reversed flow
54-
fill_flow_seq_id(seq_id, pkt, seq, 1);
55-
56-
u64 *prev_ts = (u64 *)bpf_map_lookup_elem(&flow_sequences, seq_id);
57-
if (prev_ts != NULL) {
58-
pkt->rtt = pkt->current_ts - *prev_ts;
59-
// Delete the flow from flow sequence map so if it
60-
// restarts we have a new RTT calculation.
61-
long ret = bpf_map_delete_elem(&flow_sequences, seq_id);
62-
if (trace_messages && ret != 0) {
63-
bpf_printk("error evicting flow sequence: %d", ret);
64-
}
65-
}
65+
}
66+
67+
static __always_inline void __calculate_tcp_rtt(pkt_info *pkt, struct tcphdr *tcp, flow_seq_id *seq_id) {
68+
// Stored sequence should be ack_seq - 1
69+
u32 seq = bpf_ntohl(tcp->ack_seq) - 1;
70+
// check reversed flow
71+
fill_flow_seq_id(seq_id, pkt, seq, true);
72+
73+
u64 *prev_ts = (u64 *)bpf_map_lookup_elem(&flow_sequences, seq_id);
74+
if (prev_ts != NULL) {
75+
u64 rtt = pkt->current_ts - *prev_ts;
76+
/**
77+
* FIXME: Because of SAMPLING the way it is done if we miss one of SYN/SYN+ACK/ACK
78+
* then we can get RTT values which are the process response time rather than actual RTT.
79+
* This check below clears them out but needs to be modified with a better solution or change
80+
* the algorithm for calculating RTT so it doesn't interact with SAMPLING like this.
81+
*/
82+
if (rtt < MIN_RTT) {
83+
return;
6684
}
67-
break;
85+
pkt->rtt = rtt;
86+
// Delete the flow from flow sequence map so if it
87+
// restarts we have a new RTT calculation.
88+
long ret = bpf_map_delete_elem(&flow_sequences, seq_id);
89+
if (trace_messages && ret != 0) {
90+
bpf_printk("error evicting flow sequence: %d", ret);
91+
}
92+
// This is an ACK packet with valid sequence id so a SYN must
93+
// have been sent. We can safely update the reverse flow RTT here.
94+
update_reverse_flow_rtt(pkt, seq);
6895
}
96+
return;
97+
}
98+
99+
static __always_inline void __store_tcp_ts(pkt_info *pkt, struct tcphdr *tcp, flow_seq_id *seq_id) {
100+
// store timestamp of syn packets.
101+
u32 seq = bpf_ntohl(tcp->seq);
102+
fill_flow_seq_id(seq_id, pkt, seq, false);
103+
long ret = bpf_map_update_elem(&flow_sequences, seq_id, &pkt->current_ts, BPF_NOEXIST);
104+
if (trace_messages && ret != 0) {
105+
bpf_printk("err saving flow sequence record %d", ret);
106+
}
107+
return;
108+
}
109+
110+
static __always_inline void calculate_flow_rtt_tcp(pkt_info *pkt, u8 direction, void *data_end, flow_seq_id *seq_id) {
111+
struct tcphdr *tcp = (struct tcphdr *) pkt->l4_hdr;
112+
if ( !tcp || ((void *)tcp + sizeof(*tcp) > data_end) ) {
113+
return;
114+
}
115+
116+
/* We calculate RTT for both SYN/SYN+ACK and SYN+ACK/ACK and take the maximum of both.*/
117+
if (tcp->syn && tcp->ack) { // SYN ACK Packet
118+
__calculate_tcp_rtt(pkt, tcp, seq_id);
119+
__store_tcp_ts(pkt, tcp, seq_id);
120+
}
121+
else if (tcp->ack) {
122+
__calculate_tcp_rtt(pkt, tcp, seq_id);
123+
}
124+
else if (tcp->syn) {
125+
__store_tcp_ts(pkt, tcp, seq_id);
69126
}
70127
}
71128

@@ -83,5 +140,4 @@ static __always_inline void calculate_flow_rtt(pkt_info *pkt, u8 direction, void
83140
}
84141
}
85142

86-
#endif /* __RTT_TRACKER_H__ */
87-
143+
#endif /* __RTT_TRACKER_H__ */

bpf/types.h

Lines changed: 3 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -22,9 +22,6 @@
2222
#define FIN_ACK_FLAG 0x200
2323
#define RST_ACK_FLAG 0x400
2424

25-
#define IS_SYN_PACKET(pkt) ((pkt->flags & SYN_FLAG) || (pkt->flags & SYN_ACK_FLAG))
26-
#define IS_ACK_PACKET(pkt) ((pkt->flags & ACK_FLAG) || (pkt->flags & SYN_ACK_FLAG))
27-
2825
#if defined(__BYTE_ORDER__) && defined(__ORDER_LITTLE_ENDIAN__) && \
2926
__BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
3027
#define bpf_ntohs(x) __builtin_bswap16(x)
@@ -124,14 +121,16 @@ typedef struct flow_id_t {
124121
// Force emitting struct flow_id into the ELF.
125122
const struct flow_id_t *unused2 __attribute__((unused));
126123

127-
// Standard 4 tuple and a sequence identifier.
124+
// Standard 4 tuple, transport protocol and a sequence identifier.
128125
// No need to emit this struct. It's used only in kernel space
129126
typedef struct flow_seq_id_t {
130127
u16 src_port;
131128
u16 dst_port;
132129
u8 src_ip[IP_MAX_LEN];
133130
u8 dst_ip[IP_MAX_LEN];
134131
u32 seq_id;
132+
u8 transport_protocol;
133+
u32 if_index; // OS interface index
135134
} __attribute__((packed)) flow_seq_id;
136135

137136
// Flow record is a tuple containing both flow identifier and metrics. It is used to send

examples/flowlogs-dump/server/flowlogs-dump-collector.go

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -72,7 +72,7 @@ func main() {
7272
for records := range receivedRecords {
7373
for _, record := range records.Entries {
7474
if record.EthProtocol == ipv6 {
75-
log.Printf("%s: %v %s IP %s:%d > %s:%d: protocol:%s type: %d code: %d dir:%d bytes:%d packets:%d flags:%d ends: %v dnsId: %d dnsFlags: 0x%04x dnsLatency(ms): %v rtt %v\n",
75+
log.Printf("%s: %v %s IP %s:%d > %s:%d: protocol:%s type: %d code: %d dir:%d bytes:%d packets:%d flags:%d ends: %v dnsId: %d dnsFlags: 0x%04x dnsLatency(ms): %v rtt(ns) %v\n",
7676
ipProto[record.EthProtocol],
7777
record.TimeFlowStart.AsTime().Local().Format("15:04:05.000000"),
7878
record.Interface,
@@ -91,10 +91,10 @@ func main() {
9191
record.GetDnsId(),
9292
record.GetDnsFlags(),
9393
record.DnsLatency.AsDuration().Milliseconds(),
94-
record.TimeFlowRtt.AsDuration().Microseconds(),
94+
record.TimeFlowRtt.AsDuration().Nanoseconds(),
9595
)
9696
} else {
97-
log.Printf("%s: %v %s IP %s:%d > %s:%d: protocol:%s type: %d code: %d dir:%d bytes:%d packets:%d flags:%d ends: %v dnsId: %d dnsFlags: 0x%04x dnsLatency(ms): %v rtt %v\n",
97+
log.Printf("%s: %v %s IP %s:%d > %s:%d: protocol:%s type: %d code: %d dir:%d bytes:%d packets:%d flags:%d ends: %v dnsId: %d dnsFlags: 0x%04x dnsLatency(ms): %v rtt(ns) %v\n",
9898
ipProto[record.EthProtocol],
9999
record.TimeFlowStart.AsTime().Local().Format("15:04:05.000000"),
100100
record.Interface,
@@ -113,7 +113,7 @@ func main() {
113113
record.GetDnsId(),
114114
record.GetDnsFlags(),
115115
record.DnsLatency.AsDuration().Milliseconds(),
116-
record.TimeFlowRtt.AsDuration().Microseconds(),
116+
record.TimeFlowRtt.AsDuration().Nanoseconds(),
117117
)
118118
}
119119
}

pkg/ebpf/bpf_bpfeb.go

Lines changed: 7 additions & 5 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

pkg/ebpf/bpf_bpfeb.o

11.4 KB
Binary file not shown.

pkg/ebpf/bpf_bpfel.go

Lines changed: 7 additions & 5 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

pkg/ebpf/bpf_bpfel.o

11.4 KB
Binary file not shown.

pkg/ebpf/tracer.go

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -101,6 +101,8 @@ func NewFlowFetcher(cfg *FlowFetcherConfig) (*FlowFetcher, error) {
101101
if enableRtt == 0 {
102102
// Cannot set the size of map to be 0 so set it to 1.
103103
spec.Maps[flowSequencesMap].MaxEntries = uint32(1)
104+
} else {
105+
log.Debugf("RTT calculations are enabled")
104106
}
105107

106108
if !cfg.DNSTracker {

0 commit comments

Comments
 (0)