diff --git a/.mk/bc.mk b/.mk/bc.mk index c0b5f2db8..c4ad46666 100644 --- a/.mk/bc.mk +++ b/.mk/bc.mk @@ -22,7 +22,8 @@ define PROGRAMS "xfrm_input_kprobe": "kprobe", "xfrm_input_kretprobe": "kretprobe", "xfrm_output_kprobe": "kprobe", - "xfrm_output_kretprobe": "kretprobe" + "xfrm_output_kretprobe": "kretprobe", + "probe_entry_SSL_write": "uprobe" } endef @@ -38,7 +39,9 @@ define MAPS "filter_map":"lpm_trie", "peer_filter_map":"lpm_trie", "ipsec_ingress_map":"hash", - "ipsec_egress_map":"hash" + "ipsec_egress_map":"hash", + "ssl_data_event_map":"ringbuf", + "dns_name_map":"per_cpu_array" } endef diff --git a/Dockerfile b/Dockerfile index e496a99a8..f9d490711 100644 --- a/Dockerfile +++ b/Dockerfile @@ -4,6 +4,7 @@ ARG TARGETARCH FROM docker.io/library/golang:1.24 as builder ARG TARGETARCH + ARG LDFLAGS WORKDIR /opt/app-root diff --git a/Makefile b/Makefile index aa3ebd979..768267c5d 100644 --- a/Makefile +++ b/Makefile @@ -35,8 +35,8 @@ endif LOCAL_GENERATOR_IMAGE ?= ebpf-generator:latest CILIUM_EBPF_VERSION := v0.19.0 GOLANGCI_LINT_VERSION = v2.2.1 -GO_VERSION = "1.24.4" -PROTOC_VERSION = "3.19.4" +GO_VERSION = 1.24.4 +PROTOC_VERSION = 3.19.4 PROTOC_GEN_GO_VERSION="v1.35.1" PROTOC_GEN_GO_GRPC_VERSION="v1.5.1" CLANG ?= clang @@ -151,7 +151,7 @@ generate: gen-bpf gen-protobuf .PHONY: docker-generate docker-generate: ## Create the container that generates the eBPF binaries @echo "### Creating the container that generates the eBPF binaries" - $(OCI_BIN) build . -f scripts/generators.Dockerfile -t $(LOCAL_GENERATOR_IMAGE) --platform=linux/amd64 --build-arg EXTENSION="x86_64" --build-arg PROTOCVERSION="$(PROTOC_VERSION)" --build-arg GOVERSION="$(GO_VERSION)" + $(OCI_BIN) buildx build . -f scripts/generators.Dockerfile -t $(LOCAL_GENERATOR_IMAGE) --platform=linux/amd64 --build-arg EXTENSION="x86_64" --build-arg PROTOCVERSION="$(PROTOC_VERSION)" --build-arg GOVERSION="$(GO_VERSION)" --load $(OCI_BIN) run --privileged --rm -v $(shell pwd):/src $(LOCAL_GENERATOR_IMAGE) .PHONY: compile @@ -205,7 +205,7 @@ create-and-deploy-kind-cluster: prereqs ## Create a kind cluster and deploy the .PHONY: destroy-kind-cluster destroy-kind-cluster: ## Destroy the kind cluster. - oc delete -f scripts/agent.yml + kubectl delete -f scripts/agent.yml kind delete cluster ##@ Images diff --git a/bpf/configs.h b/bpf/configs.h index 0a61ac1de..98533ddf4 100644 --- a/bpf/configs.h +++ b/bpf/configs.h @@ -15,4 +15,5 @@ volatile const u8 enable_network_events_monitoring = 0; volatile const u8 network_events_monitoring_groupid = 0; volatile const u8 enable_pkt_translation_tracking = 0; volatile const u8 enable_ipsec = 0; +volatile const u8 enable_openssl_tracking = 0; #endif //__CONFIGS_H__ diff --git a/bpf/dns_tracker.h b/bpf/dns_tracker.h index e40273fb6..2d2b1407d 100644 --- a/bpf/dns_tracker.h +++ b/bpf/dns_tracker.h @@ -110,12 +110,17 @@ static __always_inline int track_dns_packet(struct __sk_buff *skb, pkt_info *pkt pkt->dns_flags = flags; // Copy raw QNAME bytes (label-encoded) and let userspace decode to dotted form - __builtin_memset(pkt->dns_name, 0, DNS_NAME_MAX_LEN); - u32 qname_off = dns_offset + sizeof(struct dns_header); - // Best-effort fixed-size copy; safe for verifier (constant size) - (void)bpf_skb_load_bytes(skb, qname_off, pkt->dns_name, DNS_NAME_MAX_LEN - 1); - // Ensure null-termination - pkt->dns_name[DNS_NAME_MAX_LEN - 1] = '\0'; + // Use per-CPU map to avoid stack limit + u32 key = 0; + dns_name_buffer *dns_buf = bpf_map_lookup_elem(&dns_name_map, &key); + if (dns_buf) { + u32 qname_off = dns_offset + sizeof(struct dns_header); + // Best-effort fixed-size copy; safe for verifier (constant size) + (void)bpf_skb_load_bytes(skb, qname_off, dns_buf->name, DNS_NAME_MAX_LEN - 1); + // Ensure null-termination + dns_buf->name[DNS_NAME_MAX_LEN - 1] = '\0'; + pkt->dns_name = dns_buf->name; + } } // end of dns response } return ret; diff --git a/bpf/flows.c b/bpf/flows.c index 76b557ea5..f35992693 100644 --- a/bpf/flows.c +++ b/bpf/flows.c @@ -57,6 +57,11 @@ */ #include "ipsec.h" +/* + * Defines ssl tracker + */ +#include "openssl_tracker.h" + // return 0 on success, 1 if capacity reached static __always_inline int add_observed_intf(flow_metrics *value, pkt_info *pkt, u32 if_index, u8 direction) { @@ -118,7 +123,9 @@ static inline void update_dns(additional_metrics *extra_metrics, pkt_info *pkt, extra_metrics->dns_record.id = pkt->dns_id; extra_metrics->dns_record.flags = pkt->dns_flags; extra_metrics->dns_record.latency = pkt->dns_latency; - __builtin_memcpy(extra_metrics->dns_record.name, pkt->dns_name, DNS_NAME_MAX_LEN); + if (pkt->dns_name != NULL) { + __builtin_memcpy(extra_metrics->dns_record.name, pkt->dns_name, DNS_NAME_MAX_LEN); + } } if (dns_errno != 0) { extra_metrics->dns_record.errno = dns_errno; @@ -254,7 +261,9 @@ static inline int flow_monitor(struct __sk_buff *skb, u8 direction) { new_metrics.dns_record.id = pkt.dns_id; new_metrics.dns_record.flags = pkt.dns_flags; new_metrics.dns_record.latency = pkt.dns_latency; - __builtin_memcpy(new_metrics.dns_record.name, pkt.dns_name, DNS_NAME_MAX_LEN); + if (pkt.dns_name != NULL) { + __builtin_memcpy(new_metrics.dns_record.name, pkt.dns_name, DNS_NAME_MAX_LEN); + } new_metrics.dns_record.errno = dns_errno; long ret = bpf_map_update_elem(&additional_flow_metrics, &id, &new_metrics, BPF_NOEXIST); diff --git a/bpf/maps_definition.h b/bpf/maps_definition.h index 72cec2000..2a566624e 100644 --- a/bpf/maps_definition.h +++ b/bpf/maps_definition.h @@ -57,6 +57,15 @@ struct { __uint(pinning, LIBBPF_PIN_BY_NAME); } global_counters SEC(".maps"); +// Per-CPU temporary storage for DNS name (avoids stack limit) +struct { + __uint(type, BPF_MAP_TYPE_PERCPU_ARRAY); + __type(key, u32); + __type(value, dns_name_buffer); + __uint(max_entries, 1); + __uint(pinning, LIBBPF_PIN_BY_NAME); +} dns_name_map SEC(".maps"); + // LPM trie map used to filter traffic by IP address CIDR struct { __uint(type, BPF_MAP_TYPE_LPM_TRIE); @@ -97,4 +106,11 @@ struct { __uint(pinning, LIBBPF_PIN_BY_NAME); } ipsec_egress_map SEC(".maps"); +// Ringbuf for SSL data events +struct { + __uint(type, BPF_MAP_TYPE_RINGBUF); + __uint(max_entries, 1 << 27); // 16KB * 1000 events/sec * 5sec "eviction time" = ~128MB + __uint(pinning, LIBBPF_PIN_BY_NAME); +} ssl_data_event_map SEC(".maps"); + #endif //__MAPS_DEFINITION_H__ diff --git a/bpf/openssl_tracker.h b/bpf/openssl_tracker.h new file mode 100644 index 000000000..c203cc36b --- /dev/null +++ b/bpf/openssl_tracker.h @@ -0,0 +1,65 @@ +/* + * OpenSSL monitoring uprobe/uretprobe eBPF hook. + */ + +#ifndef __OPENSSL_TRACKER_H__ +#define __OPENSSL_TRACKER_H__ + +#include "utils.h" + +static inline void generate_SSL_data_event(struct pt_regs *ctx, u64 pid_tgid, u8 ssl_type, + const char *buf, uint32_t len) { + if (len <= 0) { + return; + } + + struct ssl_data_event_t *event; + event = bpf_ringbuf_reserve(&ssl_data_event_map, sizeof(*event), 0); + if (!event) { + return; + } + event->timestamp_ns = bpf_ktime_get_ns(); + event->pid_tgid = pid_tgid; + event->ssl_type = ssl_type; + event->data_len = len < MAX_DATA_SIZE_OPENSSL ? len : MAX_DATA_SIZE_OPENSSL; + bpf_probe_read_user(&event->data, event->data_len, buf); + bpf_ringbuf_submit(event, 0); +} + +// int SSL_write(SSL *ssl, const void *buf, int num); +// https://github.com/openssl/openssl/blob/master/ssl/ssl_lib.c#L2666 +SEC("uprobe/SSL_write") +int probe_entry_SSL_write(struct pt_regs *ctx) { + if (enable_openssl_tracking == 0) { + return 0; + } + + u64 pid_tgid = bpf_get_current_pid_tgid(); + + BPF_PRINTK("openssl uprobe/SSL_write pid: %d\n", pid_tgid); + // https://github.com/openssl/openssl/blob/master/ssl/ssl_local.h#L1233 + void *ssl = (void *)PT_REGS_PARM1(ctx); + + u32 ssl_type; + int ret; + + ret = bpf_probe_read_user(&ssl_type, sizeof(ssl_type), (u32 *)ssl); + if (ret) { + BPF_PRINTK("(OPENSSL) bpf_probe_read ssl_type_ptr failed, ret: %d\n", ret); + return 0; + } + const char *buf = (const char *)PT_REGS_PARM2(ctx); + uint32_t num = (uint32_t)PT_REGS_PARM3(ctx); // Third parameter: number of bytes to write + + BPF_PRINTK("openssl uprobe/SSL_write type: %d, buf: %p, num: %d\n", ssl_type, buf, num); + + // Read the data immediately in the uprobe (before SSL_write processes it) + // This captures the plaintext before encryption + if (num > 0) { + generate_SSL_data_event(ctx, pid_tgid, ssl_type, buf, num); + } + + return 0; +} + +#endif /* __OPENSSL_TRACKER_H__ */ \ No newline at end of file diff --git a/bpf/types.h b/bpf/types.h index 1c67b1494..cc3351634 100644 --- a/bpf/types.h +++ b/bpf/types.h @@ -72,6 +72,11 @@ typedef __u64 u64; #define MAX_PAYLOAD_SIZE 256 #define DNS_NAME_MAX_LEN 32 +// Per-CPU temporary storage for DNS name (avoids stack limit) +typedef struct dns_name_buffer_t { + char name[DNS_NAME_MAX_LEN]; +} dns_name_buffer; + // according to field 61 in https://www.iana.org/assignments/ipfix/ipfix.xhtml typedef enum direction_t { INGRESS, @@ -194,7 +199,7 @@ typedef struct pkt_info_t { u16 dns_id; u16 dns_flags; u64 dns_latency; - char dns_name[DNS_NAME_MAX_LEN]; + char *dns_name; } pkt_info; // Structure for payload metadata @@ -280,4 +285,17 @@ struct filter_value_t { // Force emitting enums/structs into the ELF const static struct filter_value_t *unused12 __attribute__((unused)); +#define MAX_DATA_SIZE_OPENSSL 1024 * 16 +// SSL data event +struct ssl_data_event_t { + u64 timestamp_ns; + u64 pid_tgid; + s32 data_len; + u8 ssl_type; + char data[MAX_DATA_SIZE_OPENSSL]; +} ssl_data_event; + +// Force emitting enums/structs into the ELF +const static struct ssl_data_event_t *unused13 __attribute__((unused)); + #endif /* __TYPES_H__ */ diff --git a/examples/test-ssl-host.sh b/examples/test-ssl-host.sh new file mode 100755 index 000000000..14e1f0c5e --- /dev/null +++ b/examples/test-ssl-host.sh @@ -0,0 +1,553 @@ +#!/bin/bash +# Test SSL tracking with host processes +# +# This script tests SSL/TLS tracking functionality by executing HTTPS requests +# on cluster nodes using privileged pods with hostNetwork. These pods mount the +# host's libssl.so library, ensuring processes use the same library that the +# NetObserv agent's eBPF uprobe is attached to. +# +# Prerequisites: +# - Kubernetes cluster (kind/minikube/OpenShift/etc) +# - NetObserv agent deployed with ENABLE_OPENSSL_TRACKING=true +# - Agent configured with correct OPENSSL_PATH +# +# Note: Some tests (TLS 1.3, HTTP/2) are optional and won't cause failure +# if not supported on the node. + +# Don't exit on error - we want to run all tests and report results +# Steps to test on Kind cluster: +# make create-and-deploy-kind-cluster +# export KUBECONFIG=$(pwd)/scripts/kubeconfig +# ./examples/test-ssl-host.sh + +set +e + +# Colors for output +RED='\033[0;31m' +GREEN='\033[0;32m' +YELLOW='\033[1;33m' +BLUE='\033[0;34m' +NC='\033[0m' # No Color + +echo -e "${BLUE}=== Testing SSL with Host Process ===${NC}" +echo "" + +# Get all node names first (needed for cluster type detection) +NODES=$(kubectl get nodes -o jsonpath='{.items[*].metadata.name}') + +# Detect if we're on a kind cluster (Docker-based) or real cluster +is_kind_cluster() { + # Check if nodes are Docker containers (kind clusters) + local first_node=$(echo $NODES | awk '{print $1}') + if [ -n "$first_node" ] && docker ps --format '{{.Names}}' 2>/dev/null | grep -q "^${first_node}$"; then + return 0 + fi + return 1 +} + +# Detect cluster type +if is_kind_cluster; then + echo "Detected: Kind cluster (Docker-based)" + echo "Tests will run directly on node containers." +else + echo "Detected: Real Kubernetes/OpenShift cluster" + echo "Tests will run via privileged test pods with hostNetwork." + echo "These pods mount the host's libssl.so to ensure uprobes are triggered." +fi +echo "" +echo "This will run various SSL/TLS tests on each cluster node." +echo "Tests use privileged pods with hostNetwork that mount the host's libssl.so," +echo "ensuring processes use the same library that the agent's uprobe is attached to." +echo "" + +# Counter for tests +TOTAL_TESTS=0 +PASSED_TESTS=0 +FAILED_TESTS=0 + +# Function to create a test pod on a node that uses host's libssl.so +create_test_pod_on_node() { + local node=$1 + local pod_name="ssl-test-$(echo $node | tr '.' '-' | tr ':' '-')" + + # Check if pod already exists + if kubectl get pod -n netobserv-privileged "$pod_name" &>/dev/null; then + return 0 + fi + + # Create a privileged pod with hostNetwork on the specific node + # Mount the host's /usr/lib64 so processes can use the exact library the uprobe is attached to + kubectl apply -f - </dev/null || true +} + +# Function to run a command on a node +run_on_node() { + local node=$1 + local cmd=$2 + + if is_kind_cluster; then + # Use docker exec for kind clusters (nodes are Docker containers) + docker exec $node bash -c "$cmd" 2>&1 + else + # For real clusters, create a privileged pod with hostNetwork that uses host's libssl.so + local pod_name="ssl-test-$(echo $node | tr '.' '-' | tr ':' '-')" + create_test_pod_on_node "$node" + + # The pod has hostNetwork and mounts /usr/lib64 from host + # This ensures processes use the exact same libssl.so that the uprobe is attached to + kubectl exec -n netobserv-privileged "$pod_name" -- \ + sh -c "$cmd" 2>&1 + fi +} + +run_test() { + local node=$1 + local test_name=$2 + local curl_cmd=$3 + + TOTAL_TESTS=$((TOTAL_TESTS + 1)) + echo -e "${YELLOW}[TEST $TOTAL_TESTS] $test_name${NC}" + + if run_on_node "$node" "$curl_cmd" > /dev/null 2>&1; then + echo -e "${GREEN}✓ Request completed successfully${NC}" + PASSED_TESTS=$((PASSED_TESTS + 1)) + return 0 + else + echo -e "${RED}✗ Request failed${NC}" + FAILED_TESTS=$((FAILED_TESTS + 1)) + return 1 + fi +} + + +check_ssl_events() { + local agent_pod=$1 + local test_desc=$2 + + echo -e "${BLUE}Checking logs for SSL events after $test_desc:${NC}" + local recent_logs=$(kubectl logs -n netobserv-privileged $agent_pod --tail=1000 2>/dev/null) + # Also get initialization logs from the beginning (initialization messages appear early) + local init_logs=$(kubectl logs -n netobserv-privileged $agent_pod 2>/dev/null | head -500) + + # Look for actual SSL events (these are logged at debug/info level) + # Exclude initialization messages like "SSL RingBuf tracer started" and "waiting for SSL event" + local ssl_events=$(echo "$recent_logs" | grep -iE 'SSL EVENT:|SSL ringbuffer event received!|SSL data as string:' | grep -v "waiting for SSL event" | grep -v "SSL RingBuf tracer started" | tail -10) + + if [ -n "$ssl_events" ]; then + echo -e "${GREEN}✓ SSL events found:${NC}" + echo "$ssl_events" | sed 's/^/ /' + echo "" + + # Try to decode hex strings if python is available + if command -v python3 &>/dev/null; then + echo " Decoded SSL data (plaintext before encryption):" + echo "$ssl_events" | grep "SSL data as string:" | head -3 | while IFS= read -r line; do + # Extract just the SSL data string (remove log prefix and component suffix) + hex_part=$(echo "$line" | sed 's/.*SSL data as string: //' | sed 's/" component=.*$//' | sed 's/"$//') + + # Check if it's already readable (no hex escapes) + if ! echo "$hex_part" | grep -q '\\x'; then + # Already readable - show it directly + echo " $hex_part" + # Check for HTTP patterns + if echo "$hex_part" | grep -qE '(GET|POST|PUT|DELETE|HEAD|OPTIONS|PATCH|HTTP/[0-9.]+|Host:|User-Agent:)'; then + echo " → HTTP request detected" + fi + else + # Decode hex escapes using python + decoded=$(echo "$hex_part" | python3 -c " +import sys +import re +s = sys.stdin.read() +# Replace \xHH with actual character +decoded = re.sub(r'\\\\x([0-9a-fA-F]{2})', lambda m: chr(int(m.group(1), 16)), s) +# Check if mostly readable +readable_count = sum(1 for c in decoded[:100] if 32 <= ord(c) < 127) +if readable_count > 20: + # Mostly readable - show as text with escapes for non-printable + result = '' + for c in decoded[:200]: + if 32 <= ord(c) < 127: + result += c + elif c == '\n': + result += '\\n' + elif c == '\r': + result += '\\r' + elif c == '\t': + result += '\\t' + else: + result += f'<{ord(c):02x}>' + print('READABLE:' + result) +else: + # Mostly binary - show as hex dump + hex_dump = ' '.join(f'{ord(c):02x}' for c in decoded[:50]) + print('BINARY:' + hex_dump) +" 2>/dev/null) + if [ -n "$decoded" ]; then + if echo "$decoded" | grep -q '^READABLE:'; then + readable_text=$(echo "$decoded" | sed 's/^READABLE://') + echo " $readable_text" + # Check for HTTP patterns + if echo "$readable_text" | grep -qE '(GET|POST|PUT|DELETE|HEAD|OPTIONS|PATCH|HTTP/[0-9.]+|Host:|User-Agent:)'; then + echo " → HTTP request detected" + fi + elif echo "$decoded" | grep -q '^BINARY:'; then + hex_dump=$(echo "$decoded" | sed 's/^BINARY://') + echo " Binary data (hex): $hex_dump..." + echo " → Likely HTTP/2 frame or TLS handshake data" + else + echo " $decoded" + fi + fi + fi + done + echo "" + fi + return 0 + fi + + # No events found - provide detailed diagnostics + echo -e "${YELLOW}⚠ No SSL events found in logs${NC}" + echo "" + echo " Diagnostic information:" + + # Check if SSL tracking is enabled (check from beginning of logs) + if echo "$init_logs" | grep -qi "SSL tracking enabled\|SSL RingBuf tracer started"; then + echo -e " ${GREEN}✓ SSL tracking is enabled in agent${NC}" + echo "$init_logs" | grep -iE "SSL.*tracking.*enabled|SSL RingBuf tracer started" | head -2 | sed 's/^/ /' + else + echo -e " ${RED}✗ SSL tracking may not be enabled${NC}" + fi + + # Check for errors + local errors=$(echo "$recent_logs" | grep -iE "error.*ssl|ssl.*error|failed.*ssl|ssl.*failed" | tail -5) + if [ -n "$errors" ]; then + echo -e " ${RED}✗ Found SSL-related errors:${NC}" + echo "$errors" | sed 's/^/ /' + fi + + # Check if ringbuffer is waiting + if echo "$recent_logs" | grep -qi "waiting for SSL event"; then + echo -e " ${YELLOW}⚠ Agent is waiting for SSL events (ringbuffer is listening)${NC}" + echo " This means the uprobe is attached but no events are being received." + echo "" + echo " Possible reasons:" + echo " 1. Processes are running in containers that don't use the host's libssl.so" + echo " 2. The library path used by processes differs from OPENSSL_PATH" + echo " 3. Processes are not calling SSL_write (e.g., using different SSL libraries)" + echo " 4. The uprobe attachment may not be working correctly" + fi + + echo "" + echo -e " ${YELLOW}Note: Tests use privileged pods with hostNetwork that mount the host's libssl.so.${NC}" + echo -e " ${YELLOW}If SSL events are still not captured, verify the OPENSSL_PATH matches the library${NC}" + echo -e " ${YELLOW}used by the processes you're testing.${NC}" + echo "" +} + +for NODE in $NODES; do + echo "=========================================" + echo -e "${BLUE}Testing node: $NODE${NC}" + echo "=========================================" + + # Get the agent pod running on this node + AGENT_POD=$(kubectl get pods -n netobserv-privileged -l k8s-app=netobserv-ebpf-agent \ + --field-selector spec.nodeName=$NODE \ + -o jsonpath='{range .items[*]}{.metadata.name}{"\n"}{end}' | head -n1) + + if [ -z "$AGENT_POD" ]; then + echo -e "${RED}Warning: No agent pod found on node $NODE, skipping...${NC}" + continue + fi + + echo -e "${GREEN}Agent pod: $AGENT_POD${NC}" + echo "" + + # Show diagnostic information + echo -e "${BLUE}Node diagnostics:${NC}" + echo -n " curl version: " + run_on_node "$NODE" "curl --version 2>/dev/null | head -1 || which curl >/dev/null 2>&1 && echo 'curl available' || echo 'curl not found'" 2>/dev/null || echo "unknown" + echo -n " OpenSSL library: " + # Check libssl.so version info directly (more reliable than openssl binary) + openssl_info=$(run_on_node "$NODE" " + # Find the actual library file + libssl_file=\$(ls -1 /usr/lib64/libssl.so* 2>/dev/null | head -1) + if [ -z \"\$libssl_file\" ]; then + libssl_file=\$(ls -1 /usr/lib/libssl.so* 2>/dev/null | head -1) + fi + if [ -n \"\$libssl_file\" ] && [ -e \"\$libssl_file\" ]; then + # Extract version from filename (e.g., libssl.so.3 -> OpenSSL 3, libssl.so.1.1 -> OpenSSL 1.1) + filename=\$(basename \"\$libssl_file\") + if echo \"\$filename\" | grep -q 'libssl\.so\.[0-9]'; then + version=\$(echo \"\$filename\" | sed -E 's/libssl\.so\.([0-9]+)(\.[0-9]+)?.*/\1\2/' | sed 's/\.$//') + echo \"OpenSSL \$version (\$filename)\" + else + echo \"\$filename\" + fi + else + echo 'not found' + fi + " 2>/dev/null) + if [ -n "$openssl_info" ] && [ "$openssl_info" != "unknown" ]; then + echo "$openssl_info" + else + echo "unknown" + fi + echo -n " libssl location: " + run_on_node "$NODE" "ls -la /usr/lib*/libssl.so* 2>/dev/null | head -1 || echo 'not found in standard location'" 2>/dev/null || echo "unknown" + echo "" + + # Check if agent has SSL tracking enabled + # Check from beginning of logs since initialization messages appear early + echo -e "${BLUE}Agent SSL tracking status:${NC}" + # Get logs from the beginning (no tail limit) but limit output for performance + agent_init_logs=$(kubectl logs -n netobserv-privileged $AGENT_POD 2>/dev/null | head -500) + agent_recent_logs=$(kubectl logs -n netobserv-privileged $AGENT_POD --tail=200 2>/dev/null) + + if echo "$agent_init_logs" | grep -qi "SSL tracking enabled\|OpenSSL tracking enabled\|SSL RingBuf tracer started"; then + echo -e " ${GREEN}✓ SSL tracking is enabled${NC}" + echo "$agent_init_logs" | grep -iE "SSL.*tracking.*enabled|OpenSSL.*tracking.*enabled|SSL RingBuf tracer started" | head -3 | sed 's/^/ /' + else + echo -e " ${YELLOW}⚠ SSL tracking status unclear${NC}" + echo " Checking agent environment variables..." + kubectl get pod -n netobserv-privileged $AGENT_POD -o jsonpath='{.spec.containers[0].env[*]}' 2>/dev/null | grep -i openssl || echo " No OPENSSL_PATH found in agent env" + fi + + # Check OpenSSL path configuration + openssl_path=$(kubectl get pod -n netobserv-privileged $AGENT_POD -o jsonpath='{.spec.containers[0].env[?(@.name=="OPENSSL_PATH")].value}' 2>/dev/null) + if [ -n "$openssl_path" ]; then + echo -e " ${GREEN}Agent OPENSSL_PATH: $openssl_path${NC}" + else + echo -e " ${YELLOW}⚠ OPENSSL_PATH not configured in agent${NC}" + fi + echo "" + + # Test 1: Basic HTTPS GET with HTTP/1.1 + run_test "$NODE" "Basic HTTPS GET with HTTP/1.1" \ + "curl -s --http1.1 --max-time 10 https://httpbin.org/get" + + # Test 2: HTTPS POST with data + run_test "$NODE" "HTTPS POST with JSON data" \ + "curl -s --http1.1 --max-time 10 -X POST https://httpbin.org/post -H 'Content-Type: application/json' -d '{\"test\":\"data\"}'" + + # Test 3: HTTPS with TLS 1.2 + run_test "$NODE" "HTTPS with TLS 1.2 explicitly" \ + "curl -s --tlsv1.2 --tls-max 1.2 --max-time 10 https://www.howsmyssl.com/a/check" + + # Test 4: HTTPS with TLS 1.3 (optional - may not be supported) + echo -e "${YELLOW}[TEST $((TOTAL_TESTS + 1))] HTTPS with TLS 1.3 explicitly (optional)${NC}" + TOTAL_TESTS=$((TOTAL_TESTS + 1)) + + # First check if TLS 1.3 is supported + if run_on_node "$NODE" "curl --help all 2>/dev/null | grep -q tlsv1.3" 2>/dev/null; then + if run_on_node "$NODE" "curl -s --tlsv1.3 --max-time 10 https://www.howsmyssl.com/a/check" > /dev/null 2>&1; then + echo -e "${GREEN}✓ Request completed successfully (TLS 1.3 supported)${NC}" + PASSED_TESTS=$((PASSED_TESTS + 1)) + else + # Try alternative endpoint + if run_on_node "$NODE" "curl -s --tlsv1.3 --max-time 10 https://www.cloudflare.com" > /dev/null 2>&1; then + echo -e "${GREEN}✓ Request completed successfully with alternative endpoint${NC}" + PASSED_TESTS=$((PASSED_TESTS + 1)) + else + echo -e "${YELLOW}⚠ TLS 1.3 option exists but connection failed (this is OK)${NC}" + PASSED_TESTS=$((PASSED_TESTS + 1)) + fi + fi + else + echo -e "${YELLOW}⚠ TLS 1.3 not supported by curl on this node (skipped)${NC}" + PASSED_TESTS=$((PASSED_TESTS + 1)) + fi + + # Test 5: HTTPS with headers + run_test "$NODE" "HTTPS with custom headers" \ + "curl -s --http1.1 --max-time 10 -H 'User-Agent: NetObserv-Test/1.0' -H 'X-Test-Header: SSL-Tracking' https://httpbin.org/headers" + + # Test 6: Different endpoint - github API + run_test "$NODE" "HTTPS to GitHub API" \ + "curl -s --http1.1 --max-time 10 https://api.github.com" + + # Test 7: Different endpoint - Google + run_test "$NODE" "HTTPS to Google" \ + "curl -s --http1.1 --max-time 10 -L https://www.google.com" + + # Test 8: HTTPS with large response + run_test "$NODE" "HTTPS with large response (1KB)" \ + "curl -s --http1.1 --max-time 10 https://httpbin.org/bytes/1024" + + # Test 9: HTTPS with HTTP/2 (optional - may not be supported) + echo -e "${YELLOW}[TEST $((TOTAL_TESTS + 1))] HTTPS with HTTP/2 (optional)${NC}" + TOTAL_TESTS=$((TOTAL_TESTS + 1)) + + if run_on_node "$NODE" "curl --help all 2>/dev/null | grep -q http2" 2>/dev/null; then + if run_on_node "$NODE" "curl -s --http2 --max-time 10 https://www.google.com" > /dev/null 2>&1; then + echo -e "${GREEN}✓ Request completed successfully (HTTP/2 supported)${NC}" + PASSED_TESTS=$((PASSED_TESTS + 1)) + else + echo -e "${YELLOW}⚠ HTTP/2 option exists but connection failed (this is OK)${NC}" + PASSED_TESTS=$((PASSED_TESTS + 1)) + fi + else + echo -e "${YELLOW}⚠ HTTP/2 not supported by curl on this node (skipped)${NC}" + PASSED_TESTS=$((PASSED_TESTS + 1)) + fi + + echo "" + check_ssl_events "$AGENT_POD" "all tests" + + echo -e "${BLUE}Detailed SSL event analysis:${NC}" + detailed_logs=$(kubectl logs -n netobserv-privileged $AGENT_POD --tail=1000 2>/dev/null) + # Look for actual SSL events, excluding initialization messages + detailed_events=$(echo "$detailed_logs" | grep -iE 'SSL EVENT:|SSL ringbuffer event received!|SSL data as string:' | grep -v "waiting for SSL event" | grep -v "SSL RingBuf tracer started" | tail -20) + if [ -n "$detailed_events" ]; then + echo " Found SSL events:" + echo "$detailed_events" | sed 's/^/ /' + echo "" + + # Decode SSL data strings if python is available + if command -v python3 &>/dev/null; then + echo " Decoded SSL data samples (plaintext before encryption):" + echo "$detailed_events" | grep "SSL data as string:" | head -5 | while IFS= read -r line; do + # Extract just the SSL data string (remove log prefix and component suffix) + hex_part=$(echo "$line" | sed 's/.*SSL data as string: //' | sed 's/" component=.*$//' | sed 's/"$//') + + # Check if it's already readable (no hex escapes) + if ! echo "$hex_part" | grep -q '\\x'; then + # Already readable - show it directly + echo " $hex_part" + # Check for HTTP patterns + http_match=$(echo "$hex_part" | grep -oE '(GET|POST|PUT|DELETE|HEAD|OPTIONS|PATCH|HTTP/[0-9.]+|Host:|User-Agent:|Content-Type:)' | head -3) + if [ -n "$http_match" ]; then + echo " → HTTP patterns: $http_match" + fi + else + # Decode hex escapes using python + decoded=$(echo "$hex_part" | python3 -c " +import sys +import re +s = sys.stdin.read() +# Replace \xHH with actual character +decoded = re.sub(r'\\\\x([0-9a-fA-F]{2})', lambda m: chr(int(m.group(1), 16)), s) +# Check if mostly readable +readable_count = sum(1 for c in decoded[:100] if 32 <= ord(c) < 127) +if readable_count > 20: + # Mostly readable - show as text with escapes for non-printable + result = '' + for c in decoded[:200]: + if 32 <= ord(c) < 127: + result += c + elif c == '\n': + result += '\\n' + elif c == '\r': + result += '\\r' + elif c == '\t': + result += '\\t' + else: + result += f'<{ord(c):02x}>' + print('READABLE:' + result) +else: + # Mostly binary - show as hex dump + hex_dump = ' '.join(f'{ord(c):02x}' for c in decoded[:50]) + print('BINARY:' + hex_dump) +" 2>/dev/null) + if [ -n "$decoded" ]; then + if echo "$decoded" | grep -q '^READABLE:'; then + readable_text=$(echo "$decoded" | sed 's/^READABLE://') + echo " $readable_text" + # Check for HTTP patterns + http_match=$(echo "$readable_text" | grep -oE '(GET|POST|PUT|DELETE|HEAD|OPTIONS|PATCH|HTTP/[0-9.]+|Host:|User-Agent:|Content-Type:)' | head -3) + if [ -n "$http_match" ]; then + echo " → HTTP patterns: $http_match" + fi + elif echo "$decoded" | grep -q '^BINARY:'; then + hex_dump=$(echo "$decoded" | sed 's/^BINARY://') + echo " Binary data (hex): $hex_dump..." + echo " → Likely HTTP/2 frame or TLS handshake data" + else + echo " $decoded" + fi + fi + fi + done + echo "" + fi + else + echo " No SSL events found in recent logs" + echo "" + echo " Recent SSL-related log entries (status messages):" + echo "$detailed_logs" | grep -iE 'ssl|openssl' | grep -v "waiting for SSL event" | tail -8 | sed 's/^/ /' || echo " No SSL-related log entries found" + echo "" + echo " Checking for errors or warnings..." + echo "$detailed_logs" | grep -iE 'error|warn|fail' | grep -iE 'ssl|openssl|uprobe|attach|ringbuf' | tail -5 | sed 's/^/ /' || echo " No relevant errors found" + fi + + echo "" + echo -e "${BLUE}Node $NODE test summary:${NC}" + echo " Total tests: $TOTAL_TESTS" + echo -e " ${GREEN}Passed: $PASSED_TESTS${NC}" + echo -e " ${RED}Failed: $FAILED_TESTS${NC}" + echo "" +done + +echo "=========================================" +echo -e "${BLUE}Test completed for all nodes${NC}" +echo "=========================================" +echo "" + +# Cleanup test pods if created +if ! is_kind_cluster; then + echo "Cleaning up test pods..." + kubectl delete pod -n netobserv-privileged -l app=ssl-test --ignore-not-found=true 2>/dev/null || true +fi + +echo "" +echo -e "${BLUE}Overall Summary:${NC}" +echo " Total tests executed: $TOTAL_TESTS" +echo -e " ${GREEN}Passed: $PASSED_TESTS${NC}" +echo -e " ${RED}Failed: $FAILED_TESTS${NC}" +echo "" + +# Calculate pass rate +if [ $TOTAL_TESTS -gt 0 ]; then + PASS_RATE=$((PASSED_TESTS * 100 / TOTAL_TESTS)) + echo " Pass rate: ${PASS_RATE}%" +fi diff --git a/pkg/agent/agent.go b/pkg/agent/agent.go index 9479f8fc9..9b4befedb 100644 --- a/pkg/agent/agent.go +++ b/pkg/agent/agent.go @@ -86,7 +86,8 @@ type Flows struct { promoServer *http.Server sampleDecoder *ovnobserv.SampleDecoder - metrics *metrics.Metrics + metrics *metrics.Metrics + rbSSLTracer *flow.RingBufTracer } // ebpfFlowFetcher abstracts the interface of ebpf.FlowFetcher to allow dependency injection in tests @@ -97,6 +98,7 @@ type ebpfFlowFetcher interface { LookupAndDeleteMap(*metrics.Metrics) map[ebpf.BpfFlowId]model.BpfFlowContent DeleteMapsStaleEntries(timeOut time.Duration) ReadRingBuf() (ringbuf.Record, error) + ReadSSLRingBuf() (ringbuf.Record, error) } // FlowsAgent instantiates a new agent, given a configuration. @@ -177,6 +179,8 @@ func FlowsAgent(cfg *config.Agent) (*Flows, error) { BpfManBpfFSPath: cfg.BpfManBpfFSPath, EnableIPsecTracker: cfg.EnableIPsecTracking, FilterConfig: filterRules, + EnableOpenSSLTracking: cfg.EnableOpenSSLTracking, + OpenSSLPath: cfg.OpenSSLPath, } fetcher, err := tracer.NewFlowFetcher(ebpfConfig, m) @@ -208,6 +212,10 @@ func flowsAgent( mapTracer := flow.NewMapTracer(fetcher, cfg.CacheActiveTimeout, cfg.StaleEntriesEvictTimeout, m, s, cfg.EnableUDNMapping) rbTracer := flow.NewRingBufTracer(fetcher, mapTracer, cfg.CacheActiveTimeout, m) + var rbSSLTracer *flow.RingBufTracer + if cfg.EnableOpenSSLTracking { + rbSSLTracer = flow.NewSSLRingBufTracer(fetcher, mapTracer, cfg.CacheActiveTimeout, m) + } accounter := flow.NewAccounter(cfg.CacheMaxFlows, cfg.CacheActiveTimeout, time.Now, monotime.Now, m, s, cfg.EnableUDNMapping) limiter := flow.NewCapacityLimiter(m) @@ -224,6 +232,7 @@ func flowsAgent( informer: informer, promoServer: promoServer, metrics: m, + rbSSLTracer: rbSSLTracer, }, nil } @@ -394,6 +403,10 @@ func (f *Flows) buildAndStartPipeline(ctx context.Context) (*node.Terminal[[]*mo alog.Debug("connecting flows processing graph") mapTracer := node.AsStart(f.mapTracer.TraceLoop(ctx, f.cfg.ForceGC)) rbTracer := node.AsStart(f.rbTracer.TraceLoop(ctx)) + var rbSSLTracer *node.Start[*model.RawRecord] + if f.cfg.EnableOpenSSLTracking { + rbSSLTracer = node.AsStart(f.rbSSLTracer.TraceLoop(ctx)) + } accounter := node.AsMiddle(f.accounter.Account, node.ChannelBufferLen(f.cfg.BuffersLength)) @@ -410,6 +423,9 @@ func (f *Flows) buildAndStartPipeline(ctx context.Context) (*node.Terminal[[]*mo node.ChannelBufferLen(ebl)) rbTracer.SendsTo(accounter) + if rbSSLTracer != nil { + rbSSLTracer.SendsTo(accounter) + } mapTracer.SendsTo(limiter) accounter.SendsTo(limiter) @@ -418,6 +434,9 @@ func (f *Flows) buildAndStartPipeline(ctx context.Context) (*node.Terminal[[]*mo alog.Debug("starting graph") mapTracer.Start() rbTracer.Start() + if rbSSLTracer != nil { + rbSSLTracer.Start() + } return export, nil } diff --git a/pkg/config/config.go b/pkg/config/config.go index 079b67801..fc1182bad 100644 --- a/pkg/config/config.go +++ b/pkg/config/config.go @@ -264,6 +264,10 @@ type Agent struct { // This setting is only used when the interface name could not be found for a given index and MAC. // E.g. "0a:58=eth0" (used for ovn-kubernetes) PreferredInterfaceForMACPrefix string `env:"PREFERRED_INTERFACE_FOR_MAC_PREFIX"` + // EnableOpenSSLTracking enable tracking OpenSSL flows encryption + EnableOpenSSLTracking bool `env:"ENABLE_OPENSSL_TRACKING" envDefault:"false"` + // OpenSSLPath path to the openssl binary + OpenSSLPath string `env:"OPENSSL_PATH" envDefault:"/usr/bin/openssl"` /* Deprecated configs are listed below this line * See manageDeprecatedConfigs function for details diff --git a/pkg/ebpf/bpf_arm64_bpfel.go b/pkg/ebpf/bpf_arm64_bpfel.go index 1d8e230d0..59e4b1775 100644 --- a/pkg/ebpf/bpf_arm64_bpfel.go +++ b/pkg/ebpf/bpf_arm64_bpfel.go @@ -48,6 +48,11 @@ type BpfDnsFlowId struct { _ [1]byte } +type BpfDnsNameBuffer struct { + _ structs.HostLayout + Name [32]int8 +} + type BpfDnsRecordT struct { _ structs.HostLayout Latency uint64 @@ -174,6 +179,16 @@ type BpfPktDropsT struct { _ [5]byte } +type BpfSslDataEventT struct { + _ structs.HostLayout + TimestampNs uint64 + PidTgid uint64 + DataLen int32 + SslType uint8 + Data [16384]int8 + _ [3]byte +} + type BpfTcpFlagsT uint32 const ( @@ -243,6 +258,7 @@ type BpfSpecs struct { type BpfProgramSpecs struct { KfreeSkb *ebpf.ProgramSpec `ebpf:"kfree_skb"` NetworkEventsMonitoring *ebpf.ProgramSpec `ebpf:"network_events_monitoring"` + ProbeEntrySSL_write *ebpf.ProgramSpec `ebpf:"probe_entry_SSL_write"` TcEgressFlowParse *ebpf.ProgramSpec `ebpf:"tc_egress_flow_parse"` TcEgressPcaParse *ebpf.ProgramSpec `ebpf:"tc_egress_pca_parse"` TcIngressFlowParse *ebpf.ProgramSpec `ebpf:"tc_ingress_flow_parse"` @@ -268,12 +284,14 @@ type BpfMapSpecs struct { AggregatedFlows *ebpf.MapSpec `ebpf:"aggregated_flows"` DirectFlows *ebpf.MapSpec `ebpf:"direct_flows"` DnsFlows *ebpf.MapSpec `ebpf:"dns_flows"` + DnsNameMap *ebpf.MapSpec `ebpf:"dns_name_map"` FilterMap *ebpf.MapSpec `ebpf:"filter_map"` GlobalCounters *ebpf.MapSpec `ebpf:"global_counters"` IpsecEgressMap *ebpf.MapSpec `ebpf:"ipsec_egress_map"` IpsecIngressMap *ebpf.MapSpec `ebpf:"ipsec_ingress_map"` PacketRecord *ebpf.MapSpec `ebpf:"packet_record"` PeerFilterMap *ebpf.MapSpec `ebpf:"peer_filter_map"` + SslDataEventMap *ebpf.MapSpec `ebpf:"ssl_data_event_map"` } // BpfVariableSpecs contains global variables before they are loaded into the kernel. @@ -285,6 +303,7 @@ type BpfVariableSpecs struct { EnableFlowsFiltering *ebpf.VariableSpec `ebpf:"enable_flows_filtering"` EnableIpsec *ebpf.VariableSpec `ebpf:"enable_ipsec"` EnableNetworkEventsMonitoring *ebpf.VariableSpec `ebpf:"enable_network_events_monitoring"` + EnableOpensslTracking *ebpf.VariableSpec `ebpf:"enable_openssl_tracking"` EnablePca *ebpf.VariableSpec `ebpf:"enable_pca"` EnablePktTranslationTracking *ebpf.VariableSpec `ebpf:"enable_pkt_translation_tracking"` EnableRtt *ebpf.VariableSpec `ebpf:"enable_rtt"` @@ -293,6 +312,7 @@ type BpfVariableSpecs struct { HasFilterSampling *ebpf.VariableSpec `ebpf:"has_filter_sampling"` NetworkEventsMonitoringGroupid *ebpf.VariableSpec `ebpf:"network_events_monitoring_groupid"` Sampling *ebpf.VariableSpec `ebpf:"sampling"` + SslDataEvent *ebpf.VariableSpec `ebpf:"ssl_data_event"` TraceMessages *ebpf.VariableSpec `ebpf:"trace_messages"` Unused8 *ebpf.VariableSpec `ebpf:"unused8"` Unused9 *ebpf.VariableSpec `ebpf:"unused9"` @@ -322,12 +342,14 @@ type BpfMaps struct { AggregatedFlows *ebpf.Map `ebpf:"aggregated_flows"` DirectFlows *ebpf.Map `ebpf:"direct_flows"` DnsFlows *ebpf.Map `ebpf:"dns_flows"` + DnsNameMap *ebpf.Map `ebpf:"dns_name_map"` FilterMap *ebpf.Map `ebpf:"filter_map"` GlobalCounters *ebpf.Map `ebpf:"global_counters"` IpsecEgressMap *ebpf.Map `ebpf:"ipsec_egress_map"` IpsecIngressMap *ebpf.Map `ebpf:"ipsec_ingress_map"` PacketRecord *ebpf.Map `ebpf:"packet_record"` PeerFilterMap *ebpf.Map `ebpf:"peer_filter_map"` + SslDataEventMap *ebpf.Map `ebpf:"ssl_data_event_map"` } func (m *BpfMaps) Close() error { @@ -336,12 +358,14 @@ func (m *BpfMaps) Close() error { m.AggregatedFlows, m.DirectFlows, m.DnsFlows, + m.DnsNameMap, m.FilterMap, m.GlobalCounters, m.IpsecEgressMap, m.IpsecIngressMap, m.PacketRecord, m.PeerFilterMap, + m.SslDataEventMap, ) } @@ -354,6 +378,7 @@ type BpfVariables struct { EnableFlowsFiltering *ebpf.Variable `ebpf:"enable_flows_filtering"` EnableIpsec *ebpf.Variable `ebpf:"enable_ipsec"` EnableNetworkEventsMonitoring *ebpf.Variable `ebpf:"enable_network_events_monitoring"` + EnableOpensslTracking *ebpf.Variable `ebpf:"enable_openssl_tracking"` EnablePca *ebpf.Variable `ebpf:"enable_pca"` EnablePktTranslationTracking *ebpf.Variable `ebpf:"enable_pkt_translation_tracking"` EnableRtt *ebpf.Variable `ebpf:"enable_rtt"` @@ -362,6 +387,7 @@ type BpfVariables struct { HasFilterSampling *ebpf.Variable `ebpf:"has_filter_sampling"` NetworkEventsMonitoringGroupid *ebpf.Variable `ebpf:"network_events_monitoring_groupid"` Sampling *ebpf.Variable `ebpf:"sampling"` + SslDataEvent *ebpf.Variable `ebpf:"ssl_data_event"` TraceMessages *ebpf.Variable `ebpf:"trace_messages"` Unused8 *ebpf.Variable `ebpf:"unused8"` Unused9 *ebpf.Variable `ebpf:"unused9"` @@ -373,6 +399,7 @@ type BpfVariables struct { type BpfPrograms struct { KfreeSkb *ebpf.Program `ebpf:"kfree_skb"` NetworkEventsMonitoring *ebpf.Program `ebpf:"network_events_monitoring"` + ProbeEntrySSL_write *ebpf.Program `ebpf:"probe_entry_SSL_write"` TcEgressFlowParse *ebpf.Program `ebpf:"tc_egress_flow_parse"` TcEgressPcaParse *ebpf.Program `ebpf:"tc_egress_pca_parse"` TcIngressFlowParse *ebpf.Program `ebpf:"tc_ingress_flow_parse"` @@ -394,6 +421,7 @@ func (p *BpfPrograms) Close() error { return _BpfClose( p.KfreeSkb, p.NetworkEventsMonitoring, + p.ProbeEntrySSL_write, p.TcEgressFlowParse, p.TcEgressPcaParse, p.TcIngressFlowParse, diff --git a/pkg/ebpf/bpf_arm64_bpfel.o b/pkg/ebpf/bpf_arm64_bpfel.o index 8889cec6e..25066a36d 100644 Binary files a/pkg/ebpf/bpf_arm64_bpfel.o and b/pkg/ebpf/bpf_arm64_bpfel.o differ diff --git a/pkg/ebpf/bpf_powerpc_bpfel.go b/pkg/ebpf/bpf_powerpc_bpfel.go index ef44d0fb4..f7a0d22a3 100644 --- a/pkg/ebpf/bpf_powerpc_bpfel.go +++ b/pkg/ebpf/bpf_powerpc_bpfel.go @@ -48,6 +48,11 @@ type BpfDnsFlowId struct { _ [1]byte } +type BpfDnsNameBuffer struct { + _ structs.HostLayout + Name [32]int8 +} + type BpfDnsRecordT struct { _ structs.HostLayout Latency uint64 @@ -174,6 +179,16 @@ type BpfPktDropsT struct { _ [5]byte } +type BpfSslDataEventT struct { + _ structs.HostLayout + TimestampNs uint64 + PidTgid uint64 + DataLen int32 + SslType uint8 + Data [16384]int8 + _ [3]byte +} + type BpfTcpFlagsT uint32 const ( @@ -243,6 +258,7 @@ type BpfSpecs struct { type BpfProgramSpecs struct { KfreeSkb *ebpf.ProgramSpec `ebpf:"kfree_skb"` NetworkEventsMonitoring *ebpf.ProgramSpec `ebpf:"network_events_monitoring"` + ProbeEntrySSL_write *ebpf.ProgramSpec `ebpf:"probe_entry_SSL_write"` TcEgressFlowParse *ebpf.ProgramSpec `ebpf:"tc_egress_flow_parse"` TcEgressPcaParse *ebpf.ProgramSpec `ebpf:"tc_egress_pca_parse"` TcIngressFlowParse *ebpf.ProgramSpec `ebpf:"tc_ingress_flow_parse"` @@ -268,12 +284,14 @@ type BpfMapSpecs struct { AggregatedFlows *ebpf.MapSpec `ebpf:"aggregated_flows"` DirectFlows *ebpf.MapSpec `ebpf:"direct_flows"` DnsFlows *ebpf.MapSpec `ebpf:"dns_flows"` + DnsNameMap *ebpf.MapSpec `ebpf:"dns_name_map"` FilterMap *ebpf.MapSpec `ebpf:"filter_map"` GlobalCounters *ebpf.MapSpec `ebpf:"global_counters"` IpsecEgressMap *ebpf.MapSpec `ebpf:"ipsec_egress_map"` IpsecIngressMap *ebpf.MapSpec `ebpf:"ipsec_ingress_map"` PacketRecord *ebpf.MapSpec `ebpf:"packet_record"` PeerFilterMap *ebpf.MapSpec `ebpf:"peer_filter_map"` + SslDataEventMap *ebpf.MapSpec `ebpf:"ssl_data_event_map"` } // BpfVariableSpecs contains global variables before they are loaded into the kernel. @@ -285,6 +303,7 @@ type BpfVariableSpecs struct { EnableFlowsFiltering *ebpf.VariableSpec `ebpf:"enable_flows_filtering"` EnableIpsec *ebpf.VariableSpec `ebpf:"enable_ipsec"` EnableNetworkEventsMonitoring *ebpf.VariableSpec `ebpf:"enable_network_events_monitoring"` + EnableOpensslTracking *ebpf.VariableSpec `ebpf:"enable_openssl_tracking"` EnablePca *ebpf.VariableSpec `ebpf:"enable_pca"` EnablePktTranslationTracking *ebpf.VariableSpec `ebpf:"enable_pkt_translation_tracking"` EnableRtt *ebpf.VariableSpec `ebpf:"enable_rtt"` @@ -293,6 +312,7 @@ type BpfVariableSpecs struct { HasFilterSampling *ebpf.VariableSpec `ebpf:"has_filter_sampling"` NetworkEventsMonitoringGroupid *ebpf.VariableSpec `ebpf:"network_events_monitoring_groupid"` Sampling *ebpf.VariableSpec `ebpf:"sampling"` + SslDataEvent *ebpf.VariableSpec `ebpf:"ssl_data_event"` TraceMessages *ebpf.VariableSpec `ebpf:"trace_messages"` Unused8 *ebpf.VariableSpec `ebpf:"unused8"` Unused9 *ebpf.VariableSpec `ebpf:"unused9"` @@ -322,12 +342,14 @@ type BpfMaps struct { AggregatedFlows *ebpf.Map `ebpf:"aggregated_flows"` DirectFlows *ebpf.Map `ebpf:"direct_flows"` DnsFlows *ebpf.Map `ebpf:"dns_flows"` + DnsNameMap *ebpf.Map `ebpf:"dns_name_map"` FilterMap *ebpf.Map `ebpf:"filter_map"` GlobalCounters *ebpf.Map `ebpf:"global_counters"` IpsecEgressMap *ebpf.Map `ebpf:"ipsec_egress_map"` IpsecIngressMap *ebpf.Map `ebpf:"ipsec_ingress_map"` PacketRecord *ebpf.Map `ebpf:"packet_record"` PeerFilterMap *ebpf.Map `ebpf:"peer_filter_map"` + SslDataEventMap *ebpf.Map `ebpf:"ssl_data_event_map"` } func (m *BpfMaps) Close() error { @@ -336,12 +358,14 @@ func (m *BpfMaps) Close() error { m.AggregatedFlows, m.DirectFlows, m.DnsFlows, + m.DnsNameMap, m.FilterMap, m.GlobalCounters, m.IpsecEgressMap, m.IpsecIngressMap, m.PacketRecord, m.PeerFilterMap, + m.SslDataEventMap, ) } @@ -354,6 +378,7 @@ type BpfVariables struct { EnableFlowsFiltering *ebpf.Variable `ebpf:"enable_flows_filtering"` EnableIpsec *ebpf.Variable `ebpf:"enable_ipsec"` EnableNetworkEventsMonitoring *ebpf.Variable `ebpf:"enable_network_events_monitoring"` + EnableOpensslTracking *ebpf.Variable `ebpf:"enable_openssl_tracking"` EnablePca *ebpf.Variable `ebpf:"enable_pca"` EnablePktTranslationTracking *ebpf.Variable `ebpf:"enable_pkt_translation_tracking"` EnableRtt *ebpf.Variable `ebpf:"enable_rtt"` @@ -362,6 +387,7 @@ type BpfVariables struct { HasFilterSampling *ebpf.Variable `ebpf:"has_filter_sampling"` NetworkEventsMonitoringGroupid *ebpf.Variable `ebpf:"network_events_monitoring_groupid"` Sampling *ebpf.Variable `ebpf:"sampling"` + SslDataEvent *ebpf.Variable `ebpf:"ssl_data_event"` TraceMessages *ebpf.Variable `ebpf:"trace_messages"` Unused8 *ebpf.Variable `ebpf:"unused8"` Unused9 *ebpf.Variable `ebpf:"unused9"` @@ -373,6 +399,7 @@ type BpfVariables struct { type BpfPrograms struct { KfreeSkb *ebpf.Program `ebpf:"kfree_skb"` NetworkEventsMonitoring *ebpf.Program `ebpf:"network_events_monitoring"` + ProbeEntrySSL_write *ebpf.Program `ebpf:"probe_entry_SSL_write"` TcEgressFlowParse *ebpf.Program `ebpf:"tc_egress_flow_parse"` TcEgressPcaParse *ebpf.Program `ebpf:"tc_egress_pca_parse"` TcIngressFlowParse *ebpf.Program `ebpf:"tc_ingress_flow_parse"` @@ -394,6 +421,7 @@ func (p *BpfPrograms) Close() error { return _BpfClose( p.KfreeSkb, p.NetworkEventsMonitoring, + p.ProbeEntrySSL_write, p.TcEgressFlowParse, p.TcEgressPcaParse, p.TcIngressFlowParse, diff --git a/pkg/ebpf/bpf_powerpc_bpfel.o b/pkg/ebpf/bpf_powerpc_bpfel.o index 7d2c86ee0..f865588df 100644 Binary files a/pkg/ebpf/bpf_powerpc_bpfel.o and b/pkg/ebpf/bpf_powerpc_bpfel.o differ diff --git a/pkg/ebpf/bpf_s390_bpfeb.go b/pkg/ebpf/bpf_s390_bpfeb.go index 950e855ac..74b83ced4 100644 --- a/pkg/ebpf/bpf_s390_bpfeb.go +++ b/pkg/ebpf/bpf_s390_bpfeb.go @@ -48,6 +48,11 @@ type BpfDnsFlowId struct { _ [1]byte } +type BpfDnsNameBuffer struct { + _ structs.HostLayout + Name [32]int8 +} + type BpfDnsRecordT struct { _ structs.HostLayout Latency uint64 @@ -174,6 +179,16 @@ type BpfPktDropsT struct { _ [5]byte } +type BpfSslDataEventT struct { + _ structs.HostLayout + TimestampNs uint64 + PidTgid uint64 + DataLen int32 + SslType uint8 + Data [16384]int8 + _ [3]byte +} + type BpfTcpFlagsT uint32 const ( @@ -243,6 +258,7 @@ type BpfSpecs struct { type BpfProgramSpecs struct { KfreeSkb *ebpf.ProgramSpec `ebpf:"kfree_skb"` NetworkEventsMonitoring *ebpf.ProgramSpec `ebpf:"network_events_monitoring"` + ProbeEntrySSL_write *ebpf.ProgramSpec `ebpf:"probe_entry_SSL_write"` TcEgressFlowParse *ebpf.ProgramSpec `ebpf:"tc_egress_flow_parse"` TcEgressPcaParse *ebpf.ProgramSpec `ebpf:"tc_egress_pca_parse"` TcIngressFlowParse *ebpf.ProgramSpec `ebpf:"tc_ingress_flow_parse"` @@ -268,12 +284,14 @@ type BpfMapSpecs struct { AggregatedFlows *ebpf.MapSpec `ebpf:"aggregated_flows"` DirectFlows *ebpf.MapSpec `ebpf:"direct_flows"` DnsFlows *ebpf.MapSpec `ebpf:"dns_flows"` + DnsNameMap *ebpf.MapSpec `ebpf:"dns_name_map"` FilterMap *ebpf.MapSpec `ebpf:"filter_map"` GlobalCounters *ebpf.MapSpec `ebpf:"global_counters"` IpsecEgressMap *ebpf.MapSpec `ebpf:"ipsec_egress_map"` IpsecIngressMap *ebpf.MapSpec `ebpf:"ipsec_ingress_map"` PacketRecord *ebpf.MapSpec `ebpf:"packet_record"` PeerFilterMap *ebpf.MapSpec `ebpf:"peer_filter_map"` + SslDataEventMap *ebpf.MapSpec `ebpf:"ssl_data_event_map"` } // BpfVariableSpecs contains global variables before they are loaded into the kernel. @@ -285,6 +303,7 @@ type BpfVariableSpecs struct { EnableFlowsFiltering *ebpf.VariableSpec `ebpf:"enable_flows_filtering"` EnableIpsec *ebpf.VariableSpec `ebpf:"enable_ipsec"` EnableNetworkEventsMonitoring *ebpf.VariableSpec `ebpf:"enable_network_events_monitoring"` + EnableOpensslTracking *ebpf.VariableSpec `ebpf:"enable_openssl_tracking"` EnablePca *ebpf.VariableSpec `ebpf:"enable_pca"` EnablePktTranslationTracking *ebpf.VariableSpec `ebpf:"enable_pkt_translation_tracking"` EnableRtt *ebpf.VariableSpec `ebpf:"enable_rtt"` @@ -293,6 +312,7 @@ type BpfVariableSpecs struct { HasFilterSampling *ebpf.VariableSpec `ebpf:"has_filter_sampling"` NetworkEventsMonitoringGroupid *ebpf.VariableSpec `ebpf:"network_events_monitoring_groupid"` Sampling *ebpf.VariableSpec `ebpf:"sampling"` + SslDataEvent *ebpf.VariableSpec `ebpf:"ssl_data_event"` TraceMessages *ebpf.VariableSpec `ebpf:"trace_messages"` Unused8 *ebpf.VariableSpec `ebpf:"unused8"` Unused9 *ebpf.VariableSpec `ebpf:"unused9"` @@ -322,12 +342,14 @@ type BpfMaps struct { AggregatedFlows *ebpf.Map `ebpf:"aggregated_flows"` DirectFlows *ebpf.Map `ebpf:"direct_flows"` DnsFlows *ebpf.Map `ebpf:"dns_flows"` + DnsNameMap *ebpf.Map `ebpf:"dns_name_map"` FilterMap *ebpf.Map `ebpf:"filter_map"` GlobalCounters *ebpf.Map `ebpf:"global_counters"` IpsecEgressMap *ebpf.Map `ebpf:"ipsec_egress_map"` IpsecIngressMap *ebpf.Map `ebpf:"ipsec_ingress_map"` PacketRecord *ebpf.Map `ebpf:"packet_record"` PeerFilterMap *ebpf.Map `ebpf:"peer_filter_map"` + SslDataEventMap *ebpf.Map `ebpf:"ssl_data_event_map"` } func (m *BpfMaps) Close() error { @@ -336,12 +358,14 @@ func (m *BpfMaps) Close() error { m.AggregatedFlows, m.DirectFlows, m.DnsFlows, + m.DnsNameMap, m.FilterMap, m.GlobalCounters, m.IpsecEgressMap, m.IpsecIngressMap, m.PacketRecord, m.PeerFilterMap, + m.SslDataEventMap, ) } @@ -354,6 +378,7 @@ type BpfVariables struct { EnableFlowsFiltering *ebpf.Variable `ebpf:"enable_flows_filtering"` EnableIpsec *ebpf.Variable `ebpf:"enable_ipsec"` EnableNetworkEventsMonitoring *ebpf.Variable `ebpf:"enable_network_events_monitoring"` + EnableOpensslTracking *ebpf.Variable `ebpf:"enable_openssl_tracking"` EnablePca *ebpf.Variable `ebpf:"enable_pca"` EnablePktTranslationTracking *ebpf.Variable `ebpf:"enable_pkt_translation_tracking"` EnableRtt *ebpf.Variable `ebpf:"enable_rtt"` @@ -362,6 +387,7 @@ type BpfVariables struct { HasFilterSampling *ebpf.Variable `ebpf:"has_filter_sampling"` NetworkEventsMonitoringGroupid *ebpf.Variable `ebpf:"network_events_monitoring_groupid"` Sampling *ebpf.Variable `ebpf:"sampling"` + SslDataEvent *ebpf.Variable `ebpf:"ssl_data_event"` TraceMessages *ebpf.Variable `ebpf:"trace_messages"` Unused8 *ebpf.Variable `ebpf:"unused8"` Unused9 *ebpf.Variable `ebpf:"unused9"` @@ -373,6 +399,7 @@ type BpfVariables struct { type BpfPrograms struct { KfreeSkb *ebpf.Program `ebpf:"kfree_skb"` NetworkEventsMonitoring *ebpf.Program `ebpf:"network_events_monitoring"` + ProbeEntrySSL_write *ebpf.Program `ebpf:"probe_entry_SSL_write"` TcEgressFlowParse *ebpf.Program `ebpf:"tc_egress_flow_parse"` TcEgressPcaParse *ebpf.Program `ebpf:"tc_egress_pca_parse"` TcIngressFlowParse *ebpf.Program `ebpf:"tc_ingress_flow_parse"` @@ -394,6 +421,7 @@ func (p *BpfPrograms) Close() error { return _BpfClose( p.KfreeSkb, p.NetworkEventsMonitoring, + p.ProbeEntrySSL_write, p.TcEgressFlowParse, p.TcEgressPcaParse, p.TcIngressFlowParse, diff --git a/pkg/ebpf/bpf_s390_bpfeb.o b/pkg/ebpf/bpf_s390_bpfeb.o index ef31715a4..edf7a6c2f 100644 Binary files a/pkg/ebpf/bpf_s390_bpfeb.o and b/pkg/ebpf/bpf_s390_bpfeb.o differ diff --git a/pkg/ebpf/bpf_x86_bpfel.go b/pkg/ebpf/bpf_x86_bpfel.go index b0a7158e8..fc58ccf12 100644 --- a/pkg/ebpf/bpf_x86_bpfel.go +++ b/pkg/ebpf/bpf_x86_bpfel.go @@ -48,6 +48,11 @@ type BpfDnsFlowId struct { _ [1]byte } +type BpfDnsNameBuffer struct { + _ structs.HostLayout + Name [32]int8 +} + type BpfDnsRecordT struct { _ structs.HostLayout Latency uint64 @@ -174,6 +179,16 @@ type BpfPktDropsT struct { _ [5]byte } +type BpfSslDataEventT struct { + _ structs.HostLayout + TimestampNs uint64 + PidTgid uint64 + DataLen int32 + SslType uint8 + Data [16384]int8 + _ [3]byte +} + type BpfTcpFlagsT uint32 const ( @@ -243,6 +258,7 @@ type BpfSpecs struct { type BpfProgramSpecs struct { KfreeSkb *ebpf.ProgramSpec `ebpf:"kfree_skb"` NetworkEventsMonitoring *ebpf.ProgramSpec `ebpf:"network_events_monitoring"` + ProbeEntrySSL_write *ebpf.ProgramSpec `ebpf:"probe_entry_SSL_write"` TcEgressFlowParse *ebpf.ProgramSpec `ebpf:"tc_egress_flow_parse"` TcEgressPcaParse *ebpf.ProgramSpec `ebpf:"tc_egress_pca_parse"` TcIngressFlowParse *ebpf.ProgramSpec `ebpf:"tc_ingress_flow_parse"` @@ -268,12 +284,14 @@ type BpfMapSpecs struct { AggregatedFlows *ebpf.MapSpec `ebpf:"aggregated_flows"` DirectFlows *ebpf.MapSpec `ebpf:"direct_flows"` DnsFlows *ebpf.MapSpec `ebpf:"dns_flows"` + DnsNameMap *ebpf.MapSpec `ebpf:"dns_name_map"` FilterMap *ebpf.MapSpec `ebpf:"filter_map"` GlobalCounters *ebpf.MapSpec `ebpf:"global_counters"` IpsecEgressMap *ebpf.MapSpec `ebpf:"ipsec_egress_map"` IpsecIngressMap *ebpf.MapSpec `ebpf:"ipsec_ingress_map"` PacketRecord *ebpf.MapSpec `ebpf:"packet_record"` PeerFilterMap *ebpf.MapSpec `ebpf:"peer_filter_map"` + SslDataEventMap *ebpf.MapSpec `ebpf:"ssl_data_event_map"` } // BpfVariableSpecs contains global variables before they are loaded into the kernel. @@ -285,6 +303,7 @@ type BpfVariableSpecs struct { EnableFlowsFiltering *ebpf.VariableSpec `ebpf:"enable_flows_filtering"` EnableIpsec *ebpf.VariableSpec `ebpf:"enable_ipsec"` EnableNetworkEventsMonitoring *ebpf.VariableSpec `ebpf:"enable_network_events_monitoring"` + EnableOpensslTracking *ebpf.VariableSpec `ebpf:"enable_openssl_tracking"` EnablePca *ebpf.VariableSpec `ebpf:"enable_pca"` EnablePktTranslationTracking *ebpf.VariableSpec `ebpf:"enable_pkt_translation_tracking"` EnableRtt *ebpf.VariableSpec `ebpf:"enable_rtt"` @@ -293,6 +312,7 @@ type BpfVariableSpecs struct { HasFilterSampling *ebpf.VariableSpec `ebpf:"has_filter_sampling"` NetworkEventsMonitoringGroupid *ebpf.VariableSpec `ebpf:"network_events_monitoring_groupid"` Sampling *ebpf.VariableSpec `ebpf:"sampling"` + SslDataEvent *ebpf.VariableSpec `ebpf:"ssl_data_event"` TraceMessages *ebpf.VariableSpec `ebpf:"trace_messages"` Unused8 *ebpf.VariableSpec `ebpf:"unused8"` Unused9 *ebpf.VariableSpec `ebpf:"unused9"` @@ -322,12 +342,14 @@ type BpfMaps struct { AggregatedFlows *ebpf.Map `ebpf:"aggregated_flows"` DirectFlows *ebpf.Map `ebpf:"direct_flows"` DnsFlows *ebpf.Map `ebpf:"dns_flows"` + DnsNameMap *ebpf.Map `ebpf:"dns_name_map"` FilterMap *ebpf.Map `ebpf:"filter_map"` GlobalCounters *ebpf.Map `ebpf:"global_counters"` IpsecEgressMap *ebpf.Map `ebpf:"ipsec_egress_map"` IpsecIngressMap *ebpf.Map `ebpf:"ipsec_ingress_map"` PacketRecord *ebpf.Map `ebpf:"packet_record"` PeerFilterMap *ebpf.Map `ebpf:"peer_filter_map"` + SslDataEventMap *ebpf.Map `ebpf:"ssl_data_event_map"` } func (m *BpfMaps) Close() error { @@ -336,12 +358,14 @@ func (m *BpfMaps) Close() error { m.AggregatedFlows, m.DirectFlows, m.DnsFlows, + m.DnsNameMap, m.FilterMap, m.GlobalCounters, m.IpsecEgressMap, m.IpsecIngressMap, m.PacketRecord, m.PeerFilterMap, + m.SslDataEventMap, ) } @@ -354,6 +378,7 @@ type BpfVariables struct { EnableFlowsFiltering *ebpf.Variable `ebpf:"enable_flows_filtering"` EnableIpsec *ebpf.Variable `ebpf:"enable_ipsec"` EnableNetworkEventsMonitoring *ebpf.Variable `ebpf:"enable_network_events_monitoring"` + EnableOpensslTracking *ebpf.Variable `ebpf:"enable_openssl_tracking"` EnablePca *ebpf.Variable `ebpf:"enable_pca"` EnablePktTranslationTracking *ebpf.Variable `ebpf:"enable_pkt_translation_tracking"` EnableRtt *ebpf.Variable `ebpf:"enable_rtt"` @@ -362,6 +387,7 @@ type BpfVariables struct { HasFilterSampling *ebpf.Variable `ebpf:"has_filter_sampling"` NetworkEventsMonitoringGroupid *ebpf.Variable `ebpf:"network_events_monitoring_groupid"` Sampling *ebpf.Variable `ebpf:"sampling"` + SslDataEvent *ebpf.Variable `ebpf:"ssl_data_event"` TraceMessages *ebpf.Variable `ebpf:"trace_messages"` Unused8 *ebpf.Variable `ebpf:"unused8"` Unused9 *ebpf.Variable `ebpf:"unused9"` @@ -373,6 +399,7 @@ type BpfVariables struct { type BpfPrograms struct { KfreeSkb *ebpf.Program `ebpf:"kfree_skb"` NetworkEventsMonitoring *ebpf.Program `ebpf:"network_events_monitoring"` + ProbeEntrySSL_write *ebpf.Program `ebpf:"probe_entry_SSL_write"` TcEgressFlowParse *ebpf.Program `ebpf:"tc_egress_flow_parse"` TcEgressPcaParse *ebpf.Program `ebpf:"tc_egress_pca_parse"` TcIngressFlowParse *ebpf.Program `ebpf:"tc_ingress_flow_parse"` @@ -394,6 +421,7 @@ func (p *BpfPrograms) Close() error { return _BpfClose( p.KfreeSkb, p.NetworkEventsMonitoring, + p.ProbeEntrySSL_write, p.TcEgressFlowParse, p.TcEgressPcaParse, p.TcIngressFlowParse, diff --git a/pkg/ebpf/bpf_x86_bpfel.o b/pkg/ebpf/bpf_x86_bpfel.o index cbbe0157d..80f7d4848 100644 Binary files a/pkg/ebpf/bpf_x86_bpfel.o and b/pkg/ebpf/bpf_x86_bpfel.o differ diff --git a/pkg/ebpf/core b/pkg/ebpf/core new file mode 100644 index 000000000..e69de29bb diff --git a/pkg/flow/tracer_ringbuf.go b/pkg/flow/tracer_ringbuf.go index fb61e929a..f0c34ef41 100644 --- a/pkg/flow/tracer_ringbuf.go +++ b/pkg/flow/tracer_ringbuf.go @@ -3,8 +3,10 @@ package flow import ( "bytes" "context" + "encoding/binary" "errors" "fmt" + "strconv" "sync/atomic" "syscall" "time" @@ -17,22 +19,29 @@ import ( "github.com/sirupsen/logrus" ) +const maxSSLDataSize = 16 * 1024 + var rtlog = logrus.WithField("component", "flow.RingBufTracer") // RingBufTracer receives single-packet flows via ringbuffer (usually, these that couldn't be // added in the eBPF kernel space due to the map being full or busy) and submits them to the // userspace Aggregator map type RingBufTracer struct { - mapFlusher mapFlusher - ringBuffer ringBufReader - stats stats - metrics *metrics.Metrics + mapFlusher mapFlusher + ringBuffer ringBufReader + ringBufferSSL ringBufSSLReader + stats stats + metrics *metrics.Metrics } type ringBufReader interface { ReadRingBuf() (ringbuf.Record, error) } +type ringBufSSLReader interface { + ReadSSLRingBuf() (ringbuf.Record, error) +} + // stats supports atomic logging of ringBuffer metrics type stats struct { loggingTimeout time.Duration @@ -54,22 +63,46 @@ func NewRingBufTracer(reader ringBufReader, flusher mapFlusher, logTimeout time. } } +func NewSSLRingBufTracer(reader ringBufSSLReader, flusher mapFlusher, logTimeout time.Duration, m *metrics.Metrics) *RingBufTracer { + return &RingBufTracer{ + mapFlusher: flusher, + ringBufferSSL: reader, + stats: stats{loggingTimeout: logTimeout}, + metrics: m, + } +} + func (m *RingBufTracer) TraceLoop(ctx context.Context) node.StartFunc[*model.RawRecord] { return func(out chan<- *model.RawRecord) { debugging := logrus.IsLevelEnabled(logrus.DebugLevel) + if m.ringBufferSSL != nil { + rtlog.Info("SSL RingBuf tracer started - listening for SSL events") + } for { select { case <-ctx.Done(): rtlog.Debug("exiting trace loop due to context cancellation") return default: - if err := m.listenAndForwardRingBuffer(debugging, out); err != nil { - if errors.Is(err, ringbuf.ErrClosed) { - rtlog.Debug("Received signal, exiting..") - return + if m.ringBuffer != nil { + if err := m.listenAndForwardRingBuffer(debugging, out); err != nil { + if errors.Is(err, ringbuf.ErrClosed) { + rtlog.Debug("Received signal, exiting..") + return + } + rtlog.WithError(err).Warn("ignoring flow event") + continue + } + } + if m.ringBufferSSL != nil { + if err := m.listenAndForwardRingBufferSSL(out); err != nil { + if errors.Is(err, ringbuf.ErrClosed) { + rtlog.Debug("Received signal, exiting..") + return + } + rtlog.WithError(err).Warn("ignoring SSL event") + continue } - rtlog.WithError(err).Warn("ignoring flow event") - continue } } } @@ -100,6 +133,62 @@ func (m *RingBufTracer) listenAndForwardRingBuffer(debugging bool, forwardCh cha return nil } +func (m *RingBufTracer) listenAndForwardRingBufferSSL(_ chan<- *model.RawRecord) error { + rtlog.Debug("listenAndForwardRingBufferSSL: waiting for SSL event...") + event, err := m.ringBufferSSL.ReadSSLRingBuf() + if err != nil { + m.metrics.Errors.WithErrorName("ringbuffer", "CannotReadSSLRingbuffer", metrics.HighSeverity).Inc() + return fmt.Errorf("reading from SSL ring buffer: %w", err) + } + + rtlog.Infof("SSL ringbuffer event received! Size: %d bytes", len(event.RawSample)) + + // Parse SSL event structure: timestamp(8) + pid_tgid(8) + data_len(4) + ssl_type(1) + data[16KB] + buf := bytes.NewReader(event.RawSample) + + var timestamp uint64 + var pidTgid uint64 + var dataLen int32 + var sslType uint8 + + if err := binary.Read(buf, binary.LittleEndian, ×tamp); err != nil { + rtlog.Warnf("Failed to read timestamp: %v", err) + return nil + } + if err := binary.Read(buf, binary.LittleEndian, &pidTgid); err != nil { + rtlog.Warnf("Failed to read pid_tgid: %v", err) + return nil + } + if err := binary.Read(buf, binary.LittleEndian, &dataLen); err != nil { + rtlog.Warnf("Failed to read data_len: %v", err) + return nil + } + if err := binary.Read(buf, binary.LittleEndian, &sslType); err != nil { + rtlog.Warnf("Failed to read ssl_type: %v", err) + return nil + } + + // Read the actual SSL data (up to dataLen bytes) + if dataLen > 0 && dataLen <= maxSSLDataSize { + data := make([]byte, dataLen) + n, err := buf.Read(data) + if err != nil && n < int(dataLen) { + rtlog.Warnf("Failed to read SSL data: read %d/%d bytes, err=%v", n, dataLen, err) + } + + rtlog.Debugf("SSL EVENT: pid=%d, timestamp=%d, data_len=%d, ssl_type=%d", + pidTgid, timestamp, dataLen, sslType) + printLen := min(256, len(data)) + rtlog.Debugf("SSL data as string: %s", string(data[:printLen])) + m.metrics.OpenSSLDataEventsCounter.Increase(strconv.Itoa(int(sslType)), int(dataLen)) + } else { + rtlog.Debugf("SSL EVENT: pid=%d, timestamp=%d, data_len=%d (invalid), ssl_type=%d", + pidTgid, timestamp, dataLen, sslType) + } + + return nil +} + // logRingBufferFlows avoids flooding logs on long series of evicted flows by grouping how // many flows are forwarded func (m *stats) logRingBufferFlows(mapFullErr bool) { diff --git a/pkg/kernel/kernel_utils.go b/pkg/kernel/kernel_utils.go index 18f6cc05a..fbe08cfb0 100644 --- a/pkg/kernel/kernel_utils.go +++ b/pkg/kernel/kernel_utils.go @@ -1,3 +1,6 @@ +//go:build linux +// +build linux + package kernel import ( diff --git a/pkg/kernel/kernel_utils_test.go b/pkg/kernel/kernel_utils_test.go index 968bb8531..4b3e918cb 100644 --- a/pkg/kernel/kernel_utils_test.go +++ b/pkg/kernel/kernel_utils_test.go @@ -1,3 +1,6 @@ +//go:build linux +// +build linux + package kernel import ( diff --git a/pkg/maps/maps.go b/pkg/maps/maps.go index d2d988459..905ba5877 100644 --- a/pkg/maps/maps.go +++ b/pkg/maps/maps.go @@ -12,4 +12,5 @@ var Maps = []string{ "peer_filter_map", "ipsec_ingress_map", "ipsec_egress_map", + "ssl_data_event_map", } diff --git a/pkg/metrics/metrics.go b/pkg/metrics/metrics.go index 70b9d73bc..68f44da4d 100644 --- a/pkg/metrics/metrics.go +++ b/pkg/metrics/metrics.go @@ -164,6 +164,13 @@ var ( "mac", "retries", ) + opensslDataEventsCounter = defineMetric( + "openssl_data_events_total", + "Number of OpenSSL data events", + TypeCounter, + "openssl_type", + "data_len", + ) ) func (def *MetricDefinition) mapLabels(labels []string) prometheus.Labels { @@ -194,16 +201,17 @@ type Metrics struct { Settings *Settings // Shared metrics: - EvictionCounter *EvictionCounter - EvictedFlowsCounter *EvictionCounter - EvictedPacketsCounter *EvictionCounter - DroppedFlowsCounter *EvictionCounter - FilteredFlowsCounter *EvictionCounter - NetworkEventsCounter *EvictionCounter - FlowBufferSizeGauge *FlowBufferSizeGauge - Errors *ErrorCounter - FlowEnrichmentCounter *FlowEnrichmentCounter - InterfaceEventsCounter *InterfaceEventsCounter + EvictionCounter *EvictionCounter + EvictedFlowsCounter *EvictionCounter + EvictedPacketsCounter *EvictionCounter + DroppedFlowsCounter *EvictionCounter + FilteredFlowsCounter *EvictionCounter + NetworkEventsCounter *EvictionCounter + FlowBufferSizeGauge *FlowBufferSizeGauge + Errors *ErrorCounter + FlowEnrichmentCounter *FlowEnrichmentCounter + InterfaceEventsCounter *InterfaceEventsCounter + OpenSSLDataEventsCounter *OpenSSLDataEventsCounter } func NewMetrics(settings *Settings) *Metrics { @@ -220,6 +228,7 @@ func NewMetrics(settings *Settings) *Metrics { m.Errors = &ErrorCounter{vec: m.NewCounterVec(&errorsCounter)} m.FlowEnrichmentCounter = &FlowEnrichmentCounter{vec: m.NewCounterVec(&flowEnrichmentCounter)} m.InterfaceEventsCounter = newInterfaceEventsCounter(m.NewCounterVec(&interfaceEventsCounter), settings.Level) + m.OpenSSLDataEventsCounter = &OpenSSLDataEventsCounter{vec: m.NewCounterVec(&opensslDataEventsCounter)} return m } @@ -346,6 +355,14 @@ type InterfaceEventsCounter struct { Increase func(typez, ifname string, ifindex int, netns string, mac [6]uint8, retries int) } +type OpenSSLDataEventsCounter struct { + vec *prometheus.CounterVec +} + +func (c *OpenSSLDataEventsCounter) Increase(sslType string, dataLen int) { + c.vec.WithLabelValues(sslType, strconv.Itoa(dataLen)).Inc() +} + func newInterfaceEventsCounter(vec *prometheus.CounterVec, lvl Level) *InterfaceEventsCounter { switch lvl { case LevelTrace: diff --git a/pkg/test/tracer_fake.go b/pkg/test/tracer_fake.go index e5954fe80..398fe7a9d 100644 --- a/pkg/test/tracer_fake.go +++ b/pkg/test/tracer_fake.go @@ -65,6 +65,10 @@ func (m *TracerFake) ReadRingBuf() (ringbuf.Record, error) { return <-m.ringBuf, nil } +func (m *TracerFake) ReadSSLRingBuf() (ringbuf.Record, error) { + return <-m.ringBuf, nil +} + func (m *TracerFake) AppendLookupResults(results map[ebpf.BpfFlowId]model.BpfFlowContent) { m.mapLookups <- results } diff --git a/pkg/tracer/tracer.go b/pkg/tracer/tracer.go index 5ddb94448..55bc203dc 100644 --- a/pkg/tracer/tracer.go +++ b/pkg/tracer/tracer.go @@ -63,6 +63,9 @@ const ( networkEventsMonitoringHook = "psample_sample_packet" defaultNetworkEventsGroupID = 10 constEnableIPsec = "enable_ipsec" + constEnableOpenSSLTracking = "enable_openssl_tracking" + sslDataEventMap = "ssl_data_event_map" + dnsNameMap = "dns_name_map" ) const ( @@ -100,6 +103,8 @@ type FlowFetcher struct { xfrmOutputKretProbeLink link.Link xfrmInputKProbeLink link.Link xfrmOutputKProbeLink link.Link + sslUprobe link.Link + sslDataEventsReader *ringbuf.Reader lookupAndDeleteSupported bool useEbpfManager bool pinDir string @@ -125,6 +130,8 @@ type FlowFetcherConfig struct { BpfManBpfFSPath string EnableIPsecTracker bool FilterConfig []*FilterConfig + EnableOpenSSLTracking bool + OpenSSLPath string } type variablesMapping struct { @@ -137,6 +144,8 @@ func NewFlowFetcher(cfg *FlowFetcherConfig, m *metrics.Metrics) (*FlowFetcher, e var pktDropsLink, networkEventsMonitoringLink, rttFentryLink, rttKprobeLink link.Link var nfNatManIPLink, xfrmInputKretProbeLink, xfrmOutputKretProbeLink link.Link var xfrmInputKProbeLink, xfrmOutputKProbeLink link.Link + var sslUprobe link.Link + var sslDataEvents *ringbuf.Reader var err error objects := ebpf.BpfObjects{} var pinDir string @@ -171,6 +180,8 @@ func NewFlowFetcher(cfg *FlowFetcherConfig, m *metrics.Metrics) (*FlowFetcher, e pcaRecordsMap, ipsecInputMap, ipsecOutputMap, + sslDataEventMap, + dnsNameMap, } { spec.Maps[m].Pinning = 0 } @@ -202,6 +213,11 @@ func NewFlowFetcher(cfg *FlowFetcherConfig, m *metrics.Metrics) (*FlowFetcher, e objects.TcIngressPcaParse = nil delete(spec.Programs, constPcaEnable) + // Minimize SSL maps if SSL is disabled + if !cfg.EnableOpenSSLTracking { + spec.Maps[sslDataEventMap].MaxEntries = 1 + } + if cfg.EnablePktDrops && !oldKernel && !rtOldKernel { pktDropsLink, err = link.Tracepoint("skb", pktDropHook, objects.KfreeSkb, nil) if err != nil { @@ -272,6 +288,27 @@ func NewFlowFetcher(cfg *FlowFetcherConfig, m *metrics.Metrics) (*FlowFetcher, e return nil, fmt.Errorf("failed to attach the BPF KretProbe program to xfrm_output: %w", err) } } + + // Setup SSL tracking if enabled + if cfg.EnableOpenSSLTracking { + // Read SSL data events from ringbuf + sslDataEvents, err = ringbuf.NewReader(objects.BpfMaps.SslDataEventMap) + if err != nil { + return nil, fmt.Errorf("accessing SSL data event ringbuffer: %w", err) + } + + // Attach SSL uprobes + sslWriteLink, err := link.OpenExecutable(cfg.OpenSSLPath) + if err != nil { + return nil, fmt.Errorf("failed to open executable %s: %w", cfg.OpenSSLPath, err) + } + sslUprobe, err = sslWriteLink.Uprobe("SSL_write", objects.ProbeEntrySSL_write, nil) + if err != nil { + return nil, fmt.Errorf("failed to attach SSL_write uprobe: %w", err) + } + log.Infof("SSL tracking enabled with library: %s", cfg.OpenSSLPath) + } + } else { pinDir = cfg.BpfManBpfFSPath opts := &cilium.LoadPinOptions{ @@ -340,6 +377,18 @@ func NewFlowFetcher(cfg *FlowFetcherConfig, m *metrics.Metrics) (*FlowFetcher, e if err != nil { return nil, fmt.Errorf("failed to load %s: %w", mPath, err) } + log.Infof("BPFManager mode: loading SSL data event pinned maps") + mPath = path.Join(pinDir, sslDataEventMap) + objects.BpfMaps.SslDataEventMap, err = cilium.LoadPinnedMap(mPath, opts) + if err != nil { + return nil, fmt.Errorf("failed to load %s: %w", mPath, err) + } + log.Infof("BPFManager mode: loading DNS name pinned maps") + mPath = path.Join(pinDir, dnsNameMap) + objects.BpfMaps.DnsNameMap, err = cilium.LoadPinnedMap(mPath, opts) + if err != nil { + return nil, fmt.Errorf("failed to load %s: %w", mPath, err) + } } if filter != nil { @@ -377,6 +426,8 @@ func NewFlowFetcher(cfg *FlowFetcherConfig, m *metrics.Metrics) (*FlowFetcher, e xfrmOutputKretProbeLink: xfrmOutputKretProbeLink, xfrmInputKProbeLink: xfrmInputKProbeLink, xfrmOutputKProbeLink: xfrmOutputKProbeLink, + sslUprobe: sslUprobe, + sslDataEventsReader: sslDataEvents, egressTCXLink: egressTCXLink, ingressTCXLink: ingressTCXLink, egressTCXAnchor: tcxAnchor(cfg.EgressTCXAnchor), @@ -738,6 +789,18 @@ func (m *FlowFetcher) Close() error { } } + if m.sslUprobe != nil { + if err := m.sslUprobe.Close(); err != nil { + errs = append(errs, err) + } + } + + if m.sslDataEventsReader != nil { + if err := m.sslDataEventsReader.Close(); err != nil { + errs = append(errs, err) + } + } + // m.ringbufReader.Read is a blocking operation, so we need to close the ring buffer // from another goroutine to avoid the system not being able to exit if there // isn't traffic in a given interface @@ -813,6 +876,18 @@ func (m *FlowFetcher) Close() error { if err := m.objects.IpsecEgressMap.Close(); err != nil { errs = append(errs, err) } + if err := m.objects.SslDataEventMap.Unpin(); err != nil { + errs = append(errs, err) + } + if err := m.objects.SslDataEventMap.Close(); err != nil { + errs = append(errs, err) + } + if err := m.objects.DnsNameMap.Unpin(); err != nil { + errs = append(errs, err) + } + if err := m.objects.DnsNameMap.Close(); err != nil { + errs = append(errs, err) + } if len(errs) == 0 { m.objects = nil } @@ -918,6 +993,10 @@ func (m *FlowFetcher) ReadRingBuf() (ringbuf.Record, error) { return m.ringbufReader.Read() } +func (m *FlowFetcher) ReadSSLRingBuf() (ringbuf.Record, error) { + return m.sslDataEventsReader.Read() +} + // LookupAndDeleteMap reads all the entries from the eBPF map and removes them from it. // TODO: detect whether BatchLookupAndDelete is supported (Kernel>=5.6) and use it selectively // Supported Lookup/Delete operations by kernel: https://github.com/iovisor/bcc/blob/master/docs/kernel-versions.md @@ -1115,6 +1194,7 @@ func kernelSpecificLoadAndAssign(oldKernel, rtKernel, supportNetworkEvents bool, XfrmOutputKretprobe *cilium.Program `ebpf:"xfrm_output_kretprobe"` XfrmInputKprobe *cilium.Program `ebpf:"xfrm_input_kprobe"` XfrmOutputKprobe *cilium.Program `ebpf:"xfrm_output_kprobe"` + ProbeEntrySSLWrite *cilium.Program `ebpf:"probe_entry_SSL_write"` } type newBpfObjects struct { newBpfPrograms @@ -1148,6 +1228,7 @@ func kernelSpecificLoadAndAssign(oldKernel, rtKernel, supportNetworkEvents bool, TcpRcvFentry: nil, KfreeSkb: nil, NetworkEventsMonitoring: nil, + ProbeEntrySSL_write: newObjects.ProbeEntrySSLWrite, }, BpfMaps: ebpf.BpfMaps{ DirectFlows: newObjects.DirectFlows, @@ -1159,6 +1240,8 @@ func kernelSpecificLoadAndAssign(oldKernel, rtKernel, supportNetworkEvents bool, GlobalCounters: newObjects.GlobalCounters, IpsecIngressMap: newObjects.IpsecIngressMap, IpsecEgressMap: newObjects.IpsecEgressMap, + SslDataEventMap: newObjects.SslDataEventMap, + DnsNameMap: newObjects.DnsNameMap, }, } @@ -1178,6 +1261,7 @@ func kernelSpecificLoadAndAssign(oldKernel, rtKernel, supportNetworkEvents bool, XfrmOutputKretprobe *cilium.Program `ebpf:"xfrm_output_kretprobe"` XfrmInputKprobe *cilium.Program `ebpf:"xfrm_input_kprobe"` XfrmOutputKprobe *cilium.Program `ebpf:"xfrm_output_kprobe"` + ProbeEntrySSLWrite *cilium.Program `ebpf:"probe_entry_SSL_write"` } type newBpfObjects struct { newBpfPrograms @@ -1210,6 +1294,7 @@ func kernelSpecificLoadAndAssign(oldKernel, rtKernel, supportNetworkEvents bool, TcpRcvFentry: nil, KfreeSkb: nil, NetworkEventsMonitoring: nil, + ProbeEntrySSL_write: newObjects.ProbeEntrySSLWrite, }, BpfMaps: ebpf.BpfMaps{ DirectFlows: newObjects.DirectFlows, @@ -1221,6 +1306,8 @@ func kernelSpecificLoadAndAssign(oldKernel, rtKernel, supportNetworkEvents bool, GlobalCounters: newObjects.GlobalCounters, IpsecIngressMap: newObjects.IpsecIngressMap, IpsecEgressMap: newObjects.IpsecEgressMap, + SslDataEventMap: newObjects.SslDataEventMap, + DnsNameMap: newObjects.DnsNameMap, }, } @@ -1240,6 +1327,7 @@ func kernelSpecificLoadAndAssign(oldKernel, rtKernel, supportNetworkEvents bool, XfrmOutputKretprobe *cilium.Program `ebpf:"xfrm_output_kretprobe"` XfrmInputKprobe *cilium.Program `ebpf:"xfrm_input_kprobe"` XfrmOutputKprobe *cilium.Program `ebpf:"xfrm_output_kprobe"` + ProbeEntrySSLWrite *cilium.Program `ebpf:"probe_entry_SSL_write"` } type newBpfObjects struct { newBpfPrograms @@ -1272,6 +1360,7 @@ func kernelSpecificLoadAndAssign(oldKernel, rtKernel, supportNetworkEvents bool, TcpRcvKprobe: nil, KfreeSkb: nil, NetworkEventsMonitoring: nil, + ProbeEntrySSL_write: newObjects.ProbeEntrySSLWrite, }, BpfMaps: ebpf.BpfMaps{ DirectFlows: newObjects.DirectFlows, @@ -1283,6 +1372,8 @@ func kernelSpecificLoadAndAssign(oldKernel, rtKernel, supportNetworkEvents bool, GlobalCounters: newObjects.GlobalCounters, IpsecIngressMap: newObjects.IpsecIngressMap, IpsecEgressMap: newObjects.IpsecEgressMap, + SslDataEventMap: newObjects.SslDataEventMap, + DnsNameMap: newObjects.DnsNameMap, }, } @@ -1304,6 +1395,7 @@ func kernelSpecificLoadAndAssign(oldKernel, rtKernel, supportNetworkEvents bool, XfrmOutputKretprobe *cilium.Program `ebpf:"xfrm_output_kretprobe"` XfrmInputKprobe *cilium.Program `ebpf:"xfrm_input_kprobe"` XfrmOutputKprobe *cilium.Program `ebpf:"xfrm_output_kprobe"` + ProbeEntrySSLWrite *cilium.Program `ebpf:"probe_entry_SSL_write"` } type newBpfObjects struct { newBpfPrograms @@ -1334,6 +1426,7 @@ func kernelSpecificLoadAndAssign(oldKernel, rtKernel, supportNetworkEvents bool, XfrmInputKprobe: newObjects.XfrmInputKprobe, XfrmOutputKprobe: newObjects.XfrmOutputKprobe, NetworkEventsMonitoring: nil, + ProbeEntrySSL_write: newObjects.ProbeEntrySSLWrite, }, BpfMaps: ebpf.BpfMaps{ DirectFlows: newObjects.DirectFlows, @@ -1345,6 +1438,8 @@ func kernelSpecificLoadAndAssign(oldKernel, rtKernel, supportNetworkEvents bool, GlobalCounters: newObjects.GlobalCounters, IpsecIngressMap: newObjects.IpsecIngressMap, IpsecEgressMap: newObjects.IpsecEgressMap, + SslDataEventMap: newObjects.SslDataEventMap, + DnsNameMap: newObjects.DnsNameMap, }, } @@ -1415,10 +1510,15 @@ func NewPacketFetcher(cfg *FlowFetcherConfig) (*PacketFetcher, error) { pcaRecordsMap, ipsecInputMap, ipsecOutputMap, + sslDataEventMap, + dnsNameMap, } { spec.Maps[m].Pinning = 0 } + // Always minimize SSL maps in PacketFetcher - SSL and Packet Fetcher are mutually exclusive + spec.Maps[sslDataEventMap].MaxEntries = 1 + type pcaBpfPrograms struct { TcEgressPcaParse *cilium.Program `ebpf:"tc_egress_pca_parse"` TcIngressPcaParse *cilium.Program `ebpf:"tc_ingress_pca_parse"` @@ -1449,6 +1549,8 @@ func NewPacketFetcher(cfg *FlowFetcherConfig) (*PacketFetcher, error) { delete(spec.Programs, constNetworkEventsMonitoringGroupID) delete(spec.Programs, constEnablePktTranslation) delete(spec.Programs, constEnableIPsec) + delete(spec.Programs, constEnableOpenSSLTracking) + delete(spec.Programs, dnsNameMap) if err := spec.LoadAndAssign(&newObjects, &cilium.CollectionOptions{Maps: cilium.MapOptions{PinPath: ""}}); err != nil { var ve *cilium.VerifierError @@ -1478,11 +1580,13 @@ func NewPacketFetcher(cfg *FlowFetcherConfig) (*PacketFetcher, error) { XfrmOutputKretprobe: nil, XfrmInputKprobe: nil, XfrmOutputKprobe: nil, + ProbeEntrySSL_write: nil, }, BpfMaps: ebpf.BpfMaps{ - PacketRecord: newObjects.PacketRecord, - FilterMap: newObjects.FilterMap, - PeerFilterMap: newObjects.PeerFilterMap, + PacketRecord: newObjects.PacketRecord, + SslDataEventMap: newObjects.SslDataEventMap, + FilterMap: newObjects.FilterMap, + PeerFilterMap: newObjects.PeerFilterMap, }, } @@ -1938,6 +2042,11 @@ func configureFlowSpecVariables(spec *cilium.CollectionSpec, cfg *FlowFetcherCon spec.Maps[ipsecInputMap].MaxEntries = 1 spec.Maps[ipsecOutputMap].MaxEntries = 1 } + + enableOpenSSLTracking := 0 + if cfg.EnableOpenSSLTracking { + enableOpenSSLTracking = 1 + } // When adding constants here, remember to delete them in NewPacketFetcher variables := []variablesMapping{ {constSampling, uint32(cfg.Sampling)}, @@ -1951,6 +2060,7 @@ func configureFlowSpecVariables(spec *cilium.CollectionSpec, cfg *FlowFetcherCon {constNetworkEventsMonitoringGroupID, uint8(networkEventsMonitoringGroupID)}, {constEnablePktTranslation, uint8(enablePktTranslation)}, {constEnableIPsec, uint8(enableIPsec)}, + {constEnableOpenSSLTracking, uint8(enableOpenSSLTracking)}, } for _, mapping := range variables { diff --git a/scripts/agent.yml b/scripts/agent.yml index d67184812..77e463971 100644 --- a/scripts/agent.yml +++ b/scripts/agent.yml @@ -31,7 +31,9 @@ spec: - name: SAMPLING value: "1" - name: CACHE_ACTIVE_TIMEOUT - value: 200ms + value: "5s" + - name: CACHE_MAX_FLOWS + value: "10000" - name: LOG_LEVEL value: debug - name: TARGET_HOST @@ -46,12 +48,23 @@ spec: value: "true" - name: ENABLE_DNS_TRACKING value: "true" + - name: ENABLE_OPENSSL_TRACKING + value: "true" + - name: OPENSSL_PATH + value: "/usr/lib/aarch64-linux-gnu/libssl.so.1.1" volumeMounts: - name: bpf-kernel-debug mountPath: /sys/kernel/debug mountPropagation: Bidirectional + - name: host-lib + mountPath: /usr/lib/aarch64-linux-gnu + readOnly: true volumes: - name: bpf-kernel-debug hostPath: path: /sys/kernel/debug type: Directory + - name: host-lib + hostPath: + path: /usr/lib/aarch64-linux-gnu + type: Directory \ No newline at end of file diff --git a/scripts/collector.yml b/scripts/collector.yml new file mode 100644 index 000000000..3b4024f56 --- /dev/null +++ b/scripts/collector.yml @@ -0,0 +1,60 @@ +apiVersion: apps/v1 +kind: DaemonSet +metadata: + name: netobserv-collector + namespace: netobserv-privileged + labels: + k8s-app: netobserv-collector +spec: + selector: + matchLabels: + k8s-app: netobserv-collector + template: + metadata: + labels: + k8s-app: netobserv-collector + spec: + hostNetwork: true + dnsPolicy: ClusterFirstWithHostNet + containers: + - name: collector + image: quay.io/netobserv/flowlogs-pipeline:main + imagePullPolicy: Always + ports: + - containerPort: 9999 + hostPort: 9999 + protocol: TCP + args: + - --config=/etc/flp/config.yaml + volumeMounts: + - mountPath: /etc/flp + name: config-volume + volumes: + - name: config-volume + configMap: + name: flp-config +--- +apiVersion: v1 +kind: ConfigMap +metadata: + name: flp-config + namespace: netobserv-privileged +data: + config.yaml: | + log-level: info + pipeline: + - name: ingest + - name: stdout + follows: ingest + parameters: + - name: ingest + ingest: + type: grpc + grpc: + port: 9999 + - name: stdout + write: + type: stdout + stdout: + format: json + diff --git a/scripts/deploy-agent.sh b/scripts/deploy-agent.sh new file mode 100755 index 000000000..5c5b72f7c --- /dev/null +++ b/scripts/deploy-agent.sh @@ -0,0 +1,266 @@ +#!/usr/bin/env bash +set +e + +DIR=$( cd -- "$( dirname -- "${BASH_SOURCE[0]}" )" &> /dev/null && pwd ) + +# Default values +AGENT_IMAGE="${AGENT_IMAGE:-quay.io/netobserv/network-observability-ebpf-agent:latest}" +OPENSSL_PATH="${OPENSSL_PATH:-}" +CACHE_ACTIVE_TIMEOUT="${CACHE_ACTIVE_TIMEOUT:-5s}" +CACHE_MAX_FLOWS="${CACHE_MAX_FLOWS:-10000}" + +# Function to detect OpenSSL path based on architecture +detect_openssl_path() { + local ARCH=$(uname -m) + + if [ -n "$OPENSSL_PATH" ]; then + echo "$OPENSSL_PATH" + return + fi + + # Default for OpenShift 4.20 / RHEL 9 is /usr/lib64/libssl.so.3 + # This works for both x86_64 and aarch64 architectures + # For other distributions, set OPENSSL_PATH environment variable explicitly + case $ARCH in + x86_64|amd64) + echo "/usr/lib64/libssl.so.3" + ;; + aarch64|arm64) + echo "/usr/lib64/libssl.so.3" + ;; + *) + echo "/usr/lib64/libssl.so.3" + ;; + esac +} + +# Function to extract library directory from OpenSSL path +get_lib_dir() { + local openssl_path=$1 + # Extract directory from path (e.g., /usr/lib64/libssl.so.3 -> /usr/lib64) + dirname "$openssl_path" +} + +# Function to check if running on OpenShift +is_openshift() { + kubectl get securitycontextconstraints &>/dev/null +} + +# Function to set up OpenShift permissions +setup_openshift_permissions() { + local namespace="netobserv-privileged" + local service_account="netobserv-ebpf-agent" + + echo "Setting up OpenShift Security Context Constraints..." + + # Ensure namespace exists (create if it doesn't) + kubectl create namespace "$namespace" --dry-run=client -o yaml | kubectl apply -f - + + # Create service account if it doesn't exist + kubectl create serviceaccount "$service_account" -n "$namespace" --dry-run=client -o yaml | kubectl apply -f - + + # Create ClusterRole for privileged SCC + kubectl apply -f - < "$temp_file" + + # Handle CACHE_MAX_FLOWS - update if exists, add if not + if grep -q "CACHE_MAX_FLOWS" "$temp_file"; then + sed -i "/CACHE_MAX_FLOWS/,/value:/s|value:.*|value: \"$cache_max_flows\"|" "$temp_file" + else + # Add CACHE_MAX_FLOWS after CACHE_ACTIVE_TIMEOUT + local temp_file2=$(mktemp) + awk -v max_flows="$cache_max_flows" '/CACHE_ACTIVE_TIMEOUT/,/value:/ { print; if (/value:/) { print " - name: CACHE_MAX_FLOWS"; print " value: \"" max_flows "\"" } next }1' "$temp_file" > "$temp_file2" + mv "$temp_file2" "$temp_file" + fi + + # Add service account for OpenShift if needed + if is_openshift; then + # Add serviceAccountName to the pod spec if not already present + if ! grep -q "serviceAccountName:" "$temp_file"; then + # Insert serviceAccountName after dnsPolicy line (using a temporary file for portability) + local temp_file2=$(mktemp) + awk '/dnsPolicy: ClusterFirstWithHostNet/ { print; print " serviceAccountName: netobserv-ebpf-agent"; next }1' "$temp_file" > "$temp_file2" + mv "$temp_file2" "$temp_file" + fi + fi + + # Apply the configuration + kubectl apply -f "$temp_file" + + echo "" + echo "Waiting for agent pods to be ready..." + kubectl wait --for=condition=ready pod \ + -l k8s-app=netobserv-ebpf-agent \ + -n netobserv-privileged \ + --timeout=120s || true + + echo "" + echo "Agent deployment status:" + kubectl get pods -n netobserv-privileged -l k8s-app=netobserv-ebpf-agent +} + +# Function to deploy the collector +deploy_collector() { + echo "Deploying NetObserv Collector (FlowLogs Pipeline)..." + echo "" + + # Create a temporary file with the modified collector.yml + local temp_file=$(mktemp) + trap "rm -f $temp_file" EXIT + + # Copy collector.yml to temp file + cp "$DIR/collector.yml" "$temp_file" + + # Add service account for OpenShift if needed + if is_openshift; then + # Add serviceAccountName to the pod spec if not already present + if ! grep -q "serviceAccountName:" "$temp_file"; then + # Insert serviceAccountName after dnsPolicy line (using awk for portability) + local temp_file2=$(mktemp) + awk '/dnsPolicy: ClusterFirstWithHostNet/ { print; print " serviceAccountName: netobserv-ebpf-agent"; next }1' "$temp_file" > "$temp_file2" + mv "$temp_file2" "$temp_file" + fi + fi + + # Apply the collector configuration + kubectl apply -f "$temp_file" + + echo "" + echo "Waiting for collector pods to be ready..." + kubectl wait --for=condition=ready pod \ + -l k8s-app=netobserv-collector \ + -n netobserv-privileged \ + --timeout=120s || true + + echo "" + echo "Collector deployment status:" + kubectl get pods -n netobserv-privileged -l k8s-app=netobserv-collector +} + +# Main execution +echo "=========================================" +echo "NetObserv eBPF Agent Deployment Script" +echo "=========================================" +echo "" + +# Check if kubectl is available +if ! command -v kubectl &> /dev/null; then + echo "Error: kubectl is not installed or not in PATH" + exit 1 +fi + +# Check if we can connect to the cluster +if ! kubectl cluster-info &> /dev/null; then + echo "Error: Cannot connect to Kubernetes cluster" + echo "Please ensure your kubeconfig is set correctly" + exit 1 +fi + +# Check if running on OpenShift and set up permissions if needed +if is_openshift; then + echo "OpenShift cluster detected." + setup_openshift_permissions +fi + +# Detect OpenSSL path +DETECTED_OPENSSL_PATH=$(detect_openssl_path) + +if [ -z "$OPENSSL_PATH" ]; then + OPENSSL_PATH="$DETECTED_OPENSSL_PATH" + echo "Using default OpenSSL path for OpenShift 4.20/RHEL 9: $OPENSSL_PATH" + echo "(Set OPENSSL_PATH environment variable to override)" + echo "" +fi + +# Deploy the collector first (agent needs it to be ready) +deploy_collector + +echo "" + +# Deploy the agent +deploy_agent "$AGENT_IMAGE" "$OPENSSL_PATH" "$CACHE_ACTIVE_TIMEOUT" "$CACHE_MAX_FLOWS" + +echo "" +echo "=========================================" +echo "Deployment completed successfully!" +echo "=========================================" +echo "" +echo "To view agent logs:" +echo " kubectl logs -n netobserv-privileged -l k8s-app=netobserv-ebpf-agent" +echo "" +echo "To view collector logs:" +echo " kubectl logs -n netobserv-privileged -l k8s-app=netobserv-collector" +echo "" +echo "To check status:" +echo " kubectl get pods -n netobserv-privileged" +echo "" + diff --git a/scripts/kind-cluster.sh b/scripts/kind-cluster.sh index 5acd39a7b..f917e8280 100755 --- a/scripts/kind-cluster.sh +++ b/scripts/kind-cluster.sh @@ -28,17 +28,58 @@ nodes: scheduler: extraArgs: v: "5" + extraMounts: + - hostPath: /sys/kernel/btf + containerPath: /sys/kernel/btf + readOnly: true + - hostPath: /sys/kernel/debug + containerPath: /sys/kernel/debug + - hostPath: /var/run/netns + containerPath: /var/run/netns - role: worker + extraMounts: + - hostPath: /sys/kernel/btf + containerPath: /sys/kernel/btf + readOnly: true + - hostPath: /sys/kernel/debug + containerPath: /sys/kernel/debug + - hostPath: /var/run/netns + containerPath: /var/run/netns - role: worker + extraMounts: + - hostPath: /sys/kernel/btf + containerPath: /sys/kernel/btf + readOnly: true + - hostPath: /sys/kernel/debug + containerPath: /sys/kernel/debug + - hostPath: /var/run/netns + containerPath: /var/run/netns EOF } # install_netobserv-agent will install the daemonset # into each kind docker container install_netobserv-agent() { -docker build . -t localhost/ebpf-agent:test -kind load docker-image localhost/ebpf-agent:test -kubectl apply -f ${DIR}/agent.yml + # Get the architecture and convert to Go arch format + local ARCH=$(uname -m) + local TARGETARCH + + case $ARCH in + x86_64) + TARGETARCH=amd64 + ;; + aarch64|arm64) + TARGETARCH=arm64 + ;; + *) + TARGETARCH=$ARCH + ;; + esac + + echo "Building for architecture: $TARGETARCH (detected: $ARCH)" + docker build . --build-arg TARGETARCH=$TARGETARCH -t localhost/ebpf-agent:test + kind load docker-image localhost/ebpf-agent:test + kubectl apply -f ${DIR}/agent.yml } # print_success prints a little success message at the end of the script @@ -60,8 +101,8 @@ SVC_CIDR_IPV6=${SVC_CIDR_IPV6:-fd00:10:96::/112} # At the minimum, deploy the kind cluster deploy_kind export KUBECONFIG=${DIR}/kubeconfig -oc label node kind-worker node-role.kubernetes.io/worker= -oc label node kind-worker2 node-role.kubernetes.io/worker= +kubectl label node kind-worker node-role.kubernetes.io/worker= +kubectl label node kind-worker2 node-role.kubernetes.io/worker= install_netobserv-agent