falcosecurity
diff --git a/‎driver/bpf/fillers.h‎
Lines changed: 15 additions & 5 deletions b/‎driver/bpf/fillers.h‎
Lines changed: 15 additions & 5 deletions
diff --git a/‎driver/bpf/plumbing_helpers.h‎
Lines changed: 3 additions & 8 deletions b/‎driver/bpf/plumbing_helpers.h‎
Lines changed: 3 additions & 8 deletions
diff --git a/‎driver/bpf/probe.c‎
Lines changed: 24 additions & 4 deletions b/‎driver/bpf/probe.c‎
Lines changed: 24 additions & 4 deletions
diff --git a/‎driver/bpf/types.h‎
Lines changed: 0 additions & 4 deletions b/‎driver/bpf/types.h‎
Lines changed: 0 additions & 4 deletions
diff --git a/‎driver/feature_gates.h‎
Lines changed: 0 additions & 47 deletions b/‎driver/feature_gates.h‎
Lines changed: 0 additions & 47 deletions
diff --git a/‎driver/main.c‎
Lines changed: 31 additions & 22 deletions b/‎driver/main.c‎
Lines changed: 31 additions & 22 deletions
@@ -219,6 +219,9 @@ FILLER_RAW(terminate_filler) {
 		}
 		break;
 	case PPM_SKIP_EVENT:
+		bpf_printk("PPM_SKIP_EVENT event=%d curarg=%d\n",
+		           state->tail_ctx.evt_type,
+		           state->tail_ctx.curarg);
 		break;
 	case PPM_FAILURE_FRAME_SCRATCH_MAP_FULL:
 		bpf_printk("PPM_FAILURE_FRAME_SCRATCH_MAP_FULL event=%d curarg=%d\n",
@@ -2267,11 +2270,21 @@ FILLER(proc_startupdate, true) {
 	pid_t pid;
 	int res;
 
+	retval = bpf_syscall_get_retval(data->ctx);
+
 	/*
-	 * Make sure the operation was successful
+	 * For `execve` and `execveat`, the only purpose of this filler is to catch events in case of
+	 * system call failure. In case of system call success, `execve` and `execveat` events are
+	 * caught by our tracepoint on `sched/sched_process_exec` (see comment on
+	 * `sched_proc_exec_probe` in `driver/bpf/probe.c`). A successful `execve`/`execveat` call is
+	 * identified by `retval == 0`.
 	 */
+	if(retval == 0 && (data->state->tail_ctx.evt_type == PPME_SYSCALL_EXECVE_19_X ||
+	                   data->state->tail_ctx.evt_type == PPME_SYSCALL_EXECVEAT_X)) {
+		return PPM_SKIP_EVENT;
+	}
+
 	/* Parameter 1: res (type: PT_ERRNO) */
-	retval = bpf_syscall_get_retval(data->ctx);
 	res = bpf_push_s64_to_ring(data, retval);
 	CHECK_RES(res);
 
@@ -6715,7 +6728,6 @@ FILLER(sys_getdents64_x, true) {
 	return bpf_push_s64_to_ring(data, fd);
 }
 
-#ifdef CAPTURE_SCHED_PROC_EXEC
 /* We set `is_syscall` flag to `false` since this is not
  * a real syscall, we only send the same event from another
  * tracepoint.
@@ -7111,8 +7123,6 @@ FILLER(sched_prog_exec_5, false) {
 	return bpf_push_u32_to_ring(data, egid.val);
 }
 
-#endif
-
 #ifdef CAPTURE_SCHED_PROC_FORK
 /* These `sched_proc_fork` fillers will generate a
  * `PPME_SYSCALL_CLONE_20_X` event.
 
@@ -548,7 +548,6 @@ static __always_inline int bpf_test_bit(int nr, unsigned long *addr) {
 	return 1UL & (_READ(addr[BIT_WORD(nr)]) >> (nr & (BITS_PER_LONG - 1)));
 }
 
-#if defined(CAPTURE_SCHED_PROC_FORK) || defined(CAPTURE_SCHED_PROC_EXEC)
 static __always_inline bool bpf_drop_syscall_exit_events(void *ctx, ppm_event_code evt_type) {
 	long ret = 0;
 	switch(evt_type) {
@@ -569,23 +568,19 @@ static __always_inline bool bpf_drop_syscall_exit_events(void *ctx, ppm_event_co
 		return ret == 0;
 #endif
 
-		/* If `CAPTURE_SCHED_PROC_EXEC` logic is enabled we collect execve-family
-		 * exit events through a dedicated tracepoint so we can ignore them here.
-		 */
-#ifdef CAPTURE_SCHED_PROC_EXEC
 	case PPME_SYSCALL_EXECVE_19_X:
 	case PPME_SYSCALL_EXECVEAT_X:
+		/* We collect execve-family successful exit events through a dedicated `sched_process_exec`
+		 * tracepoint , so we can ignore them here.
+		 */
 		ret = bpf_syscall_get_retval(ctx);
-		/* We ignore only successful events, so ret == 0! */
 		return ret == 0;
-#endif
 
 	default:
 		break;
 	}
 	return false;
 }
-#endif
 
 static __always_inline bool drop_event(void *ctx,
                                        struct scap_bpf_per_cpu_state *state,
 
@@ -207,10 +207,8 @@ BPF_PROBE("raw_syscalls/", sys_exit, sys_exit_args) {
 		}
 	}
 
-#if defined(CAPTURE_SCHED_PROC_FORK) || defined(CAPTURE_SCHED_PROC_EXEC)
 	if(bpf_drop_syscall_exit_events(ctx, evt_type))
 		return 0;
-#endif
 
 	call_filler(ctx, ctx, evt_type, drop_flags, socketcall_syscall_id);
 	return 0;
@@ -289,7 +287,30 @@ __bpf_section(TP_NAME "sched/sched_process_fork&1") int bpf_sched_process_fork(
 }
 #endif
 
-#ifdef CAPTURE_SCHED_PROC_EXEC
+/*
+ * This tracepoint generates `execve` exit events for both successful `execve` and `execveat` system
+ * calls. Event related to system calls failures, for both system calls, are generated by
+ * `proc_startupdate` filler. This architectural choice is motivated by the fact that the kernel
+ * haven't consistently called the correct tracepoint for `execve` and `execveat` calls on all
+ * architectures:
+ * - on `x86_64`, a successful `execveat` call is identified as `execve`, and a failing one is
+ * identified as `execveat`
+ * - on `aarch64`, till version 5.18 (actually, the fix was back-ported up to 5.15:
+ *   https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/commit/?h=linux-5.15.y&id=42eede3ae05bbf32cb0d87940b466ec5a76aca3f),
+ *   neither successful `execve`s nor successful `execveat`s used to trigger the `sys_exit` (see
+ *   https://www.spinics.net/lists/linux-trace/msg01001.html) tracepoint; only failing ones have
+ *   always triggered the correct behaviour
+ * - on `s390x`, each call correctly triggers `sys_exit` tracepoint and is correctly identified as
+ *   `execve` and `execveat`
+ * Indeed, the `sched/sched_process_exec` is correctly triggered on all architectures for successful
+ * calls. Moreover, failing calls correctly trigger the `sys_exit` tracepoint, and are correctly
+ * associated to the right syscall number. In the past, this design was applied just for `aarch64`,
+ * but since it works consistently on all architectures, its application was extended. The only
+ * issue seems to be that now we generates `execve` exit events for both `execve` and `execveat` if
+ * the call is successful: this is not a big problem, because even with the previous implementation
+ * we weren't able to generate any `execveat` exit event on both `x86_64` and `aarch64` for this
+ * scenario.
+ */
 BPF_PROBE("sched/", sched_process_exec, sched_process_exec_args) {
 	struct scap_bpf_settings *settings;
 	/* We will always send an execve exit event. */
@@ -324,7 +345,6 @@ BPF_PROBE("sched/", sched_process_exec, sched_process_exec_args) {
 	           filler_code);
 	return 0;
 }
-#endif /* CAPTURE_SCHED_PROC_EXEC */
 
 #ifdef CAPTURE_SCHED_PROC_FORK
 __bpf_section("raw_tracepoint/sched_process_fork&2") int bpf_sched_process_fork(
 
@@ -140,8 +140,6 @@ struct sys_stash_args {
 };
 #endif
 
-#ifdef CAPTURE_SCHED_PROC_EXEC
-
 #ifndef BPF_SUPPORTS_RAW_TRACEPOINTS
 struct sched_process_exec_args {
 	unsigned short common_type;
@@ -163,8 +161,6 @@ struct sched_process_exec_args {
 };
 #endif /* BPF_SUPPORTS_RAW_TRACEPOINTS */
 
-#endif /* CAPTURE_SCHED_PROC_EXEC */
-
 #ifdef CAPTURE_SCHED_PROC_FORK
 /* TP_PROTO(struct task_struct *parent, struct task_struct *child)
  * Taken from `/include/trace/events/sched.h`
 
@@ -65,37 +65,6 @@ or GPL2.txt for full copies of the license.
 #define CAPTURE_SCHED_PROC_FORK
 #endif
 
-///////////////////////////////
-// CAPTURE_SCHED_PROC_EXEC
-///////////////////////////////
-
-/* In some architectures we are not able to catch the `execve exit event`
- * from the `sys_exit` tracepoint. This is because there is no
- * default behavior among different architectures... you can find more
- * info here:
- * https://www.spinics.net/lists/linux-trace/msg01001.html
- *
- * Anyway, to not lose this event, we need to instrument a new kernel tracepoint:
- *
- * - `sched_process_exec`: allows us to catch every process that correctly performs
- *                         an `execve` call.
- *
- * In this way we can send to userspace a `PPME_SYSCALL_EXECVE_X` event
- * as we do with the `sys_exit` tracepoint.
- *
- * All the architectures that need this patch can use our BPF probe with all
- * supported kernel versions (so >= `4.14`), since `BPF_PROG_TYPE_RAW_TRACEPOINT` are
- * not required in this case.
- *
- * If you run old kernels, you can use the kernel module which requires
- * kernel versions greater or equal than `3.4`, since this tracepoint has
- * been introduced in the following kernel release:
- * https://github.com/torvalds/linux/commit/4ff16c25e2cc48cbe6956e356c38a25ac063a64d
- */
-#if defined(CONFIG_ARM64)
-#define CAPTURE_SCHED_PROC_EXEC
-#endif
-
 ///////////////////////////////
 // CAPTURE_64BIT_ARGS_SINGLE_REGISTER
 ///////////////////////////////
@@ -142,14 +111,6 @@ or GPL2.txt for full copies of the license.
 
 #elif defined(__USE_VMLINUX__) /* modern BPF probe */
 
-///////////////////////////////
-// CAPTURE_SCHED_PROC_EXEC
-///////////////////////////////
-
-#if defined(__TARGET_ARCH_arm64)
-#define CAPTURE_SCHED_PROC_EXEC
-#endif
-
 ///////////////////////////////
 // CAPTURE_SCHED_PROC_FORK
 ///////////////////////////////
@@ -209,14 +170,6 @@ or GPL2.txt for full copies of the license.
 #define CAPTURE_SCHED_PROC_FORK
 #endif
 
-///////////////////////////////
-// CAPTURE_SCHED_PROC_EXEC
-///////////////////////////////
-
-#if defined(__aarch64__)
-#define CAPTURE_SCHED_PROC_EXEC
-#endif
-
 #endif /* __KERNEL__ */
 
 #endif /* FEATURE_GATES_H */
@@ -185,12 +185,10 @@ TRACEPOINT_PROBE(page_fault_kern_probe,
 TRACEPOINT_PROBE(sched_proc_fork_probe, struct task_struct *parent, struct task_struct *child);
 #endif
 
-#ifdef CAPTURE_SCHED_PROC_EXEC
 TRACEPOINT_PROBE(sched_proc_exec_probe,
                  struct task_struct *p,
                  pid_t old_pid,
                  struct linux_binprm *bprm);
-#endif
 
 extern const int g_ia32_64_map[];
 
@@ -241,9 +239,7 @@ static bool g_fault_tracepoint_disabled;
 static struct tracepoint *tp_sched_proc_fork;
 #endif
 
-#ifdef CAPTURE_SCHED_PROC_EXEC
 static struct tracepoint *tp_sched_proc_exec;
-#endif
 
 #ifdef _DEBUG
 static bool verbose = 1;
@@ -705,14 +701,12 @@ static int force_tp_set(struct ppm_consumer_t *consumer, uint32_t new_tp_set) {
 			                            new_val);
 			break;
 #endif
-#ifdef CAPTURE_SCHED_PROC_EXEC
 		case KMOD_PROG_SCHED_PROC_EXEC:
 			ret = compat_set_tracepoint(sched_proc_exec_probe,
 			                            kmod_prog_names[idx],
 			                            tp_sched_proc_exec,
 			                            new_val);
 			break;
-#endif
 		default:
 			// unmanaged idx
 			break;
@@ -1825,11 +1819,9 @@ static int record_event_consumer(struct ppm_consumer_t *consumer,
 		 * we need to call dedicated fillers that are not in our `g_ppm_events` table.
 		 */
 		switch(event_datap->category) {
-#ifdef CAPTURE_SCHED_PROC_EXEC
 		case PPMC_SCHED_PROC_EXEC:
 			cbres = f_sched_prog_exec(&args);
 			break;
-#endif
 
 #ifdef CAPTURE_SCHED_PROC_FORK
 		case PPMC_SCHED_PROC_FORK:
@@ -1920,6 +1912,10 @@ static int record_event_consumer(struct ppm_consumer_t *consumer,
 		} else if(cbres == PPM_FAILURE_BUFFER_FULL) {
 			ring_info->n_drops_buffer++;
 			drops_buffer_syscall_categories_counters(event_type, ring_info);
+		} else if(cbres == PPM_SKIP_EVENT) {
+#ifdef _DEBUG
+			pr_err("Skipped event %d\n", event_type);
+#endif
 		} else {
 			ring_info->n_drops_buffer++;
 			ASSERT(false);
@@ -2087,15 +2083,12 @@ static __always_inline bool kmod_drop_syscall_exit_events(long ret, ppm_event_co
 		return ret == 0;
 #endif
 
-		/* If `CAPTURE_SCHED_PROC_EXEC` logic is enabled we collect execve-family
-		 * exit events through a dedicated tracepoint so we can ignore them here.
-		 */
-#ifdef CAPTURE_SCHED_PROC_EXEC
 	case PPME_SYSCALL_EXECVE_19_X:
 	case PPME_SYSCALL_EXECVEAT_X:
-		/* We ignore only successful events, so ret == 0! */
+		/* We collect execve-family successful exit events through a dedicated `sched_process_exec`
+		 * tracepoint , so we can ignore them here.
+		 */
 		return ret == 0;
-#endif
 	default:
 		break;
 	}
@@ -2176,10 +2169,8 @@ TRACEPOINT_PROBE(syscall_exit_probe, struct pt_regs *regs, long ret) {
 
 	event_pair = &g_syscall_table[table_index];
 
-#if defined(CAPTURE_SCHED_PROC_FORK) || defined(CAPTURE_SCHED_PROC_EXEC)
 	if(kmod_drop_syscall_exit_events(ret, event_pair->exit_event_type))
 		return;
-#endif
 
 	if(event_pair->exit_event_type == PPME_SOCKET_SENDMMSG_X ||
 	   event_pair->exit_event_type == PPME_SOCKET_RECVMMSG_X) {
@@ -2347,7 +2338,30 @@ TRACEPOINT_PROBE(page_fault_kern_probe,
 }
 #endif
 
-#ifdef CAPTURE_SCHED_PROC_EXEC
+/*
+ * This tracepoint generates `execve` exit events for both successful `execve` and `execveat` system
+ * calls. Event related to system calls failures, for both system calls, are generated by
+ * `proc_startupdate` filler. This architectural choice is motivated by the fact that the kernel
+ * haven't consistently called the correct tracepoint for `execve` and `execveat` calls on all
+ * architectures:
+ * - on `x86_64`, a successful `execveat` call is identified as `execve`, and a failing one is
+ * identified as `execveat`
+ * - on `aarch64`, till version 5.18 (actually, the fix was back-ported up to 5.15:
+ *   https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/commit/?h=linux-5.15.y&id=42eede3ae05bbf32cb0d87940b466ec5a76aca3f),
+ *   neither successful `execve`s nor successful `execveat`s used to trigger the `sys_exit` (see
+ *   https://www.spinics.net/lists/linux-trace/msg01001.html) tracepoint; only failing ones have
+ *   always triggered the correct behaviour
+ * - on `s390x`, each call correctly triggers `sys_exit` tracepoint and is correctly identified as
+ *   `execve` and `execveat`
+ * Indeed, the `sched/sched_process_exec` is correctly triggered on all
+ * architectures for successful calls. Moreover, failing calls correctly trigger the `sys_exit`
+ * tracepoint, and are correctly associated to the right syscall number. In the past, this design
+ * was applied just for `aarch64`, but since it works consistently on all architectures, its
+ * application was extended. The only issue seems to be that now we generates `execve` exit events
+ * for both `execve` and `execveat` if the call is successful: this is not a big problem, because
+ * even with the previous implementation we weren't able to generate any `execveat` exit event on
+ * both `x86_64` and `aarch64` for this scenario.
+ */
 TRACEPOINT_PROBE(sched_proc_exec_probe,
                  struct task_struct *p,
                  pid_t old_pid,
@@ -2367,7 +2381,6 @@ TRACEPOINT_PROBE(sched_proc_exec_probe,
 	                           &event_data,
 	                           KMOD_PROG_SCHED_PROC_EXEC);
 }
-#endif
 
 #ifdef CAPTURE_SCHED_PROC_FORK
 TRACEPOINT_PROBE(sched_proc_fork_probe, struct task_struct *parent, struct task_struct *child) {
@@ -2511,10 +2524,8 @@ static void visit_tracepoint(struct tracepoint *tp, void *priv) {
 		tp_page_fault_kernel = tp;
 #endif
 
-#ifdef CAPTURE_SCHED_PROC_EXEC
 	else if(!strcmp(tp->name, kmod_prog_names[KMOD_PROG_SCHED_PROC_EXEC]))
 		tp_sched_proc_exec = tp;
-#endif
 
 #ifdef CAPTURE_SCHED_PROC_FORK
 	else if(!strcmp(tp->name, kmod_prog_names[KMOD_PROG_SCHED_PROC_FORK]))
@@ -2563,12 +2574,10 @@ static int get_tracepoint_handles(void) {
 	}
 #endif
 
-#ifdef CAPTURE_SCHED_PROC_EXEC
 	if(!tp_sched_proc_exec) {
 		pr_err("failed to find 'sched_process_exec' tracepoint\n");
 		return -ENOENT;
 	}
-#endif
 
 #ifdef CAPTURE_SCHED_PROC_FORK
 	if(!tp_sched_proc_fork) {