Skip to content

Commit afd2627

Browse files
committed
tracing: Check "%s" dereference via the field and not the TP_printk format
The TP_printk() portion of a trace event is executed at the time a event is read from the trace. This can happen seconds, minutes, hours, days, months, years possibly later since the event was recorded. If the print format contains a dereference to a string via "%s", and that string was allocated, there's a chance that string could be freed before it is read by the trace file. To protect against such bugs, there are two functions that verify the event. The first one is test_event_printk(), which is called when the event is created. It reads the TP_printk() format as well as its arguments to make sure nothing may be dereferencing a pointer that was not copied into the ring buffer along with the event. If it is, it will trigger a WARN_ON(). For strings that use "%s", it is not so easy. The string may not reside in the ring buffer but may still be valid. Strings that are static and part of the kernel proper which will not be freed for the life of the running system, are safe to dereference. But to know if it is a pointer to a static string or to something on the heap can not be determined until the event is triggered. This brings us to the second function that tests for the bad dereferencing of strings, trace_check_vprintf(). It would walk through the printf format looking for "%s", and when it finds it, it would validate that the pointer is safe to read. If not, it would produces a WARN_ON() as well and write into the ring buffer "[UNSAFE-MEMORY]". The problem with this is how it used va_list to have vsnprintf() handle all the cases that it didn't need to check. Instead of re-implementing vsnprintf(), it would make a copy of the format up to the %s part, and call vsnprintf() with the current va_list ap variable, where the ap would then be ready to point at the string in question. For architectures that passed va_list by reference this was possible. For architectures that passed it by copy it was not. A test_can_verify() function was used to differentiate between the two, and if it wasn't possible, it would disable it. Even for architectures where this was feasible, it was a stretch to rely on such a method that is undocumented, and could cause issues later on with new optimizations of the compiler. Instead, the first function test_event_printk() was updated to look at "%s" as well. If the "%s" argument is a pointer outside the event in the ring buffer, it would find the field type of the event that is the problem and mark the structure with a new flag called "needs_test". The event itself will be marked by TRACE_EVENT_FL_TEST_STR to let it be known that this event has a field that needs to be verified before the event can be printed using the printf format. When the event fields are created from the field type structure, the fields would copy the field type's "needs_test" value. Finally, before being printed, a new function ignore_event() is called which will check if the event has the TEST_STR flag set (if not, it returns false). If the flag is set, it then iterates through the events fields looking for the ones that have the "needs_test" flag set. Then it uses the offset field from the field structure to find the pointer in the ring buffer event. It runs the tests to make sure that pointer is safe to print and if not, it triggers the WARN_ON() and also adds to the trace output that the event in question has an unsafe memory access. The ignore_event() makes the trace_check_vprintf() obsolete so it is removed. Link: https://lore.kernel.org/all/CAHk-=wh3uOnqnZPpR0PeLZZtyWbZLboZ7cHLCKRWsocvs9Y7hQ@mail.gmail.com/ Cc: [email protected] Cc: Masami Hiramatsu <[email protected]> Cc: Mark Rutland <[email protected]> Cc: Mathieu Desnoyers <[email protected]> Cc: Andrew Morton <[email protected]> Cc: Al Viro <[email protected]> Cc: Linus Torvalds <[email protected]> Link: https://lore.kernel.org/[email protected] Fixes: 5013f45 ("tracing: Add check of trace event print fmts for dereferencing pointers") Signed-off-by: Steven Rostedt (Google) <[email protected]>
1 parent 65a25d9 commit afd2627

File tree

5 files changed

+88
-217
lines changed

5 files changed

+88
-217
lines changed

include/linux/trace_events.h

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -273,7 +273,8 @@ struct trace_event_fields {
273273
const char *name;
274274
const int size;
275275
const int align;
276-
const int is_signed;
276+
const unsigned int is_signed:1;
277+
unsigned int needs_test:1;
277278
const int filter_type;
278279
const int len;
279280
};
@@ -324,6 +325,7 @@ enum {
324325
TRACE_EVENT_FL_EPROBE_BIT,
325326
TRACE_EVENT_FL_FPROBE_BIT,
326327
TRACE_EVENT_FL_CUSTOM_BIT,
328+
TRACE_EVENT_FL_TEST_STR_BIT,
327329
};
328330

329331
/*
@@ -340,6 +342,7 @@ enum {
340342
* CUSTOM - Event is a custom event (to be attached to an exsiting tracepoint)
341343
* This is set when the custom event has not been attached
342344
* to a tracepoint yet, then it is cleared when it is.
345+
* TEST_STR - The event has a "%s" that points to a string outside the event
343346
*/
344347
enum {
345348
TRACE_EVENT_FL_CAP_ANY = (1 << TRACE_EVENT_FL_CAP_ANY_BIT),
@@ -352,6 +355,7 @@ enum {
352355
TRACE_EVENT_FL_EPROBE = (1 << TRACE_EVENT_FL_EPROBE_BIT),
353356
TRACE_EVENT_FL_FPROBE = (1 << TRACE_EVENT_FL_FPROBE_BIT),
354357
TRACE_EVENT_FL_CUSTOM = (1 << TRACE_EVENT_FL_CUSTOM_BIT),
358+
TRACE_EVENT_FL_TEST_STR = (1 << TRACE_EVENT_FL_TEST_STR_BIT),
355359
};
356360

357361
#define TRACE_EVENT_FL_UKPROBE (TRACE_EVENT_FL_KPROBE | TRACE_EVENT_FL_UPROBE)

kernel/trace/trace.c

Lines changed: 53 additions & 202 deletions
Original file line numberDiff line numberDiff line change
@@ -3611,17 +3611,12 @@ char *trace_iter_expand_format(struct trace_iterator *iter)
36113611
}
36123612

36133613
/* Returns true if the string is safe to dereference from an event */
3614-
static bool trace_safe_str(struct trace_iterator *iter, const char *str,
3615-
bool star, int len)
3614+
static bool trace_safe_str(struct trace_iterator *iter, const char *str)
36163615
{
36173616
unsigned long addr = (unsigned long)str;
36183617
struct trace_event *trace_event;
36193618
struct trace_event_call *event;
36203619

3621-
/* Ignore strings with no length */
3622-
if (star && !len)
3623-
return true;
3624-
36253620
/* OK if part of the event data */
36263621
if ((addr >= (unsigned long)iter->ent) &&
36273622
(addr < (unsigned long)iter->ent + iter->ent_size))
@@ -3661,181 +3656,69 @@ static bool trace_safe_str(struct trace_iterator *iter, const char *str,
36613656
return false;
36623657
}
36633658

3664-
static DEFINE_STATIC_KEY_FALSE(trace_no_verify);
3665-
3666-
static int test_can_verify_check(const char *fmt, ...)
3667-
{
3668-
char buf[16];
3669-
va_list ap;
3670-
int ret;
3671-
3672-
/*
3673-
* The verifier is dependent on vsnprintf() modifies the va_list
3674-
* passed to it, where it is sent as a reference. Some architectures
3675-
* (like x86_32) passes it by value, which means that vsnprintf()
3676-
* does not modify the va_list passed to it, and the verifier
3677-
* would then need to be able to understand all the values that
3678-
* vsnprintf can use. If it is passed by value, then the verifier
3679-
* is disabled.
3680-
*/
3681-
va_start(ap, fmt);
3682-
vsnprintf(buf, 16, "%d", ap);
3683-
ret = va_arg(ap, int);
3684-
va_end(ap);
3685-
3686-
return ret;
3687-
}
3688-
3689-
static void test_can_verify(void)
3690-
{
3691-
if (!test_can_verify_check("%d %d", 0, 1)) {
3692-
pr_info("trace event string verifier disabled\n");
3693-
static_branch_inc(&trace_no_verify);
3694-
}
3695-
}
3696-
36973659
/**
3698-
* trace_check_vprintf - Check dereferenced strings while writing to the seq buffer
3660+
* ignore_event - Check dereferenced fields while writing to the seq buffer
36993661
* @iter: The iterator that holds the seq buffer and the event being printed
3700-
* @fmt: The format used to print the event
3701-
* @ap: The va_list holding the data to print from @fmt.
37023662
*
3703-
* This writes the data into the @iter->seq buffer using the data from
3704-
* @fmt and @ap. If the format has a %s, then the source of the string
3705-
* is examined to make sure it is safe to print, otherwise it will
3706-
* warn and print "[UNSAFE MEMORY]" in place of the dereferenced string
3707-
* pointer.
3663+
* At boot up, test_event_printk() will flag any event that dereferences
3664+
* a string with "%s" that does exist in the ring buffer. It may still
3665+
* be valid, as the string may point to a static string in the kernel
3666+
* rodata that never gets freed. But if the string pointer is pointing
3667+
* to something that was allocated, there's a chance that it can be freed
3668+
* by the time the user reads the trace. This would cause a bad memory
3669+
* access by the kernel and possibly crash the system.
3670+
*
3671+
* This function will check if the event has any fields flagged as needing
3672+
* to be checked at runtime and perform those checks.
3673+
*
3674+
* If it is found that a field is unsafe, it will write into the @iter->seq
3675+
* a message stating what was found to be unsafe.
3676+
*
3677+
* @return: true if the event is unsafe and should be ignored,
3678+
* false otherwise.
37083679
*/
3709-
void trace_check_vprintf(struct trace_iterator *iter, const char *fmt,
3710-
va_list ap)
3680+
bool ignore_event(struct trace_iterator *iter)
37113681
{
3712-
long text_delta = 0;
3713-
long data_delta = 0;
3714-
const char *p = fmt;
3715-
const char *str;
3716-
bool good;
3717-
int i, j;
3682+
struct ftrace_event_field *field;
3683+
struct trace_event *trace_event;
3684+
struct trace_event_call *event;
3685+
struct list_head *head;
3686+
struct trace_seq *seq;
3687+
const void *ptr;
37183688

3719-
if (WARN_ON_ONCE(!fmt))
3720-
return;
3689+
trace_event = ftrace_find_event(iter->ent->type);
37213690

3722-
if (static_branch_unlikely(&trace_no_verify))
3723-
goto print;
3691+
seq = &iter->seq;
37243692

3725-
/*
3726-
* When the kernel is booted with the tp_printk command line
3727-
* parameter, trace events go directly through to printk().
3728-
* It also is checked by this function, but it does not
3729-
* have an associated trace_array (tr) for it.
3730-
*/
3731-
if (iter->tr) {
3732-
text_delta = iter->tr->text_delta;
3733-
data_delta = iter->tr->data_delta;
3693+
if (!trace_event) {
3694+
trace_seq_printf(seq, "EVENT ID %d NOT FOUND?\n", iter->ent->type);
3695+
return true;
37343696
}
37353697

3736-
/* Don't bother checking when doing a ftrace_dump() */
3737-
if (iter->fmt == static_fmt_buf)
3738-
goto print;
3739-
3740-
while (*p) {
3741-
bool star = false;
3742-
int len = 0;
3743-
3744-
j = 0;
3745-
3746-
/*
3747-
* We only care about %s and variants
3748-
* as well as %p[sS] if delta is non-zero
3749-
*/
3750-
for (i = 0; p[i]; i++) {
3751-
if (i + 1 >= iter->fmt_size) {
3752-
/*
3753-
* If we can't expand the copy buffer,
3754-
* just print it.
3755-
*/
3756-
if (!trace_iter_expand_format(iter))
3757-
goto print;
3758-
}
3759-
3760-
if (p[i] == '\\' && p[i+1]) {
3761-
i++;
3762-
continue;
3763-
}
3764-
if (p[i] == '%') {
3765-
/* Need to test cases like %08.*s */
3766-
for (j = 1; p[i+j]; j++) {
3767-
if (isdigit(p[i+j]) ||
3768-
p[i+j] == '.')
3769-
continue;
3770-
if (p[i+j] == '*') {
3771-
star = true;
3772-
continue;
3773-
}
3774-
break;
3775-
}
3776-
if (p[i+j] == 's')
3777-
break;
3778-
3779-
if (text_delta && p[i+1] == 'p' &&
3780-
((p[i+2] == 's' || p[i+2] == 'S')))
3781-
break;
3782-
3783-
star = false;
3784-
}
3785-
j = 0;
3786-
}
3787-
/* If no %s found then just print normally */
3788-
if (!p[i])
3789-
break;
3790-
3791-
/* Copy up to the %s, and print that */
3792-
strncpy(iter->fmt, p, i);
3793-
iter->fmt[i] = '\0';
3794-
trace_seq_vprintf(&iter->seq, iter->fmt, ap);
3698+
event = container_of(trace_event, struct trace_event_call, event);
3699+
if (!(event->flags & TRACE_EVENT_FL_TEST_STR))
3700+
return false;
37953701

3796-
/* Add delta to %pS pointers */
3797-
if (p[i+1] == 'p') {
3798-
unsigned long addr;
3799-
char fmt[4];
3702+
head = trace_get_fields(event);
3703+
if (!head) {
3704+
trace_seq_printf(seq, "FIELDS FOR EVENT '%s' NOT FOUND?\n",
3705+
trace_event_name(event));
3706+
return true;
3707+
}
38003708

3801-
fmt[0] = '%';
3802-
fmt[1] = 'p';
3803-
fmt[2] = p[i+2]; /* Either %ps or %pS */
3804-
fmt[3] = '\0';
3709+
/* Offsets are from the iter->ent that points to the raw event */
3710+
ptr = iter->ent;
38053711

3806-
addr = va_arg(ap, unsigned long);
3807-
addr += text_delta;
3808-
trace_seq_printf(&iter->seq, fmt, (void *)addr);
3712+
list_for_each_entry(field, head, link) {
3713+
const char *str;
3714+
bool good;
38093715

3810-
p += i + 3;
3716+
if (!field->needs_test)
38113717
continue;
3812-
}
38133718

3814-
/*
3815-
* If iter->seq is full, the above call no longer guarantees
3816-
* that ap is in sync with fmt processing, and further calls
3817-
* to va_arg() can return wrong positional arguments.
3818-
*
3819-
* Ensure that ap is no longer used in this case.
3820-
*/
3821-
if (iter->seq.full) {
3822-
p = "";
3823-
break;
3824-
}
3825-
3826-
if (star)
3827-
len = va_arg(ap, int);
3828-
3829-
/* The ap now points to the string data of the %s */
3830-
str = va_arg(ap, const char *);
3719+
str = *(const char **)(ptr + field->offset);
38313720

3832-
good = trace_safe_str(iter, str, star, len);
3833-
3834-
/* Could be from the last boot */
3835-
if (data_delta && !good) {
3836-
str += data_delta;
3837-
good = trace_safe_str(iter, str, star, len);
3838-
}
3721+
good = trace_safe_str(iter, str);
38393722

38403723
/*
38413724
* If you hit this warning, it is likely that the
@@ -3846,44 +3729,14 @@ void trace_check_vprintf(struct trace_iterator *iter, const char *fmt,
38463729
* instead. See samples/trace_events/trace-events-sample.h
38473730
* for reference.
38483731
*/
3849-
if (WARN_ONCE(!good, "fmt: '%s' current_buffer: '%s'",
3850-
fmt, seq_buf_str(&iter->seq.seq))) {
3851-
int ret;
3852-
3853-
/* Try to safely read the string */
3854-
if (star) {
3855-
if (len + 1 > iter->fmt_size)
3856-
len = iter->fmt_size - 1;
3857-
if (len < 0)
3858-
len = 0;
3859-
ret = copy_from_kernel_nofault(iter->fmt, str, len);
3860-
iter->fmt[len] = 0;
3861-
star = false;
3862-
} else {
3863-
ret = strncpy_from_kernel_nofault(iter->fmt, str,
3864-
iter->fmt_size);
3865-
}
3866-
if (ret < 0)
3867-
trace_seq_printf(&iter->seq, "(0x%px)", str);
3868-
else
3869-
trace_seq_printf(&iter->seq, "(0x%px:%s)",
3870-
str, iter->fmt);
3871-
str = "[UNSAFE-MEMORY]";
3872-
strcpy(iter->fmt, "%s");
3873-
} else {
3874-
strncpy(iter->fmt, p + i, j + 1);
3875-
iter->fmt[j+1] = '\0';
3732+
if (WARN_ONCE(!good, "event '%s' has unsafe pointer field '%s'",
3733+
trace_event_name(event), field->name)) {
3734+
trace_seq_printf(seq, "EVENT %s: HAS UNSAFE POINTER FIELD '%s'\n",
3735+
trace_event_name(event), field->name);
3736+
return true;
38763737
}
3877-
if (star)
3878-
trace_seq_printf(&iter->seq, iter->fmt, len, str);
3879-
else
3880-
trace_seq_printf(&iter->seq, iter->fmt, str);
3881-
3882-
p += i + j + 1;
38833738
}
3884-
print:
3885-
if (*p)
3886-
trace_seq_vprintf(&iter->seq, p, ap);
3739+
return false;
38873740
}
38883741

38893742
const char *trace_event_format(struct trace_iterator *iter, const char *fmt)
@@ -10777,8 +10630,6 @@ __init static int tracer_alloc_buffers(void)
1077710630

1077810631
register_snapshot_cmd();
1077910632

10780-
test_can_verify();
10781-
1078210633
return 0;
1078310634

1078410635
out_free_pipe_cpumask:

kernel/trace/trace.h

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -667,9 +667,8 @@ void trace_buffer_unlock_commit_nostack(struct trace_buffer *buffer,
667667

668668
bool trace_is_tracepoint_string(const char *str);
669669
const char *trace_event_format(struct trace_iterator *iter, const char *fmt);
670-
void trace_check_vprintf(struct trace_iterator *iter, const char *fmt,
671-
va_list ap) __printf(2, 0);
672670
char *trace_iter_expand_format(struct trace_iterator *iter);
671+
bool ignore_event(struct trace_iterator *iter);
673672

674673
int trace_empty(struct trace_iterator *iter);
675674

@@ -1413,7 +1412,8 @@ struct ftrace_event_field {
14131412
int filter_type;
14141413
int offset;
14151414
int size;
1416-
int is_signed;
1415+
unsigned int is_signed:1;
1416+
unsigned int needs_test:1;
14171417
int len;
14181418
};
14191419

0 commit comments

Comments
 (0)