Skip to content

Commit b6e6d3c

Browse files
committed
[libFuzzer] Fix DataFlow.cpp logic when tracing long inputs.
Summary: 1. Do not create DFSan labels for the bytes which we do not trace. This is where we run out of labels at the first place. 2. When dumping the traces on the disk, make sure to offset the label identifiers by the number of the first byte in the trace range. 3. For the last label, make sure to write it at the last position of the trace bit string, as that label represents the input size, not any particular byte. Also fixed the bug with division in python which I've introduced when migrated the scripts to Python3 (`//` is required for integral division). Otherwise, the scripts are wasting too much time unsuccessfully trying to collect and process traces from the long inputs. For more context, see google/oss-fuzz#1632 (comment) Reviewers: kcc Reviewed By: kcc Subscribers: delcypher, #sanitizers, llvm-commits Tags: #llvm, #sanitizers Differential Revision: https://reviews.llvm.org/D60538 llvm-svn: 358311
1 parent 9e27514 commit b6e6d3c

File tree

3 files changed

+29
-11
lines changed

3 files changed

+29
-11
lines changed

compiler-rt/lib/fuzzer/dataflow/DataFlow.cpp

Lines changed: 16 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -63,6 +63,9 @@ __attribute__((weak)) extern int LLVMFuzzerInitialize(int *argc, char ***argv);
6363
} // extern "C"
6464

6565
static size_t InputLen;
66+
static size_t InputLabelBeg;
67+
static size_t InputLabelEnd;
68+
static size_t InputSizeLabel;
6669
static size_t NumFuncs;
6770
static const uintptr_t *FuncsBeg;
6871
static __thread size_t CurrentFunc;
@@ -95,8 +98,10 @@ void SetBytesForLabel(dfsan_label L, char *Bytes) {
9598
return;
9699
LabelSeen[L] = true;
97100
assert(L);
98-
if (L <= InputLen + 1) {
99-
Bytes[L - 1] = '1';
101+
if (L < InputSizeLabel) {
102+
Bytes[L + InputLabelBeg - 1] = '1';
103+
} else if (L == InputSizeLabel) {
104+
Bytes[InputLen] = '1';
100105
} else {
101106
auto *DLI = dfsan_get_label_info(L);
102107
SetBytesForLabel(DLI->l1, Bytes);
@@ -124,9 +129,9 @@ int main(int argc, char **argv) {
124129
if (argc == 1)
125130
return PrintFunctions();
126131
assert(argc == 4 || argc == 5);
127-
size_t Beg = atoi(argv[1]);
128-
size_t End = atoi(argv[2]);
129-
assert(Beg < End);
132+
InputLabelBeg = atoi(argv[1]);
133+
InputLabelEnd = atoi(argv[2]);
134+
assert(InputLabelBeg < InputLabelEnd);
130135

131136
const char *Input = argv[3];
132137
fprintf(stderr, "INFO: reading '%s'\n", Input);
@@ -143,14 +148,16 @@ int main(int argc, char **argv) {
143148

144149
fprintf(stderr, "INFO: running '%s'\n", Input);
145150
for (size_t I = 1; I <= InputLen; I++) {
146-
dfsan_label L = dfsan_create_label("", nullptr);
147-
assert(L == I);
148151
size_t Idx = I - 1;
149-
if (Idx >= Beg && Idx < End)
152+
if (Idx >= InputLabelBeg && Idx < InputLabelEnd) {
153+
dfsan_label L = dfsan_create_label("", nullptr);
154+
assert(L == I - InputLabelBeg);
150155
dfsan_set_label(L, Buf + Idx, 1);
156+
}
151157
}
152158
dfsan_label SizeL = dfsan_create_label("", nullptr);
153-
assert(SizeL == InputLen + 1);
159+
InputSizeLabel = SizeL;
160+
assert(InputSizeLabel == InputLabelEnd - InputLabelBeg + 1);
154161
dfsan_set_label(SizeL, &InputLen, sizeof(InputLen));
155162

156163
LLVMFuzzerTestOneInput(Buf, InputLen);

compiler-rt/lib/fuzzer/scripts/collect_data_flow.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -65,8 +65,8 @@ def main(argv):
6565
tmpfile = os.path.join(tmpdir, str(r[0]) + "-" + str(r[1]))
6666
ret = subprocess.call([exe, str(r[0]), str(r[1]), inp, tmpfile])
6767
if ret and r[1] - r[0] >= 2:
68-
q.append([r[0], (r[1] + r[0]) / 2])
69-
q.append([(r[1] + r[0]) / 2, r[1]])
68+
q.append([r[0], (r[1] + r[0]) // 2])
69+
q.append([(r[1] + r[0]) // 2, r[1]])
7070
else:
7171
outputs.append(tmpfile)
7272
print("******* Success: ", r)

compiler-rt/test/fuzzer/dataflow.test

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -82,3 +82,14 @@ USE_DATA_FLOW_TRACE: INFO: DataFlowTrace: reading from {{.*}}/OUT
8282
USE_DATA_FLOW_TRACE-DAG: a8eefe2fd5d6b32028f355fafa3e739a6bf5edc => |000001|
8383
USE_DATA_FLOW_TRACE-DGA: d28cb407e8e1a702c72d25473f0553d3ec172262 => |0000011|
8484
USE_DATA_FLOW_TRACE: INFO: DataFlowTrace: 6 trace files, 3 functions, 2 traces with focus function
85+
86+
# Test that we can run collect_data_flow on a long input (>2**16 bytes)
87+
RUN: rm -rf %t/OUT
88+
RUN: printf "%0.sA" {1..150001} > %t/IN/very_long_input
89+
RUN: %libfuzzer_src/scripts/collect_data_flow.py %t-ThreeFunctionsTestDF %t/IN/very_long_input %t/OUT | FileCheck %s --check-prefix=COLLECT_TRACE_FOR_LONG_INPUT
90+
RUN: rm %t/IN/very_long_input
91+
COLLECT_TRACE_FOR_LONG_INPUT: ******* Trying:{{[ ]+}}[0, 150001]
92+
COLLECT_TRACE_FOR_LONG_INPUT: ******* Trying:{{[ ]+}}[75000, 150001]
93+
COLLECT_TRACE_FOR_LONG_INPUT: ******* Trying:{{[ ]+}}[112500, 150001]
94+
COLLECT_TRACE_FOR_LONG_INPUT: ******* Success:{{[ ]+}}[{{[0123456789]+}}, 150001]
95+
COLLECT_TRACE_FOR_LONG_INPUT: ******* Success:{{[ ]+}}[0, {{[0123456789]+}}]

0 commit comments

Comments
 (0)