Skip to content

Commit 5ecbaa0

Browse files
authored
[CHAOSPLT-932] Add support for internal fuzzing infra (#1372)
* Add initial fuzzer for normalize_span * Testing CI setup * Add chaos platform as code owner for fuzzing related things * Fix licence * Remove default value to use backend generated ones
1 parent 0959f2d commit 5ecbaa0

File tree

14 files changed

+827
-1
lines changed

14 files changed

+827
-1
lines changed

.github/CODEOWNERS

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,7 @@
1111
.gitignore @Datadog/libdatadog
1212
.gitlab-ci.yml @Datadog/apm-common-components-core
1313
.gitlab/benchmarks.yml @Datadog/apm-common-components-core
14+
.gitlab/fuzz.yml @Datadog/chaos-platform
1415
benchmark/ @Datadog/apm-common-components-core
1516
bin_tests/ @Datadog/libdatadog-profiling
1617
build-common/ @Datadog/apm-common-components-core
@@ -64,6 +65,7 @@ tests/spawn_from_lib/ @Datadog/libdatadog-php @Datadog/libdatadog
6465
tests/windows_package/ @Datadog/apm-common-components-core
6566
tools/ @Datadog/apm-common-components-core
6667
windows/ @Datadog/libdatadog-core
68+
fuzz/ @Datadog/chaos-platform
6769

6870
# Specific overrides (must come after their general patterns above)
6971
bin_tests/tests/test_the_tests.rs @Datadog/libdatadog-core

.gitlab-ci.yml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,7 @@ variables:
77

88
include:
99
- local: .gitlab/benchmarks.yml
10+
- local: .gitlab/fuzz.yml
1011

1112
trigger_internal_build:
1213
variables:

.gitlab/fuzz.yml

Lines changed: 31 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,31 @@
1+
# Fuzzing job configuration
2+
# This job discovers, builds, and uploads all cargo-fuzz targets to the internal fuzzing infrastructure
3+
# See ci/README_FUZZING.md for more information
4+
5+
variables:
6+
BASE_CI_IMAGE: registry.ddbuild.io/ci/benchmarking-platform:libdatadog-benchmarks
7+
8+
fuzz:
9+
tags: ["arch:amd64"]
10+
needs: []
11+
image:
12+
name: $BASE_CI_IMAGE
13+
rules:
14+
# runs on gitlab schedule and on merge to main.
15+
# Also allow manual run in branches for ease of debug / testing
16+
- if: '$CI_COMMIT_BRANCH == "main" && $CI_PIPELINE_SOURCE == "schedule"'
17+
allow_failure: true
18+
- if: $CI_COMMIT_BRANCH == "main"
19+
allow_failure: true
20+
- when: manual
21+
allow_failure: true
22+
timeout: 1h
23+
script:
24+
- VAULT_VERSION=1.15.4 && curl -fsSL "https://releases.hashicorp.com/vault/${VAULT_VERSION}/vault_${VAULT_VERSION}_linux_amd64.zip" -o vault.zip && unzip vault.zip && mv vault /usr/local/bin/vault && rm vault.zip && chmod +x /usr/local/bin/vault
25+
- rustup default nightly
26+
- cargo install cargo-fuzz
27+
- pip3 install requests toml
28+
- python3 fuzz/fuzz_infra.py
29+
allow_failure: true
30+
variables:
31+
KUBERNETES_SERVICE_ACCOUNT_OVERWRITE: libdatadog

Cargo.lock

Lines changed: 1 addition & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

fuzz/fuzz_infra.py

Lines changed: 233 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,233 @@
1+
#!/usr/bin/env python3
2+
3+
"""
4+
Script for running fuzz targets in the internal fuzzing infrastructure.
5+
This is called from .gitlab/fuzz.yml.
6+
7+
If you want to run this locally, please set the VAULT_FUZZING_TOKEN environment variable
8+
(i.e: ddtool auth token security-fuzzing-platform --datacenter=us1.ddbuild.io)
9+
10+
In CI, this is expected to run with the base image defined in ./ci/Dockerfiles/Dockerfile.fuzz.
11+
12+
"""
13+
14+
import os
15+
from subprocess import Popen, PIPE
16+
import requests
17+
import toml
18+
19+
DEFAULT_FUZZING_SLACK_CHANNEL = "fuzzing-ops" # TODO: change me once we validated everything is not spamming and set up correctly.
20+
# Lets reuse the token for all requests to avoid issues.
21+
# The process should be short lived enough that the token should be valid for the duration.
22+
_cached_token = None
23+
24+
25+
def get_auth_header():
26+
global _cached_token
27+
if os.getenv("VAULT_FUZZING_TOKEN") is not None:
28+
return os.getenv("VAULT_FUZZING_TOKEN")
29+
30+
if _cached_token is None:
31+
_cached_token = (
32+
os.popen(
33+
"vault read -field=token identity/oidc/token/security-fuzzing-platform"
34+
)
35+
.read()
36+
.strip()
37+
)
38+
return _cached_token
39+
40+
41+
def get_commit_sha():
42+
return os.getenv("CI_COMMIT_SHA")
43+
44+
45+
def upload_fuzz(
46+
directory,
47+
git_sha,
48+
fuzz_test,
49+
team="apm-sdk-rust",
50+
):
51+
"""
52+
This builds and uploads fuzz targets to the internal fuzzing infrastructure.
53+
It needs to be passed the -fuzz flag in order to build the fuzz with efficient coverage guidance.
54+
"""
55+
56+
api_url = "https://fuzzing-api.us1.ddbuild.io/api/v1"
57+
58+
# Get the auth token a single time and reuse it for all requests
59+
auth_header = get_auth_header()
60+
if not auth_header:
61+
print("❌ Failed to get auth header")
62+
exit(1)
63+
64+
# We let the API handle package name length validation
65+
# It will be returned, truncated / reformated, if needed in the json response.
66+
# We simply force the prefix to be `libdatadog-` for ease of filtering (until we improve that part on the API side)
67+
# As a note: more than 63 characters will be truncated by the API
68+
pkgname_prefix = "libdatadog-"
69+
pkgname = (
70+
(pkgname_prefix + directory + "-" + fuzz_test)
71+
.replace("_", "-")
72+
.replace("/", "-")
73+
)
74+
pkgname = pkgname.strip("-.") # Remove trailing dashes and dots.
75+
print(f"pkgname: {pkgname}")
76+
77+
print(f"Getting presigned URL for {pkgname}...")
78+
headers = {"Authorization": f"Bearer {auth_header}"}
79+
presigned_response = requests.post(
80+
f"{api_url}/apps/{pkgname}/builds/{git_sha}/url", headers=headers, timeout=30
81+
)
82+
83+
if not presigned_response.ok:
84+
print(
85+
f"❌ Failed to get presigned URL (status {presigned_response.status_code})"
86+
)
87+
try:
88+
error_detail = presigned_response.json()
89+
print(f"Error details: {error_detail}")
90+
except Exception as e:
91+
print(f"Raw error response: {presigned_response.text} {e}")
92+
presigned_response.raise_for_status()
93+
presigned_url = presigned_response.json()["data"]["url"]
94+
95+
print(f"Uploading {pkgname} ({fuzz_test}) for {git_sha}...")
96+
# Upload file to presigned URL
97+
with open(
98+
f"{directory}/target/x86_64-unknown-linux-gnu/release/{fuzz_test}", "rb"
99+
) as f:
100+
upload_response = requests.put(presigned_url, data=f, timeout=300)
101+
102+
if not upload_response.ok:
103+
print(f"❌ Failed to upload file (status {upload_response.status_code})")
104+
try:
105+
error_detail = upload_response.json()
106+
print(f"Error details: {error_detail}")
107+
except Exception as e:
108+
print(f"Raw error response: {upload_response.text} {e}")
109+
upload_response.raise_for_status()
110+
111+
print(f"Starting fuzzer for {pkgname} ({fuzz_test})...")
112+
# Start new fuzzer
113+
run_payload = {
114+
"app": pkgname,
115+
"debug": False,
116+
"version": git_sha,
117+
"type": "cargo-fuzz",
118+
"binary": fuzz_test,
119+
"team": team,
120+
"repository_url": "https://github.com/DataDog/libdatadog",
121+
"slack_channel": DEFAULT_FUZZING_SLACK_CHANNEL,
122+
}
123+
124+
headers = {
125+
"Authorization": f"Bearer {auth_header}",
126+
"Content-Type": "application/json",
127+
}
128+
129+
try:
130+
response = requests.post(
131+
f"{api_url}/apps/{pkgname}/fuzzers",
132+
headers=headers,
133+
json=run_payload,
134+
timeout=30,
135+
)
136+
response.raise_for_status()
137+
except Exception as e:
138+
error_detail = response.json()
139+
print(f"❌ API request failed with status {response.status_code}")
140+
print(f"Error details: {error_detail}")
141+
print(f"Raw error response: {response.text} {e}")
142+
143+
print(f"✅ Started fuzzer for {pkgname} ({fuzz_test})...")
144+
response_json = response.json()
145+
print(response_json)
146+
147+
148+
def search_fuzz_tests(directory) -> list[str]:
149+
fuzz_list_cmd = ["cargo", "+nightly", "fuzz", "list"]
150+
process = Popen(fuzz_list_cmd, cwd=directory, stdout=PIPE, stderr=PIPE)
151+
stdout, stderr = process.communicate()
152+
153+
if process.returncode != 0:
154+
print(f"❌ Failed to list fuzz tests in {directory}")
155+
print(f"Command: {' '.join(fuzz_list_cmd)}")
156+
print(f"Exit code: {process.returncode}")
157+
if stderr:
158+
print(f"Error output: {stderr.decode('utf-8')}")
159+
if stdout:
160+
print(f"Standard output: {stdout.decode('utf-8')}")
161+
return []
162+
163+
return stdout.decode("utf-8").splitlines()
164+
165+
166+
def build_fuzz(directory, fuzz_test) -> bool:
167+
build_cmd = ["cargo", "+nightly", "fuzz", "build", fuzz_test]
168+
return Popen(build_cmd, cwd=directory).wait() == 0
169+
170+
171+
# We want to search for all crates in the repository.
172+
# We can't simply run `cargo fuzz list` in the root directory.
173+
def is_fuzz_crate(cargo_toml_path) -> bool:
174+
"""Check if a Cargo.toml file has cargo-fuzz = true in its metadata."""
175+
try:
176+
with open(cargo_toml_path, "r") as f:
177+
cargo_config = toml.load(f)
178+
return (
179+
cargo_config.get("package", {})
180+
.get("metadata", {})
181+
.get("cargo-fuzz", False)
182+
)
183+
except Exception as e:
184+
print(f"Warning: Could not parse {cargo_toml_path}: {e}")
185+
return False
186+
187+
188+
def find_cargo_roots(directory) -> list[str]:
189+
print(f"Finding cargo roots in {directory}")
190+
cargo_roots = []
191+
for root, dirs, files in os.walk(directory):
192+
# Skip target directories to avoid scanning build artifacts
193+
if "target" in dirs:
194+
dirs.remove("target")
195+
196+
if "Cargo.toml" in files:
197+
cargo_toml_path = os.path.join(root, "Cargo.toml")
198+
if is_fuzz_crate(cargo_toml_path):
199+
print(f"Found fuzz cargo root: {root}")
200+
cargo_roots.append(root)
201+
else:
202+
print(f"Skipping non-fuzz cargo root: {root}")
203+
return cargo_roots
204+
205+
206+
if __name__ == "__main__":
207+
cargo_roots = find_cargo_roots(os.getcwd())
208+
print(cargo_roots)
209+
git_sha = get_commit_sha()
210+
211+
for cargo_root in cargo_roots:
212+
fuzz_tests = search_fuzz_tests(cargo_root)
213+
print(f"Found {len(fuzz_tests)} fuzz tests in {cargo_root}")
214+
if len(fuzz_tests) == 0:
215+
print(f"No fuzz tests found in {cargo_root}, skipping...")
216+
continue
217+
218+
for fuzz_test in fuzz_tests:
219+
print(f"Building fuzz for {cargo_root}/{fuzz_test} ({git_sha})")
220+
err = build_fuzz(cargo_root, fuzz_test)
221+
if not err:
222+
print(
223+
f"❌ Failed to build fuzz for {cargo_root}/{fuzz_test} ({git_sha}). Skipping uploading."
224+
)
225+
continue
226+
227+
# Make cargo_root relative to the root of the repository, so the generated target name is libdatadog-<foldername>-<fuzz-test>
228+
# In the future, the api will support a custom path flag
229+
repo_root = os.path.abspath(os.getcwd())
230+
rel_cargo_root = os.path.relpath(cargo_root, repo_root)
231+
print(f"Uploading fuzz for {rel_cargo_root}/{fuzz_test} ({git_sha})")
232+
upload_fuzz(rel_cargo_root, git_sha, fuzz_test)
233+

libdd-trace-normalization/Cargo.toml

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,10 @@ bench = false
1515
[dependencies]
1616
anyhow = "1.0"
1717
libdd-trace-protobuf = { version = "1.0.0", path = "../libdd-trace-protobuf" }
18+
arbitrary = { version = "1.3", features = ["derive"], optional = true }
19+
20+
[features]
21+
fuzzing = ["arbitrary"]
1822

1923
[dev-dependencies]
2024
rand = "0.8.5"
Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,4 @@
1+
target
2+
corpus
3+
artifacts
4+

0 commit comments

Comments
 (0)