Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
54 changes: 14 additions & 40 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -216,26 +216,20 @@ Fixtures and crash inputs produced by the fuzzing infrastructure can be download

### Setup

1. **Install dependencies:**
```sh
pip install -e ".[octane]"
```

2. **GCS credentials** (for downloading artifacts from Google Cloud Storage):

Credentials are auto-detected in this order:
1. `GOOGLE_APPLICATION_CREDENTIALS` environment variable
2. gcloud legacy credentials (`~/.config/gcloud/legacy_credentials/<account>/adc.json`)
3. GCE metadata service (when running on Google Compute Engine)

For manual setup:
```sh
# Option 1: Use gcloud CLI (recommended)
gcloud auth application-default login

# Option 2: Set credentials path explicitly
export GOOGLE_APPLICATION_CREDENTIALS=/path/to/credentials.json
```
Artifact downloads are proxied through the API server by default — no cloud
credentials are required.

For **optional** direct GCS/S3 downloads (faster, avoids the API proxy hop),
install the extra dependencies and configure credentials:

```sh
pip install -e ".[octane]"
```

Credentials are auto-detected in this order:
1. `GOOGLE_APPLICATION_CREDENTIALS` environment variable
2. gcloud legacy credentials (`~/.config/gcloud/legacy_credentials/<account>/adc.json`)
3. GCE metadata service (when running on Google Compute Engine)

### Debugging workflow

Expand All @@ -261,26 +255,6 @@ solana-conformance debug-mismatches -n sol_elf_loader_diff -l 5 \
-s $SOLFUZZ_TARGET -t $FIREDANCER_TARGET -o debug_output/
```

### GCS Authentication Troubleshooting

GCS credentials are auto-detected from gcloud legacy credentials, so most users won't need manual setup. If you still see authentication errors:

```sh
# Check if gcloud is authenticated
gcloud auth list

# Check for legacy credentials (auto-detected)
ls ~/.config/gcloud/legacy_credentials/*/adc.json

# Re-authenticate if needed
gcloud auth application-default login

# Or set credentials explicitly
export GOOGLE_APPLICATION_CREDENTIALS=~/.config/gcloud/legacy_credentials/<account>/adc.json
```

**Note:** On GCE VMs without a service account attached, the metadata service won't work. The tool will automatically fall back to gcloud legacy credentials if available.

## Setting up Environment
To setup the `solana-conformance` environment, run the following command and you will be all set:
```
Expand Down
1 change: 1 addition & 0 deletions src/test_suite/fuzz_context.py
Original file line number Diff line number Diff line change
Expand Up @@ -98,6 +98,7 @@
context_human_encode_fn=gossip_codec.encode_input,
context_human_decode_fn=gossip_codec.decode_input,
effects_human_encode_fn=gossip_codec.encode_output,
raw_binary_io=True,
)


Expand Down
3 changes: 3 additions & 0 deletions src/test_suite/fuzz_interface.py
Original file line number Diff line number Diff line change
Expand Up @@ -84,6 +84,9 @@ class HarnessCtx:
effects_human_decode_fn: Callable[[EffectsType], None] = generic_human_decode
regenerate_transformation_fn: Callable[[FixtureType], None] = generic_transform
supports_flatbuffers: bool = False
raw_binary_io: bool = (
False # context.data passed as raw bytes; output is a raw single-byte bool
)
fixture_type: Type[FixtureType] = field(init=False)
context_type: Type[ContextType] = field(init=False)
effects_type: Type[EffectsType] = field(init=False)
Expand Down
35 changes: 21 additions & 14 deletions src/test_suite/multiprocessing_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -176,12 +176,13 @@ def process_target(
- invoke_pb.InstrEffects | None: Result of instruction execution.
"""

serialized_instruction_context = context.SerializeToString(deterministic=True)
if serialized_instruction_context is None:
return None

# Prepare input data and output buffers
in_data = serialized_instruction_context
if harness_ctx.raw_binary_io:
in_data = context.data
else:
serialized_instruction_context = context.SerializeToString(deterministic=True)
if serialized_instruction_context is None:
Comment on lines +179 to +183
return None
in_data = serialized_instruction_context
in_ptr = (ctypes.c_uint8 * len(in_data))(*in_data)
in_sz = len(in_data)
out_sz = ctypes.c_uint64(OUTPUT_BUFFER_SIZE)
Expand Down Expand Up @@ -209,7 +210,14 @@ def process_target(
# Process the output
output_data = bytearray(globals.output_buffer_pointer[: out_sz.value])
output_object = harness_ctx.effects_type()
output_object.ParseFromString(output_data)

if harness_ctx.raw_binary_io and len(output_data) == 1:
for field_desc in output_object.DESCRIPTOR.fields:
if field_desc.type == field_desc.TYPE_BOOL:
setattr(output_object, field_desc.name, output_data[0] != 0)
break
Comment on lines +214 to +218
else:
output_object.ParseFromString(output_data)

return output_object

Expand Down Expand Up @@ -1017,9 +1025,9 @@ def download_and_process(source):
api_origin=api_origin,
http2=True,
) as client:
# If we have cached metadata with a BugRecord, download
# directly from the GCS/S3 URLs to avoid a redundant
# /api/bugs/<hash> round-trip.
# If we have cached metadata with a BugRecord, use its
# URLs directly to avoid a redundant /api/bugs/<hash>
# round-trip.
bug_record = (
getattr(repro_metadata, "bug_record", None)
if repro_metadata
Expand Down Expand Up @@ -1112,10 +1120,9 @@ def download_single_crash(source):
api_origin=api_origin,
http2=True,
) as client:
# If we have cached metadata with a BugRecord, download directly
# from the GCS/S3 URLs to avoid a redundant /api/bugs/<hash>
# round-trip (which can 404 due to bundle_id scoping in
# standalone mode).
# If we have cached metadata with a BugRecord, use its URLs
# directly to avoid a redundant /api/bugs/<hash> round-trip
# (which can 404 due to bundle_id scoping in standalone mode).
cached_meta = None
if hasattr(globals, "repro_metadata_cache") and crash_hash in getattr(
globals, "repro_metadata_cache", {}
Expand Down
80 changes: 50 additions & 30 deletions src/test_suite/octane_api_client.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,11 +8,12 @@
- /api/bugs - List all bugs with full metadata (supports filters: lineages, hashes, statuses, run_id, bundle_id)
- /api/bugs/<hash> - Get single bug by hash
- /api/bugs/<hash>/artifact - Get artifact download URLs
- /api/artifact/download - Proxy endpoint for downloading artifacts
- /api/health - Health check

Key features:
- Server-side filtering: lineages, hashes, statuses, run_id all combined with AND logic
- Direct GCS/S3 downloads: artifacts are downloaded directly from cloud storage
- Artifact downloads: proxied through the API server by default, with optional direct GCS/S3 fallback
- Reproducible bugs: use statuses=REPRO_BUG_STATUSES or get_reproducible_bugs()

Default API endpoint: gusc1b-fdfuzz-orchestrator1.jumpisolated.com:5000
Expand Down Expand Up @@ -425,12 +426,10 @@ class OctaneAPIClient:
API client for the native Octane orchestrator API.

This client uses the native Octane API endpoints (/api/bugs, etc.)
IMPORTANT: All artifact downloads are performed directly from cloud storage
(GCS/S3). The Octane API only provides metadata and download URLs, it never
proxies artifact bytes.
Artifact downloads use direct GCS/S3 access when the SDK and credentials
are available, and fall back to proxying through the API server otherwise.

Requires google-cloud-storage and/or boto3 for direct cloud downloads.
Install with: pip install "solana-conformance[octane]"
For direct downloads, install optional deps: pip install "solana-conformance[octane]"
"""

def __init__(
Expand Down Expand Up @@ -688,10 +687,7 @@ def download_bug_artifact_native(
progress_callback: Optional[Callable[[int, int], None]] = None,
) -> bytes:
"""
Download bug artifact by first getting URLs from API, then downloading directly from GCS/S3.

This is the preferred method as it downloads directly from cloud storage
without proxying through the Octane server.
Download bug artifact by first getting URLs from API, then downloading.

Args:
bug_hash: The bug fingerprint hash.
Expand Down Expand Up @@ -969,32 +965,56 @@ def _download_from_s3(

return data

def _download_via_http(
self,
url: str,
) -> bytes:
"""Download from an HTTP/HTTPS URL."""
response = self.client.get(url)
response.raise_for_status()
data = response.content

# Update shared progress bar
try:
import test_suite.globals as globals

if globals.download_progress_bar is not None:
globals.download_progress_bar.update(len(data))
except:
pass

return data

def _proxy_url(self, cloud_url: str) -> str:
"""Convert a gs:// or s3:// URL to an API-proxied HTTP URL."""
return f"{self.api_origin}/api/artifact/download?url={urllib.parse.quote(cloud_url, safe='')}"

def _download_from_url(
self,
url: str,
progress_callback: Optional[Callable[[int, int], None]] = None,
) -> bytes:
"""Download from a URL (GCS, S3, or HTTP)."""
if url.startswith("gs://"):
return self._download_from_gcs(url, progress_callback)
elif url.startswith("s3://"):
return self._download_from_s3(url, progress_callback)
elif url.startswith("http://") or url.startswith("https://"):
# HTTP download
response = self.client.get(url)
response.raise_for_status()
data = response.content
"""Download from a URL (GCS, S3, or HTTP).

# Update shared progress bar
For gs:// and s3:// URLs, attempts direct download first if the
appropriate SDK is installed (google-cloud-storage or boto3).
Falls back to proxying through the API server if the SDK is missing
or direct download fails (e.g. missing credentials).
"""
if url.startswith("gs://") or url.startswith("s3://"):
# Try direct download if SDK is available
try:
import test_suite.globals as globals

if globals.download_progress_bar is not None:
globals.download_progress_bar.update(len(data))
except:
if url.startswith("gs://"):
return self._download_from_gcs(url, progress_callback)
else:
return self._download_from_s3(url, progress_callback)
except (ImportError, Exception):
pass

return data
# Fall back to API proxy (no SDK/credentials needed)
return self._download_via_http(self._proxy_url(url))
elif url.startswith("http://") or url.startswith("https://"):
return self._download_via_http(url)
else:
raise ValueError(f"Unsupported URL scheme: {url}")

Expand Down Expand Up @@ -1151,7 +1171,7 @@ def download_repro_data(
Download repro data by hash: .fix first, then .fuzz as fallback.

Use this for download-repro / download-repros commands.
Downloads directly from cloud storage (GCS/S3), never proxied through Octane.
Uses direct GCS/S3 download if available, otherwise proxied through the API server.

Args:
repro_hash: Hash of the repro to download.
Expand Down Expand Up @@ -1180,7 +1200,7 @@ def download_fixture_data(
Download fixture data (.fix file) ONLY by hash - no fallback to .fuzz.

Use this for download-fixture / download-fixtures commands.
Downloads directly from cloud storage (GCS/S3), never proxied through Octane.
Uses direct GCS/S3 download if available, otherwise proxied through the API server.

Args:
artifact_hash: Hash of the artifact (bug hash) to download.
Expand Down Expand Up @@ -1212,7 +1232,7 @@ def download_crash_data(
Download crash data (.fuzz file) ONLY by hash - no fallback to .fix.

Use this for download-crash / download-crashes commands.
Downloads directly from cloud storage (GCS/S3), never proxied through Octane.
Uses direct GCS/S3 download if available, otherwise proxied through the API server.

Args:
crash_hash: Hash of the crash (bug hash) to download.
Expand Down
78 changes: 78 additions & 0 deletions src/test_suite/test_suite.py
Original file line number Diff line number Diff line change
Expand Up @@ -1539,6 +1539,18 @@ def fetch_repros(client):
if num_duplicates > 0:
print(f"Removed {num_duplicates} duplicate(s)")

for section_name in section_names_list:
harness = _infer_raw_binary_harness(section_name)
if harness is not None:
converted = _convert_raw_crashes_to_contexts(globals.inputs_dir, harness)
if converted > 0:
default_harness_ctx = next(
name for name, obj in HARNESS_MAP.items() if obj is harness
)
Comment on lines +1545 to +1549
print(
f"Converted {converted} raw crash file(s) to {harness.context_extension} context(s)"
)

create_fixtures_dir = globals.output_dir / "create_fixtures"
if create_fixtures_dir.exists():
shutil.rmtree(create_fixtures_dir)
Expand Down Expand Up @@ -1587,6 +1599,54 @@ def fetch_repros(client):
)


def _infer_raw_binary_harness(lineage: str) -> "HarnessCtx | None":
"""If *lineage* maps to a raw_binary_io harness, return it.
Returns None otherwise (including for normal protobuf harnesses)."""
for entrypoint, harness in ENTRYPOINT_HARNESS_MAP.items():
if not harness.raw_binary_io:
continue
core = (
entrypoint.removeprefix("sol_compat_")
.removesuffix("_v1")
.removesuffix("_v2")
)
if core and core in lineage:
return harness
return None


def _convert_raw_crashes_to_contexts(inputs_dir: Path, harness: "HarnessCtx") -> int:
"""Convert raw (non-protobuf) .fix files in *inputs_dir* into context
protobuf files that a raw_binary_io harness can consume. Returns
the number of files converted."""
from test_suite.multiprocessing_utils import _MetadataOnlyFixture

converted = 0
for fix_file in list(inputs_dir.rglob(f"*{FIXTURE_EXTENSION}")):
with open(fix_file, "rb") as f:
raw = f.read()

try:
meta = _MetadataOnlyFixture()
meta.ParseFromString(raw)
if meta.HasField("metadata") and meta.metadata.fn_entrypoint:
continue
except Exception:
pass

try:
ctx = harness.context_type()
ctx.data = raw
ctx_path = fix_file.with_suffix(harness.context_extension)
with open(ctx_path, "wb") as f:
f.write(ctx.SerializeToString(deterministic=True))
fix_file.unlink()
converted += 1
except Exception as e:
print(f" Warning: failed to convert {fix_file.name}: {e}")
return converted


@app.command(help="Debug a single repro by hash.")
def debug_mismatch(
repro_hash: str = typer.Argument(
Expand Down Expand Up @@ -1679,6 +1739,24 @@ def debug_mismatch(
raise typer.Exit(code=1)
print(f"{result}\n")

# Convert raw crash files to context protobufs if needed.
# Targets like gossip (raw_binary_io) use raw binary inputs, not
# protobuf fixtures.
harness_ctx_for_lineage = _infer_raw_binary_harness(lineage)
if harness_ctx_for_lineage is not None:
converted = _convert_raw_crashes_to_contexts(
globals.inputs_dir, harness_ctx_for_lineage
)
if converted > 0:
default_harness_ctx = next(
name
for name, obj in HARNESS_MAP.items()
if obj is harness_ctx_for_lineage
)
Comment on lines +1747 to +1755
print(
f"Converted {converted} raw crash file(s) to {harness_ctx_for_lineage.context_extension} context(s)"
)

# Deduplicate
print("Deduplicating fixtures...")
num_duplicates = deduplicate_fixtures_by_hash(globals.inputs_dir)
Expand Down
Loading
Loading