From 8819cea6df80b19a80adc6a9564cf75b7aa91bf0 Mon Sep 17 00:00:00 2001 From: Leigh <351529+leighmcculloch@users.noreply.github.com> Date: Fri, 9 Jan 2026 00:56:03 +1000 Subject: [PATCH 1/7] replace wasm-gen with wasm-encoder and add spec filtering --- Cargo.lock | 54 +- cmd/crates/soroban-spec-tools/Cargo.toml | 1 + cmd/crates/soroban-spec-tools/src/contract.rs | 247 ++++++++++ cmd/crates/soroban-spec-tools/src/filter.rs | 461 ++++++++++++++++++ cmd/crates/soroban-spec-tools/src/lib.rs | 1 + cmd/soroban-cli/Cargo.toml | 2 +- .../src/commands/contract/build.rs | 76 ++- 7 files changed, 813 insertions(+), 29 deletions(-) create mode 100644 cmd/crates/soroban-spec-tools/src/filter.rs diff --git a/Cargo.lock b/Cargo.lock index a02cf20c8f..ab93ec0e89 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -827,12 +827,6 @@ version = "3.19.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "46c5e41b57b8bba42a04676d81cb89e9ee8e859a1a66f80a5a72e1cb76b34d43" -[[package]] -name = "byteorder" -version = "0.5.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0fc10e8cc6b2580fda3f36eb6dc5316657f812a3df879a44a66fc9f0fdbc4855" - [[package]] name = "byteorder" version = "1.5.0" @@ -2938,7 +2932,7 @@ version = "3.6.3" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "eebcc3aff044e5944a8fbaf69eb277d11986064cba30c468730e8b9909fb551c" dependencies = [ - "byteorder 1.5.0", + "byteorder", "dbus-secret-service", "log", "secret-service", @@ -2995,10 +2989,10 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "bbd2bcb4c963f2ddae06a2efc7e9f3591312473c50c6685e1f298068316e66fe" [[package]] -name = "leb128" -version = "0.2.5" +name = "leb128fmt" +version = "0.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "884e2677b40cc8c339eaefcb701c32ef1fd2493d71118dc0ca4b6a736c93bd67" +checksum = "09edd9e8b54e49e587e4f6295a7d29c3ea94d469cb40ab8ca70b288248a81db2" [[package]] name = "ledger-apdu" @@ -3027,7 +3021,7 @@ version = "0.10.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "45ba81a1f5f24396b37211478aff7fbcd605dd4544df8dbed07b9da3c2057aee" dependencies = [ - "byteorder 1.5.0", + "byteorder", "cfg-if", "hex", "hidapi", @@ -3858,7 +3852,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "9120690fafc389a67ba3803df527d0ec9cbbc9cc45e4cc20b332996dfb672425" dependencies = [ "anyhow", - "itertools 0.11.0", + "itertools 0.14.0", "proc-macro2", "quote", "syn 2.0.106", @@ -4995,9 +4989,9 @@ dependencies = [ "ulid", "url", "walkdir", - "wasm-gen", + "wasm-encoder", "wasm-opt", - "wasmparser", + "wasmparser 0.116.1", "which", "whoami", "zeroize", @@ -5019,7 +5013,7 @@ dependencies = [ "soroban-wasmi", "static_assertions", "stellar-xdr", - "wasmparser", + "wasmparser 0.116.1", ] [[package]] @@ -5065,7 +5059,7 @@ dependencies = [ "soroban-wasmi", "static_assertions", "stellar-strkey 0.0.13", - "wasmparser", + "wasmparser 0.116.1", ] [[package]] @@ -5153,7 +5147,7 @@ dependencies = [ "base64 0.22.1", "stellar-xdr", "thiserror 1.0.69", - "wasmparser", + "wasmparser 0.116.1", ] [[package]] @@ -5200,7 +5194,8 @@ dependencies = [ "stellar-xdr", "thiserror 1.0.69", "tokio", - "wasmparser", + "wasm-encoder", + "wasmparser 0.116.1", "which", ] @@ -5351,7 +5346,7 @@ version = "23.4.1" dependencies = [ "async-trait", "bollard", - "byteorder 1.5.0", + "byteorder", "ed25519-dalek", "env_logger", "hex", @@ -6529,13 +6524,13 @@ dependencies = [ ] [[package]] -name = "wasm-gen" -version = "0.1.4" +name = "wasm-encoder" +version = "0.235.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b854b1461005a7b3365742310f7faa3cac3add809d66928c64a40c7e9e842ebb" +checksum = "b3bc393c395cb621367ff02d854179882b9a351b4e0c93d1397e6090b53a5c2a" dependencies = [ - "byteorder 0.5.3", - "leb128", + "leb128fmt", + "wasmparser 0.235.0", ] [[package]] @@ -6619,6 +6614,17 @@ dependencies = [ "semver", ] +[[package]] +name = "wasmparser" +version = "0.235.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "161296c618fa2d63f6ed5fffd1112937e803cb9ec71b32b01a76321555660917" +dependencies = [ + "bitflags", + "indexmap 2.11.0", + "semver", +] + [[package]] name = "wasmparser-nostd" version = "0.100.2" diff --git a/cmd/crates/soroban-spec-tools/Cargo.toml b/cmd/crates/soroban-spec-tools/Cargo.toml index a17a5d31f1..d2b719eaec 100644 --- a/cmd/crates/soroban-spec-tools/Cargo.toml +++ b/cmd/crates/soroban-spec-tools/Cargo.toml @@ -27,6 +27,7 @@ hex = { workspace = true } wasmparser = { workspace = true } base64 = { workspace = true } thiserror = "1.0.31" +wasm-encoder = "0.235.0" [dev-dependencies] diff --git a/cmd/crates/soroban-spec-tools/src/contract.rs b/cmd/crates/soroban-spec-tools/src/contract.rs index 0f6bef7a04..753a998bb1 100644 --- a/cmd/crates/soroban-spec-tools/src/contract.rs +++ b/cmd/crates/soroban-spec-tools/src/contract.rs @@ -119,6 +119,87 @@ impl Spec { ScSpecEntry::read_xdr_iter(&mut read).collect::, xdr::Error>>()?, )) } + + /// Returns a filtered version of the spec with unused types removed. + /// + /// This removes any type definitions that are not referenced (directly or + /// transitively) by any function in the contract. Functions and events are + /// always preserved. + #[must_use] + pub fn filter_unused_types(&self) -> Vec { + crate::filter::filter_unused_types(self.spec.clone()) + } + + /// Returns the filtered spec entries serialized as XDR bytes. + /// + /// This is useful for replacing the contractspecv0 custom section in a WASM + /// file with a smaller version that only contains used types. + pub fn filtered_spec_xdr(&self) -> Result, Error> { + let filtered = self.filter_unused_types(); + let mut buffer = Vec::new(); + let mut writer = Limited::new(Cursor::new(&mut buffer), Limits::none()); + for entry in filtered { + entry.write_xdr(&mut writer)?; + } + Ok(buffer) + } +} + +/// Replaces a custom section in WASM bytes with new content. +/// +/// This function parses the WASM to find the target custom section, then rebuilds +/// the WASM by copying all other sections verbatim and appending the new custom +/// section at the end. +/// +/// # Arguments +/// +/// * `wasm_bytes` - The original WASM binary +/// * `section_name` - The name of the custom section to replace +/// * `new_content` - The new content for the custom section +/// +/// # Returns +/// +/// A new WASM binary with the custom section replaced. +pub fn replace_custom_section( + wasm_bytes: &[u8], + section_name: &str, + new_content: &[u8], +) -> Result, Error> { + use wasm_encoder::{CustomSection, Module, RawSection}; + use wasmparser::Payload; + + let mut module = Module::new(); + + let parser = wasmparser::Parser::new(0); + for payload in parser.parse_all(wasm_bytes) { + let payload = payload?; + + match &payload { + // Skip the target custom section - we'll append the new one at the end + Payload::CustomSection(section) if section.name() == section_name => { + continue; + } + // For all other payloads that represent sections, copy them verbatim + _ => { + if let Some((id, range)) = payload.as_section() { + let raw = RawSection { + id, + data: &wasm_bytes[range], + }; + module.section(&raw); + } + } + } + } + + // Append the new custom section + let custom = CustomSection { + name: section_name.into(), + data: new_content.into(), + }; + module.section(&custom); + + Ok(module.finish()) } impl Display for Spec { @@ -296,3 +377,169 @@ fn format_name(lib: &StringM<80>, name: &StringM<60>) -> String { ) } } + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn test_filter_spec_on_empty_contract() { + // This test checks that filtering a contract with events but no UDT references + // keeps the events (as per design) but would filter any unused UDTs + + // Skip if the file doesn't exist (it's in a different repo) + let wasm_path = "/Users/leighmcculloch/Code/rs-soroban-sdk/tests/empty/out/test_empty.wasm"; + if !std::path::Path::new(wasm_path).exists() { + return; + } + + let wasm_bytes = std::fs::read(wasm_path).unwrap(); + let spec = Spec::new(&wasm_bytes).unwrap(); + + println!("Original spec entries: {}", spec.spec.len()); + for entry in &spec.spec { + match entry { + ScSpecEntry::FunctionV0(f) => { + println!(" Function: {}", f.name.to_utf8_string_lossy()); + } + ScSpecEntry::UdtStructV0(s) => { + println!(" Struct: {}", s.name.to_utf8_string_lossy()); + } + ScSpecEntry::UdtUnionV0(u) => { + println!(" Union: {}", u.name.to_utf8_string_lossy()); + } + ScSpecEntry::UdtEnumV0(e) => { + println!(" Enum: {}", e.name.to_utf8_string_lossy()); + } + ScSpecEntry::UdtErrorEnumV0(e) => { + println!(" ErrorEnum: {}", e.name.to_utf8_string_lossy()); + } + ScSpecEntry::EventV0(e) => { + println!(" Event: {}", e.name.to_utf8_string_lossy()); + } + } + } + + let filtered = spec.filter_unused_types(); + println!("\nFiltered spec entries: {}", filtered.len()); + for entry in &filtered { + match entry { + ScSpecEntry::FunctionV0(f) => { + println!(" Function: {}", f.name.to_utf8_string_lossy()); + } + ScSpecEntry::UdtStructV0(s) => { + println!(" Struct: {}", s.name.to_utf8_string_lossy()); + } + ScSpecEntry::UdtUnionV0(u) => { + println!(" Union: {}", u.name.to_utf8_string_lossy()); + } + ScSpecEntry::UdtEnumV0(e) => { + println!(" Enum: {}", e.name.to_utf8_string_lossy()); + } + ScSpecEntry::UdtErrorEnumV0(e) => { + println!(" ErrorEnum: {}", e.name.to_utf8_string_lossy()); + } + ScSpecEntry::EventV0(e) => { + println!(" Event: {}", e.name.to_utf8_string_lossy()); + } + } + } + + // The function should still be there + assert!(filtered + .iter() + .any(|e| matches!(e, ScSpecEntry::FunctionV0(_)))); + + // Events should be preserved + let event_count = filtered + .iter() + .filter(|e| matches!(e, ScSpecEntry::EventV0(_))) + .count(); + assert!(event_count > 0, "Events should be preserved"); + } + + #[test] + fn test_filter_on_custom_types_contract() { + // Test filtering on the custom_types contract wasm + let wasm_path = "/Users/leighmcculloch/Code/stellar-cli-spec-clean/target/wasm32v1-none/release/test_custom_types.wasm"; + if !std::path::Path::new(wasm_path).exists() { + eprintln!("Skipping test: wasm file not found at {}", wasm_path); + return; + } + + let wasm_bytes = std::fs::read(wasm_path).unwrap(); + let spec = Spec::new(&wasm_bytes).unwrap(); + + println!("\n=== CUSTOM TYPES CONTRACT ==="); + println!("Original spec entries: {}", spec.spec.len()); + + // Count functions and UDTs + let func_count = spec + .spec + .iter() + .filter(|e| matches!(e, ScSpecEntry::FunctionV0(_))) + .count(); + let udt_count = spec.spec.len() - func_count; + + println!("Functions: {}", func_count); + println!("UDTs: {}", udt_count); + + // List all UDTs before filtering + for entry in &spec.spec { + match entry { + ScSpecEntry::UdtStructV0(s) => { + println!(" Struct: {}", s.name.to_utf8_string_lossy()) + } + ScSpecEntry::UdtUnionV0(u) => { + println!(" Union: {}", u.name.to_utf8_string_lossy()) + } + ScSpecEntry::UdtEnumV0(e) => { + println!(" Enum: {}", e.name.to_utf8_string_lossy()) + } + ScSpecEntry::UdtErrorEnumV0(e) => { + println!(" ErrorEnum: {}", e.name.to_utf8_string_lossy()) + } + ScSpecEntry::EventV0(e) => println!(" Event: {}", e.name.to_utf8_string_lossy()), + _ => {} + } + } + + let filtered = spec.filter_unused_types(); + println!("\nFiltered spec entries: {}", filtered.len()); + + // All types in this contract are used by functions, so nothing should be filtered + // Verify key types are preserved: + // - Test (used by strukt, strukt_hel, and transitively by ComplexEnum, TupleStruct) + // - SimpleEnum (used by simple, and transitively by ComplexEnum, TupleStruct) + // - RoyalCard (used by card) + // - ComplexEnum (used by complex) + // - TupleStruct (used by tuple_strukt) + // - RecursiveEnum (used by recursive_enum) + + let has_test = filtered.iter().any(|entry| { + matches!(entry, ScSpecEntry::UdtStructV0(s) if s.name.to_utf8_string_lossy() == "Test") + }); + let has_simple_enum = filtered.iter().any(|entry| { + matches!(entry, ScSpecEntry::UdtUnionV0(u) if u.name.to_utf8_string_lossy() == "SimpleEnum") + }); + let has_complex_enum = filtered.iter().any(|entry| { + matches!(entry, ScSpecEntry::UdtUnionV0(u) if u.name.to_utf8_string_lossy() == "ComplexEnum") + }); + + assert!(has_test, "Test struct should be preserved"); + assert!(has_simple_enum, "SimpleEnum should be preserved"); + assert!(has_complex_enum, "ComplexEnum should be preserved"); + + // All functions should be preserved + let filtered_func_count = filtered + .iter() + .filter(|e| matches!(e, ScSpecEntry::FunctionV0(_))) + .count(); + assert_eq!( + filtered_func_count, func_count, + "All functions should be preserved" + ); + + println!("Filter test passed: all used types and functions preserved"); + } +} diff --git a/cmd/crates/soroban-spec-tools/src/filter.rs b/cmd/crates/soroban-spec-tools/src/filter.rs new file mode 100644 index 0000000000..4bdc192a0d --- /dev/null +++ b/cmd/crates/soroban-spec-tools/src/filter.rs @@ -0,0 +1,461 @@ +//! Filter unused types from contract spec entries. +//! +//! This module provides functionality to remove type definitions that are not +//! referenced by any function in the contract spec. This helps reduce WASM size +//! by eliminating unnecessary spec entries. + +use std::collections::HashSet; + +use stellar_xdr::curr::{ + ScSpecEntry, ScSpecTypeDef, ScSpecUdtStructV0, ScSpecUdtUnionCaseV0, ScSpecUdtUnionV0, +}; + +/// Extracts UDT (User Defined Type) names referenced by a type definition. +/// +/// This function recursively traverses the type structure to find all +/// references to user-defined types. +fn get_type_refs(type_def: &ScSpecTypeDef) -> HashSet { + let mut refs = HashSet::new(); + + match type_def { + // Primitive types have no UDT references + ScSpecTypeDef::Val + | ScSpecTypeDef::U64 + | ScSpecTypeDef::I64 + | ScSpecTypeDef::U128 + | ScSpecTypeDef::I128 + | ScSpecTypeDef::U32 + | ScSpecTypeDef::I32 + | ScSpecTypeDef::U256 + | ScSpecTypeDef::I256 + | ScSpecTypeDef::Bool + | ScSpecTypeDef::Symbol + | ScSpecTypeDef::Error + | ScSpecTypeDef::Bytes + | ScSpecTypeDef::BytesN(_) + | ScSpecTypeDef::Void + | ScSpecTypeDef::Timepoint + | ScSpecTypeDef::Duration + | ScSpecTypeDef::String + | ScSpecTypeDef::Address + | ScSpecTypeDef::MuxedAddress => {} + + // UDT reference - add the type name + ScSpecTypeDef::Udt(udt) => { + refs.insert(udt.name.to_utf8_string_lossy()); + } + + // Composite types - recurse into contained types + ScSpecTypeDef::Vec(vec_type) => { + refs.extend(get_type_refs(&vec_type.element_type)); + } + ScSpecTypeDef::Map(map_type) => { + refs.extend(get_type_refs(&map_type.key_type)); + refs.extend(get_type_refs(&map_type.value_type)); + } + ScSpecTypeDef::Option(opt_type) => { + refs.extend(get_type_refs(&opt_type.value_type)); + } + ScSpecTypeDef::Result(result_type) => { + refs.extend(get_type_refs(&result_type.ok_type)); + refs.extend(get_type_refs(&result_type.error_type)); + } + ScSpecTypeDef::Tuple(tuple_type) => { + for value_type in tuple_type.value_types.iter() { + refs.extend(get_type_refs(value_type)); + } + } + } + + refs +} + +/// Extracts all UDT names referenced by a spec entry. +fn get_entry_type_refs(entry: &ScSpecEntry) -> HashSet { + let mut refs = HashSet::new(); + + match entry { + ScSpecEntry::FunctionV0(func) => { + // Collect types from inputs + for input in func.inputs.iter() { + refs.extend(get_type_refs(&input.type_)); + } + // Collect types from outputs + for output in func.outputs.iter() { + refs.extend(get_type_refs(output)); + } + } + ScSpecEntry::UdtStructV0(ScSpecUdtStructV0 { fields, .. }) => { + for field in fields.iter() { + refs.extend(get_type_refs(&field.type_)); + } + } + ScSpecEntry::UdtUnionV0(ScSpecUdtUnionV0 { cases, .. }) => { + for case in cases.iter() { + if let ScSpecUdtUnionCaseV0::TupleV0(tuple_case) = case { + for type_def in tuple_case.type_.iter() { + refs.extend(get_type_refs(type_def)); + } + } + } + } + // Enums and error enums don't reference other types + ScSpecEntry::UdtEnumV0(_) | ScSpecEntry::UdtErrorEnumV0(_) => {} + // Events are kept unconditionally + ScSpecEntry::EventV0(_) => {} + } + + refs +} + +/// Gets the name of a UDT entry, or None if it's not a UDT. +fn get_udt_name(entry: &ScSpecEntry) -> Option { + match entry { + ScSpecEntry::UdtStructV0(s) => Some(s.name.to_utf8_string_lossy()), + ScSpecEntry::UdtUnionV0(u) => Some(u.name.to_utf8_string_lossy()), + ScSpecEntry::UdtEnumV0(e) => Some(e.name.to_utf8_string_lossy()), + ScSpecEntry::UdtErrorEnumV0(e) => Some(e.name.to_utf8_string_lossy()), + ScSpecEntry::FunctionV0(_) | ScSpecEntry::EventV0(_) => None, + } +} + +/// Filters out unused types from contract spec entries. +/// +/// This function performs a reachability analysis starting from all functions. +/// It keeps: +/// - All functions (FunctionV0) +/// - All events (EventV0) +/// - All UDTs that are directly or transitively referenced by functions +/// +/// Types that are defined but never used by any function are removed. +/// +/// # Example +/// +/// If a contract has: +/// - Function `foo` that takes `TypeA` as input +/// - `TypeA` which references `TypeB` in a field +/// - `TypeC` which is defined but never used +/// +/// The result will include `foo`, `TypeA`, and `TypeB`, but not `TypeC`. +pub fn filter_unused_types(entries: Vec) -> Vec { + // Build a map from type name to entry for lookup + let type_entries: std::collections::HashMap = entries + .iter() + .filter_map(|entry| get_udt_name(entry).map(|name| (name, entry))) + .collect(); + + // Collect initial references from all functions + let mut reachable_types: HashSet = HashSet::new(); + for entry in &entries { + if matches!(entry, ScSpecEntry::FunctionV0(_)) { + reachable_types.extend(get_entry_type_refs(entry)); + } + } + + // Fixed-point iteration: keep adding types referenced by reachable types + // until no new types are found + loop { + let mut new_types: HashSet = HashSet::new(); + + for type_name in &reachable_types { + if let Some(entry) = type_entries.get(type_name) { + for referenced_type in get_entry_type_refs(entry) { + if !reachable_types.contains(&referenced_type) { + new_types.insert(referenced_type); + } + } + } + } + + if new_types.is_empty() { + break; + } + + reachable_types.extend(new_types); + } + + // Filter entries: keep functions, events, and reachable UDTs + entries + .into_iter() + .filter(|entry| { + match entry { + // Always keep functions + ScSpecEntry::FunctionV0(_) => true, + // Always keep events + ScSpecEntry::EventV0(_) => true, + // Keep UDTs only if they're reachable + _ => { + if let Some(name) = get_udt_name(entry) { + reachable_types.contains(&name) + } else { + true + } + } + } + }) + .collect() +} + +#[cfg(test)] +mod tests { + use super::*; + use stellar_xdr::curr::{ + ScSpecFunctionInputV0, ScSpecFunctionV0, ScSpecTypeUdt, ScSpecUdtEnumCaseV0, + ScSpecUdtEnumV0, ScSpecUdtErrorEnumCaseV0, ScSpecUdtErrorEnumV0, ScSpecUdtStructFieldV0, + StringM, VecM, + }; + + fn make_function(name: &str, input_types: Vec) -> ScSpecEntry { + let inputs: VecM = input_types + .into_iter() + .enumerate() + .map(|(i, type_)| ScSpecFunctionInputV0 { + doc: StringM::default(), + name: format!("arg{i}").try_into().unwrap(), + type_, + }) + .collect::>() + .try_into() + .unwrap(); + + ScSpecEntry::FunctionV0(ScSpecFunctionV0 { + doc: StringM::default(), + name: name.try_into().unwrap(), + inputs, + outputs: VecM::default(), + }) + } + + fn make_struct(name: &str, field_types: Vec<(&str, ScSpecTypeDef)>) -> ScSpecEntry { + let fields: VecM = field_types + .into_iter() + .map(|(field_name, type_)| ScSpecUdtStructFieldV0 { + doc: StringM::default(), + name: field_name.try_into().unwrap(), + type_, + }) + .collect::>() + .try_into() + .unwrap(); + + ScSpecEntry::UdtStructV0(ScSpecUdtStructV0 { + doc: StringM::default(), + lib: StringM::default(), + name: name.try_into().unwrap(), + fields, + }) + } + + fn make_enum(name: &str) -> ScSpecEntry { + ScSpecEntry::UdtEnumV0(ScSpecUdtEnumV0 { + doc: StringM::default(), + lib: StringM::default(), + name: name.try_into().unwrap(), + cases: vec![ScSpecUdtEnumCaseV0 { + doc: StringM::default(), + name: "Variant".try_into().unwrap(), + value: 0, + }] + .try_into() + .unwrap(), + }) + } + + fn make_error_enum(name: &str) -> ScSpecEntry { + ScSpecEntry::UdtErrorEnumV0(ScSpecUdtErrorEnumV0 { + doc: StringM::default(), + lib: StringM::default(), + name: name.try_into().unwrap(), + cases: vec![ScSpecUdtErrorEnumCaseV0 { + doc: StringM::default(), + name: "Error".try_into().unwrap(), + value: 1, + }] + .try_into() + .unwrap(), + }) + } + + fn udt(name: &str) -> ScSpecTypeDef { + ScSpecTypeDef::Udt(ScSpecTypeUdt { + name: name.try_into().unwrap(), + }) + } + + #[test] + fn test_removes_unused_type() { + let entries = vec![ + make_function("foo", vec![ScSpecTypeDef::U32]), + make_struct("UsedType", vec![("field", ScSpecTypeDef::U32)]), + make_struct("UnusedType", vec![("field", ScSpecTypeDef::U32)]), + ]; + + let filtered = filter_unused_types(entries); + + assert_eq!(filtered.len(), 1); + assert!(matches!(filtered[0], ScSpecEntry::FunctionV0(_))); + } + + #[test] + fn test_keeps_directly_referenced_type() { + let entries = vec![ + make_function("foo", vec![udt("UsedType")]), + make_struct("UsedType", vec![("field", ScSpecTypeDef::U32)]), + make_struct("UnusedType", vec![("field", ScSpecTypeDef::U32)]), + ]; + + let filtered = filter_unused_types(entries); + + assert_eq!(filtered.len(), 2); + + let names: Vec<_> = filtered.iter().filter_map(get_udt_name).collect(); + assert!(names.contains(&"UsedType".to_string())); + assert!(!names.contains(&"UnusedType".to_string())); + } + + #[test] + fn test_keeps_transitively_referenced_type() { + let entries = vec![ + make_function("foo", vec![udt("TypeA")]), + make_struct("TypeA", vec![("field", udt("TypeB"))]), + make_struct("TypeB", vec![("field", ScSpecTypeDef::U32)]), + make_struct("UnusedType", vec![("field", ScSpecTypeDef::U32)]), + ]; + + let filtered = filter_unused_types(entries); + + let names: Vec<_> = filtered.iter().filter_map(get_udt_name).collect(); + assert!(names.contains(&"TypeA".to_string())); + assert!(names.contains(&"TypeB".to_string())); + assert!(!names.contains(&"UnusedType".to_string())); + } + + #[test] + fn test_keeps_all_functions() { + let entries = vec![ + make_function("foo", vec![ScSpecTypeDef::U32]), + make_function("bar", vec![ScSpecTypeDef::Bool]), + ]; + + let filtered = filter_unused_types(entries); + + assert_eq!(filtered.len(), 2); + assert!(filtered + .iter() + .all(|e| matches!(e, ScSpecEntry::FunctionV0(_)))); + } + + #[test] + fn test_removes_unused_error_enum() { + let entries = vec![ + make_function("foo", vec![ScSpecTypeDef::U32]), + make_error_enum("UsedError"), + make_error_enum("UnusedError"), + ]; + + let filtered = filter_unused_types(entries); + + // Only function should remain, no error enums are referenced + assert_eq!(filtered.len(), 1); + assert!(matches!(filtered[0], ScSpecEntry::FunctionV0(_))); + } + + #[test] + fn test_keeps_error_enum_in_result() { + let entries = vec![ + make_function( + "foo", + vec![ScSpecTypeDef::Result(Box::new( + stellar_xdr::curr::ScSpecTypeResult { + ok_type: Box::new(ScSpecTypeDef::U32), + error_type: Box::new(udt("MyError")), + }, + ))], + ), + make_error_enum("MyError"), + make_error_enum("UnusedError"), + ]; + + let filtered = filter_unused_types(entries); + + let names: Vec<_> = filtered.iter().filter_map(get_udt_name).collect(); + assert!(names.contains(&"MyError".to_string())); + assert!(!names.contains(&"UnusedError".to_string())); + } + + #[test] + fn test_handles_circular_references() { + // TypeA references TypeB, TypeB references TypeA + let entries = vec![ + make_function("foo", vec![udt("TypeA")]), + make_struct("TypeA", vec![("b", udt("TypeB"))]), + make_struct("TypeB", vec![("a", udt("TypeA"))]), + ]; + + let filtered = filter_unused_types(entries); + + let names: Vec<_> = filtered.iter().filter_map(get_udt_name).collect(); + assert!(names.contains(&"TypeA".to_string())); + assert!(names.contains(&"TypeB".to_string())); + } + + #[test] + fn test_handles_vec_of_udt() { + let entries = vec![ + make_function( + "foo", + vec![ScSpecTypeDef::Vec(Box::new( + stellar_xdr::curr::ScSpecTypeVec { + element_type: Box::new(udt("MyType")), + }, + ))], + ), + make_struct("MyType", vec![("field", ScSpecTypeDef::U32)]), + ]; + + let filtered = filter_unused_types(entries); + + let names: Vec<_> = filtered.iter().filter_map(get_udt_name).collect(); + assert!(names.contains(&"MyType".to_string())); + } + + #[test] + fn test_handles_map_with_udt() { + let entries = vec![ + make_function( + "foo", + vec![ScSpecTypeDef::Map(Box::new( + stellar_xdr::curr::ScSpecTypeMap { + key_type: Box::new(udt("KeyType")), + value_type: Box::new(udt("ValueType")), + }, + ))], + ), + make_struct("KeyType", vec![("field", ScSpecTypeDef::U32)]), + make_struct("ValueType", vec![("field", ScSpecTypeDef::U32)]), + make_struct("UnusedType", vec![("field", ScSpecTypeDef::U32)]), + ]; + + let filtered = filter_unused_types(entries); + + let names: Vec<_> = filtered.iter().filter_map(get_udt_name).collect(); + assert!(names.contains(&"KeyType".to_string())); + assert!(names.contains(&"ValueType".to_string())); + assert!(!names.contains(&"UnusedType".to_string())); + } + + #[test] + fn test_keeps_enum_referenced_by_function() { + let entries = vec![ + make_function("foo", vec![udt("MyEnum")]), + make_enum("MyEnum"), + make_enum("UnusedEnum"), + ]; + + let filtered = filter_unused_types(entries); + + let names: Vec<_> = filtered.iter().filter_map(get_udt_name).collect(); + assert!(names.contains(&"MyEnum".to_string())); + assert!(!names.contains(&"UnusedEnum".to_string())); + } +} diff --git a/cmd/crates/soroban-spec-tools/src/lib.rs b/cmd/crates/soroban-spec-tools/src/lib.rs index e4dd8c659d..c2796e2195 100644 --- a/cmd/crates/soroban-spec-tools/src/lib.rs +++ b/cmd/crates/soroban-spec-tools/src/lib.rs @@ -16,6 +16,7 @@ use stellar_xdr::curr::{ }; pub mod contract; +pub mod filter; pub mod utils; #[derive(thiserror::Error, Debug)] diff --git a/cmd/soroban-cli/Cargo.toml b/cmd/soroban-cli/Cargo.toml index c3868c91c6..d08bfe612b 100644 --- a/cmd/soroban-cli/Cargo.toml +++ b/cmd/soroban-cli/Cargo.toml @@ -122,7 +122,7 @@ glob = "0.3.1" fqdn = "0.3.12" open = "5.3.0" url = "2.5.2" -wasm-gen = "0.1.4" +wasm-encoder = "0.235.0" zeroize = "1.8.1" keyring = { version = "3", features = ["apple-native", "windows-native", "sync-secret-service", "crypto-rust"], optional = true } whoami = "1.5.2" diff --git a/cmd/soroban-cli/src/commands/contract/build.rs b/cmd/soroban-cli/src/commands/contract/build.rs index 7cd33ab33c..aa25ddb2ce 100644 --- a/cmd/soroban-cli/src/commands/contract/build.rs +++ b/cmd/soroban-cli/src/commands/contract/build.rs @@ -162,6 +162,12 @@ pub enum Error { #[error(transparent)] Wasm(#[from] wasm::Error), + + #[error(transparent)] + SpecTools(#[from] soroban_spec_tools::contract::Error), + + #[error(transparent)] + WasmParsing(#[from] wasmparser::BinaryReaderError), } const WASM_TARGET: &str = "wasm32v1-none"; @@ -256,6 +262,7 @@ impl Cmd { .join(&file); self.inject_meta(&target_file_path)?; + Self::filter_spec(&target_file_path)?; let final_path = if let Some(out_dir) = &self.out_dir { fs::create_dir_all(out_dir).map_err(Error::CreatingOutDir)?; @@ -361,14 +368,75 @@ impl Cmd { } fn inject_meta(&self, target_file_path: &PathBuf) -> Result<(), Error> { - let mut wasm_bytes = fs::read(target_file_path).map_err(Error::ReadingWasmFile)?; - let xdr = self.encoded_new_meta()?; - wasm_gen::write_custom_section(&mut wasm_bytes, META_CUSTOM_SECTION_NAME, &xdr); + use wasm_encoder::{CustomSection, Module, RawSection}; + use wasmparser::Payload; + + let wasm_bytes = fs::read(target_file_path).map_err(Error::ReadingWasmFile)?; + + let mut module = Module::new(); + let mut existing_meta: Vec = Vec::new(); + + let parser = wasmparser::Parser::new(0); + for payload in parser.parse_all(&wasm_bytes) { + let payload = payload?; + + match &payload { + // Collect existing meta to merge with new meta + Payload::CustomSection(section) if section.name() == META_CUSTOM_SECTION_NAME => { + existing_meta.extend_from_slice(section.data()); + } + // Copy all other sections verbatim + _ => { + if let Some((id, range)) = payload.as_section() { + let raw = RawSection { + id, + data: &wasm_bytes[range], + }; + module.section(&raw); + } + } + } + } + + // Append new meta to existing meta + let new_meta = self.encoded_new_meta()?; + existing_meta.extend(new_meta); + + let meta_section = CustomSection { + name: META_CUSTOM_SECTION_NAME.into(), + data: existing_meta.into(), + }; + module.section(&meta_section); + + let updated_wasm = module.finish(); // Deleting .wasm file effectively unlinking it from /release/deps/.wasm preventing from overwrite // See https://github.com/stellar/stellar-cli/issues/1694#issuecomment-2709342205 fs::remove_file(target_file_path).map_err(Error::DeletingArtifact)?; - fs::write(target_file_path, wasm_bytes).map_err(Error::WritingWasmFile) + fs::write(target_file_path, updated_wasm).map_err(Error::WritingWasmFile) + } + + /// Filters unused types from the contract spec. + /// + /// This removes type definitions that are not referenced by any function, + /// reducing the size of the WASM binary. + fn filter_spec(target_file_path: &PathBuf) -> Result<(), Error> { + use soroban_spec_tools::contract::{replace_custom_section, Spec}; + + let wasm_bytes = fs::read(target_file_path).map_err(Error::ReadingWasmFile)?; + + // Parse the spec from the wasm + let spec = Spec::new(&wasm_bytes)?; + + // Get the filtered spec as XDR bytes + let filtered_xdr = spec.filtered_spec_xdr()?; + + // Replace the contractspecv0 section with the filtered version + let new_wasm = replace_custom_section(&wasm_bytes, "contractspecv0", &filtered_xdr)?; + + // Write the modified wasm back + fs::remove_file(target_file_path).map_err(Error::DeletingArtifact)?; + fs::write(target_file_path, new_wasm).map_err(Error::WritingWasmFile) } fn encoded_new_meta(&self) -> Result, Error> { From ce91eb85b11627481bb57f86a60dd1719ce8c9fb Mon Sep 17 00:00:00 2001 From: Leigh <351529+leighmcculloch@users.noreply.github.com> Date: Fri, 9 Jan 2026 01:15:56 +1000 Subject: [PATCH 2/7] Remove tests with hard-coded local paths --- cmd/crates/soroban-spec-tools/src/contract.rs | 166 ------------------ 1 file changed, 166 deletions(-) diff --git a/cmd/crates/soroban-spec-tools/src/contract.rs b/cmd/crates/soroban-spec-tools/src/contract.rs index 753a998bb1..d52eba012d 100644 --- a/cmd/crates/soroban-spec-tools/src/contract.rs +++ b/cmd/crates/soroban-spec-tools/src/contract.rs @@ -377,169 +377,3 @@ fn format_name(lib: &StringM<80>, name: &StringM<60>) -> String { ) } } - -#[cfg(test)] -mod tests { - use super::*; - - #[test] - fn test_filter_spec_on_empty_contract() { - // This test checks that filtering a contract with events but no UDT references - // keeps the events (as per design) but would filter any unused UDTs - - // Skip if the file doesn't exist (it's in a different repo) - let wasm_path = "/Users/leighmcculloch/Code/rs-soroban-sdk/tests/empty/out/test_empty.wasm"; - if !std::path::Path::new(wasm_path).exists() { - return; - } - - let wasm_bytes = std::fs::read(wasm_path).unwrap(); - let spec = Spec::new(&wasm_bytes).unwrap(); - - println!("Original spec entries: {}", spec.spec.len()); - for entry in &spec.spec { - match entry { - ScSpecEntry::FunctionV0(f) => { - println!(" Function: {}", f.name.to_utf8_string_lossy()); - } - ScSpecEntry::UdtStructV0(s) => { - println!(" Struct: {}", s.name.to_utf8_string_lossy()); - } - ScSpecEntry::UdtUnionV0(u) => { - println!(" Union: {}", u.name.to_utf8_string_lossy()); - } - ScSpecEntry::UdtEnumV0(e) => { - println!(" Enum: {}", e.name.to_utf8_string_lossy()); - } - ScSpecEntry::UdtErrorEnumV0(e) => { - println!(" ErrorEnum: {}", e.name.to_utf8_string_lossy()); - } - ScSpecEntry::EventV0(e) => { - println!(" Event: {}", e.name.to_utf8_string_lossy()); - } - } - } - - let filtered = spec.filter_unused_types(); - println!("\nFiltered spec entries: {}", filtered.len()); - for entry in &filtered { - match entry { - ScSpecEntry::FunctionV0(f) => { - println!(" Function: {}", f.name.to_utf8_string_lossy()); - } - ScSpecEntry::UdtStructV0(s) => { - println!(" Struct: {}", s.name.to_utf8_string_lossy()); - } - ScSpecEntry::UdtUnionV0(u) => { - println!(" Union: {}", u.name.to_utf8_string_lossy()); - } - ScSpecEntry::UdtEnumV0(e) => { - println!(" Enum: {}", e.name.to_utf8_string_lossy()); - } - ScSpecEntry::UdtErrorEnumV0(e) => { - println!(" ErrorEnum: {}", e.name.to_utf8_string_lossy()); - } - ScSpecEntry::EventV0(e) => { - println!(" Event: {}", e.name.to_utf8_string_lossy()); - } - } - } - - // The function should still be there - assert!(filtered - .iter() - .any(|e| matches!(e, ScSpecEntry::FunctionV0(_)))); - - // Events should be preserved - let event_count = filtered - .iter() - .filter(|e| matches!(e, ScSpecEntry::EventV0(_))) - .count(); - assert!(event_count > 0, "Events should be preserved"); - } - - #[test] - fn test_filter_on_custom_types_contract() { - // Test filtering on the custom_types contract wasm - let wasm_path = "/Users/leighmcculloch/Code/stellar-cli-spec-clean/target/wasm32v1-none/release/test_custom_types.wasm"; - if !std::path::Path::new(wasm_path).exists() { - eprintln!("Skipping test: wasm file not found at {}", wasm_path); - return; - } - - let wasm_bytes = std::fs::read(wasm_path).unwrap(); - let spec = Spec::new(&wasm_bytes).unwrap(); - - println!("\n=== CUSTOM TYPES CONTRACT ==="); - println!("Original spec entries: {}", spec.spec.len()); - - // Count functions and UDTs - let func_count = spec - .spec - .iter() - .filter(|e| matches!(e, ScSpecEntry::FunctionV0(_))) - .count(); - let udt_count = spec.spec.len() - func_count; - - println!("Functions: {}", func_count); - println!("UDTs: {}", udt_count); - - // List all UDTs before filtering - for entry in &spec.spec { - match entry { - ScSpecEntry::UdtStructV0(s) => { - println!(" Struct: {}", s.name.to_utf8_string_lossy()) - } - ScSpecEntry::UdtUnionV0(u) => { - println!(" Union: {}", u.name.to_utf8_string_lossy()) - } - ScSpecEntry::UdtEnumV0(e) => { - println!(" Enum: {}", e.name.to_utf8_string_lossy()) - } - ScSpecEntry::UdtErrorEnumV0(e) => { - println!(" ErrorEnum: {}", e.name.to_utf8_string_lossy()) - } - ScSpecEntry::EventV0(e) => println!(" Event: {}", e.name.to_utf8_string_lossy()), - _ => {} - } - } - - let filtered = spec.filter_unused_types(); - println!("\nFiltered spec entries: {}", filtered.len()); - - // All types in this contract are used by functions, so nothing should be filtered - // Verify key types are preserved: - // - Test (used by strukt, strukt_hel, and transitively by ComplexEnum, TupleStruct) - // - SimpleEnum (used by simple, and transitively by ComplexEnum, TupleStruct) - // - RoyalCard (used by card) - // - ComplexEnum (used by complex) - // - TupleStruct (used by tuple_strukt) - // - RecursiveEnum (used by recursive_enum) - - let has_test = filtered.iter().any(|entry| { - matches!(entry, ScSpecEntry::UdtStructV0(s) if s.name.to_utf8_string_lossy() == "Test") - }); - let has_simple_enum = filtered.iter().any(|entry| { - matches!(entry, ScSpecEntry::UdtUnionV0(u) if u.name.to_utf8_string_lossy() == "SimpleEnum") - }); - let has_complex_enum = filtered.iter().any(|entry| { - matches!(entry, ScSpecEntry::UdtUnionV0(u) if u.name.to_utf8_string_lossy() == "ComplexEnum") - }); - - assert!(has_test, "Test struct should be preserved"); - assert!(has_simple_enum, "SimpleEnum should be preserved"); - assert!(has_complex_enum, "ComplexEnum should be preserved"); - - // All functions should be preserved - let filtered_func_count = filtered - .iter() - .filter(|e| matches!(e, ScSpecEntry::FunctionV0(_))) - .count(); - assert_eq!( - filtered_func_count, func_count, - "All functions should be preserved" - ); - - println!("Filter test passed: all used types and functions preserved"); - } -} From 39493abee2b4ab9e932e63c0c38c89d7baed768a Mon Sep 17 00:00:00 2001 From: Leigh <351529+leighmcculloch@users.noreply.github.com> Date: Fri, 9 Jan 2026 01:23:15 +1000 Subject: [PATCH 3/7] Fix clippy warnings for needless_continue and match_same_arms --- cmd/crates/soroban-spec-tools/src/contract.rs | 23 ++++++++----------- cmd/crates/soroban-spec-tools/src/filter.rs | 12 ++++------ 2 files changed, 14 insertions(+), 21 deletions(-) diff --git a/cmd/crates/soroban-spec-tools/src/contract.rs b/cmd/crates/soroban-spec-tools/src/contract.rs index d52eba012d..3e2dc5be1c 100644 --- a/cmd/crates/soroban-spec-tools/src/contract.rs +++ b/cmd/crates/soroban-spec-tools/src/contract.rs @@ -174,20 +174,17 @@ pub fn replace_custom_section( for payload in parser.parse_all(wasm_bytes) { let payload = payload?; - match &payload { - // Skip the target custom section - we'll append the new one at the end - Payload::CustomSection(section) if section.name() == section_name => { - continue; - } + // Skip the target custom section - we'll append the new one at the end + let dominated = + matches!(&payload, Payload::CustomSection(section) if section.name() == section_name); + if !dominated { // For all other payloads that represent sections, copy them verbatim - _ => { - if let Some((id, range)) = payload.as_section() { - let raw = RawSection { - id, - data: &wasm_bytes[range], - }; - module.section(&raw); - } + if let Some((id, range)) = payload.as_section() { + let raw = RawSection { + id, + data: &wasm_bytes[range], + }; + module.section(&raw); } } } diff --git a/cmd/crates/soroban-spec-tools/src/filter.rs b/cmd/crates/soroban-spec-tools/src/filter.rs index 4bdc192a0d..db7434ad66 100644 --- a/cmd/crates/soroban-spec-tools/src/filter.rs +++ b/cmd/crates/soroban-spec-tools/src/filter.rs @@ -99,10 +99,8 @@ fn get_entry_type_refs(entry: &ScSpecEntry) -> HashSet { } } } - // Enums and error enums don't reference other types - ScSpecEntry::UdtEnumV0(_) | ScSpecEntry::UdtErrorEnumV0(_) => {} - // Events are kept unconditionally - ScSpecEntry::EventV0(_) => {} + // Enums, error enums, and events don't reference other types + ScSpecEntry::UdtEnumV0(_) | ScSpecEntry::UdtErrorEnumV0(_) | ScSpecEntry::EventV0(_) => {} } refs @@ -179,10 +177,8 @@ pub fn filter_unused_types(entries: Vec) -> Vec { .into_iter() .filter(|entry| { match entry { - // Always keep functions - ScSpecEntry::FunctionV0(_) => true, - // Always keep events - ScSpecEntry::EventV0(_) => true, + // Always keep functions and events + ScSpecEntry::FunctionV0(_) | ScSpecEntry::EventV0(_) => true, // Keep UDTs only if they're reachable _ => { if let Some(name) = get_udt_name(entry) { From 6958e164da3ee257b5fbb67186dbaf66a10bb8a5 Mon Sep 17 00:00:00 2001 From: Leigh <351529+leighmcculloch@users.noreply.github.com> Date: Fri, 9 Jan 2026 01:24:38 +1000 Subject: [PATCH 4/7] Rename variable to is_target_section for clarity --- cmd/crates/soroban-spec-tools/src/contract.rs | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/cmd/crates/soroban-spec-tools/src/contract.rs b/cmd/crates/soroban-spec-tools/src/contract.rs index 3e2dc5be1c..1f44886ed6 100644 --- a/cmd/crates/soroban-spec-tools/src/contract.rs +++ b/cmd/crates/soroban-spec-tools/src/contract.rs @@ -175,9 +175,9 @@ pub fn replace_custom_section( let payload = payload?; // Skip the target custom section - we'll append the new one at the end - let dominated = + let is_target_section = matches!(&payload, Payload::CustomSection(section) if section.name() == section_name); - if !dominated { + if !is_target_section { // For all other payloads that represent sections, copy them verbatim if let Some((id, range)) = payload.as_section() { let raw = RawSection { From 3432f53abca6e1d61b1a206e0ced26974dbfd023 Mon Sep 17 00:00:00 2001 From: Leigh <351529+leighmcculloch@users.noreply.github.com> Date: Fri, 9 Jan 2026 19:35:57 +1000 Subject: [PATCH 5/7] add marker-based spec filtering for types and events --- Cargo.lock | 1 + cmd/crates/soroban-spec-tools/Cargo.toml | 1 + cmd/crates/soroban-spec-tools/src/contract.rs | 33 ++ cmd/crates/soroban-spec-tools/src/filter.rs | 329 +++++++++++++++++- .../src/commands/contract/build.rs | 17 +- 5 files changed, 375 insertions(+), 6 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index ab93ec0e89..6db08c7ee3 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -5189,6 +5189,7 @@ dependencies = [ "hex", "itertools 0.10.5", "serde_json", + "sha2 0.10.9", "soroban-spec", "stellar-strkey 0.0.15", "stellar-xdr", diff --git a/cmd/crates/soroban-spec-tools/Cargo.toml b/cmd/crates/soroban-spec-tools/Cargo.toml index d2b719eaec..e9938d7ff7 100644 --- a/cmd/crates/soroban-spec-tools/Cargo.toml +++ b/cmd/crates/soroban-spec-tools/Cargo.toml @@ -28,6 +28,7 @@ wasmparser = { workspace = true } base64 = { workspace = true } thiserror = "1.0.31" wasm-encoder = "0.235.0" +sha2 = { workspace = true } [dev-dependencies] diff --git a/cmd/crates/soroban-spec-tools/src/contract.rs b/cmd/crates/soroban-spec-tools/src/contract.rs index 1f44886ed6..54a0a1e2b3 100644 --- a/cmd/crates/soroban-spec-tools/src/contract.rs +++ b/cmd/crates/soroban-spec-tools/src/contract.rs @@ -143,6 +143,39 @@ impl Spec { } Ok(buffer) } + + /// Returns the filtered spec entries serialized as XDR bytes, filtering + /// based on markers in the WASM data section. + /// + /// The SDK embeds markers in the data section for each type/event that is + /// actually used in the contract. These markers survive dead code elimination, + /// so we can filter out any spec entries that don't have corresponding markers. + /// + /// Functions are always kept as they define the contract's API. + /// + /// # Arguments + /// + /// * `wasm_bytes` - The WASM binary to extract markers from + /// + /// # Returns + /// + /// XDR bytes of the filtered spec entries. + pub fn filtered_spec_xdr_with_markers(&self, wasm_bytes: &[u8]) -> Result, Error> { + use crate::filter::{extract_spec_markers, filter_by_markers}; + + // Extract markers from the WASM data section + let markers = extract_spec_markers(wasm_bytes); + + // Filter all entries (types, events) based on markers + let filtered = filter_by_markers(self.spec.clone(), &markers); + + let mut buffer = Vec::new(); + let mut writer = Limited::new(Cursor::new(&mut buffer), Limits::none()); + for entry in filtered { + entry.write_xdr(&mut writer)?; + } + Ok(buffer) + } } /// Replaces a custom section in WASM bytes with new content. diff --git a/cmd/crates/soroban-spec-tools/src/filter.rs b/cmd/crates/soroban-spec-tools/src/filter.rs index db7434ad66..d4f50ed887 100644 --- a/cmd/crates/soroban-spec-tools/src/filter.rs +++ b/cmd/crates/soroban-spec-tools/src/filter.rs @@ -6,10 +6,122 @@ use std::collections::HashSet; +use sha2::{Digest, Sha256}; use stellar_xdr::curr::{ - ScSpecEntry, ScSpecTypeDef, ScSpecUdtStructV0, ScSpecUdtUnionCaseV0, ScSpecUdtUnionV0, + Limits, ScSpecEntry, ScSpecTypeDef, ScSpecUdtStructV0, ScSpecUdtUnionCaseV0, ScSpecUdtUnionV0, + WriteXdr, }; +/// Magic bytes that identify a spec marker: "SpEc" +pub const SPEC_MARKER_MAGIC: [u8; 4] = [b'S', b'p', b'E', b'c']; + +/// Length of the hash portion (truncated SHA256 - first 8 bytes / 64 bits). +pub const SPEC_MARKER_HASH_LEN: usize = 8; + +/// Length of the marker: 4-byte prefix + 8-byte truncated SHA256 hash. +pub const SPEC_MARKER_LEN: usize = 4 + SPEC_MARKER_HASH_LEN; + +/// A spec marker hash found in the WASM data section. +/// This is an 8-byte truncated SHA256 hash of the spec entry XDR bytes. +#[derive(Debug, Clone, PartialEq, Eq, Hash)] +pub struct SpecMarkerHash(pub [u8; SPEC_MARKER_HASH_LEN]); + +/// Computes the marker hash for a spec entry. +/// +/// The hash is a truncated SHA256 (first 8 bytes) of the spec entry's XDR bytes. +pub fn compute_marker_hash(entry: &ScSpecEntry) -> SpecMarkerHash { + let xdr_bytes = entry + .to_xdr(Limits::none()) + .expect("XDR encoding should not fail"); + let mut hasher = Sha256::new(); + hasher.update(&xdr_bytes); + let hash: [u8; 32] = hasher.finalize().into(); + let mut truncated = [0u8; SPEC_MARKER_HASH_LEN]; + truncated.copy_from_slice(&hash[..SPEC_MARKER_HASH_LEN]); + SpecMarkerHash(truncated) +} + +/// Extracts spec markers from the WASM data section. +/// +/// The SDK embeds markers in the data section for each spec entry that is +/// actually used in the contract. These markers survive dead code elimination +/// only if the corresponding type/event is used. +/// +/// Marker format: +/// - 4 bytes: "SpEc" magic +/// - 8 bytes: truncated SHA256 hash of the spec entry XDR bytes +pub fn extract_spec_markers(wasm_bytes: &[u8]) -> HashSet { + let mut markers = HashSet::new(); + + for payload in wasmparser::Parser::new(0).parse_all(wasm_bytes) { + let Ok(payload) = payload else { continue }; + + if let wasmparser::Payload::DataSection(reader) = payload { + for data in reader.into_iter().flatten() { + extract_markers_from_data(data.data, &mut markers); + } + } + } + + markers +} + +/// Extracts spec markers from a data segment. +fn extract_markers_from_data(data: &[u8], markers: &mut HashSet) { + // Marker size is exactly 12 bytes: 4 (magic) + 8 (hash) + if data.len() < SPEC_MARKER_LEN { + return; + } + + for i in 0..=data.len() - SPEC_MARKER_LEN { + // Look for magic bytes + if data[i..].starts_with(&SPEC_MARKER_MAGIC) { + let hash_start = i + 4; + let hash_end = hash_start + SPEC_MARKER_HASH_LEN; + let mut hash = [0u8; SPEC_MARKER_HASH_LEN]; + hash.copy_from_slice(&data[hash_start..hash_end]); + markers.insert(SpecMarkerHash(hash)); + } + } +} + +/// Filters spec entries based on markers found in the WASM data section. +/// +/// This removes any spec entries (types, events) that don't have corresponding +/// markers in the data section. The SDK embeds markers for types/events that +/// are actually used, and these markers survive dead code elimination. +/// +/// Functions are always kept as they define the contract's API. +/// +/// # Arguments +/// +/// * `entries` - The spec entries to filter +/// * `markers` - Marker hashes extracted from the WASM data section +/// +/// # Returns +/// +/// Filtered entries with only used types/events remaining. +pub fn filter_by_markers( + entries: Vec, + markers: &HashSet, +) -> Vec { + entries + .into_iter() + .filter(|entry| { + match entry { + // Always keep functions - they're the contract's API + ScSpecEntry::FunctionV0(_) => true, + + // For all other entries (types, events), check if marker exists + _ => { + let hash = compute_marker_hash(entry); + markers.contains(&hash) + } + } + }) + .collect() +} + /// Extracts UDT (User Defined Type) names referenced by a type definition. /// /// This function recursively traverses the type structure to find all @@ -454,4 +566,219 @@ mod tests { assert!(names.contains(&"MyEnum".to_string())); assert!(!names.contains(&"UnusedEnum".to_string())); } + + // Helper to encode a marker (matches SDK's spec_marker.rs format) + // Format: "SpEc" (4 bytes) + truncated SHA256 hash (8 bytes) + fn encode_marker(entry: &ScSpecEntry) -> Vec { + let hash = compute_marker_hash(entry); + let mut buf = Vec::new(); + buf.extend_from_slice(&SPEC_MARKER_MAGIC); + buf.extend_from_slice(&hash.0); + buf + } + + use stellar_xdr::curr::{ScSpecEventDataFormat, ScSpecEventV0}; + + fn make_event(name: &str) -> ScSpecEntry { + ScSpecEntry::EventV0(ScSpecEventV0 { + doc: StringM::default(), + lib: StringM::default(), + name: name.try_into().unwrap(), + prefix_topics: VecM::default(), + params: VecM::default(), + data_format: ScSpecEventDataFormat::SingleValue, + }) + } + + #[test] + fn test_compute_marker_hash() { + let entry = make_struct("MyStruct", vec![("field", ScSpecTypeDef::U32)]); + let hash = compute_marker_hash(&entry); + + // Hash should be 8 bytes + assert_eq!(hash.0.len(), SPEC_MARKER_HASH_LEN); + + // Same entry produces same hash + let hash2 = compute_marker_hash(&entry); + assert_eq!(hash.0, hash2.0); + + // Different entry produces different hash + let entry2 = make_struct("DifferentStruct", vec![("field", ScSpecTypeDef::U32)]); + let hash3 = compute_marker_hash(&entry2); + assert_ne!(hash.0, hash3.0); + } + + #[test] + fn test_encode_marker_format() { + let entry = make_event("Transfer"); + let marker = encode_marker(&entry); + + // Marker should be 12 bytes: 4 (magic) + 8 (hash) + assert_eq!(marker.len(), SPEC_MARKER_LEN); + + // First 4 bytes should be magic + assert_eq!(&marker[..4], &SPEC_MARKER_MAGIC); + } + + #[test] + fn test_extract_markers_from_data() { + let entry1 = make_event("Transfer"); + let entry2 = make_struct("MyStruct", vec![("field", ScSpecTypeDef::U32)]); + + let marker1 = encode_marker(&entry1); + let marker2 = encode_marker(&entry2); + + // Concatenate markers with some padding + let mut data = Vec::new(); + data.extend_from_slice(&[0u8; 16]); // Some leading bytes + data.extend_from_slice(&marker1); + data.extend_from_slice(&[0u8; 8]); // Some padding + data.extend_from_slice(&marker2); + data.extend_from_slice(&[0u8; 16]); // Some trailing bytes + + let mut markers = HashSet::new(); + extract_markers_from_data(&data, &mut markers); + + // Both markers should be found + assert!(markers.contains(&compute_marker_hash(&entry1))); + assert!(markers.contains(&compute_marker_hash(&entry2))); + } + + #[test] + fn test_filter_by_markers_keeps_used_events() { + let transfer_event = make_event("Transfer"); + let mint_event = make_event("Mint"); + + let entries = vec![ + make_function("foo", vec![ScSpecTypeDef::U32]), + transfer_event.clone(), + mint_event.clone(), + make_event("Unused"), + ]; + + let mut markers = HashSet::new(); + markers.insert(compute_marker_hash(&transfer_event)); + markers.insert(compute_marker_hash(&mint_event)); + + let filtered = filter_by_markers(entries, &markers); + + // Should have: 1 function + 2 used events + assert_eq!(filtered.len(), 3); + + let event_names: Vec<_> = filtered + .iter() + .filter_map(|e| { + if let ScSpecEntry::EventV0(event) = e { + Some(event.name.to_utf8_string_lossy()) + } else { + None + } + }) + .collect(); + + assert!(event_names.contains(&"Transfer".to_string())); + assert!(event_names.contains(&"Mint".to_string())); + assert!(!event_names.contains(&"Unused".to_string())); + } + + #[test] + fn test_filter_by_markers_removes_all_events_if_no_markers() { + let entries = vec![ + make_function("foo", vec![ScSpecTypeDef::U32]), + make_event("Transfer"), + make_event("Mint"), + ]; + + let markers = HashSet::new(); + + let filtered = filter_by_markers(entries, &markers); + + // Should have: 1 function, 0 events + assert_eq!(filtered.len(), 1); + assert!(matches!(filtered[0], ScSpecEntry::FunctionV0(_))); + } + + #[test] + fn test_filter_by_markers_removes_all_if_no_markers() { + let entries = vec![ + make_function("foo", vec![ScSpecTypeDef::U32]), + make_struct("MyStruct", vec![("field", ScSpecTypeDef::U32)]), + make_enum("MyEnum"), + make_event("Unused"), + ]; + + let markers = HashSet::new(); // No markers + + let filtered = filter_by_markers(entries, &markers); + + // Should have: only functions (always kept), no types or events + assert_eq!(filtered.len(), 1); + assert!(filtered + .iter() + .all(|e| matches!(e, ScSpecEntry::FunctionV0(_)))); + } + + #[test] + fn test_filter_by_markers_keeps_types_with_markers() { + let used_struct = make_struct("UsedStruct", vec![("field", ScSpecTypeDef::U32)]); + let used_enum = make_enum("UsedEnum"); + let used_event = make_event("UsedEvent"); + + let entries = vec![ + make_function("foo", vec![ScSpecTypeDef::U32]), + used_struct.clone(), + make_struct("UnusedStruct", vec![("field", ScSpecTypeDef::U32)]), + used_enum.clone(), + make_enum("UnusedEnum"), + used_event.clone(), + make_event("UnusedEvent"), + ]; + + let mut markers = HashSet::new(); + markers.insert(compute_marker_hash(&used_struct)); + markers.insert(compute_marker_hash(&used_enum)); + markers.insert(compute_marker_hash(&used_event)); + + let filtered = filter_by_markers(entries, &markers); + + // Should have: 1 function + 1 struct + 1 enum + 1 event + assert_eq!(filtered.len(), 4); + + // Check specific entries + let struct_names: Vec<_> = filtered + .iter() + .filter_map(|e| { + if let ScSpecEntry::UdtStructV0(s) = e { + Some(s.name.to_utf8_string_lossy()) + } else { + None + } + }) + .collect(); + assert_eq!(struct_names, vec!["UsedStruct"]); + + let enum_names: Vec<_> = filtered + .iter() + .filter_map(|e| { + if let ScSpecEntry::UdtEnumV0(s) = e { + Some(s.name.to_utf8_string_lossy()) + } else { + None + } + }) + .collect(); + assert_eq!(enum_names, vec!["UsedEnum"]); + + let event_names: Vec<_> = filtered + .iter() + .filter_map(|e| { + if let ScSpecEntry::EventV0(s) = e { + Some(s.name.to_utf8_string_lossy()) + } else { + None + } + }) + .collect(); + assert_eq!(event_names, vec!["UsedEvent"]); + } } diff --git a/cmd/soroban-cli/src/commands/contract/build.rs b/cmd/soroban-cli/src/commands/contract/build.rs index aa25ddb2ce..f72a40c129 100644 --- a/cmd/soroban-cli/src/commands/contract/build.rs +++ b/cmd/soroban-cli/src/commands/contract/build.rs @@ -416,10 +416,16 @@ impl Cmd { fs::write(target_file_path, updated_wasm).map_err(Error::WritingWasmFile) } - /// Filters unused types from the contract spec. + /// Filters unused types and events from the contract spec. /// - /// This removes type definitions that are not referenced by any function, - /// reducing the size of the WASM binary. + /// This removes: + /// - Type definitions that are not referenced by any function + /// - Events that don't have corresponding markers in the WASM data section + /// (events that are defined but never published) + /// + /// The SDK embeds markers in the data section for types/events that are + /// actually used. These markers survive dead code elimination, so we can + /// detect which spec entries are truly needed. fn filter_spec(target_file_path: &PathBuf) -> Result<(), Error> { use soroban_spec_tools::contract::{replace_custom_section, Spec}; @@ -428,8 +434,9 @@ impl Cmd { // Parse the spec from the wasm let spec = Spec::new(&wasm_bytes)?; - // Get the filtered spec as XDR bytes - let filtered_xdr = spec.filtered_spec_xdr()?; + // Get the filtered spec as XDR bytes, filtering both types and events + // based on markers in the WASM data section + let filtered_xdr = spec.filtered_spec_xdr_with_markers(&wasm_bytes)?; // Replace the contractspecv0 section with the filtered version let new_wasm = replace_custom_section(&wasm_bytes, "contractspecv0", &filtered_xdr)?; From c27baf89834794196864ee246e80b467d87fa167 Mon Sep 17 00:00:00 2001 From: Leigh <351529+leighmcculloch@users.noreply.github.com> Date: Fri, 9 Jan 2026 22:39:34 +1000 Subject: [PATCH 6/7] fix: address clippy lints in filter module --- cmd/crates/soroban-spec-tools/src/filter.rs | 29 ++++++++++++--------- 1 file changed, 16 insertions(+), 13 deletions(-) diff --git a/cmd/crates/soroban-spec-tools/src/filter.rs b/cmd/crates/soroban-spec-tools/src/filter.rs index d4f50ed887..83747fa870 100644 --- a/cmd/crates/soroban-spec-tools/src/filter.rs +++ b/cmd/crates/soroban-spec-tools/src/filter.rs @@ -12,7 +12,7 @@ use stellar_xdr::curr::{ WriteXdr, }; -/// Magic bytes that identify a spec marker: "SpEc" +/// Magic bytes that identify a spec marker: `SpEc` pub const SPEC_MARKER_MAGIC: [u8; 4] = [b'S', b'p', b'E', b'c']; /// Length of the hash portion (truncated SHA256 - first 8 bytes / 64 bits). @@ -29,6 +29,11 @@ pub struct SpecMarkerHash(pub [u8; SPEC_MARKER_HASH_LEN]); /// Computes the marker hash for a spec entry. /// /// The hash is a truncated SHA256 (first 8 bytes) of the spec entry's XDR bytes. +/// +/// # Panics +/// +/// Panics if the spec entry cannot be encoded to XDR, which should never happen +/// for valid `ScSpecEntry` values. pub fn compute_marker_hash(entry: &ScSpecEntry) -> SpecMarkerHash { let xdr_bytes = entry .to_xdr(Limits::none()) @@ -48,7 +53,7 @@ pub fn compute_marker_hash(entry: &ScSpecEntry) -> SpecMarkerHash { /// only if the corresponding type/event is used. /// /// Marker format: -/// - 4 bytes: "SpEc" magic +/// - 4 bytes: `SpEc` magic /// - 8 bytes: truncated SHA256 hash of the spec entry XDR bytes pub fn extract_spec_markers(wasm_bytes: &[u8]) -> HashSet { let mut markers = HashSet::new(); @@ -101,6 +106,7 @@ fn extract_markers_from_data(data: &[u8], markers: &mut HashSet) /// # Returns /// /// Filtered entries with only used types/events remaining. +#[allow(clippy::implicit_hasher)] pub fn filter_by_markers( entries: Vec, markers: &HashSet, @@ -108,16 +114,13 @@ pub fn filter_by_markers( entries .into_iter() .filter(|entry| { - match entry { - // Always keep functions - they're the contract's API - ScSpecEntry::FunctionV0(_) => true, - - // For all other entries (types, events), check if marker exists - _ => { - let hash = compute_marker_hash(entry); - markers.contains(&hash) - } + // Always keep functions - they're the contract's API + if matches!(entry, ScSpecEntry::FunctionV0(_)) { + return true; } + // For all other entries (types, events), check if marker exists + let hash = compute_marker_hash(entry); + markers.contains(&hash) }) .collect() } @@ -233,8 +236,8 @@ fn get_udt_name(entry: &ScSpecEntry) -> Option { /// /// This function performs a reachability analysis starting from all functions. /// It keeps: -/// - All functions (FunctionV0) -/// - All events (EventV0) +/// - All functions (`FunctionV0`) +/// - All events (`EventV0`) /// - All UDTs that are directly or transitively referenced by functions /// /// Types that are defined but never used by any function are removed. From c55e029dbd125229a3a657fd08a4fb12b0559790 Mon Sep 17 00:00:00 2001 From: Leigh <351529+leighmcculloch@users.noreply.github.com> Date: Sat, 10 Jan 2026 01:29:16 +1000 Subject: [PATCH 7/7] fix: rename test variables to avoid similar_names lint --- cmd/crates/soroban-spec-tools/src/filter.rs | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/cmd/crates/soroban-spec-tools/src/filter.rs b/cmd/crates/soroban-spec-tools/src/filter.rs index 83747fa870..d4edbb5734 100644 --- a/cmd/crates/soroban-spec-tools/src/filter.rs +++ b/cmd/crates/soroban-spec-tools/src/filter.rs @@ -628,23 +628,23 @@ mod tests { let entry1 = make_event("Transfer"); let entry2 = make_struct("MyStruct", vec![("field", ScSpecTypeDef::U32)]); - let marker1 = encode_marker(&entry1); - let marker2 = encode_marker(&entry2); + let encoded1 = encode_marker(&entry1); + let encoded2 = encode_marker(&entry2); // Concatenate markers with some padding let mut data = Vec::new(); data.extend_from_slice(&[0u8; 16]); // Some leading bytes - data.extend_from_slice(&marker1); + data.extend_from_slice(&encoded1); data.extend_from_slice(&[0u8; 8]); // Some padding - data.extend_from_slice(&marker2); + data.extend_from_slice(&encoded2); data.extend_from_slice(&[0u8; 16]); // Some trailing bytes - let mut markers = HashSet::new(); - extract_markers_from_data(&data, &mut markers); + let mut found = HashSet::new(); + extract_markers_from_data(&data, &mut found); // Both markers should be found - assert!(markers.contains(&compute_marker_hash(&entry1))); - assert!(markers.contains(&compute_marker_hash(&entry2))); + assert!(found.contains(&compute_marker_hash(&entry1))); + assert!(found.contains(&compute_marker_hash(&entry2))); } #[test]