Skip to content

Commit 29d3ec9

Browse files
committed
Implement collections and more generic protocol framework
1 parent b7f39cd commit 29d3ec9

29 files changed

+3830
-315
lines changed

app/models/ethscription_transaction.rb

Lines changed: 13 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -107,7 +107,7 @@ def self.transfer_multiple_ethscriptions(
107107
def function_selector
108108
function_signature = case ethscription_operation
109109
when 'create'
110-
'createEthscription((bytes32,bytes32,address,bytes,string,string,string,bool,(string,string,string,uint256,uint256,uint256)))'
110+
'createEthscription((bytes32,bytes32,address,bytes,string,string,string,bool,(string,string,bytes)))'
111111
when 'transfer'
112112
if transfer_ids && transfer_ids.any?
113113
'transferMultipleEthscriptions(bytes32[],address)'
@@ -223,7 +223,9 @@ def build_create_calldata
223223
mime_subtype = mimetype&.split('/')&.last
224224
raw_content = data_uri.decoded_data.b
225225
esip6 = DataUri.esip6?(content_uri) || false
226-
token_params = TokenParamsExtractor.extract(content_uri)
226+
227+
# Extract protocol params - returns [protocol, operation, encoded_data]
228+
protocol, operation, encoded_data = ProtocolExtractor.for_calldata(content_uri)
227229

228230
# Hash the content for protocol uniqueness
229231
content_uri_hash_hex = Digest::SHA256.hexdigest(content_uri)
@@ -233,6 +235,13 @@ def build_create_calldata
233235
tx_hash_bin = hex_to_bin(eth_transaction.transaction_hash)
234236
owner_bin = address_to_bin(initial_owner)
235237

238+
# Build protocol params tuple
239+
protocol_params = [
240+
protocol, # string protocol
241+
operation, # string operation
242+
encoded_data # bytes data
243+
]
244+
236245
# Encode parameters
237246
params = [
238247
tx_hash_bin, # bytes32 transactionHash
@@ -243,11 +252,11 @@ def build_create_calldata
243252
media_type.to_s.b, # string
244253
mime_subtype.to_s.b, # string
245254
esip6, # bool esip6
246-
token_params # TokenParams tuple
255+
protocol_params # ProtocolParams tuple
247256
]
248257

249258
encoded = Eth::Abi.encode(
250-
['(bytes32,bytes32,address,bytes,string,string,string,bool,(string,string,string,uint256,uint256,uint256))'],
259+
['(bytes32,bytes32,address,bytes,string,string,string,bool,(string,string,bytes))'],
251260
[params]
252261
)
253262

Lines changed: 264 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,264 @@
1+
# Generic protocol extractor for JSON-based ethscription protocols
2+
# Extracts protocol and operation, then ABI-encodes remaining parameters
3+
class GenericProtocolExtractor
4+
# Security limits
5+
MAX_DEPTH = 3 # Maximum JSON nesting depth
6+
MAX_STRING_LENGTH = 1000 # Maximum length for any string value
7+
MAX_ARRAY_LENGTH = 100 # Maximum array length
8+
MAX_OBJECT_KEYS = 20 # Maximum keys in an object
9+
UINT256_MAX = 2**256 - 1 # Maximum uint256 value
10+
11+
# Standard protocol fields
12+
PROTOCOL_FIELD = 'p'
13+
OPERATION_FIELD = 'op'
14+
RESERVED_FIELDS = [PROTOCOL_FIELD, OPERATION_FIELD].freeze
15+
16+
# Default return for invalid input
17+
DEFAULT_PARAMS = [''.b, ''.b, ''.b].freeze # [protocol, operation, abi_encoded_data]
18+
19+
class ExtractionError < StandardError; end
20+
21+
def self.extract(content_uri)
22+
new.extract(content_uri)
23+
end
24+
25+
def extract(content_uri)
26+
return DEFAULT_PARAMS unless valid_data_uri?(content_uri)
27+
28+
begin
29+
# Extract JSON from data URI
30+
json_str = content_uri[6..] # Remove 'data:,'
31+
32+
# Parse with security checks
33+
data = parse_json_safely(json_str)
34+
35+
# Extract protocol and operation
36+
protocol = data[PROTOCOL_FIELD]
37+
operation = data[OPERATION_FIELD]
38+
39+
return DEFAULT_PARAMS unless valid_protocol_fields?(protocol, operation)
40+
41+
# Remove reserved fields and encode the rest
42+
params = data.reject { |k, _| RESERVED_FIELDS.include?(k) }
43+
44+
# ABI encode the parameters
45+
encoded_data = encode_parameters(params)
46+
47+
[protocol.b, operation.b, encoded_data.b]
48+
49+
rescue JSON::ParserError, ExtractionError => e
50+
Rails.logger.debug "Protocol extraction failed: #{e.message}"
51+
DEFAULT_PARAMS
52+
end
53+
end
54+
55+
private
56+
57+
def valid_data_uri?(uri)
58+
uri.is_a?(String) && uri.start_with?('data:,{')
59+
end
60+
61+
def valid_protocol_fields?(protocol, operation)
62+
protocol.is_a?(String) &&
63+
operation.is_a?(String) &&
64+
protocol.length.between?(1, 50) &&
65+
operation.length.between?(1, 50) &&
66+
protocol.match?(/\A[a-z0-9\-_]+\z/) && # lowercase alphanumeric with dash/underscore
67+
operation.match?(/\A[a-z0-9\-_]+\z/) # lowercase alphanumeric with dash/underscore
68+
end
69+
70+
def parse_json_safely(json_str)
71+
# Size check
72+
raise ExtractionError, "JSON too large" if json_str.bytesize > 10_000
73+
74+
# Parse
75+
data = JSON.parse(json_str, max_nesting: MAX_DEPTH)
76+
77+
# Must be an object at root
78+
raise ExtractionError, "Root must be object" unless data.is_a?(Hash)
79+
80+
# Validate structure with depth limit
81+
validate_structure(data, 0)
82+
83+
data
84+
end
85+
86+
def validate_structure(value, depth)
87+
raise ExtractionError, "Max depth exceeded" if depth > MAX_DEPTH
88+
89+
case value
90+
when Hash
91+
raise ExtractionError, "Too many object keys" if value.size > MAX_OBJECT_KEYS
92+
value.each do |k, v|
93+
raise ExtractionError, "Invalid key type" unless k.is_a?(String)
94+
raise ExtractionError, "Key too long" if k.length > MAX_STRING_LENGTH
95+
validate_structure(v, depth + 1)
96+
end
97+
98+
when Array
99+
raise ExtractionError, "Array too long" if value.size > MAX_ARRAY_LENGTH
100+
value.each { |v| validate_structure(v, depth + 1) }
101+
102+
when String
103+
raise ExtractionError, "String too long" if value.length > MAX_STRING_LENGTH
104+
105+
when Integer
106+
# Check uint256 bounds
107+
raise ExtractionError, "Number out of bounds" if value < 0 || value > UINT256_MAX
108+
109+
when Float
110+
# Convert to integer if whole number, otherwise reject
111+
if value == value.to_i
112+
validate_structure(value.to_i, depth)
113+
else
114+
raise ExtractionError, "Decimal numbers not supported"
115+
end
116+
117+
when TrueClass, FalseClass
118+
# Booleans allowed
119+
120+
when NilClass
121+
# Nulls not allowed
122+
raise ExtractionError, "Null values not supported"
123+
124+
else
125+
raise ExtractionError, "Unsupported type: #{value.class}"
126+
end
127+
end
128+
129+
def encode_parameters(params)
130+
return ''.b if params.empty?
131+
132+
# Build dynamic ABI encoding based on inferred types
133+
types = []
134+
values = []
135+
136+
# Sort keys for deterministic encoding
137+
params.keys.sort.each do |key|
138+
value = params[key]
139+
type, encoded_value = infer_type_and_value(value)
140+
types << type
141+
values << encoded_value
142+
end
143+
144+
# ABI encode all parameters as a tuple
145+
Eth::Abi.encode(types, values)
146+
rescue StandardError => e
147+
Rails.logger.error "ABI encoding failed: #{e.message}"
148+
raise ExtractionError, "Failed to encode parameters"
149+
end
150+
151+
def infer_type_and_value(value)
152+
case value
153+
when Integer
154+
['uint256', value]
155+
156+
when String
157+
# Check if it's a boolean string
158+
if value == 'true'
159+
['bool', true]
160+
elsif value == 'false'
161+
['bool', false]
162+
# Check if it starts with 0x - could be hex string
163+
elsif value.start_with?('0x')
164+
hex_part = value[2..]
165+
166+
# Check for invalid hex patterns that should be rejected
167+
if hex_part.empty?
168+
# "0x" with nothing after
169+
raise ExtractionError, "Empty hex string"
170+
elsif hex_part.length % 2 != 0
171+
# Odd number of hex characters
172+
raise ExtractionError, "Invalid hex string: odd number of characters"
173+
elsif !hex_part.match?(/\A[0-9a-fA-F]+\z/)
174+
# Contains non-hex characters - treat as regular string
175+
['string', value]
176+
else
177+
# Valid hex string
178+
byte_length = hex_part.length / 2
179+
180+
if byte_length > 32
181+
# Too long for fixed bytes type
182+
raise ExtractionError, "Hex string too long for bytes32"
183+
end
184+
185+
# Normalize to lowercase
186+
hex_data = hex_part.downcase
187+
188+
# Common lengths we handle specially
189+
case byte_length
190+
when 20
191+
# Address (bytes20) - keep as hex string, Eth::Abi will handle conversion
192+
["address", "0x" + hex_data]
193+
when 32
194+
# Common for hashes, IDs (bytes32) - convert to binary for encoding
195+
# Eth::Abi expects bytes32 as a binary string, not hex
196+
["bytes32", [hex_data].pack('H*')]
197+
else
198+
# Other fixed-length bytes (bytes1-bytes31) - convert to binary
199+
# ["bytes#{byte_length}", [hex_data].pack('H*')]
200+
# TODO: Fix this
201+
raise ExtractionError, "Not supported"
202+
end
203+
end
204+
# Check if it's a valid number string (like token extractor pattern)
205+
elsif value.match?(/\A(0|[1-9][0-9]*)\z/)
206+
# Valid positive integer string - convert to uint256
207+
num = value.to_i
208+
if num <= UINT256_MAX
209+
['uint256', num]
210+
else
211+
raise ExtractionError, "Number too large for uint256"
212+
end
213+
else
214+
# Regular string
215+
['string', value]
216+
end
217+
218+
when TrueClass, FalseClass
219+
['bool', value]
220+
221+
when NilClass
222+
# Reject null values
223+
raise ExtractionError, "Null values not supported"
224+
225+
when Array
226+
if value.empty?
227+
# Empty array defaults to uint256[]
228+
['uint256[]', []]
229+
else
230+
# Infer from first element
231+
first_type, first_value = infer_type_and_value(value.first)
232+
base_type = first_type.sub('[]', '')
233+
234+
# For address and bytes types, we need to ensure consistent handling
235+
# since they return the hex string with 0x prefix
236+
is_bytes_type = base_type.start_with?('bytes') || base_type == 'address'
237+
238+
# Ensure all elements match the type
239+
encoded_array = value.map do |item|
240+
item_type, item_value = infer_type_and_value(item)
241+
if item_type.sub('[]', '') != base_type
242+
raise ExtractionError, "Mixed types in array"
243+
end
244+
item_value
245+
end
246+
247+
["#{base_type}[]", encoded_array]
248+
end
249+
250+
when Hash
251+
# Reject nested objects - use basic types and arrays only
252+
raise ExtractionError, "Nested objects not supported"
253+
254+
else
255+
raise ExtractionError, "Cannot infer type for #{value.class}"
256+
end
257+
end
258+
259+
# Helper method for legacy token protocol (maintains compatibility)
260+
def self.extract_token_params(content_uri)
261+
# Use the strict regex-based extractor for token protocol
262+
TokenParamsExtractor.extract(content_uri)
263+
end
264+
end

0 commit comments

Comments
 (0)