1+ # Generic protocol extractor for JSON-based ethscription protocols
2+ # Extracts protocol and operation, then ABI-encodes remaining parameters
3+ class GenericProtocolExtractor
4+ # Security limits
5+ MAX_DEPTH = 3 # Maximum JSON nesting depth
6+ MAX_STRING_LENGTH = 1000 # Maximum length for any string value
7+ MAX_ARRAY_LENGTH = 100 # Maximum array length
8+ MAX_OBJECT_KEYS = 20 # Maximum keys in an object
9+ UINT256_MAX = 2 **256 - 1 # Maximum uint256 value
10+
11+ # Standard protocol fields
12+ PROTOCOL_FIELD = 'p'
13+ OPERATION_FIELD = 'op'
14+ RESERVED_FIELDS = [ PROTOCOL_FIELD , OPERATION_FIELD ] . freeze
15+
16+ # Default return for invalid input
17+ DEFAULT_PARAMS = [ '' . b , '' . b , '' . b ] . freeze # [protocol, operation, abi_encoded_data]
18+
19+ class ExtractionError < StandardError ; end
20+
21+ def self . extract ( content_uri )
22+ new . extract ( content_uri )
23+ end
24+
25+ def extract ( content_uri )
26+ return DEFAULT_PARAMS unless valid_data_uri? ( content_uri )
27+
28+ begin
29+ # Extract JSON from data URI
30+ json_str = content_uri [ 6 ..] # Remove 'data:,'
31+
32+ # Parse with security checks
33+ data = parse_json_safely ( json_str )
34+
35+ # Extract protocol and operation
36+ protocol = data [ PROTOCOL_FIELD ]
37+ operation = data [ OPERATION_FIELD ]
38+
39+ return DEFAULT_PARAMS unless valid_protocol_fields? ( protocol , operation )
40+
41+ # Remove reserved fields and encode the rest
42+ params = data . reject { |k , _ | RESERVED_FIELDS . include? ( k ) }
43+
44+ # ABI encode the parameters
45+ encoded_data = encode_parameters ( params )
46+
47+ [ protocol . b , operation . b , encoded_data . b ]
48+
49+ rescue JSON ::ParserError , ExtractionError => e
50+ Rails . logger . debug "Protocol extraction failed: #{ e . message } "
51+ DEFAULT_PARAMS
52+ end
53+ end
54+
55+ private
56+
57+ def valid_data_uri? ( uri )
58+ uri . is_a? ( String ) && uri . start_with? ( 'data:,{' )
59+ end
60+
61+ def valid_protocol_fields? ( protocol , operation )
62+ protocol . is_a? ( String ) &&
63+ operation . is_a? ( String ) &&
64+ protocol . length . between? ( 1 , 50 ) &&
65+ operation . length . between? ( 1 , 50 ) &&
66+ protocol . match? ( /\A [a-z0-9\- _]+\z / ) && # lowercase alphanumeric with dash/underscore
67+ operation . match? ( /\A [a-z0-9\- _]+\z / ) # lowercase alphanumeric with dash/underscore
68+ end
69+
70+ def parse_json_safely ( json_str )
71+ # Size check
72+ raise ExtractionError , "JSON too large" if json_str . bytesize > 10_000
73+
74+ # Parse
75+ data = JSON . parse ( json_str , max_nesting : MAX_DEPTH )
76+
77+ # Must be an object at root
78+ raise ExtractionError , "Root must be object" unless data . is_a? ( Hash )
79+
80+ # Validate structure with depth limit
81+ validate_structure ( data , 0 )
82+
83+ data
84+ end
85+
86+ def validate_structure ( value , depth )
87+ raise ExtractionError , "Max depth exceeded" if depth > MAX_DEPTH
88+
89+ case value
90+ when Hash
91+ raise ExtractionError , "Too many object keys" if value . size > MAX_OBJECT_KEYS
92+ value . each do |k , v |
93+ raise ExtractionError , "Invalid key type" unless k . is_a? ( String )
94+ raise ExtractionError , "Key too long" if k . length > MAX_STRING_LENGTH
95+ validate_structure ( v , depth + 1 )
96+ end
97+
98+ when Array
99+ raise ExtractionError , "Array too long" if value . size > MAX_ARRAY_LENGTH
100+ value . each { |v | validate_structure ( v , depth + 1 ) }
101+
102+ when String
103+ raise ExtractionError , "String too long" if value . length > MAX_STRING_LENGTH
104+
105+ when Integer
106+ # Check uint256 bounds
107+ raise ExtractionError , "Number out of bounds" if value < 0 || value > UINT256_MAX
108+
109+ when Float
110+ # Convert to integer if whole number, otherwise reject
111+ if value == value . to_i
112+ validate_structure ( value . to_i , depth )
113+ else
114+ raise ExtractionError , "Decimal numbers not supported"
115+ end
116+
117+ when TrueClass , FalseClass
118+ # Booleans allowed
119+
120+ when NilClass
121+ # Nulls not allowed
122+ raise ExtractionError , "Null values not supported"
123+
124+ else
125+ raise ExtractionError , "Unsupported type: #{ value . class } "
126+ end
127+ end
128+
129+ def encode_parameters ( params )
130+ return '' . b if params . empty?
131+
132+ # Build dynamic ABI encoding based on inferred types
133+ types = [ ]
134+ values = [ ]
135+
136+ # Sort keys for deterministic encoding
137+ params . keys . sort . each do |key |
138+ value = params [ key ]
139+ type , encoded_value = infer_type_and_value ( value )
140+ types << type
141+ values << encoded_value
142+ end
143+
144+ # ABI encode all parameters as a tuple
145+ Eth ::Abi . encode ( types , values )
146+ rescue StandardError => e
147+ Rails . logger . error "ABI encoding failed: #{ e . message } "
148+ raise ExtractionError , "Failed to encode parameters"
149+ end
150+
151+ def infer_type_and_value ( value )
152+ case value
153+ when Integer
154+ [ 'uint256' , value ]
155+
156+ when String
157+ # Check if it's a boolean string
158+ if value == 'true'
159+ [ 'bool' , true ]
160+ elsif value == 'false'
161+ [ 'bool' , false ]
162+ # Check if it starts with 0x - could be hex string
163+ elsif value . start_with? ( '0x' )
164+ hex_part = value [ 2 ..]
165+
166+ # Check for invalid hex patterns that should be rejected
167+ if hex_part . empty?
168+ # "0x" with nothing after
169+ raise ExtractionError , "Empty hex string"
170+ elsif hex_part . length % 2 != 0
171+ # Odd number of hex characters
172+ raise ExtractionError , "Invalid hex string: odd number of characters"
173+ elsif !hex_part . match? ( /\A [0-9a-fA-F]+\z / )
174+ # Contains non-hex characters - treat as regular string
175+ [ 'string' , value ]
176+ else
177+ # Valid hex string
178+ byte_length = hex_part . length / 2
179+
180+ if byte_length > 32
181+ # Too long for fixed bytes type
182+ raise ExtractionError , "Hex string too long for bytes32"
183+ end
184+
185+ # Normalize to lowercase
186+ hex_data = hex_part . downcase
187+
188+ # Common lengths we handle specially
189+ case byte_length
190+ when 20
191+ # Address (bytes20) - keep as hex string, Eth::Abi will handle conversion
192+ [ "address" , "0x" + hex_data ]
193+ when 32
194+ # Common for hashes, IDs (bytes32) - convert to binary for encoding
195+ # Eth::Abi expects bytes32 as a binary string, not hex
196+ [ "bytes32" , [ hex_data ] . pack ( 'H*' ) ]
197+ else
198+ # Other fixed-length bytes (bytes1-bytes31) - convert to binary
199+ # ["bytes#{byte_length}", [hex_data].pack('H*')]
200+ # TODO: Fix this
201+ raise ExtractionError , "Not supported"
202+ end
203+ end
204+ # Check if it's a valid number string (like token extractor pattern)
205+ elsif value . match? ( /\A (0|[1-9][0-9]*)\z / )
206+ # Valid positive integer string - convert to uint256
207+ num = value . to_i
208+ if num <= UINT256_MAX
209+ [ 'uint256' , num ]
210+ else
211+ raise ExtractionError , "Number too large for uint256"
212+ end
213+ else
214+ # Regular string
215+ [ 'string' , value ]
216+ end
217+
218+ when TrueClass , FalseClass
219+ [ 'bool' , value ]
220+
221+ when NilClass
222+ # Reject null values
223+ raise ExtractionError , "Null values not supported"
224+
225+ when Array
226+ if value . empty?
227+ # Empty array defaults to uint256[]
228+ [ 'uint256[]' , [ ] ]
229+ else
230+ # Infer from first element
231+ first_type , first_value = infer_type_and_value ( value . first )
232+ base_type = first_type . sub ( '[]' , '' )
233+
234+ # For address and bytes types, we need to ensure consistent handling
235+ # since they return the hex string with 0x prefix
236+ is_bytes_type = base_type . start_with? ( 'bytes' ) || base_type == 'address'
237+
238+ # Ensure all elements match the type
239+ encoded_array = value . map do |item |
240+ item_type , item_value = infer_type_and_value ( item )
241+ if item_type . sub ( '[]' , '' ) != base_type
242+ raise ExtractionError , "Mixed types in array"
243+ end
244+ item_value
245+ end
246+
247+ [ "#{ base_type } []" , encoded_array ]
248+ end
249+
250+ when Hash
251+ # Reject nested objects - use basic types and arrays only
252+ raise ExtractionError , "Nested objects not supported"
253+
254+ else
255+ raise ExtractionError , "Cannot infer type for #{ value . class } "
256+ end
257+ end
258+
259+ # Helper method for legacy token protocol (maintains compatibility)
260+ def self . extract_token_params ( content_uri )
261+ # Use the strict regex-based extractor for token protocol
262+ TokenParamsExtractor . extract ( content_uri )
263+ end
264+ end
0 commit comments