Skip to content

Commit 3314f16

Browse files
committed
Retry validation in more cases
1 parent b7f39cd commit 3314f16

File tree

4 files changed

+82
-110
lines changed

4 files changed

+82
-110
lines changed

app/jobs/validation_job.rb

Lines changed: 8 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -1,27 +1,25 @@
11
class ValidationJob < ApplicationJob
22
queue_as :validation
33

4-
# Import TransientValidationError from BlockValidator
5-
TransientValidationError = BlockValidator::TransientValidationError
6-
7-
# Only retry transient errors, not all StandardError
8-
retry_on TransientValidationError,
4+
# Retry all errors - any exception means we couldn't validate, not that validation failed
5+
# StandardError catches all normal exceptions (network, RPC, API, etc.)
6+
retry_on StandardError,
97
wait: ENV.fetch('VALIDATION_RETRY_WAIT_SECONDS', 5).to_i.seconds,
10-
attempts: ENV.fetch('VALIDATION_TRANSIENT_RETRIES', 5).to_i
8+
attempts: ENV.fetch('VALIDATION_TRANSIENT_RETRIES', 1000).to_i
119

1210
def perform(l1_block_number, l2_block_hashes)
1311
start_time = Time.current
1412

1513
# ValidationResult.validate_and_save will:
1614
# 1. Create ValidationResult with success: true (job succeeds)
1715
# 2. Create ValidationResult with success: false (job succeeds - real validation failure found)
18-
# 3. Raise TransientValidationError (job retries via retry_on, then fails if exhausted)
16+
# 3. Raise any exception (job retries via retry_on StandardError)
1917
ValidationResult.validate_and_save(l1_block_number, l2_block_hashes)
2018

2119
elapsed_time = Time.current - start_time
2220
Rails.logger.info "ValidationJob: Block #{l1_block_number} validation completed in #{elapsed_time.round(3)}s"
23-
24-
# Job completes successfully for cases 1 & 2
25-
# TransientValidationError will be handled by retry_on automatically
21+
rescue => e
22+
Rails.logger.error "ValidationJob failed for L1 #{l1_block_number}: #{e.class}: #{e.message}"
23+
raise
2624
end
2725
end

app/models/validation_result.rb

Lines changed: 31 additions & 59 deletions
Original file line numberDiff line numberDiff line change
@@ -62,70 +62,42 @@ def self.recent_failures(limit: 10)
6262
def self.validate_and_save(l1_block_number, l2_block_hashes)
6363
Rails.logger.info "ValidationResult: Validating L1 block #{l1_block_number}"
6464

65-
begin
66-
# Create validator and validate (validator fetches its own API data)
67-
validator = BlockValidator.new
68-
start_time = Time.current
69-
block_result = validator.validate_l1_block(l1_block_number, l2_block_hashes)
65+
# Create validator and validate (validator fetches its own API data)
66+
validator = BlockValidator.new
67+
start_time = Time.current
68+
block_result = validator.validate_l1_block(l1_block_number, l2_block_hashes)
69+
70+
# Find or initialize - idempotent for re-runs
71+
validation_result = find_or_initialize_by(l1_block: l1_block_number)
72+
73+
validation_result.assign_attributes(
74+
success: block_result.success,
75+
error_details: block_result.errors,
76+
validation_stats: {
77+
# Basic stats
78+
success: block_result.success,
79+
l1_block: l1_block_number,
80+
l2_blocks: l2_block_hashes,
7081

71-
# Find or initialize - idempotent for re-runs
72-
validation_result = find_or_initialize_by(l1_block: l1_block_number)
82+
# Detailed comparison data
83+
validation_details: block_result.stats,
7384

74-
validation_result.assign_attributes(
75-
success: block_result.success,
76-
error_details: block_result.errors,
77-
validation_stats: {
78-
# Basic stats
79-
success: block_result.success,
80-
l1_block: l1_block_number,
81-
l2_blocks: l2_block_hashes,
82-
83-
# Detailed comparison data
84-
validation_details: block_result.stats,
85-
86-
# Store the raw data for debugging
87-
raw_api_data: block_result.respond_to?(:api_data) ? block_result.api_data : nil,
88-
raw_l2_events: block_result.respond_to?(:l2_events) ? block_result.l2_events : nil,
89-
90-
# Timing info
91-
validation_duration_ms: ((Time.current - start_time) * 1000).round(2)
92-
},
93-
validated_at: Time.current
94-
)
85+
# Store the raw data for debugging
86+
raw_api_data: block_result.respond_to?(:api_data) ? block_result.api_data : nil,
87+
raw_l2_events: block_result.respond_to?(:l2_events) ? block_result.l2_events : nil,
9588

96-
validation_result.save!
97-
98-
# Log the result
99-
validation_result.log_summary
100-
101-
validation_result
102-
rescue BlockValidator::TransientValidationError => e
103-
# Don't persist transient errors - let ValidationJob handle retries
104-
Rails.logger.debug "ValidationResult: Transient error for block #{l1_block_number}: #{e.message}"
105-
raise e
106-
rescue => e
107-
Rails.logger.error "ValidationResult: Exception validating block #{l1_block_number}: #{e.message}"
108-
109-
# Only persist non-transient validation errors - idempotent for re-runs
110-
validation_result = find_or_initialize_by(l1_block: l1_block_number)
111-
112-
validation_result.assign_attributes(
113-
success: false,
114-
error_details: [e.message],
115-
validation_stats: {
116-
exception: true,
117-
exception_class: e.class.name,
118-
exception_message: e.message,
119-
exception_backtrace: e.backtrace&.first(10) # Store first 10 lines of backtrace
120-
},
121-
validated_at: Time.current
122-
)
89+
# Timing info
90+
validation_duration_ms: ((Time.current - start_time) * 1000).round(2)
91+
},
92+
validated_at: Time.current
93+
)
12394

124-
validation_result.save!
95+
validation_result.save!
12596

126-
validation_result.log_summary
127-
raise e
128-
end
97+
# Log the result
98+
validation_result.log_summary
99+
100+
validation_result
129101
end
130102

131103
# Instance methods

lib/block_validator.rb

Lines changed: 30 additions & 38 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@ class BlockValidator
22
attr_reader :errors, :stats
33

44
# Exception for transient errors that should trigger retries
5+
# This is informational - all exceptions are treated as transient
56
class TransientValidationError < StandardError; end
67

78
def initialize
@@ -117,15 +118,10 @@ def load_genesis_transaction_hashes
117118

118119
def fetch_expected_data(l1_block_number)
119120
EthscriptionsApiClient.fetch_block_data(l1_block_number)
120-
rescue EthscriptionsApiClient::ApiUnavailableError => e
121-
# API unavailable after exhausting all retries - this is an infrastructure issue
122-
Rails.logger.warn "API unavailable for block #{l1_block_number}: #{e.message}"
123-
raise TransientValidationError, e.message
124121
rescue => e
125-
# Other unexpected errors - log and continue with empty data
126-
message = "Unexpected error fetching API data: #{e.message}"
127-
@errors << message
128-
{creations: [], transfers: []}
122+
# Treat any API client failure as transient to avoid false negatives
123+
Rails.logger.warn "Transient API error for block #{l1_block_number}: #{e.class}: #{e.message}"
124+
raise TransientValidationError, e.message
129125
end
130126

131127
def aggregate_l2_events(block_hashes)
@@ -137,24 +133,20 @@ def aggregate_l2_events(block_hashes)
137133
begin
138134
receipts = EthRpcClient.l2.call('eth_getBlockReceipts', [block_hash])
139135
if receipts.nil?
136+
# Treat missing receipts as transient infrastructure issue
140137
error_msg = "No receipts returned for L2 block #{block_hash}"
141-
@errors << error_msg
142-
# Treat missing receipts as potentially transient
138+
Rails.logger.warn "Transient L2 error: #{error_msg}"
143139
raise TransientValidationError, error_msg
144140
end
145141

146142
data = EventDecoder.decode_block_receipts(receipts)
147143
all_creations.concat(data[:creations])
148144
all_transfers.concat(data[:transfers]) # Ethscriptions protocol transfers
149145
rescue => e
146+
# Treat any L2 RPC failure as transient to avoid false negatives
150147
error_msg = "Failed to get receipts for block #{block_hash}: #{e.message}"
151-
@errors << error_msg
152-
# Classify L2 receipt fetch errors - network issues are transient
153-
if transient_error?(e)
154-
raise TransientValidationError, error_msg
155-
else
156-
raise
157-
end
148+
Rails.logger.warn "Transient L2 error: #{error_msg}"
149+
raise TransientValidationError, error_msg
158150
end
159151
end
160152

@@ -348,14 +340,15 @@ def verify_ethscription_storage(creation, l1_block_num, block_tag)
348340
begin
349341
stored = StorageReader.get_ethscription_with_content(tx_hash, block_tag: block_tag)
350342
rescue => e
351-
@errors << "Ethscription #{tx_hash} not found in contract storage: #{e.message}"
352-
@storage_checks_performed.increment
353-
return
343+
# RPC/network error - treat as transient inability to validate
344+
Rails.logger.warn "Transient storage error for #{tx_hash}: #{e.message}"
345+
raise TransientValidationError, "Storage read failed for #{tx_hash}: #{e.message}"
354346
end
355347

356348
@storage_checks_performed.increment
357349

358350
if stored.nil?
351+
# Ethscription genuinely doesn't exist in contract - this is a validation failure
359352
@errors << "Ethscription #{tx_hash} not found in contract storage"
360353
return
361354
end
@@ -509,18 +502,32 @@ def verify_transfer_ownership(transfers, block_tag)
509502
# Verify each token's final owner
510503
final_owners.each do |token_id, expected_owner|
511504
# First check if the ethscription exists in storage
512-
ethscription = StorageReader.get_ethscription(token_id, block_tag: block_tag)
505+
begin
506+
ethscription = StorageReader.get_ethscription(token_id, block_tag: block_tag)
507+
rescue => e
508+
# RPC/network error - treat as transient inability to validate
509+
Rails.logger.warn "Transient storage error for #{token_id}: #{e.message}"
510+
raise TransientValidationError, "Storage read failed for #{token_id}: #{e.message}"
511+
end
513512

514513
if ethscription.nil?
515-
# Token doesn't exist yet - treat as fatal divergence
514+
# Token genuinely doesn't exist - this is a validation failure
516515
@errors << "Token #{token_id} not found in storage"
517516
next
518517
end
519518

520-
actual_owner = StorageReader.get_owner(token_id, block_tag: block_tag)
519+
begin
520+
actual_owner = StorageReader.get_owner(token_id, block_tag: block_tag)
521+
rescue => e
522+
# RPC/network error - treat as transient inability to validate
523+
Rails.logger.warn "Transient owner read error for #{token_id}: #{e.message}"
524+
raise TransientValidationError, "Owner read failed for #{token_id}: #{e.message}"
525+
end
526+
521527
@storage_checks_performed.increment
522528

523529
if actual_owner.nil?
530+
# Owner doesn't exist (shouldn't happen if ethscription exists) - validation failure
524531
@errors << "Could not verify owner of token #{token_id}"
525532
next
526533
end
@@ -570,20 +577,6 @@ def safe_content_preview(content, length: 50)
570577
preview + (content.length > length ? "..." : "")
571578
end
572579

573-
# Classify L2 RPC errors as transient (infrastructure) vs permanent (logic)
574-
def transient_error?(error)
575-
case error
576-
# L2 RPC network errors
577-
when SocketError, Errno::ECONNREFUSED, Errno::ECONNRESET,
578-
Net::OpenTimeout, Net::ReadTimeout
579-
true
580-
# L2 RPC client errors that might be transient
581-
when EthRpcClient::HttpError, EthRpcClient::ApiError
582-
true
583-
else
584-
false
585-
end
586-
end
587580

588581
# Sanitize data structures for JSON serialization
589582
def sanitize_for_json(data)
@@ -604,4 +597,3 @@ def sanitize_for_json(data)
604597
end
605598
end
606599
end
607-

lib/storage_reader.rb

Lines changed: 13 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -101,7 +101,11 @@ def get_ethscription_with_content(tx_hash, block_tag: 'latest')
101101

102102
# Make the eth_call
103103
result = eth_call('0x' + calldata.unpack1('H*'), block_tag)
104-
raise StandardError, "Ethscription not found: #{tx_hash}" if result.nil? || result == '0x' || result == '0x0'
104+
# When contract returns 0x/0x0, the ethscription doesn't exist (not an error, just not found)
105+
return nil if result == '0x' || result == '0x0'
106+
107+
# If result is nil, that's an RPC/network error
108+
raise StandardError, "RPC call failed for ethscription #{tx_hash}" if result.nil?
105109

106110
# Decode the tuple: (Ethscription, bytes)
107111
types = ['((bytes32,bytes32,string,string,string,bool),address,address,address,uint256,uint256,uint64,uint64,bytes32)', 'bytes']
@@ -152,7 +156,10 @@ def get_ethscription(tx_hash, block_tag: 'latest')
152156

153157
# Make the eth_call
154158
result = eth_call('0x' + calldata.unpack1('H*'), block_tag)
155-
return nil if result.nil? || result == '0x' || result == '0x0'
159+
# Deterministic not-found from contract returns 0x/0x0
160+
return nil if result == '0x' || result == '0x0'
161+
# Nil indicates an RPC/network failure
162+
raise StandardError, "RPC call failed for ethscription #{tx_hash}" if result.nil?
156163

157164
# Decode using Eth::Abi
158165
# Updated types for nested struct: ContentInfo is a tuple within the main tuple
@@ -225,7 +232,10 @@ def get_owner(token_id, block_tag: 'latest')
225232

226233
# Make the eth_call
227234
result = eth_call('0x' + calldata.unpack1('H*'), block_tag)
228-
return nil if result.nil? || result == '0x'
235+
# Some nodes return 0x when the call yields no data
236+
return nil if result == '0x'
237+
# Nil indicates an RPC/network failure
238+
raise StandardError, "RPC call failed for ownerOf #{token_id}" if result.nil?
229239

230240
# Decode the result - ownerOf returns a single address
231241
decoded = Eth::Abi.decode(['address'], result)

0 commit comments

Comments
 (0)