From 8bc400600e56cf31ab8de57c8fb3ae623f3aec50 Mon Sep 17 00:00:00 2001 From: satyakwok Date: Thu, 21 May 2026 14:13:36 +0200 Subject: [PATCH] fix(audit): token_transfers unique constraint + raise orphan log to warn Two audit follow-ups against the ERC-20 handler PR (#46): ## M-3: token_transfers unique constraint `token_transfers` had only `PRIMARY KEY (id)` when shipped. On reorg recovery + replay, the same `(tx_hash, log_index)` rows would re-insert without a uniqueness gate, producing duplicate transfer rows that double-count balances downstream. The atomic cursor advance in the steady-state writer prevented this in practice, but reorg / forensic- rerun paths bypass that protection. - New migration `0003_token_transfers_unique.sql` adds `transfers_tx_log_unique_idx` after deduping any existing duplicates. - `insert_batch` re-enabled `ON CONFLICT (tx_hash, log_index) DO NOTHING` so replay is now idempotent at the DB layer in addition to the cursor-based writer guard. The DELETE in the migration is a no-op for chains that have only run the indexer post-#46 (no duplicates accumulated); kept in place to keep the migration replay-safe on chains that did. ## M-5: raise orphan-log filter to warn `fetch_one` / `ingest_one` drop logs whose tx_hash isn't backed by a tx row in the same bundle (added in #46 to work around a Sentrix-side data inconsistency where reverted-tx log envelopes still come back from `eth_getLogs`). Original commit logged at `debug!`, which is silent at default log level. Sustained high-volume drops here signal a real chain bug worth investigating, so promote to `warn!` to make the gap visible without changing logging config. Verified locally: no compile changes, existing tests pass. --- .../0003_token_transfers_unique.sql | 28 +++++++++++++++++++ crates/db/src/token_transfers.rs | 8 +++--- crates/sync/src/backfill.rs | 9 +++++- 3 files changed, 40 insertions(+), 5 deletions(-) create mode 100644 crates/db/migrations/0003_token_transfers_unique.sql diff --git a/crates/db/migrations/0003_token_transfers_unique.sql b/crates/db/migrations/0003_token_transfers_unique.sql new file mode 100644 index 0000000..0c1f31b --- /dev/null +++ b/crates/db/migrations/0003_token_transfers_unique.sql @@ -0,0 +1,28 @@ +-- 0003_token_transfers_unique.sql — audit M-3 fix (2026-05-21) +-- +-- token_transfers had only PRIMARY KEY (id) when shipped in PR #46. On reorg +-- recovery + replay the worker re-inserts the same (tx_hash, log_index) rows +-- without a uniqueness gate, producing duplicate transfer rows that double- +-- count balances downstream. The atomic cursor advance in the steady-state +-- writer prevented this in practice, but reorg / forensic-rerun paths bypass +-- that protection. +-- +-- Add the unique constraint that should have been there from day one. The +-- corresponding `ON CONFLICT (tx_hash, log_index) DO NOTHING` is re-enabled +-- in `crates/db/src/token_transfers.rs::insert_batch` in the same change set. +-- +-- Safe to run on a populated table: drops existing duplicates first +-- (keeping the lowest id per (tx_hash, log_index) pair) so the unique index +-- can be created. Production runs as of 2026-05-21 don't yet have duplicates +-- (no reorg has hit the patched indexer), so the DELETE is a no-op for now; +-- left in place to make this migration replay-safe on chains that did +-- accumulate duplicates before the upgrade. + +DELETE FROM token_transfers t1 +USING token_transfers t2 +WHERE t1.id > t2.id + AND t1.tx_hash = t2.tx_hash + AND t1.log_index = t2.log_index; + +CREATE UNIQUE INDEX IF NOT EXISTS transfers_tx_log_unique_idx + ON token_transfers (tx_hash, log_index); diff --git a/crates/db/src/token_transfers.rs b/crates/db/src/token_transfers.rs index a920266..9a8bbb4 100644 --- a/crates/db/src/token_transfers.rs +++ b/crates/db/src/token_transfers.rs @@ -54,10 +54,10 @@ where .push_bind(t.token_id) .push_bind(t.amount); }); - // No ON CONFLICT — table has no unique constraint on (tx_hash, log_index). - // The writer's atomic cursor advance prevents re-processing the same - // block in the steady state; reorg recovery deletes downstream rows - // before re-insert. Adding a unique index here is a follow-up migration. + // ON CONFLICT enabled after migration 0003 added the unique index on + // (tx_hash, log_index). Re-insert on reorg replay is now idempotent + // at the DB layer in addition to the cursor-based writer guard. + qb.push(" ON CONFLICT (tx_hash, log_index) DO NOTHING"); qb.build().execute(executor).await?; Ok(()) } diff --git a/crates/sync/src/backfill.rs b/crates/sync/src/backfill.rs index 71b95d9..73f88b7 100644 --- a/crates/sync/src/backfill.rs +++ b/crates/sync/src/backfill.rs @@ -227,7 +227,14 @@ async fn fetch_one( .filter(|l| tx_hash_set.contains(&l.tx_hash)) .collect(); if dom_logs.len() < logs_total { - tracing::debug!( + // Raised from debug! to warn! per audit M-5 (2026-05-21). + // Orphan logs (tx_hash in eth_getLogs but missing from + // `/chain/blocks/`'s tx vec) signal Sentrix-side data + // inconsistency — usually a reverted tx whose log envelopes + // still get returned. Low-volume warnings are fine; sustained + // high-volume warnings here flag a real chain bug worth + // investigating, so they should be visible at default log level. + tracing::warn!( block = h.0, dropped = logs_total - dom_logs.len(), "backfill: dropped orphan logs (tx_hash not in block.txs)"