Skip to content

Commit db85afd

Browse files
committed
batch inserts to public.blocks
1 parent e0717f2 commit db85afd

File tree

9 files changed

+257
-129
lines changed

9 files changed

+257
-129
lines changed

statediff/indexer/batch_tx.go

Lines changed: 91 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,91 @@
1+
// VulcanizeDB
2+
// Copyright © 2021 Vulcanize
3+
4+
// This program is free software: you can redistribute it and/or modify
5+
// it under the terms of the GNU Affero General Public License as published by
6+
// the Free Software Foundation, either version 3 of the License, or
7+
// (at your option) any later version.
8+
9+
// This program is distributed in the hope that it will be useful,
10+
// but WITHOUT ANY WARRANTY; without even the implied warranty of
11+
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12+
// GNU Affero General Public License for more details.
13+
14+
// You should have received a copy of the GNU Affero General Public License
15+
// along with this program. If not, see <http://www.gnu.org/licenses/>.
16+
17+
package indexer
18+
19+
import (
20+
blockstore "github.com/ipfs/go-ipfs-blockstore"
21+
dshelp "github.com/ipfs/go-ipfs-ds-help"
22+
node "github.com/ipfs/go-ipld-format"
23+
"github.com/jmoiron/sqlx"
24+
"github.com/lib/pq"
25+
26+
"github.com/ethereum/go-ethereum/statediff/indexer/ipfs/ipld"
27+
"github.com/ethereum/go-ethereum/statediff/indexer/models"
28+
)
29+
30+
const ipldBatchInsertPgStr string = `INSERT INTO public.blocks (key, data) VALUES (unnest($1::TEXT[]), unnest($2::BYTEA[])) ON CONFLICT (key) DO NOTHING`
31+
32+
// BlockTx wraps a Postgres tx with the state necessary for building the Postgres tx concurrently during trie difference iteration
33+
type BlockTx struct {
34+
dbtx *sqlx.Tx
35+
BlockNumber uint64
36+
headerID int64
37+
Close func(blockTx *BlockTx, err error) error
38+
39+
quit chan struct{}
40+
iplds chan models.IPLDModel
41+
ipldCache models.IPLDBatch
42+
}
43+
44+
func (tx *BlockTx) flush() error {
45+
_, err := tx.dbtx.Exec(ipldBatchInsertPgStr, pq.Array(tx.ipldCache.Keys), pq.Array(tx.ipldCache.Values))
46+
if err != nil {
47+
return err
48+
}
49+
tx.ipldCache = models.IPLDBatch{}
50+
return nil
51+
}
52+
53+
// run in background goroutine to synchronize concurrent appends to the ipldCache
54+
func (tx *BlockTx) cache() {
55+
for {
56+
select {
57+
case i := <-tx.iplds:
58+
tx.ipldCache.Keys = append(tx.ipldCache.Keys, i.Key)
59+
tx.ipldCache.Values = append(tx.ipldCache.Values, i.Data)
60+
case <-tx.quit:
61+
return
62+
}
63+
}
64+
}
65+
66+
func (tx *BlockTx) cacheDirect(key string, value []byte) {
67+
tx.iplds <- models.IPLDModel{
68+
Key: key,
69+
Data: value,
70+
}
71+
}
72+
73+
func (tx *BlockTx) cacheIPLD(i node.Node) {
74+
tx.iplds <- models.IPLDModel{
75+
Key: blockstore.BlockPrefix.String() + dshelp.MultihashToDsKey(i.Cid().Hash()).String(),
76+
Data: i.RawData(),
77+
}
78+
}
79+
80+
func (tx *BlockTx) cacheRaw(codec, mh uint64, raw []byte) (string, string, error) {
81+
c, err := ipld.RawdataToCid(codec, raw, mh)
82+
if err != nil {
83+
return "", "", err
84+
}
85+
prefixedKey := blockstore.BlockPrefix.String() + dshelp.MultihashToDsKey(c.Hash()).String()
86+
tx.iplds <- models.IPLDModel{
87+
Key: prefixedKey,
88+
Data: raw,
89+
}
90+
return c.String(), prefixedKey, err
91+
}

statediff/indexer/indexer.go

Lines changed: 51 additions & 65 deletions
Original file line numberDiff line numberDiff line change
@@ -25,7 +25,6 @@ import (
2525

2626
"github.com/ipfs/go-cid"
2727
node "github.com/ipfs/go-ipld-format"
28-
"github.com/jmoiron/sqlx"
2928
"github.com/multiformats/go-multihash"
3029

3130
"github.com/ethereum/go-ethereum/common"
@@ -65,7 +64,6 @@ type Indexer interface {
6564
type StateDiffIndexer struct {
6665
chainConfig *params.ChainConfig
6766
dbWriter *PostgresCIDWriter
68-
init bool
6967
}
7068

7169
// NewStateDiffIndexer creates a pointer to a new PayloadConverter which satisfies the PayloadConverter interface
@@ -80,13 +78,6 @@ func NewStateDiffIndexer(chainConfig *params.ChainConfig, db *postgres.DB) (*Sta
8078
}, nil
8179
}
8280

83-
type BlockTx struct {
84-
dbtx *sqlx.Tx
85-
BlockNumber uint64
86-
headerID int64
87-
Close func(err error) error
88-
}
89-
9081
// ReportDBMetrics is a reporting function to run as goroutine
9182
func (sdi *StateDiffIndexer) ReportDBMetrics(delay time.Duration, quit <-chan bool) {
9283
if !metrics.Enabled {
@@ -127,7 +118,10 @@ func (sdi *StateDiffIndexer) PushBlock(block *types.Block, receipts types.Receip
127118
}
128119

129120
if len(txNodes) != len(rctNodes) || len(rctNodes) != len(rctLeafNodeCIDs) {
130-
return nil, fmt.Errorf("expected number of transactions (%d), receipts (%d), and receipt trie leaf nodes (%d)to be equal", len(txNodes), len(rctNodes), len(rctLeafNodeCIDs))
121+
return nil, fmt.Errorf("expected number of transactions (%d), receipts (%d), and receipt trie leaf nodes (%d) to be equal", len(txNodes), len(rctNodes), len(rctLeafNodeCIDs))
122+
}
123+
if len(txTrieNodes) != len(rctTrieNodes) {
124+
return nil, fmt.Errorf("expected number of tx trie (%d) and rct trie (%d) nodes to be equal", len(txTrieNodes), len(rctTrieNodes))
131125
}
132126

133127
// Calculate reward
@@ -139,6 +133,7 @@ func (sdi *StateDiffIndexer) PushBlock(block *types.Block, receipts types.Receip
139133
reward = CalcEthBlockReward(block.Header(), block.Uncles(), block.Transactions(), receipts)
140134
}
141135
t = time.Now()
136+
142137
// Begin new db tx for everything
143138
tx, err := sdi.dbWriter.db.Beginx()
144139
if err != nil {
@@ -153,9 +148,14 @@ func (sdi *StateDiffIndexer) PushBlock(block *types.Block, receipts types.Receip
153148
}
154149
}()
155150
blockTx := &BlockTx{
156-
dbtx: tx,
151+
iplds: make(chan models.IPLDModel),
152+
quit: make(chan struct{}),
153+
ipldCache: models.IPLDBatch{},
154+
dbtx: tx,
157155
// handle transaction commit or rollback for any return case
158-
Close: func(err error) error {
156+
Close: func(self *BlockTx, err error) error {
157+
close(self.quit)
158+
close(self.iplds)
159159
if p := recover(); p != nil {
160160
shared.Rollback(tx)
161161
panic(p)
@@ -166,6 +166,12 @@ func (sdi *StateDiffIndexer) PushBlock(block *types.Block, receipts types.Receip
166166
indexerMetrics.tStateStoreCodeProcessing.Update(tDiff)
167167
traceMsg += fmt.Sprintf("state, storage, and code storage processing time: %s\r\n", tDiff.String())
168168
t = time.Now()
169+
if err := self.flush(); err != nil {
170+
shared.Rollback(tx)
171+
traceMsg += fmt.Sprintf(" TOTAL PROCESSING DURATION: %s\r\n", time.Since(start).String())
172+
log.Debug(traceMsg)
173+
return err
174+
}
169175
err = tx.Commit()
170176
tDiff = time.Since(t)
171177
indexerMetrics.tPostgresCommit.Update(tDiff)
@@ -176,6 +182,8 @@ func (sdi *StateDiffIndexer) PushBlock(block *types.Block, receipts types.Receip
176182
return err
177183
},
178184
}
185+
go blockTx.cache()
186+
179187
tDiff := time.Since(t)
180188
indexerMetrics.tFreePostgres.Update(tDiff)
181189

@@ -184,7 +192,7 @@ func (sdi *StateDiffIndexer) PushBlock(block *types.Block, receipts types.Receip
184192

185193
// Publish and index header, collect headerID
186194
var headerID int64
187-
headerID, err = sdi.processHeader(tx, block.Header(), headerNode, reward, totalDifficulty)
195+
headerID, err = sdi.processHeader(blockTx, block.Header(), headerNode, reward, totalDifficulty)
188196
if err != nil {
189197
return nil, err
190198
}
@@ -193,7 +201,7 @@ func (sdi *StateDiffIndexer) PushBlock(block *types.Block, receipts types.Receip
193201
traceMsg += fmt.Sprintf("header processing time: %s\r\n", tDiff.String())
194202
t = time.Now()
195203
// Publish and index uncles
196-
err = sdi.processUncles(tx, headerID, height, uncleNodes)
204+
err = sdi.processUncles(blockTx, headerID, height, uncleNodes)
197205
if err != nil {
198206
return nil, err
199207
}
@@ -202,7 +210,7 @@ func (sdi *StateDiffIndexer) PushBlock(block *types.Block, receipts types.Receip
202210
traceMsg += fmt.Sprintf("uncle processing time: %s\r\n", tDiff.String())
203211
t = time.Now()
204212
// Publish and index receipts and txs
205-
err = sdi.processReceiptsAndTxs(tx, processArgs{
213+
err = sdi.processReceiptsAndTxs(blockTx, processArgs{
206214
headerID: headerID,
207215
blockNumber: block.Number(),
208216
receipts: receipts,
@@ -230,11 +238,8 @@ func (sdi *StateDiffIndexer) PushBlock(block *types.Block, receipts types.Receip
230238

231239
// processHeader publishes and indexes a header IPLD in Postgres
232240
// it returns the headerID
233-
func (sdi *StateDiffIndexer) processHeader(tx *sqlx.Tx, header *types.Header, headerNode node.Node, reward, td *big.Int) (int64, error) {
234-
// publish header
235-
if err := shared.PublishIPLD(tx, headerNode); err != nil {
236-
return 0, fmt.Errorf("error publishing header IPLD: %v", err)
237-
}
241+
func (sdi *StateDiffIndexer) processHeader(tx *BlockTx, header *types.Header, headerNode node.Node, reward, td *big.Int) (int64, error) {
242+
tx.cacheIPLD(headerNode)
238243

239244
var baseFee *int64
240245
if header.BaseFee != nil {
@@ -243,7 +248,7 @@ func (sdi *StateDiffIndexer) processHeader(tx *sqlx.Tx, header *types.Header, he
243248
}
244249

245250
// index header
246-
return sdi.dbWriter.upsertHeaderCID(tx, models.HeaderModel{
251+
return sdi.dbWriter.upsertHeaderCID(tx.dbtx, models.HeaderModel{
247252
CID: headerNode.Cid().String(),
248253
MhKey: shared.MultihashKeyFromCID(headerNode.Cid()),
249254
ParentHash: header.ParentHash.String(),
@@ -262,12 +267,10 @@ func (sdi *StateDiffIndexer) processHeader(tx *sqlx.Tx, header *types.Header, he
262267
}
263268

264269
// processUncles publishes and indexes uncle IPLDs in Postgres
265-
func (sdi *StateDiffIndexer) processUncles(tx *sqlx.Tx, headerID int64, blockNumber uint64, uncleNodes []*ipld.EthHeader) error {
270+
func (sdi *StateDiffIndexer) processUncles(tx *BlockTx, headerID int64, blockNumber uint64, uncleNodes []*ipld.EthHeader) error {
266271
// publish and index uncles
267272
for _, uncleNode := range uncleNodes {
268-
if err := shared.PublishIPLD(tx, uncleNode); err != nil {
269-
return fmt.Errorf("error publishing uncle IPLD: %v", err)
270-
}
273+
tx.cacheIPLD(uncleNode)
271274
var uncleReward *big.Int
272275
// in PoA networks uncle reward is 0
273276
if sdi.chainConfig.Clique != nil {
@@ -282,7 +285,7 @@ func (sdi *StateDiffIndexer) processUncles(tx *sqlx.Tx, headerID int64, blockNum
282285
BlockHash: uncleNode.Hash().String(),
283286
Reward: uncleReward.String(),
284287
}
285-
if err := sdi.dbWriter.upsertUncleCID(tx, uncle, headerID); err != nil {
288+
if err := sdi.dbWriter.upsertUncleCID(tx.dbtx, uncle, headerID); err != nil {
286289
return err
287290
}
288291
}
@@ -305,28 +308,15 @@ type processArgs struct {
305308
}
306309

307310
// processReceiptsAndTxs publishes and indexes receipt and transaction IPLDs in Postgres
308-
func (sdi *StateDiffIndexer) processReceiptsAndTxs(tx *sqlx.Tx, args processArgs) error {
311+
func (sdi *StateDiffIndexer) processReceiptsAndTxs(tx *BlockTx, args processArgs) error {
309312
// Process receipts and txs
310313
signer := types.MakeSigner(sdi.chainConfig, args.blockNumber)
311314
for i, receipt := range args.receipts {
312-
// tx that corresponds with this receipt
313-
trx := args.txs[i]
314-
from, err := types.Sender(signer, trx)
315-
if err != nil {
316-
return fmt.Errorf("error deriving tx sender: %v", err)
317-
}
318-
319-
for _, trie := range args.logTrieNodes[i] {
320-
if err = shared.PublishIPLD(tx, trie); err != nil {
321-
return fmt.Errorf("error publishing log trie node IPLD: %w", err)
322-
}
315+
for _, logTrieNode := range args.logTrieNodes[i] {
316+
tx.cacheIPLD(logTrieNode)
323317
}
324-
325-
// publish the txs and receipts
326318
txNode := args.txNodes[i]
327-
if err := shared.PublishIPLD(tx, txNode); err != nil {
328-
return fmt.Errorf("error publishing tx IPLD: %v", err)
329-
}
319+
tx.cacheIPLD(txNode)
330320

331321
// Indexing
332322
// extract topic and contract data from the receipt for indexing
@@ -344,7 +334,6 @@ func (sdi *StateDiffIndexer) processReceiptsAndTxs(tx *sqlx.Tx, args processArgs
344334

345335
mappedContracts[l.Address.String()] = true
346336
logDataSet[idx] = &models.LogsModel{
347-
ID: 0,
348337
Address: l.Address.String(),
349338
Index: int64(l.Index),
350339
Data: l.Data,
@@ -368,6 +357,12 @@ func (sdi *StateDiffIndexer) processReceiptsAndTxs(tx *sqlx.Tx, args processArgs
368357
contractHash = crypto.Keccak256Hash(common.HexToAddress(contract).Bytes()).String()
369358
}
370359
// index tx first so that the receipt can reference it by FK
360+
trx := args.txs[i]
361+
// derive sender for the tx that corresponds with this receipt
362+
from, err := types.Sender(signer, trx)
363+
if err != nil {
364+
return fmt.Errorf("error deriving tx sender: %v", err)
365+
}
371366
txModel := models.TxModel{
372367
Dst: shared.HandleZeroAddrPointer(trx.To()),
373368
Src: shared.HandleZeroAddr(from),
@@ -381,7 +376,7 @@ func (sdi *StateDiffIndexer) processReceiptsAndTxs(tx *sqlx.Tx, args processArgs
381376
if txType != types.LegacyTxType {
382377
txModel.Type = &txType
383378
}
384-
txID, err := sdi.dbWriter.upsertTransactionCID(tx, txModel, args.headerID)
379+
txID, err := sdi.dbWriter.upsertTransactionCID(tx.dbtx, txModel, args.headerID)
385380
if err != nil {
386381
return err
387382
}
@@ -397,7 +392,7 @@ func (sdi *StateDiffIndexer) processReceiptsAndTxs(tx *sqlx.Tx, args processArgs
397392
Address: accessListElement.Address.Hex(),
398393
StorageKeys: storageKeys,
399394
}
400-
if err := sdi.dbWriter.upsertAccessListElement(tx, accessListElementModel, txID); err != nil {
395+
if err := sdi.dbWriter.upsertAccessListElement(tx.dbtx, accessListElementModel, txID); err != nil {
401396
return err
402397
}
403398
}
@@ -420,27 +415,20 @@ func (sdi *StateDiffIndexer) processReceiptsAndTxs(tx *sqlx.Tx, args processArgs
420415
rctModel.PostState = common.Bytes2Hex(receipt.PostState)
421416
}
422417

423-
receiptID, err := sdi.dbWriter.upsertReceiptCID(tx, rctModel, txID)
418+
receiptID, err := sdi.dbWriter.upsertReceiptCID(tx.dbtx, rctModel, txID)
424419
if err != nil {
425420
return err
426421
}
427422

428-
if err = sdi.dbWriter.upsertLogCID(tx, logDataSet, receiptID); err != nil {
423+
if err = sdi.dbWriter.upsertLogCID(tx.dbtx, logDataSet, receiptID); err != nil {
429424
return err
430425
}
431426
}
432427

433428
// publish trie nodes, these aren't indexed directly
434-
for _, n := range args.txTrieNodes {
435-
if err := shared.PublishIPLD(tx, n); err != nil {
436-
return fmt.Errorf("error publishing tx trie node IPLD: %w", err)
437-
}
438-
}
439-
440-
for _, n := range args.rctTrieNodes {
441-
if err := shared.PublishIPLD(tx, n); err != nil {
442-
return fmt.Errorf("error publishing rct trie node IPLD: %w", err)
443-
}
429+
for i, n := range args.txTrieNodes {
430+
tx.cacheIPLD(n)
431+
tx.cacheIPLD(args.rctTrieNodes[i])
444432
}
445433

446434
return nil
@@ -462,9 +450,9 @@ func (sdi *StateDiffIndexer) PushStateNode(tx *BlockTx, stateNode sdtypes.StateN
462450
_, err := sdi.dbWriter.upsertStateCID(tx.dbtx, stateModel, tx.headerID)
463451
return err
464452
}
465-
stateCIDStr, stateMhKey, err := shared.PublishRaw(tx.dbtx, ipld.MEthStateTrie, multihash.KECCAK_256, stateNode.NodeValue)
453+
stateCIDStr, stateMhKey, err := tx.cacheRaw(ipld.MEthStateTrie, multihash.KECCAK_256, stateNode.NodeValue)
466454
if err != nil {
467-
return fmt.Errorf("error publishing state node IPLD: %v", err)
455+
return fmt.Errorf("error generating and cacheing state node IPLD: %v", err)
468456
}
469457
stateModel := models.StateNodeModel{
470458
Path: stateNode.Path,
@@ -518,9 +506,9 @@ func (sdi *StateDiffIndexer) PushStateNode(tx *BlockTx, stateNode sdtypes.StateN
518506
}
519507
continue
520508
}
521-
storageCIDStr, storageMhKey, err := shared.PublishRaw(tx.dbtx, ipld.MEthStorageTrie, multihash.KECCAK_256, storageNode.NodeValue)
509+
storageCIDStr, storageMhKey, err := tx.cacheRaw(ipld.MEthStorageTrie, multihash.KECCAK_256, storageNode.NodeValue)
522510
if err != nil {
523-
return fmt.Errorf("error publishing storage node IPLD: %v", err)
511+
return fmt.Errorf("error generating and cacheing storage node IPLD: %v", err)
524512
}
525513
storageModel := models.StorageNodeModel{
526514
Path: storageNode.Path,
@@ -544,8 +532,6 @@ func (sdi *StateDiffIndexer) PushCodeAndCodeHash(tx *BlockTx, codeAndCodeHash sd
544532
if err != nil {
545533
return fmt.Errorf("error deriving multihash key from codehash: %v", err)
546534
}
547-
if err := shared.PublishDirect(tx.dbtx, mhKey, codeAndCodeHash.Code); err != nil {
548-
return fmt.Errorf("error publishing code IPLD: %v", err)
549-
}
535+
tx.cacheDirect(mhKey, codeAndCodeHash.Code)
550536
return nil
551537
}

0 commit comments

Comments
 (0)