Skip to content

Commit 0beab4a

Browse files
authored
Optimize ClickHouse table schemas and indexing (#100)
### TL;DR Optimized ClickHouse table structures for improved query performance and data organization. ### What changed? - Changed order by to include block number, because most queries would be ordering by time (equivalent to block numbers) or fetching a specific block data by number, not by hash - Introduced a `function_selector` column in the transactions table - Indexing every log topic to be able to query based on them ### How to test? 1. Apply the SQL changes to a test ClickHouse instance 2. Verify that the tables are created successfully 3. Insert sample data into each table 4. Run queries using the new indexes and ordering to ensure improved performance 5. Compare query execution times with the previous table structure ### Why make this change? These changes aim to: 1. Enhance query performance by using more appropriate index types (e.g., bloom_filter for hash columns) 2. Improve data organization and retrieval efficiency with updated ORDER BY clauses 3. Enable faster filtering on commonly used columns with additional indexes 4. Support function-based queries in the transactions table with the new `function_selector` column These optimizations will lead to faster data access and improved overall system performance, especially for large-scale blockchain data analysis.
2 parents cc15cb4 + 6c6514f commit 0beab4a

File tree

7 files changed

+38
-11
lines changed

7 files changed

+38
-11
lines changed

internal/common/transaction.go

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,7 @@ type Transaction struct {
1818
Gas uint64 `json:"gas"`
1919
GasPrice *big.Int `json:"gas_price"`
2020
Data string `json:"data"`
21+
FunctionSelector string `json:"function_selector"`
2122
MaxFeePerGas *big.Int `json:"max_fee_per_gas"`
2223
MaxPriorityFeePerGas *big.Int `json:"max_priority_fee_per_gas"`
2324
TransactionType uint8 `json:"transaction_type"`

internal/rpc/serializer.go

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -159,6 +159,7 @@ func serializeTransaction(chainId *big.Int, rawTx interface{}, blockTimestamp ui
159159
Gas: hexToUint64(tx["gas"]),
160160
GasPrice: hexToBigInt(tx["gasPrice"]),
161161
Data: interfaceToString(tx["input"]),
162+
FunctionSelector: extractFunctionSelector(interfaceToString(tx["input"])),
162163
MaxFeePerGas: hexToBigInt(tx["maxFeePerGas"]),
163164
MaxPriorityFeePerGas: hexToBigInt(tx["maxPriorityFeePerGas"]),
164165
TransactionType: uint8(hexToUint64(tx["type"])),
@@ -169,6 +170,16 @@ func serializeTransaction(chainId *big.Int, rawTx interface{}, blockTimestamp ui
169170
}
170171
}
171172

173+
/**
174+
* Extracts the function selector (first 4 bytes) from a transaction input.
175+
*/
176+
func extractFunctionSelector(s string) string {
177+
if len(s) < 10 {
178+
return ""
179+
}
180+
return s[0:10]
181+
}
182+
172183
func serializeLogs(chainId *big.Int, rawLogs []map[string]interface{}, block common.Block) []common.Log {
173184
serializedLogs := make([]common.Log, len(rawLogs))
174185
for i, rawLog := range rawLogs {

internal/storage/clickhouse.go

Lines changed: 13 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@ package storage
33
import (
44
"context"
55
"crypto/tls"
6+
"database/sql"
67
"encoding/json"
78
"fmt"
89
"math/big"
@@ -110,7 +111,7 @@ func (c *ClickHouseConnector) insertTransactions(txs *[]common.Transaction) erro
110111
query := `
111112
INSERT INTO ` + c.cfg.Database + `.transactions (
112113
chain_id, hash, nonce, block_hash, block_number, block_timestamp, transaction_index,
113-
from_address, to_address, value, gas, gas_price, data, max_fee_per_gas, max_priority_fee_per_gas,
114+
from_address, to_address, value, gas, gas_price, data, function_selector, max_fee_per_gas, max_priority_fee_per_gas,
114115
transaction_type, r, s, v, access_list
115116
)
116117
`
@@ -133,6 +134,7 @@ func (c *ClickHouseConnector) insertTransactions(txs *[]common.Transaction) erro
133134
tx.Gas,
134135
tx.GasPrice,
135136
tx.Data,
137+
tx.FunctionSelector,
136138
tx.MaxFeePerGas,
137139
tx.MaxPriorityFeePerGas,
138140
tx.TransactionType,
@@ -490,28 +492,36 @@ func scanLog(rows driver.Rows) (common.Log, error) {
490492
}
491493

492494
func (c *ClickHouseConnector) GetMaxBlockNumber(chainId *big.Int) (maxBlockNumber *big.Int, err error) {
493-
query := fmt.Sprintf("SELECT max(number) FROM %s.blocks WHERE is_deleted = 0", c.cfg.Database)
495+
query := fmt.Sprintf("SELECT number FROM %s.blocks WHERE is_deleted = 0", c.cfg.Database)
494496
if chainId.Sign() > 0 {
495497
query += fmt.Sprintf(" AND chain_id = %s", chainId.String())
496498
}
499+
query += " ORDER BY number DESC LIMIT 1"
497500
err = c.conn.QueryRow(context.Background(), query).Scan(&maxBlockNumber)
498501
if err != nil {
502+
if err == sql.ErrNoRows {
503+
return big.NewInt(0), nil
504+
}
499505
return nil, err
500506
}
501507
zLog.Debug().Msgf("Max block number in main storage is: %s", maxBlockNumber.String())
502508
return maxBlockNumber, nil
503509
}
504510

505511
func (c *ClickHouseConnector) GetLastStagedBlockNumber(chainId *big.Int, rangeEnd *big.Int) (maxBlockNumber *big.Int, err error) {
506-
query := fmt.Sprintf("SELECT max(block_number) FROM %s.block_data WHERE is_deleted = 0", c.cfg.Database)
512+
query := fmt.Sprintf("SELECT block_number FROM %s.block_data WHERE is_deleted = 0", c.cfg.Database)
507513
if chainId.Sign() > 0 {
508514
query += fmt.Sprintf(" AND chain_id = %s", chainId.String())
509515
}
510516
if rangeEnd.Sign() > 0 {
511517
query += fmt.Sprintf(" AND block_number <= %s", rangeEnd.String())
512518
}
519+
query += " ORDER BY block_number DESC LIMIT 1"
513520
err = c.conn.QueryRow(context.Background(), query).Scan(&maxBlockNumber)
514521
if err != nil {
522+
if err == sql.ErrNoRows {
523+
return big.NewInt(0), nil
524+
}
515525
return nil, err
516526
}
517527
return maxBlockNumber, nil

internal/tools/clickhouse_create_blocks_table.sql

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -24,7 +24,7 @@ CREATE TABLE blocks (
2424
`insert_timestamp` DateTime DEFAULT now(),
2525
`is_deleted` UInt8 DEFAULT 0,
2626
INDEX idx_timestamp timestamp TYPE minmax GRANULARITY 1,
27-
INDEX idx_number number TYPE minmax GRANULARITY 1,
27+
INDEX idx_hash hash TYPE bloom_filter GRANULARITY 1,
2828
) ENGINE = ReplacingMergeTree(insert_timestamp, is_deleted)
29-
ORDER BY (chain_id, hash)
29+
ORDER BY (chain_id, number)
3030
SETTINGS allow_experimental_replacing_merge_with_cleanup = 1;

internal/tools/clickhouse_create_logs_table.sql

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -15,10 +15,13 @@ CREATE TABLE logs (
1515
`insert_timestamp` DateTime DEFAULT now(),
1616
`is_deleted` UInt8 DEFAULT 0,
1717
INDEX idx_block_timestamp block_timestamp TYPE minmax GRANULARITY 1,
18-
INDEX idx_block_number block_number TYPE minmax GRANULARITY 1,
18+
INDEX idx_transaction_hash transaction_hash TYPE bloom_filter GRANULARITY 1,
1919
INDEX idx_block_hash block_hash TYPE bloom_filter GRANULARITY 1,
2020
INDEX idx_address address TYPE bloom_filter GRANULARITY 1,
2121
INDEX idx_topic0 topic_0 TYPE bloom_filter GRANULARITY 1,
22+
INDEX idx_topic1 topic_1 TYPE bloom_filter GRANULARITY 1,
23+
INDEX idx_topic2 topic_2 TYPE bloom_filter GRANULARITY 1,
24+
INDEX idx_topic3 topic_3 TYPE bloom_filter GRANULARITY 1,
2225
) ENGINE = ReplacingMergeTree(insert_timestamp, is_deleted)
23-
ORDER BY (chain_id, transaction_hash, log_index, block_hash)
26+
ORDER BY (chain_id, block_number, transaction_hash, log_index)
2427
SETTINGS allow_experimental_replacing_merge_with_cleanup = 1;

internal/tools/clickhouse_create_traces_table.sql

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -23,11 +23,11 @@ CREATE TABLE traces (
2323
`is_deleted` UInt8 DEFAULT 0,
2424
`insert_timestamp` DateTime DEFAULT now(),
2525
INDEX idx_block_timestamp block_timestamp TYPE minmax GRANULARITY 1,
26-
INDEX idx_block_number block_number TYPE minmax GRANULARITY 1,
2726
INDEX idx_block_hash block_hash TYPE bloom_filter GRANULARITY 1,
27+
INDEX idx_transaction_hash transaction_hash TYPE bloom_filter GRANULARITY 1,
2828
INDEX idx_from_address from_address TYPE bloom_filter GRANULARITY 1,
2929
INDEX idx_to_address to_address TYPE bloom_filter GRANULARITY 1,
3030
INDEX idx_type type TYPE bloom_filter GRANULARITY 1,
3131
) ENGINE = ReplacingMergeTree(insert_timestamp, is_deleted)
32-
ORDER BY (chain_id, transaction_hash, trace_address, block_hash)
32+
ORDER BY (chain_id, block_number, transaction_hash, trace_address)
3333
SETTINGS allow_experimental_replacing_merge_with_cleanup = 1;

internal/tools/clickhouse_create_transactions_table.sql

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,7 @@ CREATE TABLE transactions (
1212
`gas` UInt64,
1313
`gas_price` UInt256,
1414
`data` String,
15+
`function_selector` FixedString(10),
1516
`max_fee_per_gas` UInt128,
1617
`max_priority_fee_per_gas` UInt128,
1718
`transaction_type` UInt8,
@@ -22,9 +23,10 @@ CREATE TABLE transactions (
2223
`is_deleted` UInt8 DEFAULT 0,
2324
`insert_timestamp` DateTime DEFAULT now(),
2425
INDEX idx_block_timestamp block_timestamp TYPE minmax GRANULARITY 1,
25-
INDEX idx_block_number block_number TYPE minmax GRANULARITY 1,
2626
INDEX idx_block_hash block_hash TYPE bloom_filter GRANULARITY 1,
27+
INDEX idx_hash hash TYPE bloom_filter GRANULARITY 1,
2728
INDEX idx_from_address from_address TYPE bloom_filter GRANULARITY 1,
2829
INDEX idx_to_address to_address TYPE bloom_filter GRANULARITY 1,
30+
INDEX idx_function_selector function_selector TYPE bloom_filter GRANULARITY 1,
2931
) ENGINE = ReplacingMergeTree(insert_timestamp, is_deleted)
30-
ORDER BY (chain_id, hash) SETTINGS allow_experimental_replacing_merge_with_cleanup = 1;
32+
ORDER BY (chain_id, block_number, hash) SETTINGS allow_experimental_replacing_merge_with_cleanup = 1;

0 commit comments

Comments
 (0)