Skip to content

Commit 558a187

Browse files
authored
Add mode to write to CSV files in statediff file writer (#249)
* Change file writing mode to csv files * Implement writer interface for file indexer * Implement option for csv or sql in file mode * Close files in CSV writer * Add tests for CSV file mode * Implement CSV file for watched addresses * Separate test configs for CSV and SQL * Refactor common code for file indexer tests
1 parent bf4bba2 commit 558a187

22 files changed

+2723
-1377
lines changed

.github/workflows/tests.yml

Lines changed: 1 addition & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -53,32 +53,9 @@ jobs:
5353
- name: Checkout code
5454
uses: actions/checkout@v2
5555

56-
- uses: actions/checkout@v3
57-
with:
58-
ref: ${{ env.stack-orchestrator-ref }}
59-
path: "./stack-orchestrator/"
60-
repository: vulcanize/stack-orchestrator
61-
fetch-depth: 0
62-
63-
- uses: actions/checkout@v3
64-
with:
65-
ref: ${{ env.ipld-eth-db-ref }}
66-
repository: vulcanize/ipld-eth-db
67-
path: "./ipld-eth-db/"
68-
fetch-depth: 0
69-
70-
- name: Create config file
71-
run: |
72-
echo vulcanize_ipld_eth_db=$GITHUB_WORKSPACE/ipld-eth-db/ > $GITHUB_WORKSPACE/config.sh
73-
echo db_write=true >> $GITHUB_WORKSPACE/config.sh
74-
cat $GITHUB_WORKSPACE/config.sh
75-
7656
- name: Run docker compose
7757
run: |
78-
docker-compose \
79-
-f "$GITHUB_WORKSPACE/stack-orchestrator/docker/local/docker-compose-db-sharding.yml" \
80-
--env-file $GITHUB_WORKSPACE/config.sh \
81-
up -d --build
58+
docker-compose up -d
8259
8360
- name: Give the migration a few seconds
8461
run: sleep 30;

cmd/geth/config.go

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -211,7 +211,15 @@ func makeFullNode(ctx *cli.Context) (*node.Node, ethapi.Backend) {
211211
}
212212
switch dbType {
213213
case shared.FILE:
214+
fileModeStr := ctx.GlobalString(utils.StateDiffFileMode.Name)
215+
fileMode, err := file.ResolveFileMode(fileModeStr)
216+
if err != nil {
217+
utils.Fatalf("%v", err)
218+
}
219+
214220
indexerConfig = file.Config{
221+
Mode: fileMode,
222+
OutputDir: ctx.GlobalString(utils.StateDiffFileCsvDir.Name),
215223
FilePath: ctx.GlobalString(utils.StateDiffFilePath.Name),
216224
WatchedAddressesFilePath: ctx.GlobalString(utils.StateDiffWatchedAddressesFilePath.Name),
217225
}

cmd/geth/main.go

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -171,6 +171,8 @@ var (
171171
utils.StateDiffDBClientNameFlag,
172172
utils.StateDiffWritingFlag,
173173
utils.StateDiffWorkersFlag,
174+
utils.StateDiffFileMode,
175+
utils.StateDiffFileCsvDir,
174176
utils.StateDiffFilePath,
175177
utils.StateDiffKnownGapsFilePath,
176178
utils.StateDiffWaitForSync,

cmd/geth/usage.go

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -244,6 +244,8 @@ var AppHelpFlagGroups = []flags.FlagGroup{
244244
utils.StateDiffDBClientNameFlag,
245245
utils.StateDiffWritingFlag,
246246
utils.StateDiffWorkersFlag,
247+
utils.StateDiffFileMode,
248+
utils.StateDiffFileCsvDir,
247249
utils.StateDiffFilePath,
248250
utils.StateDiffKnownGapsFilePath,
249251
utils.StateDiffWaitForSync,

cmd/utils/flags.go

Lines changed: 10 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -902,9 +902,18 @@ var (
902902
Name: "statediff.db.nodeid",
903903
Usage: "Node ID to use when writing state diffs to database",
904904
}
905+
StateDiffFileMode = cli.StringFlag{
906+
Name: "statediff.file.mode",
907+
Usage: "Statediff file writing mode (current options: csv, sql)",
908+
Value: "csv",
909+
}
910+
StateDiffFileCsvDir = cli.StringFlag{
911+
Name: "statediff.file.csvdir",
912+
Usage: "Full path of output directory to write statediff data out to when operating in csv file mode",
913+
}
905914
StateDiffFilePath = cli.StringFlag{
906915
Name: "statediff.file.path",
907-
Usage: "Full path (including filename) to write statediff data out to when operating in file mode",
916+
Usage: "Full path (including filename) to write statediff data out to when operating in sql file mode",
908917
}
909918
StateDiffKnownGapsFilePath = cli.StringFlag{
910919
Name: "statediff.knowngapsfile.path",

docker-compose.yml

Lines changed: 18 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1,14 +1,27 @@
11
version: "3.2"
22

33
services:
4-
ipld-eth-db:
4+
migrations:
55
restart: on-failure
66
depends_on:
7-
- access-node
8-
image: vulcanize/ipld-eth-db:v4.1.1-alpha
7+
- ipld-eth-db
8+
image: vulcanize/ipld-eth-db:v4.1.4-alpha
99
environment:
1010
DATABASE_USER: "vdbm"
11-
DATABASE_NAME: "vulcanize_testing_v4"
11+
DATABASE_NAME: "vulcanize_testing"
1212
DATABASE_PASSWORD: "password"
13-
DATABASE_HOSTNAME: "access-node"
13+
DATABASE_HOSTNAME: "ipld-eth-db"
1414
DATABASE_PORT: 5432
15+
16+
ipld-eth-db:
17+
image: timescale/timescaledb:latest-pg14
18+
restart: always
19+
command: ["postgres", "-c", "log_statement=all"]
20+
environment:
21+
POSTGRES_USER: "vdbm"
22+
POSTGRES_DB: "vulcanize_testing"
23+
POSTGRES_PASSWORD: "password"
24+
ports:
25+
- "127.0.0.1:8077:5432"
26+
volumes:
27+
- ./statediff/indexer/database/file:/file_indexer

statediff/indexer/database/file/config.go

Lines changed: 47 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -17,12 +17,38 @@
1717
package file
1818

1919
import (
20+
"fmt"
21+
"strings"
22+
2023
"github.com/ethereum/go-ethereum/statediff/indexer/node"
2124
"github.com/ethereum/go-ethereum/statediff/indexer/shared"
2225
)
2326

24-
// Config holds params for writing sql statements out to a file
27+
// FileMode to explicitly type the mode of file writer we are using
28+
type FileMode string
29+
30+
const (
31+
CSV FileMode = "CSV"
32+
SQL FileMode = "SQL"
33+
Unknown FileMode = "Unknown"
34+
)
35+
36+
// ResolveFileMode resolves a FileMode from a provided string
37+
func ResolveFileMode(str string) (FileMode, error) {
38+
switch strings.ToLower(str) {
39+
case "csv":
40+
return CSV, nil
41+
case "sql":
42+
return SQL, nil
43+
default:
44+
return Unknown, fmt.Errorf("unrecognized file type string: %s", str)
45+
}
46+
}
47+
48+
// Config holds params for writing out CSV or SQL files
2549
type Config struct {
50+
Mode FileMode
51+
OutputDir string
2652
FilePath string
2753
WatchedAddressesFilePath string
2854
NodeInfo node.Info
@@ -33,15 +59,26 @@ func (c Config) Type() shared.DBType {
3359
return shared.FILE
3460
}
3561

36-
// TestConfig config for unit tests
37-
var TestConfig = Config{
62+
var nodeInfo = node.Info{
63+
GenesisBlock: "0xd4e56740f876aef8c010b86a40d5f56745a118d0906a34e69aec8c0db1cb8fa3",
64+
NetworkID: "1",
65+
ChainID: 1,
66+
ID: "mockNodeID",
67+
ClientName: "go-ethereum",
68+
}
69+
70+
// CSVTestConfig config for unit tests
71+
var CSVTestConfig = Config{
72+
Mode: CSV,
73+
OutputDir: "./statediffing_test",
74+
WatchedAddressesFilePath: "./statediffing_watched_addresses_test_file.csv",
75+
NodeInfo: nodeInfo,
76+
}
77+
78+
// SQLTestConfig config for unit tests
79+
var SQLTestConfig = Config{
80+
Mode: SQL,
3881
FilePath: "./statediffing_test_file.sql",
3982
WatchedAddressesFilePath: "./statediffing_watched_addresses_test_file.sql",
40-
NodeInfo: node.Info{
41-
GenesisBlock: "0xd4e56740f876aef8c010b86a40d5f56745a118d0906a34e69aec8c0db1cb8fa3",
42-
NetworkID: "1",
43-
ChainID: 1,
44-
ID: "mockNodeID",
45-
ClientName: "go-ethereum",
46-
},
83+
NodeInfo: nodeInfo,
4784
}
Lines changed: 135 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,135 @@
1+
// VulcanizeDB
2+
// Copyright © 2022 Vulcanize
3+
4+
// This program is free software: you can redistribute it and/or modify
5+
// it under the terms of the GNU Affero General Public License as published by
6+
// the Free Software Foundation, either version 3 of the License, or
7+
// (at your option) any later version.
8+
9+
// This program is distributed in the hope that it will be useful,
10+
// but WITHOUT ANY WARRANTY; without even the implied warranty of
11+
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12+
// GNU Affero General Public License for more details.
13+
14+
// You should have received a copy of the GNU Affero General Public License
15+
// along with this program. If not, see <http://www.gnu.org/licenses/>.
16+
17+
package file_test
18+
19+
import (
20+
"context"
21+
"errors"
22+
"fmt"
23+
"os"
24+
"path/filepath"
25+
"strings"
26+
"testing"
27+
28+
"github.com/jmoiron/sqlx"
29+
"github.com/multiformats/go-multihash"
30+
"github.com/stretchr/testify/require"
31+
32+
"github.com/ethereum/go-ethereum/statediff/indexer/database/file"
33+
"github.com/ethereum/go-ethereum/statediff/indexer/database/file/types"
34+
"github.com/ethereum/go-ethereum/statediff/indexer/database/sql/postgres"
35+
"github.com/ethereum/go-ethereum/statediff/indexer/interfaces"
36+
"github.com/ethereum/go-ethereum/statediff/indexer/ipld"
37+
)
38+
39+
const dbDirectory = "/file_indexer"
40+
const pgCopyStatement = `COPY %s FROM '%s' CSV`
41+
42+
func setupCSVLegacy(t *testing.T) {
43+
mockLegacyBlock = legacyData.MockBlock
44+
legacyHeaderCID, _ = ipld.RawdataToCid(ipld.MEthHeader, legacyData.MockHeaderRlp, multihash.KECCAK_256)
45+
file.CSVTestConfig.OutputDir = "./statediffing_legacy_test"
46+
47+
if _, err := os.Stat(file.CSVTestConfig.OutputDir); !errors.Is(err, os.ErrNotExist) {
48+
err := os.RemoveAll(file.CSVTestConfig.OutputDir)
49+
require.NoError(t, err)
50+
}
51+
52+
ind, err := file.NewStateDiffIndexer(context.Background(), legacyData.Config, file.CSVTestConfig)
53+
require.NoError(t, err)
54+
var tx interfaces.Batch
55+
tx, err = ind.PushBlock(
56+
mockLegacyBlock,
57+
legacyData.MockReceipts,
58+
legacyData.MockBlock.Difficulty())
59+
require.NoError(t, err)
60+
61+
defer func() {
62+
if err := tx.Submit(err); err != nil {
63+
t.Fatal(err)
64+
}
65+
if err := ind.Close(); err != nil {
66+
t.Fatal(err)
67+
}
68+
}()
69+
70+
for _, node := range legacyData.StateDiffs {
71+
err = ind.PushStateNode(tx, node, legacyData.MockBlock.Hash().String())
72+
require.NoError(t, err)
73+
}
74+
75+
require.Equal(t, legacyData.BlockNumber.String(), tx.(*file.BatchTx).BlockNumber)
76+
77+
connStr := postgres.DefaultConfig.DbConnectionString()
78+
sqlxdb, err = sqlx.Connect("postgres", connStr)
79+
if err != nil {
80+
t.Fatalf("failed to connect to db with connection string: %s err: %v", connStr, err)
81+
}
82+
}
83+
84+
func dumpCSVFileData(t *testing.T) {
85+
outputDir := filepath.Join(dbDirectory, file.CSVTestConfig.OutputDir)
86+
87+
for _, tbl := range file.Tables {
88+
var stmt string
89+
varcharColumns := tbl.VarcharColumns()
90+
if len(varcharColumns) > 0 {
91+
stmt = fmt.Sprintf(
92+
pgCopyStatement+" FORCE NOT NULL %s",
93+
tbl.Name,
94+
file.TableFilePath(outputDir, tbl.Name),
95+
strings.Join(varcharColumns, ", "),
96+
)
97+
} else {
98+
stmt = fmt.Sprintf(pgCopyStatement, tbl.Name, file.TableFilePath(outputDir, tbl.Name))
99+
}
100+
101+
_, err = sqlxdb.Exec(stmt)
102+
require.NoError(t, err)
103+
}
104+
}
105+
106+
func dumpWatchedAddressesCSVFileData(t *testing.T) {
107+
outputFilePath := filepath.Join(dbDirectory, file.CSVTestConfig.WatchedAddressesFilePath)
108+
stmt := fmt.Sprintf(pgCopyStatement, types.TableWatchedAddresses.Name, outputFilePath)
109+
110+
_, err = sqlxdb.Exec(stmt)
111+
require.NoError(t, err)
112+
}
113+
114+
func tearDownCSV(t *testing.T) {
115+
file.TearDownDB(t, sqlxdb)
116+
117+
err := os.RemoveAll(file.CSVTestConfig.OutputDir)
118+
require.NoError(t, err)
119+
120+
if err := os.Remove(file.CSVTestConfig.WatchedAddressesFilePath); !errors.Is(err, os.ErrNotExist) {
121+
require.NoError(t, err)
122+
}
123+
124+
err = sqlxdb.Close()
125+
require.NoError(t, err)
126+
}
127+
128+
func TestCSVFileIndexerLegacy(t *testing.T) {
129+
t.Run("Publish and index header IPLDs", func(t *testing.T) {
130+
setupCSVLegacy(t)
131+
dumpCSVFileData(t)
132+
defer tearDownCSV(t)
133+
testLegacyPublishAndIndexHeaderIPLDs(t)
134+
})
135+
}

0 commit comments

Comments
 (0)