Skip to content

Commit 056bcbc

Browse files
authored
Merge pull request #42 from PostHog/feature/ducklake-integration-tests
Run integration tests on DuckLake by default
2 parents 22d2290 + d394bd4 commit 056bcbc

File tree

4 files changed

+212
-29
lines changed

4 files changed

+212
-29
lines changed

tests/integration/docker-compose.yml

Lines changed: 53 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,5 @@
1-
version: '3.8'
2-
31
services:
2+
# PostgreSQL for comparison testing (existing)
43
postgres:
54
image: postgres:16-alpine
65
container_name: duckgres-test-postgres
@@ -20,3 +19,55 @@ services:
2019
retries: 10
2120
tmpfs:
2221
- /var/lib/postgresql/data
22+
23+
# PostgreSQL for DuckLake metadata storage
24+
ducklake-metadata:
25+
image: postgres:16-alpine
26+
container_name: duckgres-test-ducklake-metadata
27+
environment:
28+
POSTGRES_USER: ducklake
29+
POSTGRES_PASSWORD: ducklake
30+
POSTGRES_DB: ducklake
31+
ports:
32+
- "35433:5432"
33+
healthcheck:
34+
test: ["CMD-SHELL", "pg_isready -U ducklake -d ducklake"]
35+
interval: 2s
36+
timeout: 5s
37+
retries: 10
38+
tmpfs:
39+
- /var/lib/postgresql/data
40+
41+
# MinIO for DuckLake object storage
42+
minio:
43+
image: minio/minio:latest
44+
container_name: duckgres-test-minio
45+
command: server /data --console-address ":9001"
46+
environment:
47+
MINIO_ROOT_USER: minioadmin
48+
MINIO_ROOT_PASSWORD: minioadmin
49+
ports:
50+
- "39000:9000" # S3 API
51+
- "39001:9001" # Web console
52+
healthcheck:
53+
test: ["CMD", "mc", "ready", "local"]
54+
interval: 2s
55+
timeout: 5s
56+
retries: 10
57+
tmpfs:
58+
- /data
59+
60+
# Creates the ducklake bucket on startup
61+
minio-init:
62+
image: minio/mc:latest
63+
container_name: duckgres-test-minio-init
64+
depends_on:
65+
minio:
66+
condition: service_healthy
67+
entrypoint: >
68+
/bin/sh -c "
69+
mc alias set minio http://minio:9000 minioadmin minioadmin;
70+
mc mb minio/ducklake --ignore-existing;
71+
mc anonymous set download minio/ducklake;
72+
echo 'Bucket ducklake created successfully';
73+
"

tests/integration/harness.go

Lines changed: 117 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,7 @@ type TestHarness struct {
2525
tmpDir string
2626
pgPort int
2727
dgPort int
28+
useDuckLake bool
2829
mu sync.Mutex
2930
}
3031

@@ -36,21 +37,33 @@ type HarnessConfig struct {
3637
SkipPostgres bool
3738
// Verbose enables verbose logging
3839
Verbose bool
40+
// UseDuckLake enables DuckLake mode (requires ducklake-metadata and minio)
41+
UseDuckLake bool
42+
// DuckLakeMetadataPort is the port for the DuckLake metadata PostgreSQL (default: 35433)
43+
DuckLakeMetadataPort int
44+
// MinIOPort is the port for MinIO S3 API (default: 39000)
45+
MinIOPort int
3946
}
4047

4148
// DefaultConfig returns the default harness configuration
4249
func DefaultConfig() HarnessConfig {
50+
// Default to DuckLake mode unless DUCKGRES_TEST_NO_DUCKLAKE is set
51+
useDuckLake := os.Getenv("DUCKGRES_TEST_NO_DUCKLAKE") == ""
4352
return HarnessConfig{
44-
PostgresPort: 35432,
45-
SkipPostgres: false,
46-
Verbose: os.Getenv("DUCKGRES_TEST_VERBOSE") != "",
53+
PostgresPort: 35432,
54+
SkipPostgres: false,
55+
Verbose: os.Getenv("DUCKGRES_TEST_VERBOSE") != "",
56+
UseDuckLake: useDuckLake,
57+
DuckLakeMetadataPort: 35433,
58+
MinIOPort: 39000,
4759
}
4860
}
4961

5062
// NewTestHarness creates a new test harness
5163
func NewTestHarness(cfg HarnessConfig) (*TestHarness, error) {
5264
h := &TestHarness{
53-
pgPort: cfg.PostgresPort,
65+
pgPort: cfg.PostgresPort,
66+
useDuckLake: cfg.UseDuckLake,
5467
}
5568

5669
// Create temp directory for Duckgres
@@ -61,7 +74,7 @@ func NewTestHarness(cfg HarnessConfig) (*TestHarness, error) {
6174
h.tmpDir = tmpDir
6275

6376
// Start Duckgres server
64-
if err := h.startDuckgres(); err != nil {
77+
if err := h.startDuckgres(cfg); err != nil {
6578
os.RemoveAll(tmpDir)
6679
return nil, fmt.Errorf("failed to start Duckgres: %w", err)
6780
}
@@ -90,7 +103,7 @@ func NewTestHarness(cfg HarnessConfig) (*TestHarness, error) {
90103
}
91104

92105
// startDuckgres starts the Duckgres server
93-
func (h *TestHarness) startDuckgres() error {
106+
func (h *TestHarness) startDuckgres(harnessCfg HarnessConfig) error {
94107
port := findAvailablePort()
95108
h.dgPort = port
96109

@@ -111,6 +124,22 @@ func (h *TestHarness) startDuckgres() error {
111124
Users: map[string]string{
112125
"testuser": "testpass",
113126
},
127+
Extensions: []string{"ducklake"},
128+
}
129+
130+
// Configure DuckLake if enabled
131+
if harnessCfg.UseDuckLake {
132+
cfg.DuckLake = server.DuckLakeConfig{
133+
MetadataStore: fmt.Sprintf("postgres:host=127.0.0.1 port=%d user=ducklake password=ducklake dbname=ducklake", harnessCfg.DuckLakeMetadataPort),
134+
ObjectStore: "s3://ducklake/data/",
135+
S3Provider: "config",
136+
S3Endpoint: fmt.Sprintf("127.0.0.1:%d", harnessCfg.MinIOPort),
137+
S3AccessKey: "minioadmin",
138+
S3SecretKey: "minioadmin",
139+
S3Region: "us-east-1",
140+
S3UseSSL: false,
141+
S3URLStyle: "path",
142+
}
114143
}
115144

116145
srv, err := server.New(cfg)
@@ -185,7 +214,17 @@ func (h *TestHarness) connectDuckgres() error {
185214
}
186215

187216
// loadFixtures loads the test schema and data into Duckgres
217+
// In DuckLake mode, tables are automatically created in ducklake.main
218+
// because the server runs "USE ducklake" to set the default catalog
188219
func (h *TestHarness) loadFixtures() error {
220+
// In DuckLake mode, drop existing tables first since metadata persists
221+
if h.useDuckLake {
222+
if err := h.cleanupDuckLakeTables(); err != nil {
223+
// Log but don't fail - tables might not exist
224+
fmt.Printf("Warning: cleanup failed (may be OK): %v\n", err)
225+
}
226+
}
227+
189228
// Read and execute schema
190229
schemaPath := filepath.Join(getTestDir(), "fixtures", "schema.sql")
191230
schemaSQL, err := os.ReadFile(schemaPath)
@@ -226,6 +265,45 @@ func (h *TestHarness) loadFixtures() error {
226265
return nil
227266
}
228267

268+
// cleanupDuckLakeTables drops existing tables in DuckLake before loading fixtures
269+
func (h *TestHarness) cleanupDuckLakeTables() error {
270+
// Drop views first (they depend on tables)
271+
views := []string{"order_details", "user_stats", "active_users"}
272+
for _, v := range views {
273+
h.DuckgresDB.Exec(fmt.Sprintf("DROP VIEW IF EXISTS %s", v))
274+
}
275+
276+
// Drop tables in reverse dependency order
277+
tables := []string{
278+
"test_schema.schema_test",
279+
"array_test",
280+
"documents",
281+
"metrics",
282+
"empty_table",
283+
"nullable_test",
284+
"json_data",
285+
"events",
286+
"sales",
287+
"categories",
288+
"order_items",
289+
"orders",
290+
"products",
291+
"users",
292+
"types_test",
293+
}
294+
295+
for _, t := range tables {
296+
if _, err := h.DuckgresDB.Exec(fmt.Sprintf("DROP TABLE IF EXISTS %s", t)); err != nil {
297+
// Ignore errors - table might not exist or schema might not exist
298+
}
299+
}
300+
301+
// Drop test schema
302+
h.DuckgresDB.Exec("DROP SCHEMA IF EXISTS test_schema")
303+
304+
return nil
305+
}
306+
229307
// Close shuts down the test harness
230308
func (h *TestHarness) Close() error {
231309
h.mu.Lock()
@@ -297,6 +375,39 @@ func IsPostgresRunning(port int) bool {
297375
return true
298376
}
299377

378+
// IsDuckLakeInfraRunning checks if the DuckLake infrastructure (metadata postgres + minio) is running
379+
func IsDuckLakeInfraRunning(metadataPort, minioPort int) bool {
380+
// Check DuckLake metadata PostgreSQL
381+
metaConn, err := net.DialTimeout("tcp", fmt.Sprintf("127.0.0.1:%d", metadataPort), time.Second)
382+
if err != nil {
383+
return false
384+
}
385+
metaConn.Close()
386+
387+
// Check MinIO
388+
minioConn, err := net.DialTimeout("tcp", fmt.Sprintf("127.0.0.1:%d", minioPort), time.Second)
389+
if err != nil {
390+
return false
391+
}
392+
minioConn.Close()
393+
394+
return true
395+
}
396+
397+
// WaitForDuckLakeInfra waits for DuckLake infrastructure to be ready
398+
func WaitForDuckLakeInfra(metadataPort, minioPort int, timeout time.Duration) error {
399+
deadline := time.Now().Add(timeout)
400+
for time.Now().Before(deadline) {
401+
if IsDuckLakeInfraRunning(metadataPort, minioPort) {
402+
// Give MinIO a bit more time to initialize the bucket
403+
time.Sleep(500 * time.Millisecond)
404+
return nil
405+
}
406+
time.Sleep(500 * time.Millisecond)
407+
}
408+
return fmt.Errorf("timeout waiting for DuckLake infrastructure (metadata:%d, minio:%d)", metadataPort, minioPort)
409+
}
410+
300411
// Helper functions
301412

302413
func findAvailablePort() int {

tests/integration/setup_test.go

Lines changed: 19 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@ import (
55
"fmt"
66
"os"
77
"testing"
8+
"time"
89
)
910

1011
var (
@@ -16,8 +17,10 @@ var (
1617

1718
// TestMain sets up and tears down the test environment
1819
func TestMain(m *testing.M) {
19-
// Check if PostgreSQL is running
20-
pgPort := 35432
20+
cfg := DefaultConfig()
21+
22+
// Check if PostgreSQL (for comparison) is running
23+
pgPort := cfg.PostgresPort
2124
if !IsPostgresRunning(pgPort) {
2225
fmt.Println("PostgreSQL container not running. Starting it...")
2326
if err := StartPostgresContainer(); err != nil {
@@ -27,8 +30,20 @@ func TestMain(m *testing.M) {
2730
}
2831
}
2932

30-
// Create test harness
31-
cfg := DefaultConfig()
33+
// Check and wait for DuckLake infrastructure if DuckLake mode is enabled
34+
if cfg.UseDuckLake {
35+
if !IsDuckLakeInfraRunning(cfg.DuckLakeMetadataPort, cfg.MinIOPort) {
36+
fmt.Println("DuckLake infrastructure not running. Waiting for it...")
37+
if err := WaitForDuckLakeInfra(cfg.DuckLakeMetadataPort, cfg.MinIOPort, 30*time.Second); err != nil {
38+
fmt.Printf("DuckLake infrastructure not available: %v\n", err)
39+
fmt.Println("Falling back to vanilla DuckDB mode (set DUCKGRES_TEST_NO_DUCKLAKE=1 to suppress this)")
40+
cfg.UseDuckLake = false
41+
} else {
42+
fmt.Println("DuckLake infrastructure is ready")
43+
}
44+
}
45+
}
46+
3247
cfg.SkipPostgres = skipPostgresCompare
3348

3449
var err error

transpiler/transform/ddl.go

Lines changed: 23 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -263,36 +263,42 @@ func (t *DDLTransform) isUnsupportedColumnConstraint(c *pg_query.Constraint) boo
263263
return false
264264
}
265265

266-
// isUnsupportedDefault checks if a DEFAULT expression is unsupported (e.g., now(), current_timestamp)
266+
// isUnsupportedDefault checks if a DEFAULT expression is unsupported by DuckLake.
267+
// DuckLake only supports simple numeric and string literals as defaults.
268+
// Returns true if the default should be stripped.
267269
func (t *DDLTransform) isUnsupportedDefault(expr *pg_query.Node) bool {
268270
if expr == nil {
269271
return false
270272
}
271273

272-
// Check for function calls to now() or current_timestamp
274+
// Check for function calls (e.g., now(), current_timestamp)
273275
if funcCall := expr.GetFuncCall(); funcCall != nil {
274-
if len(funcCall.Funcname) == 1 {
275-
if name := funcCall.Funcname[0].GetString_(); name != nil {
276-
funcName := strings.ToLower(name.Sval)
277-
if funcName == "now" || funcName == "current_timestamp" {
278-
return true
279-
}
280-
}
281-
}
276+
return true // All function calls are unsupported
282277
}
283278

284279
// Check for SQLValueFunction (CURRENT_TIMESTAMP, CURRENT_DATE, etc.)
285280
if svf := expr.GetSqlvalueFunction(); svf != nil {
286-
switch svf.Op {
287-
case pg_query.SQLValueFunctionOp_SVFOP_CURRENT_TIMESTAMP,
288-
pg_query.SQLValueFunctionOp_SVFOP_CURRENT_TIMESTAMP_N,
289-
pg_query.SQLValueFunctionOp_SVFOP_LOCALTIMESTAMP,
290-
pg_query.SQLValueFunctionOp_SVFOP_LOCALTIMESTAMP_N:
291-
return true
281+
return true // All SQL value functions are unsupported
282+
}
283+
284+
// Check for boolean constants (DEFAULT true/false)
285+
// DuckLake only supports numeric and string literals
286+
if typeCast := expr.GetTypeCast(); typeCast != nil {
287+
return t.isUnsupportedDefault(typeCast.Arg)
288+
}
289+
290+
// Check for A_Const nodes - only allow Integer and String
291+
if aconst := expr.GetAConst(); aconst != nil {
292+
switch aconst.Val.(type) {
293+
case *pg_query.A_Const_Ival, *pg_query.A_Const_Fval, *pg_query.A_Const_Sval:
294+
return false // These are supported
295+
default:
296+
return true // Booleans, NULLs, etc. are not supported
292297
}
293298
}
294299

295-
return false
300+
// Column references, expressions, etc. are not supported
301+
return true
296302
}
297303

298304
// isConstraintCommand checks if an ALTER TABLE command is adding a constraint

0 commit comments

Comments
 (0)