Skip to content

Commit 90a322e

Browse files
authored
Merge pull request #4 from PostHog/feature/extensions
Add configurable DuckDB extension loading
2 parents 855b234 + 7c1b0c4 commit 90a322e

File tree

5 files changed

+246
-6
lines changed

5 files changed

+246
-6
lines changed

duckgres.example.yaml

Lines changed: 26 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,32 @@ users:
1919
alice: "alice123"
2020
bob: "bob123"
2121

22+
# DuckDB extensions to load on database initialization
23+
# Extensions are installed (downloaded if needed) and loaded automatically
24+
# Common extensions: httpfs, parquet, json, sqlite, postgres, mysql, excel
25+
# Default: ducklake (loaded even without config file)
26+
extensions:
27+
- ducklake
28+
# - httpfs
29+
# - parquet
30+
31+
# DuckLake configuration (optional)
32+
# When configured, DuckLake catalog is automatically attached on connection
33+
# See: https://ducklake.select/docs/stable/duckdb/usage/connecting
34+
ducklake:
35+
# Metadata store connection string (required to enable DuckLake)
36+
# Examples:
37+
# - "postgres:dbname=ducklake"
38+
# - "postgres:host=localhost dbname=ducklake user=postgres password=secret"
39+
# - "sqlite:ducklake.db"
40+
# metadata_store: "postgres:dbname=ducklake"
41+
42+
# Data path for table data files (optional)
43+
# Examples:
44+
# - "s3://my-bucket/ducklake-data/"
45+
# - "/local/path/to/data/"
46+
# data_path: "s3://my-bucket/ducklake-data/"
47+
2248
# Rate limiting configuration (optional - these are the defaults)
2349
rate_limit:
2450
# Max failed auth attempts before banning an IP

main.go

Lines changed: 33 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -16,12 +16,14 @@ import (
1616

1717
// FileConfig represents the YAML configuration file structure
1818
type FileConfig struct {
19-
Host string `yaml:"host"`
20-
Port int `yaml:"port"`
21-
DataDir string `yaml:"data_dir"`
22-
TLS TLSConfig `yaml:"tls"`
23-
Users map[string]string `yaml:"users"`
24-
RateLimit RateLimitFileConfig `yaml:"rate_limit"`
19+
Host string `yaml:"host"`
20+
Port int `yaml:"port"`
21+
DataDir string `yaml:"data_dir"`
22+
TLS TLSConfig `yaml:"tls"`
23+
Users map[string]string `yaml:"users"`
24+
RateLimit RateLimitFileConfig `yaml:"rate_limit"`
25+
Extensions []string `yaml:"extensions"`
26+
DuckLake DuckLakeFileConfig `yaml:"ducklake"`
2527
}
2628

2729
type TLSConfig struct {
@@ -36,6 +38,11 @@ type RateLimitFileConfig struct {
3638
MaxConnectionsPerIP int `yaml:"max_connections_per_ip"`
3739
}
3840

41+
type DuckLakeFileConfig struct {
42+
MetadataStore string `yaml:"metadata_store"` // e.g., "postgres:dbname=ducklake"
43+
DataPath string `yaml:"data_path"` // e.g., "s3://my-bucket/data/"
44+
}
45+
3946
// loadConfigFile loads configuration from a YAML file
4047
func loadConfigFile(path string) (*FileConfig, error) {
4148
data, err := os.ReadFile(path)
@@ -109,6 +116,7 @@ func main() {
109116
Users: map[string]string{
110117
"postgres": "postgres",
111118
},
119+
Extensions: []string{"ducklake"},
112120
}
113121

114122
// Load config file if specified
@@ -160,6 +168,19 @@ func main() {
160168
log.Printf("Warning: invalid ban_duration duration: %v", err)
161169
}
162170
}
171+
172+
// Apply extensions config
173+
if len(fileCfg.Extensions) > 0 {
174+
cfg.Extensions = fileCfg.Extensions
175+
}
176+
177+
// Apply DuckLake config
178+
if fileCfg.DuckLake.MetadataStore != "" {
179+
cfg.DuckLake.MetadataStore = fileCfg.DuckLake.MetadataStore
180+
}
181+
if fileCfg.DuckLake.DataPath != "" {
182+
cfg.DuckLake.DataPath = fileCfg.DuckLake.DataPath
183+
}
163184
}
164185

165186
// Apply environment variables (override config file)
@@ -180,6 +201,12 @@ func main() {
180201
if v := os.Getenv("DUCKGRES_KEY"); v != "" {
181202
cfg.TLSKeyFile = v
182203
}
204+
if v := os.Getenv("DUCKGRES_DUCKLAKE_METADATA_STORE"); v != "" {
205+
cfg.DuckLake.MetadataStore = v
206+
}
207+
if v := os.Getenv("DUCKGRES_DUCKLAKE_DATA_PATH"); v != "" {
208+
cfg.DuckLake.DataPath = v
209+
}
183210

184211
// Apply CLI flags (highest priority)
185212
if *host != "" {

scripts/test_ducklake.sh

Lines changed: 61 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,61 @@
1+
#!/bin/bash
2+
# Test script for DuckLake catalog configuration
3+
4+
set -e
5+
6+
cd "$(dirname "$0")/.."
7+
8+
# Create a test config with DuckLake using SQLite metadata store
9+
cat > /tmp/ducklake-test-config.yaml <<EOF
10+
host: "127.0.0.1"
11+
port: 35436
12+
data_dir: "./data"
13+
tls:
14+
cert: "./certs/server.crt"
15+
key: "./certs/server.key"
16+
users:
17+
testuser: "testpass"
18+
extensions:
19+
- ducklake
20+
ducklake:
21+
metadata_store: "sqlite:./data/ducklake_meta.db"
22+
data_path: "./data/ducklake_data/"
23+
EOF
24+
25+
# Clean up any existing data
26+
rm -rf ./data/testuser.db ./data/ducklake_meta.db ./data/ducklake_data/
27+
28+
# Create data path directory
29+
mkdir -p ./data/ducklake_data
30+
31+
# Kill any existing instances
32+
pkill -f "duckgres.*35436" 2>/dev/null || true
33+
sleep 1
34+
35+
echo "=== Starting server with DuckLake configured ==="
36+
./duckgres --config /tmp/ducklake-test-config.yaml &
37+
SERVER_PID=$!
38+
sleep 3
39+
40+
echo ""
41+
echo "=== Testing DuckLake catalog is attached ==="
42+
# Try to use the DuckLake catalog - if it's attached, this should work
43+
RESULT=$(PGPASSWORD=testpass psql "host=127.0.0.1 port=35436 user=testuser dbname=test sslmode=require" -t -c "SELECT 'ducklake attached' as status;" 2>&1)
44+
echo "Result: $RESULT"
45+
46+
# The real test is that the server logs show "Attached DuckLake catalog"
47+
# We can verify by checking that basic queries still work
48+
if echo "$RESULT" | grep -q "ducklake attached"; then
49+
echo "✓ Server is running with DuckLake configured!"
50+
else
51+
echo "✗ Connection failed"
52+
kill $SERVER_PID 2>/dev/null || true
53+
exit 1
54+
fi
55+
56+
# Cleanup
57+
kill $SERVER_PID 2>/dev/null || true
58+
rm -f /tmp/ducklake-test-config.yaml
59+
60+
echo ""
61+
echo "=== DuckLake configuration test passed! ==="

scripts/test_extensions.sh

Lines changed: 53 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,53 @@
1+
#!/bin/bash
2+
# Test script for extension loading
3+
4+
set -e
5+
6+
cd "$(dirname "$0")/.."
7+
8+
# Create a test config with json extension
9+
cat > /tmp/ext-test-config.yaml <<EOF
10+
host: "127.0.0.1"
11+
port: 35433
12+
data_dir: "./data"
13+
tls:
14+
cert: "./certs/server.crt"
15+
key: "./certs/server.key"
16+
users:
17+
testuser: "testpass"
18+
extensions:
19+
- json
20+
EOF
21+
22+
# Clean up any existing data for testuser
23+
rm -rf ./data/testuser.db
24+
25+
# Kill any existing instances
26+
pkill -f "duckgres.*35433" 2>/dev/null || true
27+
sleep 1
28+
29+
echo "=== Starting server with json extension configured ==="
30+
./duckgres --config /tmp/ext-test-config.yaml &
31+
SERVER_PID=$!
32+
sleep 3
33+
34+
echo ""
35+
echo "=== Testing JSON extension functionality ==="
36+
# The json extension adds functions like json_extract, json_valid, etc.
37+
RESULT=$(PGPASSWORD=testpass psql "host=127.0.0.1 port=35433 user=testuser dbname=test sslmode=require" -t -c "SELECT json_extract('{\"name\": \"Alice\"}', '\$.name') as result;" 2>&1)
38+
echo "Result: $RESULT"
39+
40+
if echo "$RESULT" | grep -q "Alice"; then
41+
echo "✓ JSON extension loaded and working!"
42+
else
43+
echo "✗ JSON extension test failed"
44+
kill $SERVER_PID 2>/dev/null || true
45+
exit 1
46+
fi
47+
48+
# Cleanup
49+
kill $SERVER_PID 2>/dev/null || true
50+
rm -f /tmp/ext-test-config.yaml
51+
52+
echo ""
53+
echo "=== Extension loading test passed! ==="

server/server.go

Lines changed: 73 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,18 @@ type Config struct {
2323

2424
// Rate limiting configuration
2525
RateLimit RateLimitConfig
26+
27+
// Extensions to load on database initialization
28+
Extensions []string
29+
30+
// DuckLake configuration
31+
DuckLake DuckLakeConfig
32+
}
33+
34+
// DuckLakeConfig configures DuckLake metadata store and data path
35+
type DuckLakeConfig struct {
36+
MetadataStore string // e.g., "postgres:dbname=ducklake" or "sqlite:ducklake.db"
37+
DataPath string // e.g., "s3://my-bucket/data/" or "/local/path"
2638
}
2739

2840
type Server struct {
@@ -144,6 +156,18 @@ func (s *Server) getOrCreateDB(username string) (*sql.DB, error) {
144156
return nil, fmt.Errorf("failed to ping duckdb: %w", err)
145157
}
146158

159+
// Load configured extensions
160+
if err := s.loadExtensions(db); err != nil {
161+
log.Printf("Warning: failed to load some extensions for user %q: %v", username, err)
162+
// Continue anyway - database will still work without the extensions
163+
}
164+
165+
// Attach DuckLake catalog if configured
166+
if err := s.attachDuckLake(db); err != nil {
167+
log.Printf("Warning: failed to attach DuckLake for user %q: %v", username, err)
168+
// Continue anyway - database will still work without DuckLake
169+
}
170+
147171
// Initialize pg_catalog schema for PostgreSQL compatibility
148172
if err := initPgCatalog(db); err != nil {
149173
log.Printf("Warning: failed to initialize pg_catalog for user %q: %v", username, err)
@@ -155,6 +179,55 @@ func (s *Server) getOrCreateDB(username string) (*sql.DB, error) {
155179
return db, nil
156180
}
157181

182+
// loadExtensions installs and loads configured DuckDB extensions
183+
func (s *Server) loadExtensions(db *sql.DB) error {
184+
if len(s.cfg.Extensions) == 0 {
185+
return nil
186+
}
187+
188+
var lastErr error
189+
for _, ext := range s.cfg.Extensions {
190+
// First install the extension (downloads if needed)
191+
if _, err := db.Exec("INSTALL " + ext); err != nil {
192+
log.Printf("Warning: failed to install extension %q: %v", ext, err)
193+
lastErr = err
194+
continue
195+
}
196+
197+
// Then load it into the current session
198+
if _, err := db.Exec("LOAD " + ext); err != nil {
199+
log.Printf("Warning: failed to load extension %q: %v", ext, err)
200+
lastErr = err
201+
continue
202+
}
203+
204+
log.Printf("Loaded extension: %s", ext)
205+
}
206+
207+
return lastErr
208+
}
209+
210+
// attachDuckLake attaches a DuckLake catalog if configured
211+
func (s *Server) attachDuckLake(db *sql.DB) error {
212+
if s.cfg.DuckLake.MetadataStore == "" {
213+
return nil // DuckLake not configured
214+
}
215+
216+
// Build the ATTACH statement
217+
// Format: ATTACH 'ducklake:metadata_store' (DATA_PATH 'data_path')
218+
attachStmt := fmt.Sprintf("ATTACH 'ducklake:%s'", s.cfg.DuckLake.MetadataStore)
219+
if s.cfg.DuckLake.DataPath != "" {
220+
attachStmt += fmt.Sprintf(" (DATA_PATH '%s')", s.cfg.DuckLake.DataPath)
221+
}
222+
223+
if _, err := db.Exec(attachStmt); err != nil {
224+
return fmt.Errorf("failed to attach DuckLake: %w", err)
225+
}
226+
227+
log.Printf("Attached DuckLake catalog: %s", s.cfg.DuckLake.MetadataStore)
228+
return nil
229+
}
230+
158231
func (s *Server) handleConnection(conn net.Conn) {
159232
remoteAddr := conn.RemoteAddr()
160233

0 commit comments

Comments
 (0)