Skip to content

Commit 40634d8

Browse files
EDsCODEclaude
andcommitted
Fix DuckLake concurrent attachment and DDL rewriting for ETL tools
Two fixes for DuckLake compatibility: 1. Fix DDL rewriting for ETL tool queries (e.g., queries with /*comment*/ prefixes): - rewriteForDuckLake now strips leading SQL comments before detecting CREATE TABLE - Added createTableDetectRegex to handle multiple spaces (e.g., "CREATE TABLE") - PRIMARY KEY and other constraints are now properly stripped from these queries 2. Fix DuckLake attachment race condition: - Multiple connections attaching simultaneously caused "__ducklake_metadata_ducklake already exists" - Moved mutex to attachDuckLake to serialize both secret creation and catalog attachment - Added check for existing DuckLake catalog before attempting attachment 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude Opus 4.5 <[email protected]>
1 parent ceed599 commit 40634d8

File tree

3 files changed

+46
-9
lines changed

3 files changed

+46
-9
lines changed

server/catalog.go

Lines changed: 17 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -396,6 +396,8 @@ var (
396396
tableCheckRegex = regexp.MustCompile(`(?i),?\s*CHECK\s*\((?:[^()]*|\([^()]*\))*\)`)
397397
// CONSTRAINT name prefix (for named constraints)
398398
constraintNameRegex = regexp.MustCompile(`(?i),?\s*CONSTRAINT\s+\w+\s+(?:PRIMARY\s+KEY|UNIQUE|FOREIGN\s+KEY|CHECK)\s*\([^)]+\)(?:\s+REFERENCES\s+\w+(?:\.\w+)?\s*\([^)]+\))?(?:\s+ON\s+(?:DELETE|UPDATE)\s+(?:CASCADE|SET\s+NULL|SET\s+DEFAULT|RESTRICT|NO\s+ACTION))*`)
399+
// Detect CREATE TABLE statements (handles multiple spaces like "CREATE TABLE")
400+
createTableDetectRegex = regexp.MustCompile(`(?i)^CREATE\s+(?:TEMPORARY\s+|TEMP\s+|UNLOGGED\s+)?TABLE\b`)
399401
)
400402

401403
// PostgreSQL-specific SET parameters that DuckDB doesn't support.
@@ -603,11 +605,22 @@ func rewritePgCatalogQuery(query string) string {
603605
func rewriteForDuckLake(query string) string {
604606
upperQuery := strings.ToUpper(strings.TrimSpace(query))
605607

608+
// Strip leading SQL comments (e.g., /*Fivetran*/) for detection
609+
// This handles tools that prefix queries with comments
610+
commentStripped := upperQuery
611+
for strings.HasPrefix(commentStripped, "/*") {
612+
endIdx := strings.Index(commentStripped, "*/")
613+
if endIdx == -1 {
614+
break
615+
}
616+
commentStripped = strings.TrimSpace(commentStripped[endIdx+2:])
617+
}
618+
606619
// Only rewrite CREATE TABLE statements
607-
if !strings.HasPrefix(upperQuery, "CREATE TABLE") &&
608-
!strings.HasPrefix(upperQuery, "CREATE TEMPORARY TABLE") &&
609-
!strings.HasPrefix(upperQuery, "CREATE TEMP TABLE") &&
610-
!strings.HasPrefix(upperQuery, "CREATE UNLOGGED TABLE") {
620+
// Use regex to handle multiple spaces (e.g., "CREATE TABLE")
621+
isCreateTable := createTableDetectRegex.MatchString(commentStripped)
622+
623+
if !isCreateTable {
611624
return query
612625
}
613626

server/ducklake_test.go

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -150,6 +150,17 @@ func TestRewriteForDuckLake(t *testing.T) {
150150
created_at TIMESTAMP
151151
)`,
152152
},
153+
// ETL tool queries with comment prefix and double spaces
154+
{
155+
name: "comment prefix with PRIMARY KEY",
156+
input: `/*ETL*/CREATE TABLE "schema"."table" ( "id" CHARACTER VARYING(256) , "created_at" TIMESTAMP WITH TIME ZONE , PRIMARY KEY ("id") )`,
157+
expected: `/*ETL*/CREATE TABLE "schema"."table" ( "id" CHARACTER VARYING(256) , "created_at" TIMESTAMP WITH TIME ZONE )`,
158+
},
159+
{
160+
name: "multiple comment prefixes",
161+
input: `/* comment 1 */ /* comment 2 */ CREATE TABLE test (id INTEGER PRIMARY KEY)`,
162+
expected: `/* comment 1 */ /* comment 2 */ CREATE TABLE test (id INTEGER)`,
163+
},
153164
}
154165

155166
for _, tt := range tests {

server/server.go

Lines changed: 18 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -308,6 +308,23 @@ func (s *Server) attachDuckLake(db *sql.DB) error {
308308
return nil // DuckLake not configured
309309
}
310310

311+
// Serialize DuckLake attachment to avoid race conditions
312+
// Multiple connections trying to attach simultaneously can cause
313+
// "database with name '__ducklake_metadata_ducklake' already exists" errors
314+
s.duckLakeMu.Lock()
315+
defer s.duckLakeMu.Unlock()
316+
317+
// Check if DuckLake catalog is already attached
318+
var count int
319+
err := db.QueryRow("SELECT COUNT(*) FROM duckdb_databases() WHERE database_name = 'ducklake'").Scan(&count)
320+
if err == nil && count > 0 {
321+
// Already attached, just set as default
322+
if _, err := db.Exec("USE ducklake"); err != nil {
323+
return fmt.Errorf("failed to set DuckLake as default catalog: %w", err)
324+
}
325+
return nil
326+
}
327+
311328
// Create S3 secret if using object store
312329
// - With explicit credentials (S3AccessKey set) or custom endpoint
313330
// - With credential_chain provider (for AWS S3)
@@ -359,13 +376,9 @@ func (s *Server) attachDuckLake(db *sql.DB) error {
359376
// - "config": explicit credentials (for MinIO or when you have access keys)
360377
// - "credential_chain": AWS SDK credential chain (env vars, config files, instance metadata, etc.)
361378
//
379+
// Note: Caller must hold duckLakeMu to avoid race conditions.
362380
// See: https://duckdb.org/docs/stable/core_extensions/httpfs/s3api
363381
func (s *Server) createS3Secret(db *sql.DB) error {
364-
// Serialize secret creation to avoid DuckDB write-write conflicts
365-
// when multiple connections try to create the same secret simultaneously
366-
s.duckLakeMu.Lock()
367-
defer s.duckLakeMu.Unlock()
368-
369382
// Check if secret already exists to avoid unnecessary creation
370383
var count int
371384
err := db.QueryRow("SELECT COUNT(*) FROM duckdb_secrets() WHERE name = 'ducklake_s3'").Scan(&count)

0 commit comments

Comments
 (0)