Skip to content

Commit 104d5b2

Browse files
authored
Merge pull request #31 from PostHog/feat/ducklake-compatibility
Add DuckLake compatibility layer for PostgreSQL DDL
2 parents 0571f45 + 237f977 commit 104d5b2

File tree

4 files changed

+411
-1
lines changed

4 files changed

+411
-1
lines changed

server/catalog.go

Lines changed: 98 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -368,6 +368,34 @@ var (
368368
// Two patterns: one for version() with existing AS alias, one for version() without
369369
versionFuncWithAliasRegex = regexp.MustCompile(`(?i)\bversion\s*\(\s*\)(\s+AS\s+)`)
370370
versionFuncRegex = regexp.MustCompile(`(?i)\bversion\s*\(\s*\)`)
371+
372+
// DuckLake compatibility: Strip unsupported constraints from CREATE TABLE
373+
// PRIMARY KEY constraint (inline or table-level)
374+
primaryKeyInlineRegex = regexp.MustCompile(`(?i)\s+PRIMARY\s+KEY`)
375+
// UNIQUE constraint (inline)
376+
uniqueInlineRegex = regexp.MustCompile(`(?i)\s+UNIQUE`)
377+
// REFERENCES / FOREIGN KEY (inline) with optional ON DELETE/UPDATE clauses
378+
referencesRegex = regexp.MustCompile(`(?i)\s+REFERENCES\s+\w+(?:\.\w+)?(?:\s*\([^)]+\))?(?:\s+ON\s+(?:DELETE|UPDATE)\s+(?:CASCADE|SET\s+NULL|SET\s+DEFAULT|RESTRICT|NO\s+ACTION))*`)
379+
// CHECK constraint (inline) - handles one level of nested parentheses
380+
checkConstraintRegex = regexp.MustCompile(`(?i)\s+CHECK\s*\((?:[^()]*|\([^()]*\))*\)`)
381+
// SERIAL types -> INTEGER types
382+
serialRegex = regexp.MustCompile(`(?i)\bSERIAL\b`)
383+
bigserialRegex = regexp.MustCompile(`(?i)\bBIGSERIAL\b`)
384+
smallserialRegex = regexp.MustCompile(`(?i)\bSMALLSERIAL\b`)
385+
// DEFAULT now()/current_timestamp (not supported in DuckLake)
386+
defaultNowRegex = regexp.MustCompile(`(?i)\s+DEFAULT\s+(?:now\s*\(\s*\)|current_timestamp|CURRENT_TIMESTAMP)`)
387+
// GENERATED columns
388+
generatedRegex = regexp.MustCompile(`(?i)\s+GENERATED\s+(?:ALWAYS|BY\s+DEFAULT)\s+AS\s+(?:IDENTITY(?:\s*\([^)]*\))?|[^,)]+)`)
389+
// Table-level PRIMARY KEY constraint: PRIMARY KEY (col1, col2)
390+
tablePrimaryKeyRegex = regexp.MustCompile(`(?i),?\s*PRIMARY\s+KEY\s*\([^)]+\)`)
391+
// Table-level UNIQUE constraint: UNIQUE (col1, col2)
392+
tableUniqueRegex = regexp.MustCompile(`(?i),?\s*UNIQUE\s*\([^)]+\)`)
393+
// Table-level FOREIGN KEY constraint
394+
tableForeignKeyRegex = regexp.MustCompile(`(?i),?\s*FOREIGN\s+KEY\s*\([^)]+\)\s*REFERENCES\s+\w+(?:\.\w+)?\s*\([^)]+\)(?:\s+ON\s+(?:DELETE|UPDATE)\s+(?:CASCADE|SET\s+NULL|SET\s+DEFAULT|RESTRICT|NO\s+ACTION))*`)
395+
// Table-level CHECK constraint - handles one level of nested parentheses
396+
tableCheckRegex = regexp.MustCompile(`(?i),?\s*CHECK\s*\((?:[^()]*|\([^()]*\))*\)`)
397+
// CONSTRAINT name prefix (for named constraints)
398+
constraintNameRegex = regexp.MustCompile(`(?i),?\s*CONSTRAINT\s+\w+\s+(?:PRIMARY\s+KEY|UNIQUE|FOREIGN\s+KEY|CHECK)\s*\([^)]+\)(?:\s+REFERENCES\s+\w+(?:\.\w+)?\s*\([^)]+\))?(?:\s+ON\s+(?:DELETE|UPDATE)\s+(?:CASCADE|SET\s+NULL|SET\s+DEFAULT|RESTRICT|NO\s+ACTION))*`)
371399
)
372400

373401
// PostgreSQL-specific SET parameters that DuckDB doesn't support.
@@ -568,3 +596,73 @@ func rewritePgCatalogQuery(query string) string {
568596
return query
569597
}
570598

599+
// rewriteForDuckLake rewrites PostgreSQL DDL to be compatible with DuckLake limitations.
600+
// DuckLake does not support: PRIMARY KEY, UNIQUE, FOREIGN KEY, CHECK constraints,
601+
// SERIAL types, DEFAULT now(), GENERATED columns, or indexes.
602+
// This function strips these unsupported features so DDL can execute.
603+
func rewriteForDuckLake(query string) string {
604+
upperQuery := strings.ToUpper(strings.TrimSpace(query))
605+
606+
// Only rewrite CREATE TABLE statements
607+
if !strings.HasPrefix(upperQuery, "CREATE TABLE") &&
608+
!strings.HasPrefix(upperQuery, "CREATE TEMPORARY TABLE") &&
609+
!strings.HasPrefix(upperQuery, "CREATE TEMP TABLE") &&
610+
!strings.HasPrefix(upperQuery, "CREATE UNLOGGED TABLE") {
611+
return query
612+
}
613+
614+
// Strip named constraints first (CONSTRAINT name PRIMARY KEY/UNIQUE/FOREIGN KEY/CHECK)
615+
query = constraintNameRegex.ReplaceAllString(query, "")
616+
617+
// Strip table-level constraints
618+
query = tablePrimaryKeyRegex.ReplaceAllString(query, "")
619+
query = tableUniqueRegex.ReplaceAllString(query, "")
620+
query = tableForeignKeyRegex.ReplaceAllString(query, "")
621+
query = tableCheckRegex.ReplaceAllString(query, "")
622+
623+
// Strip inline constraints
624+
query = primaryKeyInlineRegex.ReplaceAllString(query, "")
625+
query = uniqueInlineRegex.ReplaceAllString(query, "")
626+
query = referencesRegex.ReplaceAllString(query, "")
627+
query = checkConstraintRegex.ReplaceAllString(query, "")
628+
629+
// Convert SERIAL types to INTEGER types
630+
query = smallserialRegex.ReplaceAllString(query, "SMALLINT")
631+
query = bigserialRegex.ReplaceAllString(query, "BIGINT")
632+
query = serialRegex.ReplaceAllString(query, "INTEGER")
633+
634+
// Strip DEFAULT now()/current_timestamp (DuckLake only allows literal defaults)
635+
query = defaultNowRegex.ReplaceAllString(query, "")
636+
637+
// Strip GENERATED columns
638+
query = generatedRegex.ReplaceAllString(query, "")
639+
640+
return query
641+
}
642+
643+
// isNoOpCommand returns true if the command should be acknowledged but not executed.
644+
// These are PostgreSQL features that DuckLake doesn't support.
645+
func isNoOpCommand(cmdType string) bool {
646+
switch cmdType {
647+
case "CREATE INDEX", "DROP INDEX", "REINDEX",
648+
"CLUSTER", "VACUUM", "ANALYZE",
649+
"GRANT", "REVOKE", "COMMENT",
650+
"REFRESH", // REFRESH MATERIALIZED VIEW
651+
"ALTER TABLE ADD CONSTRAINT": // Constraints not supported in DuckLake
652+
return true
653+
default:
654+
return false
655+
}
656+
}
657+
658+
// getNoOpCommandTag returns the command tag to send for a no-op command.
659+
// Some internal command types need to be mapped to standard PostgreSQL tags.
660+
func getNoOpCommandTag(cmdType string) string {
661+
switch cmdType {
662+
case "ALTER TABLE ADD CONSTRAINT":
663+
return "ALTER TABLE" // PostgreSQL returns "ALTER TABLE" for constraint operations
664+
default:
665+
return cmdType
666+
}
667+
}
668+

server/conn.go

Lines changed: 28 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -292,6 +292,9 @@ func (c *clientConn) handleQuery(body []byte) error {
292292
// Rewrite pg_catalog function calls for compatibility
293293
query = rewritePgCatalogQuery(query)
294294

295+
// Rewrite CREATE TABLE for DuckLake compatibility (strip unsupported constraints)
296+
query = rewriteForDuckLake(query)
297+
295298
// Determine command type for proper response
296299
upperQuery := strings.ToUpper(query)
297300
cmdType := c.getCommandType(upperQuery)
@@ -301,6 +304,15 @@ func (c *clientConn) handleQuery(body []byte) error {
301304
return c.handleCopy(query, upperQuery)
302305
}
303306

307+
// Handle no-op commands (CREATE INDEX, VACUUM, etc.) - DuckLake doesn't support these
308+
if isNoOpCommand(cmdType) {
309+
log.Printf("[%s] No-op command (DuckLake limitation): %s", c.username, query)
310+
writeCommandComplete(c.writer, getNoOpCommandTag(cmdType))
311+
writeReadyForQuery(c.writer, c.txStatus)
312+
c.writer.Flush()
313+
return nil
314+
}
315+
304316
// For non-SELECT queries, use Exec
305317
if cmdType != "SELECT" {
306318
// Handle nested BEGIN: PostgreSQL issues a warning but continues,
@@ -505,6 +517,12 @@ func (c *clientConn) getCommandType(upperQuery string) string {
505517
return "DROP SCHEMA"
506518
case strings.HasPrefix(upperQuery, "DROP"):
507519
return "DROP"
520+
case strings.Contains(upperQuery, "ADD CONSTRAINT") ||
521+
strings.Contains(upperQuery, "ADD PRIMARY KEY") ||
522+
strings.Contains(upperQuery, "ADD UNIQUE") ||
523+
strings.Contains(upperQuery, "ADD FOREIGN KEY") ||
524+
strings.Contains(upperQuery, "ADD CHECK"):
525+
return "ALTER TABLE ADD CONSTRAINT"
508526
case strings.HasPrefix(upperQuery, "ALTER"):
509527
return "ALTER TABLE"
510528
case strings.HasPrefix(upperQuery, "TRUNCATE"):
@@ -1027,6 +1045,9 @@ func (c *clientConn) handleParse(body []byte) {
10271045
// Rewrite pg_catalog function calls for compatibility (same as simple query protocol)
10281046
rewrittenQuery := rewritePgCatalogQuery(query)
10291047

1048+
// Rewrite CREATE TABLE for DuckLake compatibility (strip unsupported constraints)
1049+
rewrittenQuery = rewriteForDuckLake(rewrittenQuery)
1050+
10301051
// Convert PostgreSQL $1, $2 placeholders to ? for database/sql
10311052
convertedQuery, numParams := convertPlaceholders(rewrittenQuery)
10321053

@@ -1321,6 +1342,13 @@ func (c *clientConn) handleExecute(body []byte) {
13211342
return
13221343
}
13231344

1345+
// Handle no-op commands (CREATE INDEX, VACUUM, etc.) - DuckLake doesn't support these
1346+
if isNoOpCommand(cmdType) {
1347+
log.Printf("[%s] No-op command (DuckLake limitation): %s", c.username, p.stmt.query)
1348+
writeCommandComplete(c.writer, getNoOpCommandTag(cmdType))
1349+
return
1350+
}
1351+
13241352
if !returnsResults {
13251353
// Handle nested BEGIN: PostgreSQL issues a warning but continues,
13261354
// while DuckDB throws an error. Match PostgreSQL behavior.

0 commit comments

Comments
 (0)