Skip to content

Commit 8de3845

Browse files
authored
Merge pull request #50 from PostHog/feature/information-schema-transpiler
Add information_schema transpiler transform for PostgreSQL compatibility
2 parents 82efecd + b95bd6a commit 8de3845

File tree

6 files changed

+550
-48
lines changed

6 files changed

+550
-48
lines changed

server/catalog.go

Lines changed: 169 additions & 33 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@ package server
22

33
import (
44
"database/sql"
5+
"fmt"
56
)
67

78
// initPgCatalog creates PostgreSQL compatibility functions and views in DuckDB
@@ -243,11 +244,11 @@ func initPgCatalog(db *sql.DB) error {
243244
// pg_table_is_visible - checks if table is in search path
244245
`CREATE OR REPLACE MACRO pg_table_is_visible(oid) AS true`,
245246
// has_schema_privilege - check schema access
246-
`CREATE OR REPLACE MACRO has_schema_privilege(schema, priv) AS true`,
247-
`CREATE OR REPLACE MACRO has_schema_privilege(u, schema, priv) AS true`,
247+
// Note: DuckDB doesn't support macro overloading well, so we only define 2-arg versions
248+
// The transpiler should handle 3-arg calls by dropping the user argument
249+
`CREATE OR REPLACE MACRO has_schema_privilege(schema_name, priv) AS true`,
248250
// has_table_privilege - check table access
249251
`CREATE OR REPLACE MACRO has_table_privilege(table_name, priv) AS true`,
250-
`CREATE OR REPLACE MACRO has_table_privilege(u, table_name, priv) AS true`,
251252
// pg_encoding_to_char - convert encoding ID to name
252253
`CREATE OR REPLACE MACRO pg_encoding_to_char(enc) AS 'UTF8'`,
253254
// format_type - format a type OID as string
@@ -317,10 +318,21 @@ func initPgCatalog(db *sql.DB) error {
317318

318319
// initInformationSchema creates the column metadata table and information_schema wrapper views.
319320
// This enables accurate type information (VARCHAR lengths, NUMERIC precision) in information_schema.
320-
func initInformationSchema(db *sql.DB) error {
321+
// When duckLakeMode is true, the views query from ducklake.information_schema instead of the
322+
// local information_schema, since DuckLake is set as the default catalog.
323+
func initInformationSchema(db *sql.DB, duckLakeMode bool) error {
324+
// Determine the source information_schema based on mode
325+
// In DuckLake mode, we need to query ducklake.information_schema to see DuckLake tables
326+
// In non-DuckLake mode, we query the local information_schema
327+
infoSchemaPrefix := "information_schema"
328+
if duckLakeMode {
329+
infoSchemaPrefix = "ducklake.information_schema"
330+
}
331+
321332
// Create metadata table to store column type information that DuckDB doesn't preserve
333+
// Table is created in main schema of current database
322334
metadataTableSQL := `
323-
CREATE TABLE IF NOT EXISTS __duckgres_column_metadata (
335+
CREATE TABLE IF NOT EXISTS main.__duckgres_column_metadata (
324336
table_schema VARCHAR NOT NULL,
325337
table_name VARCHAR NOT NULL,
326338
column_name VARCHAR NOT NULL,
@@ -336,21 +348,53 @@ func initInformationSchema(db *sql.DB) error {
336348
}
337349

338350
// Create information_schema.columns wrapper view
339-
// Transforms DuckDB type names to PostgreSQL-compatible names (e.g., VARCHAR -> text)
340-
// First try with metadata table join, fall back to simple view if table doesn't exist
341-
columnsViewWithMetaSQL := `
342-
CREATE OR REPLACE VIEW information_schema_columns_compat AS
351+
// Transforms DuckDB type names to PostgreSQL-compatible names
352+
// Maps: VARCHAR->text, BOOLEAN->boolean, INTEGER->integer, BIGINT->bigint,
353+
// TIMESTAMP->timestamp without time zone, DECIMAL->numeric, etc.
354+
// Views are created in main schema of current database
355+
columnsViewSQL := `
356+
CREATE OR REPLACE VIEW main.information_schema_columns_compat AS
343357
SELECT
344358
c.table_catalog,
345-
c.table_schema,
359+
CASE WHEN c.table_schema = 'main' THEN 'public' ELSE c.table_schema END AS table_schema,
346360
c.table_name,
347361
c.column_name,
348362
c.ordinal_position,
349-
c.column_default,
363+
-- Normalize column_default to PostgreSQL format
364+
CASE
365+
WHEN c.column_default IS NULL THEN NULL
366+
WHEN c.column_default = 'CAST(''t'' AS BOOLEAN)' THEN 'true'
367+
WHEN c.column_default = 'CAST(''f'' AS BOOLEAN)' THEN 'false'
368+
WHEN UPPER(c.column_default) = 'CURRENT_TIMESTAMP' THEN 'CURRENT_TIMESTAMP'
369+
WHEN UPPER(c.column_default) = 'NOW()' THEN 'now()'
370+
ELSE c.column_default
371+
END AS column_default,
350372
c.is_nullable,
373+
-- Normalize data_type to PostgreSQL lowercase format
351374
CASE
352-
WHEN UPPER(c.data_type) = 'VARCHAR' OR UPPER(c.data_type) LIKE 'VARCHAR(%' THEN 'text'
353-
ELSE c.data_type
375+
WHEN UPPER(c.data_type) = 'VARCHAR' OR UPPER(c.data_type) LIKE 'VARCHAR(%%' THEN 'text'
376+
WHEN UPPER(c.data_type) = 'TEXT' THEN 'text'
377+
WHEN UPPER(c.data_type) LIKE 'TEXT(%%' THEN 'character'
378+
WHEN UPPER(c.data_type) = 'BOOLEAN' THEN 'boolean'
379+
WHEN UPPER(c.data_type) = 'TINYINT' THEN 'smallint'
380+
WHEN UPPER(c.data_type) = 'SMALLINT' THEN 'smallint'
381+
WHEN UPPER(c.data_type) = 'INTEGER' THEN 'integer'
382+
WHEN UPPER(c.data_type) = 'BIGINT' THEN 'bigint'
383+
WHEN UPPER(c.data_type) = 'HUGEINT' THEN 'numeric'
384+
WHEN UPPER(c.data_type) = 'REAL' OR UPPER(c.data_type) = 'FLOAT4' THEN 'real'
385+
WHEN UPPER(c.data_type) = 'DOUBLE' OR UPPER(c.data_type) = 'FLOAT8' THEN 'double precision'
386+
WHEN UPPER(c.data_type) LIKE 'DECIMAL%%' THEN 'numeric'
387+
WHEN UPPER(c.data_type) LIKE 'NUMERIC%%' THEN 'numeric'
388+
WHEN UPPER(c.data_type) = 'DATE' THEN 'date'
389+
WHEN UPPER(c.data_type) = 'TIME' THEN 'time without time zone'
390+
WHEN UPPER(c.data_type) = 'TIMESTAMP' THEN 'timestamp without time zone'
391+
WHEN UPPER(c.data_type) = 'TIMESTAMPTZ' OR UPPER(c.data_type) = 'TIMESTAMP WITH TIME ZONE' THEN 'timestamp with time zone'
392+
WHEN UPPER(c.data_type) = 'INTERVAL' THEN 'interval'
393+
WHEN UPPER(c.data_type) = 'UUID' THEN 'uuid'
394+
WHEN UPPER(c.data_type) = 'BLOB' OR UPPER(c.data_type) = 'BYTEA' THEN 'bytea'
395+
WHEN UPPER(c.data_type) = 'JSON' THEN 'json'
396+
WHEN UPPER(c.data_type) LIKE '%%[]' THEN 'ARRAY'
397+
ELSE LOWER(c.data_type)
354398
END AS data_type,
355399
COALESCE(m.character_maximum_length, c.character_maximum_length) AS character_maximum_length,
356400
c.character_octet_length,
@@ -387,28 +431,57 @@ func initInformationSchema(db *sql.DB) error {
387431
'NEVER' AS is_generated,
388432
NULL AS generation_expression,
389433
'YES' AS is_updatable
390-
FROM information_schema.columns c
391-
LEFT JOIN __duckgres_column_metadata m
434+
FROM %s.columns c
435+
LEFT JOIN main.__duckgres_column_metadata m
392436
ON c.table_schema = m.table_schema
393437
AND c.table_name = m.table_name
394438
AND c.column_name = m.column_name
395439
`
396-
// Try with metadata table first
397-
if _, err := db.Exec(columnsViewWithMetaSQL); err != nil {
398-
// Metadata table doesn't exist, create simpler view without it
440+
if _, err := db.Exec(fmt.Sprintf(columnsViewSQL, infoSchemaPrefix)); err != nil {
441+
// If join with metadata table fails, create simpler view without it
399442
columnsViewSimpleSQL := `
400-
CREATE OR REPLACE VIEW information_schema_columns_compat AS
443+
CREATE OR REPLACE VIEW main.information_schema_columns_compat AS
401444
SELECT
402445
table_catalog,
403-
table_schema,
446+
CASE WHEN table_schema = 'main' THEN 'public' ELSE table_schema END AS table_schema,
404447
table_name,
405448
column_name,
406449
ordinal_position,
407-
column_default,
450+
-- Normalize column_default to PostgreSQL format
451+
CASE
452+
WHEN column_default IS NULL THEN NULL
453+
WHEN column_default = 'CAST(''t'' AS BOOLEAN)' THEN 'true'
454+
WHEN column_default = 'CAST(''f'' AS BOOLEAN)' THEN 'false'
455+
WHEN UPPER(column_default) = 'CURRENT_TIMESTAMP' THEN 'CURRENT_TIMESTAMP'
456+
WHEN UPPER(column_default) = 'NOW()' THEN 'now()'
457+
ELSE column_default
458+
END AS column_default,
408459
is_nullable,
460+
-- Normalize data_type to PostgreSQL lowercase format
409461
CASE
410-
WHEN UPPER(data_type) = 'VARCHAR' OR UPPER(data_type) LIKE 'VARCHAR(%' THEN 'text'
411-
ELSE data_type
462+
WHEN UPPER(data_type) = 'VARCHAR' OR UPPER(data_type) LIKE 'VARCHAR(%%' THEN 'text'
463+
WHEN UPPER(data_type) = 'TEXT' THEN 'text'
464+
WHEN UPPER(data_type) LIKE 'TEXT(%%' THEN 'character'
465+
WHEN UPPER(data_type) = 'BOOLEAN' THEN 'boolean'
466+
WHEN UPPER(data_type) = 'TINYINT' THEN 'smallint'
467+
WHEN UPPER(data_type) = 'SMALLINT' THEN 'smallint'
468+
WHEN UPPER(data_type) = 'INTEGER' THEN 'integer'
469+
WHEN UPPER(data_type) = 'BIGINT' THEN 'bigint'
470+
WHEN UPPER(data_type) = 'HUGEINT' THEN 'numeric'
471+
WHEN UPPER(data_type) = 'REAL' OR UPPER(data_type) = 'FLOAT4' THEN 'real'
472+
WHEN UPPER(data_type) = 'DOUBLE' OR UPPER(data_type) = 'FLOAT8' THEN 'double precision'
473+
WHEN UPPER(data_type) LIKE 'DECIMAL%%' THEN 'numeric'
474+
WHEN UPPER(data_type) LIKE 'NUMERIC%%' THEN 'numeric'
475+
WHEN UPPER(data_type) = 'DATE' THEN 'date'
476+
WHEN UPPER(data_type) = 'TIME' THEN 'time without time zone'
477+
WHEN UPPER(data_type) = 'TIMESTAMP' THEN 'timestamp without time zone'
478+
WHEN UPPER(data_type) = 'TIMESTAMPTZ' OR UPPER(data_type) = 'TIMESTAMP WITH TIME ZONE' THEN 'timestamp with time zone'
479+
WHEN UPPER(data_type) = 'INTERVAL' THEN 'interval'
480+
WHEN UPPER(data_type) = 'UUID' THEN 'uuid'
481+
WHEN UPPER(data_type) = 'BLOB' OR UPPER(data_type) = 'BYTEA' THEN 'bytea'
482+
WHEN UPPER(data_type) = 'JSON' THEN 'json'
483+
WHEN UPPER(data_type) LIKE '%%[]' THEN 'ARRAY'
484+
ELSE LOWER(data_type)
412485
END AS data_type,
413486
character_maximum_length,
414487
character_octet_length,
@@ -445,17 +518,19 @@ func initInformationSchema(db *sql.DB) error {
445518
'NEVER' AS is_generated,
446519
NULL AS generation_expression,
447520
'YES' AS is_updatable
448-
FROM information_schema.columns
521+
FROM %s.columns
449522
`
450-
db.Exec(columnsViewSimpleSQL)
523+
db.Exec(fmt.Sprintf(columnsViewSimpleSQL, infoSchemaPrefix))
451524
}
452525

453526
// Create information_schema.tables wrapper view with additional PostgreSQL columns
527+
// Filter out internal duckgres tables/views and DuckDB system views
528+
// Normalize 'main' schema to 'public' for PostgreSQL compatibility
454529
tablesViewSQL := `
455-
CREATE OR REPLACE VIEW information_schema_tables_compat AS
530+
CREATE OR REPLACE VIEW main.information_schema_tables_compat AS
456531
SELECT
457532
t.table_catalog,
458-
t.table_schema,
533+
CASE WHEN t.table_schema = 'main' THEN 'public' ELSE t.table_schema END AS table_schema,
459534
t.table_name,
460535
t.table_type,
461536
NULL AS self_referencing_column_name,
@@ -466,24 +541,85 @@ func initInformationSchema(db *sql.DB) error {
466541
'YES' AS is_insertable_into,
467542
'NO' AS is_typed,
468543
NULL AS commit_action
469-
FROM information_schema.tables t
544+
FROM %s.tables t
545+
WHERE t.table_name NOT IN (
546+
-- Internal duckgres tables
547+
'__duckgres_column_metadata',
548+
-- pg_catalog compat views
549+
'pg_class_full', 'pg_collation', 'pg_database', 'pg_inherits',
550+
'pg_namespace', 'pg_policy', 'pg_publication', 'pg_publication_rel',
551+
'pg_publication_tables', 'pg_roles', 'pg_rules', 'pg_statistic_ext',
552+
-- information_schema compat views
553+
'information_schema_columns_compat', 'information_schema_tables_compat',
554+
'information_schema_schemata_compat', 'information_schema_views_compat'
555+
)
556+
AND t.table_name NOT LIKE 'duckdb_%%'
557+
AND t.table_name NOT LIKE 'sqlite_%%'
558+
AND t.table_name NOT LIKE 'pragma_%%'
470559
`
471-
db.Exec(tablesViewSQL)
560+
db.Exec(fmt.Sprintf(tablesViewSQL, infoSchemaPrefix))
472561

473562
// Create information_schema.schemata wrapper view
563+
// Normalize 'main' to 'public' and add synthetic entries for pg_catalog and information_schema
564+
// to match PostgreSQL's information_schema.schemata
474565
schemataViewSQL := `
475-
CREATE OR REPLACE VIEW information_schema_schemata_compat AS
566+
CREATE OR REPLACE VIEW main.information_schema_schemata_compat AS
476567
SELECT
477568
s.catalog_name,
478-
s.schema_name,
569+
CASE WHEN s.schema_name = 'main' THEN 'public' ELSE s.schema_name END AS schema_name,
479570
'duckdb' AS schema_owner,
480571
NULL AS default_character_set_catalog,
481572
NULL AS default_character_set_schema,
482573
NULL AS default_character_set_name,
483574
NULL AS sql_path
484-
FROM information_schema.schemata s
575+
FROM %s.schemata s
576+
WHERE s.schema_name NOT IN ('main', 'pg_catalog', 'information_schema')
577+
UNION ALL
578+
SELECT 'memory' AS catalog_name, 'public' AS schema_name, 'duckdb' AS schema_owner,
579+
NULL, NULL, NULL, NULL
580+
UNION ALL
581+
SELECT 'memory' AS catalog_name, 'pg_catalog' AS schema_name, 'duckdb' AS schema_owner,
582+
NULL, NULL, NULL, NULL
583+
UNION ALL
584+
SELECT 'memory' AS catalog_name, 'information_schema' AS schema_name, 'duckdb' AS schema_owner,
585+
NULL, NULL, NULL, NULL
586+
UNION ALL
587+
SELECT 'memory' AS catalog_name, 'pg_toast' AS schema_name, 'duckdb' AS schema_owner,
588+
NULL, NULL, NULL, NULL
589+
`
590+
db.Exec(fmt.Sprintf(schemataViewSQL, infoSchemaPrefix))
591+
592+
// Create information_schema.views wrapper view
593+
// Filter out internal duckgres views and DuckDB system views
594+
// Normalize 'main' schema to 'public' for PostgreSQL compatibility
595+
viewsViewSQL := `
596+
CREATE OR REPLACE VIEW main.information_schema_views_compat AS
597+
SELECT
598+
v.table_catalog,
599+
CASE WHEN v.table_schema = 'main' THEN 'public' ELSE v.table_schema END AS table_schema,
600+
v.table_name,
601+
v.view_definition,
602+
v.check_option,
603+
v.is_updatable,
604+
v.is_insertable_into,
605+
v.is_trigger_updatable,
606+
v.is_trigger_deletable,
607+
v.is_trigger_insertable_into
608+
FROM %s.views v
609+
WHERE v.table_name NOT IN (
610+
-- pg_catalog compat views
611+
'pg_class_full', 'pg_collation', 'pg_database', 'pg_inherits',
612+
'pg_namespace', 'pg_policy', 'pg_publication', 'pg_publication_rel',
613+
'pg_publication_tables', 'pg_roles', 'pg_rules', 'pg_statistic_ext',
614+
-- information_schema compat views
615+
'information_schema_columns_compat', 'information_schema_tables_compat',
616+
'information_schema_schemata_compat', 'information_schema_views_compat'
617+
)
618+
AND v.table_name NOT LIKE 'duckdb_%%'
619+
AND v.table_name NOT LIKE 'sqlite_%%'
620+
AND v.table_name NOT LIKE 'pragma_%%'
485621
`
486-
db.Exec(schemataViewSQL)
622+
db.Exec(fmt.Sprintf(viewsViewSQL, infoSchemaPrefix))
487623

488624
return nil
489625
}

server/server.go

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -281,6 +281,7 @@ func (s *Server) createDBConnection(username string) (*sql.DB, error) {
281281
}
282282

283283
// Attach DuckLake catalog if configured
284+
duckLakeMode := false
284285
if err := s.attachDuckLake(db); err != nil {
285286
// If DuckLake was explicitly configured, fail the connection.
286287
// Silent fallback to local DB causes schema/table mismatches.
@@ -290,6 +291,15 @@ func (s *Server) createDBConnection(username string) (*sql.DB, error) {
290291
}
291292
// DuckLake not configured, this warning is just informational
292293
log.Printf("Warning: failed to attach DuckLake for user %q: %v", username, err)
294+
} else if s.cfg.DuckLake.MetadataStore != "" {
295+
duckLakeMode = true
296+
}
297+
298+
// Initialize information_schema compatibility views
299+
// Must be done AFTER attaching DuckLake so views can reference ducklake.information_schema
300+
if err := initInformationSchema(db, duckLakeMode); err != nil {
301+
log.Printf("Warning: failed to initialize information_schema for user %q: %v", username, err)
302+
// Continue anyway - basic queries will still work
293303
}
294304

295305
return db, nil

tests/integration/catalog_test.go

Lines changed: 14 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -119,17 +119,17 @@ func TestCatalogInformationSchemaTables(t *testing.T) {
119119
{
120120
Name: "info_schema_tables_all",
121121
Query: "SELECT table_schema, table_name, table_type FROM information_schema.tables WHERE table_schema NOT IN ('pg_catalog', 'information_schema') LIMIT 20",
122-
DuckgresOnly: true,
122+
DuckgresOnly: false,
123123
},
124124
{
125125
Name: "info_schema_tables_public",
126126
Query: "SELECT table_name FROM information_schema.tables WHERE table_schema = 'main' OR table_schema = 'public' LIMIT 20",
127-
DuckgresOnly: true,
127+
DuckgresOnly: false,
128128
},
129129
{
130130
Name: "info_schema_tables_base",
131-
Query: "SELECT table_name FROM information_schema.tables WHERE table_type = 'BASE TABLE' LIMIT 20",
132-
DuckgresOnly: true,
131+
Query: "SELECT table_name FROM information_schema.tables WHERE table_type = 'BASE TABLE' AND table_schema NOT IN ('pg_catalog', 'information_schema') ORDER BY table_name LIMIT 20",
132+
DuckgresOnly: false,
133133
},
134134
}
135135
runQueryTests(t, tests)
@@ -141,12 +141,12 @@ func TestCatalogInformationSchemaColumns(t *testing.T) {
141141
{
142142
Name: "info_schema_columns_users",
143143
Query: "SELECT column_name, data_type, is_nullable FROM information_schema.columns WHERE table_name = 'users' ORDER BY ordinal_position",
144-
DuckgresOnly: true,
144+
DuckgresOnly: false,
145145
},
146146
{
147147
Name: "info_schema_columns_with_default",
148148
Query: "SELECT column_name, column_default FROM information_schema.columns WHERE column_default IS NOT NULL LIMIT 10",
149-
DuckgresOnly: true,
149+
DuckgresOnly: false,
150150
},
151151
}
152152
runQueryTests(t, tests)
@@ -157,8 +157,14 @@ func TestCatalogInformationSchemaViews(t *testing.T) {
157157
tests := []QueryTest{
158158
{
159159
Name: "info_schema_views_all",
160+
Query: "SELECT table_schema, table_name FROM information_schema.views WHERE table_schema NOT IN ('pg_catalog', 'information_schema') ORDER BY table_name LIMIT 10",
161+
DuckgresOnly: false,
162+
},
163+
{
164+
// view_definition format differs between PostgreSQL and DuckDB - test separately
165+
Name: "info_schema_views_definition",
160166
Query: "SELECT table_name, view_definition FROM information_schema.views WHERE table_schema NOT IN ('pg_catalog', 'information_schema') LIMIT 10",
161-
DuckgresOnly: true,
167+
DuckgresOnly: true, // view_definition format differs
162168
},
163169
}
164170
runQueryTests(t, tests)
@@ -170,7 +176,7 @@ func TestCatalogInformationSchemaSchemata(t *testing.T) {
170176
{
171177
Name: "info_schema_schemata_all",
172178
Query: "SELECT schema_name FROM information_schema.schemata",
173-
DuckgresOnly: true,
179+
DuckgresOnly: false,
174180
},
175181
}
176182
runQueryTests(t, tests)

0 commit comments

Comments
 (0)