Skip to content

Commit 86d4e1c

Browse files
authored
Merge pull request #2 from PostHog/feature/pg-compat-and-binary-format
Add pg_catalog compatibility and binary format support
2 parents e7b40a4 + a72a573 commit 86d4e1c

File tree

7 files changed

+683
-31
lines changed

7 files changed

+683
-31
lines changed

TODO.md

Lines changed: 4 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -20,12 +20,10 @@
2020
- [ ] **Cancel Request Handling**: Properly cancel long-running queries
2121

2222
### Compatibility
23-
- [ ] **System Catalog Emulation**: Implement `pg_catalog` views for tool compatibility
24-
- [ ] `pg_database`
25-
- [ ] `pg_tables`
26-
- [ ] `pg_columns`
27-
- [ ] `pg_type`
28-
- [ ] `pg_class`
23+
- [x] **System Catalog Emulation**: Basic `pg_catalog` compatibility for psql
24+
- [x] `\dt` (list tables) - working
25+
- [x] `\l` (list databases) - working
26+
- [ ] `\d <table>` (describe table) - needs more pg_class columns
2927
- [ ] **Information Schema**: Emulate PostgreSQL's `information_schema`
3028
- [ ] **Session Variables**: Support `SET` commands (timezone, search_path, etc.)
3129
- [ ] **Type OID Mapping**: Proper PostgreSQL OID mapping for all DuckDB types

go.mod

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,7 @@ require (
1919
github.com/google/uuid v1.6.0 // indirect
2020
github.com/klauspost/compress v1.18.0 // indirect
2121
github.com/klauspost/cpuid/v2 v2.3.0 // indirect
22+
github.com/lib/pq v1.10.9 // indirect
2223
github.com/pierrec/lz4/v4 v4.1.22 // indirect
2324
github.com/zeebo/xxh3 v1.0.2 // indirect
2425
golang.org/x/exp v0.0.0-20250408133849-7e4ce0ab07d0 // indirect

scripts/test_binary.go

Lines changed: 102 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,102 @@
1+
// Test binary format support with lib/pq
2+
package main
3+
4+
import (
5+
"database/sql"
6+
"fmt"
7+
"log"
8+
9+
_ "github.com/lib/pq"
10+
)
11+
12+
func main() {
13+
// Connect with binary mode - the driver uses binary for prepared statements automatically
14+
connStr := "host=127.0.0.1 port=35432 user=postgres password=postgres dbname=test sslmode=require"
15+
16+
db, err := sql.Open("postgres", connStr)
17+
if err != nil {
18+
log.Fatalf("Failed to open db: %v", err)
19+
}
20+
defer db.Close()
21+
22+
// Test 1: Create a test table with various types
23+
fmt.Println("=== Creating test table ===")
24+
_, err = db.Exec(`DROP TABLE IF EXISTS binary_test`)
25+
if err != nil {
26+
log.Printf("Drop table warning: %v", err)
27+
}
28+
29+
_, err = db.Exec(`
30+
CREATE TABLE binary_test (
31+
id INTEGER,
32+
name TEXT,
33+
price DOUBLE,
34+
active BOOLEAN
35+
)
36+
`)
37+
if err != nil {
38+
log.Fatalf("Create table failed: %v", err)
39+
}
40+
fmt.Println("Table created successfully")
41+
42+
// Test 2: Insert data
43+
fmt.Println("\n=== Inserting test data ===")
44+
_, err = db.Exec(`
45+
INSERT INTO binary_test VALUES
46+
(1, 'Widget', 19.99, true),
47+
(2, 'Gadget', 49.99, false),
48+
(3, 'Gizmo', 9.99, true)
49+
`)
50+
if err != nil {
51+
log.Fatalf("Insert failed: %v", err)
52+
}
53+
fmt.Println("Data inserted successfully")
54+
55+
// Test 3: Query with prepared statement (uses extended query protocol with binary)
56+
fmt.Println("\n=== Querying with prepared statement ===")
57+
rows, err := db.Query("SELECT id, name, price, active FROM binary_test WHERE id > $1", 0)
58+
if err != nil {
59+
log.Fatalf("Query failed: %v", err)
60+
}
61+
defer rows.Close()
62+
63+
for rows.Next() {
64+
var id int
65+
var name string
66+
var price float64
67+
var active bool
68+
if err := rows.Scan(&id, &name, &price, &active); err != nil {
69+
log.Fatalf("Scan failed: %v", err)
70+
}
71+
fmt.Printf(" id=%d, name=%s, price=%.2f, active=%v\n", id, name, price, active)
72+
}
73+
74+
// Test 4: Query integers
75+
fmt.Println("\n=== Testing integer types ===")
76+
var intVal int
77+
err = db.QueryRow("SELECT 12345").Scan(&intVal)
78+
if err != nil {
79+
log.Fatalf("Integer query failed: %v", err)
80+
}
81+
fmt.Printf(" Integer: %d (expected 12345)\n", intVal)
82+
83+
// Test 5: Query floats
84+
fmt.Println("\n=== Testing float types ===")
85+
var floatVal float64
86+
err = db.QueryRow("SELECT 3.14159").Scan(&floatVal)
87+
if err != nil {
88+
log.Fatalf("Float query failed: %v", err)
89+
}
90+
fmt.Printf(" Float: %f (expected 3.14159)\n", floatVal)
91+
92+
// Test 6: Query boolean
93+
fmt.Println("\n=== Testing boolean type ===")
94+
var boolVal bool
95+
err = db.QueryRow("SELECT true").Scan(&boolVal)
96+
if err != nil {
97+
log.Fatalf("Boolean query failed: %v", err)
98+
}
99+
fmt.Printf(" Boolean: %v (expected true)\n", boolVal)
100+
101+
fmt.Println("\n=== All tests passed! ===")
102+
}

server/catalog.go

Lines changed: 164 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,164 @@
1+
package server
2+
3+
import (
4+
"database/sql"
5+
"regexp"
6+
"strings"
7+
)
8+
9+
// initPgCatalog creates PostgreSQL compatibility functions and views in DuckDB
10+
// DuckDB already has a pg_catalog schema with basic views, so we just add missing functions
11+
func initPgCatalog(db *sql.DB) error {
12+
// Create our own pg_database view that has all the columns psql expects
13+
// We put it in main schema and rewrite queries to use it
14+
pgDatabaseSQL := `
15+
CREATE OR REPLACE VIEW pg_database AS
16+
SELECT
17+
1::INTEGER AS oid,
18+
current_database() AS datname,
19+
0::INTEGER AS datdba,
20+
6::INTEGER AS encoding,
21+
'en_US.UTF-8' AS datcollate,
22+
'en_US.UTF-8' AS datctype,
23+
false AS datistemplate,
24+
true AS datallowconn,
25+
-1::INTEGER AS datconnlimit,
26+
NULL AS datacl
27+
`
28+
db.Exec(pgDatabaseSQL)
29+
30+
// Create helper macros/functions that psql expects but DuckDB doesn't have
31+
// These need to be created without schema prefix so DuckDB finds them
32+
functions := []string{
33+
// pg_get_userbyid - returns username for a role OID
34+
`CREATE OR REPLACE MACRO pg_get_userbyid(id) AS 'duckdb'`,
35+
// pg_table_is_visible - checks if table is in search path
36+
`CREATE OR REPLACE MACRO pg_table_is_visible(oid) AS true`,
37+
// has_schema_privilege - check schema access
38+
`CREATE OR REPLACE MACRO has_schema_privilege(schema, priv) AS true`,
39+
`CREATE OR REPLACE MACRO has_schema_privilege(u, schema, priv) AS true`,
40+
// has_table_privilege - check table access
41+
`CREATE OR REPLACE MACRO has_table_privilege(table_name, priv) AS true`,
42+
`CREATE OR REPLACE MACRO has_table_privilege(u, table_name, priv) AS true`,
43+
// pg_encoding_to_char - convert encoding ID to name
44+
`CREATE OR REPLACE MACRO pg_encoding_to_char(enc) AS 'UTF8'`,
45+
// format_type - format a type OID as string
46+
`CREATE OR REPLACE MACRO format_type(type_oid, typemod) AS
47+
CASE type_oid
48+
WHEN 16 THEN 'boolean'
49+
WHEN 17 THEN 'bytea'
50+
WHEN 20 THEN 'bigint'
51+
WHEN 21 THEN 'smallint'
52+
WHEN 23 THEN 'integer'
53+
WHEN 25 THEN 'text'
54+
WHEN 700 THEN 'real'
55+
WHEN 701 THEN 'double precision'
56+
WHEN 1042 THEN 'character'
57+
WHEN 1043 THEN 'character varying'
58+
WHEN 1082 THEN 'date'
59+
WHEN 1083 THEN 'time'
60+
WHEN 1114 THEN 'timestamp'
61+
WHEN 1184 THEN 'timestamp with time zone'
62+
WHEN 1700 THEN 'numeric'
63+
WHEN 2950 THEN 'uuid'
64+
ELSE 'unknown'
65+
END`,
66+
// obj_description - get object comment
67+
`CREATE OR REPLACE MACRO obj_description(oid, catalog) AS NULL`,
68+
// col_description - get column comment
69+
`CREATE OR REPLACE MACRO col_description(table_oid, col_num) AS NULL`,
70+
// shobj_description - get shared object comment
71+
`CREATE OR REPLACE MACRO shobj_description(oid, catalog) AS NULL`,
72+
// pg_get_indexdef - get index definition
73+
`CREATE OR REPLACE MACRO pg_get_indexdef(index_oid) AS ''`,
74+
`CREATE OR REPLACE MACRO pg_get_indexdef(index_oid, col, pretty) AS ''`,
75+
// pg_get_constraintdef - get constraint definition
76+
`CREATE OR REPLACE MACRO pg_get_constraintdef(constraint_oid) AS ''`,
77+
`CREATE OR REPLACE MACRO pg_get_constraintdef(constraint_oid, pretty) AS ''`,
78+
// current_setting - get config setting
79+
`CREATE OR REPLACE MACRO current_setting(name) AS
80+
CASE name
81+
WHEN 'server_version' THEN '15.0'
82+
WHEN 'server_encoding' THEN 'UTF8'
83+
ELSE ''
84+
END`,
85+
// pg_is_in_recovery - check if in recovery mode
86+
`CREATE OR REPLACE MACRO pg_is_in_recovery() AS false`,
87+
}
88+
89+
for _, f := range functions {
90+
if _, err := db.Exec(f); err != nil {
91+
// Log but don't fail - some might already exist or conflict
92+
continue
93+
}
94+
}
95+
96+
return nil
97+
}
98+
99+
// pgCatalogFunctions is the list of functions we provide that psql calls with pg_catalog. prefix
100+
var pgCatalogFunctions = []string{
101+
"pg_get_userbyid",
102+
"pg_table_is_visible",
103+
"pg_get_expr",
104+
"pg_encoding_to_char",
105+
"format_type",
106+
"obj_description",
107+
"col_description",
108+
"shobj_description",
109+
"pg_get_indexdef",
110+
"pg_get_constraintdef",
111+
"current_setting",
112+
"pg_is_in_recovery",
113+
"has_schema_privilege",
114+
"has_table_privilege",
115+
"array_to_string",
116+
}
117+
118+
// pgCatalogFuncRegex matches pg_catalog.function_name( patterns
119+
var pgCatalogFuncRegex *regexp.Regexp
120+
121+
func init() {
122+
// Build regex to match pg_catalog.func_name patterns
123+
funcPattern := strings.Join(pgCatalogFunctions, "|")
124+
pgCatalogFuncRegex = regexp.MustCompile(`(?i)pg_catalog\.(` + funcPattern + `)\s*\(`)
125+
}
126+
127+
// Regex patterns for query rewriting
128+
var (
129+
// OPERATOR(pg_catalog.~) -> ~
130+
operatorRegex = regexp.MustCompile(`(?i)OPERATOR\s*\(\s*pg_catalog\.([~!<>=]+)\s*\)`)
131+
// COLLATE pg_catalog.default -> (remove)
132+
collateRegex = regexp.MustCompile(`(?i)\s+COLLATE\s+pg_catalog\."?default"?`)
133+
// pg_catalog.pg_database -> pg_database (use our view)
134+
pgDatabaseRegex = regexp.MustCompile(`(?i)pg_catalog\.pg_database`)
135+
// ::pg_catalog.regtype::pg_catalog.text -> ::VARCHAR (PostgreSQL type cast)
136+
regtypeTextCastRegex = regexp.MustCompile(`(?i)::pg_catalog\.regtype::pg_catalog\.text`)
137+
// ::pg_catalog.regtype -> ::VARCHAR
138+
regtypeCastRegex = regexp.MustCompile(`(?i)::pg_catalog\.regtype`)
139+
// ::pg_catalog.text -> ::VARCHAR
140+
textCastRegex = regexp.MustCompile(`(?i)::pg_catalog\.text`)
141+
)
142+
143+
// rewritePgCatalogQuery rewrites PostgreSQL-specific syntax for DuckDB compatibility
144+
func rewritePgCatalogQuery(query string) string {
145+
// Replace pg_catalog.func_name( with func_name(
146+
query = pgCatalogFuncRegex.ReplaceAllString(query, "$1(")
147+
148+
// Replace OPERATOR(pg_catalog.~) with just ~
149+
query = operatorRegex.ReplaceAllString(query, "$1")
150+
151+
// Remove COLLATE pg_catalog.default
152+
query = collateRegex.ReplaceAllString(query, "")
153+
154+
// Replace pg_catalog.pg_database with pg_database (our view in main schema)
155+
query = pgDatabaseRegex.ReplaceAllString(query, "pg_database")
156+
157+
// Replace PostgreSQL type casts (order matters - most specific first)
158+
query = regtypeTextCastRegex.ReplaceAllString(query, "::VARCHAR")
159+
query = regtypeCastRegex.ReplaceAllString(query, "::VARCHAR")
160+
query = textCastRegex.ReplaceAllString(query, "::VARCHAR")
161+
162+
return query
163+
}
164+

0 commit comments

Comments
 (0)