Skip to content

Commit 0671f8b

Browse files
authored
Merge pull request #48 from PostHog/fix/regclass-metabase-support
Fix ::regclass cast for Metabase column discovery
2 parents b80254f + a913f0e commit 0671f8b

File tree

2 files changed

+173
-36
lines changed

2 files changed

+173
-36
lines changed

tests/integration/TEST_RESULTS.md

Lines changed: 12 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -47,7 +47,7 @@ To run without DuckLake: `DUCKGRES_TEST_NO_DUCKLAKE=1 go test ./tests/integratio
4747
| **Fivetran Queries** | 4 | 4 | 0 | 100% |
4848
| **Airbyte Queries** | 3 | 3 | 0 | 100% |
4949
| **dbt Queries** | 7 | 7 | 0 | 100% |
50-
| **Metabase Queries** | 5 | 4 | 1 | 80% |
50+
| **Metabase Queries** | 5 | 5 | 0 | 100% |
5151
| **DBeaver Queries** | 4 | 4 | 0 | 100% |
5252
| **Prepared Statements** | 3 | 3 | 0 | 100% |
5353
| **Transactions** | 2 | 2 | 0 | 100% |
@@ -100,7 +100,7 @@ DuckLake has additional constraints compared to vanilla DuckDB:
100100
| Airbyte Queries | 100% |
101101
| dbt Queries | 100% |
102102
| DBeaver Queries | 100% |
103-
| Metabase Queries | 80% (1 `::regclass` limitation) |
103+
| Metabase Queries | 100% |
104104

105105
## Passing Test Categories (Vanilla DuckDB)
106106

@@ -145,12 +145,12 @@ All pg_catalog views and functions work correctly:
145145
- Column metadata ✓
146146
- Server version ✓
147147

148-
**Metabase** (80%):
148+
**Metabase** (100%):
149149
- Get schemas ✓
150150
- Get tables ✓
151151
- Connection test ✓
152152
- Version check ✓
153-
- Get columns ✗ (`::regclass` cast not supported)
153+
- Get columns
154154

155155
### ETL Tools Compatibility
156156

@@ -194,18 +194,13 @@ DuckDB: `10 / 3 = 3.333...` (float)
194194
**Impact**: Arithmetic tests
195195
**Solution**: Use explicit `CAST` or `::integer`
196196

197-
### 4. `::regclass` Cast (Metabase)
198-
Metabase uses `'table_name'::regclass` for column lookup. The transpiler converts this to `::varchar` which doesn't work for attrelid lookups.
199-
200-
**Workaround**: Join with pg_class by relname instead
201-
202-
### 5. RETURNING Clause (DuckLake)
197+
### 4. RETURNING Clause (DuckLake)
203198
DuckLake does not support `INSERT/UPDATE/DELETE ... RETURNING`.
204199

205200
**Impact**: Tests using RETURNING clause fail in DuckLake mode
206201
**Workaround**: Use separate SELECT after mutation
207202

208-
### 6. Per-Connection Database (Vanilla DuckDB)
203+
### 5. Per-Connection Database (Vanilla DuckDB)
209204
Each new database connection gets a fresh in-memory DuckDB database. This is by design but affects tests requiring data persistence across connections.
210205

211206
**Note**: This is not an issue in DuckLake mode where metadata persists.
@@ -227,6 +222,9 @@ Each new database connection gets a fresh in-memory DuckDB database. This is by
227222
- **Session commands** (`transpiler/transform/setshow.go`): Added RESET ALL, DISCARD ALL/PLANS/SEQUENCES/TEMP support
228223
- **Transaction modes** (`transpiler/transform/setshow.go`): Strip ISOLATION LEVEL and READ ONLY/WRITE from BEGIN statements
229224

225+
### Regclass Cast Fix (PR #48)
226+
- **`::regclass` subquery rewrite** (`transpiler/transform/typecast.go`): Convert `'tablename'::regclass` to `(SELECT oid FROM pg_class WHERE relname = 'tablename')` for proper OID lookup. This fixes Metabase column discovery queries.
227+
230228
### Test Harness Fixes
231229
- **`compare.go`**: Use `sql.RawBytes` to avoid driver parsing issues
232230
- **`compare.go`**: Add `IgnoreColumnNames` option (DuckDB names anonymous columns differently)
@@ -297,10 +295,9 @@ go test ./tests/integration/clients/... -v -run "TestGrafana"
297295

298296
## Recommendations for Improving Compatibility
299297

300-
1. **Fix `::regclass` handling** in the transpiler for better Metabase support
301-
2. **Transpile regex operators** (`~`, `~*`) to `regexp_matches()`
302-
3. **Handle integer division** to match PostgreSQL behavior
303-
4. **Add RETURNING emulation** for DuckLake mode (separate SELECT)
298+
1. **Transpile regex operators** (`~`, `~*`) to `regexp_matches()`
299+
2. **Handle integer division** to match PostgreSQL behavior
300+
3. **Add RETURNING emulation** for DuckLake mode (separate SELECT)
304301

305302
## CI Configuration
306303

transpiler/transform/typecast.go

Lines changed: 161 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,7 @@ import (
88

99
// TypeCastTransform converts PostgreSQL-specific type casts to DuckDB equivalents.
1010
// For example: ::pg_catalog.regtype -> ::VARCHAR
11+
// Special case: 'tablename'::regclass -> (SELECT oid FROM pg_class WHERE relname = 'tablename')
1112
type TypeCastTransform struct {
1213
// TypeMappings maps pg_catalog types to DuckDB types
1314
TypeMappings map[string]string
@@ -17,16 +18,16 @@ type TypeCastTransform struct {
1718
func NewTypeCastTransform() *TypeCastTransform {
1819
return &TypeCastTransform{
1920
TypeMappings: map[string]string{
20-
"regtype": "varchar",
21-
"regclass": "varchar",
22-
"regnamespace": "varchar",
23-
"regproc": "varchar",
24-
"regoper": "varchar",
25-
"regoperator": "varchar",
26-
"regprocedure": "varchar",
27-
"regconfig": "varchar",
21+
"regtype": "varchar",
22+
"regnamespace": "varchar",
23+
"regproc": "varchar",
24+
"regoper": "varchar",
25+
"regoperator": "varchar",
26+
"regprocedure": "varchar",
27+
"regconfig": "varchar",
2828
"regdictionary": "varchar",
29-
"text": "varchar",
29+
"text": "varchar",
30+
// Note: regclass is handled specially - converted to subquery lookup
3031
},
3132
}
3233
}
@@ -58,35 +59,70 @@ func (t *TypeCastTransform) walkAndTransform(node *pg_query.Node, changed *bool)
5859
if n.TypeCast != nil && n.TypeCast.TypeName != nil {
5960
// Check if this is a pg_catalog type cast
6061
typeName := n.TypeCast.TypeName
62+
typeLower := ""
63+
6164
if len(typeName.Names) >= 2 {
6265
// Check for pg_catalog.typename pattern
6366
if first := typeName.Names[0]; first != nil {
6467
if str := first.GetString_(); str != nil && strings.EqualFold(str.Sval, "pg_catalog") {
6568
if second := typeName.Names[1]; second != nil {
6669
if typeStr := second.GetString_(); typeStr != nil {
67-
typeLower := strings.ToLower(typeStr.Sval)
68-
if newType, ok := t.TypeMappings[typeLower]; ok {
69-
// Replace with just the DuckDB type name
70-
typeName.Names = []*pg_query.Node{
71-
{Node: &pg_query.Node_String_{String_: &pg_query.String{Sval: newType}}},
72-
}
73-
*changed = true
74-
}
70+
typeLower = strings.ToLower(typeStr.Sval)
7571
}
7672
}
7773
}
7874
}
7975
} else if len(typeName.Names) == 1 {
80-
// Single name, check if it needs mapping
76+
// Single name
8177
if first := typeName.Names[0]; first != nil {
8278
if str := first.GetString_(); str != nil {
83-
typeLower := strings.ToLower(str.Sval)
84-
if newType, ok := t.TypeMappings[typeLower]; ok {
79+
typeLower = strings.ToLower(str.Sval)
80+
}
81+
}
82+
}
83+
84+
// Special handling for ::regclass with string literal argument
85+
// Convert 'tablename'::regclass to (SELECT oid FROM pg_class WHERE relname = 'tablename')
86+
if typeLower == "regclass" {
87+
if sublink := t.createRegclassSubquery(n.TypeCast.Arg); sublink != nil {
88+
// Replace the TypeCast node with the SubLink
89+
node.Node = &pg_query.Node_SubLink{SubLink: sublink}
90+
*changed = true
91+
return
92+
}
93+
// For non-string arguments (like oid::regclass), fall through to varchar mapping
94+
typeLower = "regclass_fallback"
95+
}
96+
97+
// Standard type mapping for other types
98+
if typeLower == "regclass_fallback" {
99+
// Fallback for regclass with non-string arguments
100+
if len(typeName.Names) >= 2 {
101+
typeName.Names = []*pg_query.Node{
102+
{Node: &pg_query.Node_String_{String_: &pg_query.String{Sval: "varchar"}}},
103+
}
104+
} else if len(typeName.Names) == 1 {
105+
if first := typeName.Names[0]; first != nil {
106+
if str := first.GetString_(); str != nil {
107+
str.Sval = "varchar"
108+
}
109+
}
110+
}
111+
*changed = true
112+
} else if newType, ok := t.TypeMappings[typeLower]; ok {
113+
if len(typeName.Names) >= 2 {
114+
// Replace pg_catalog.typename with just the new type
115+
typeName.Names = []*pg_query.Node{
116+
{Node: &pg_query.Node_String_{String_: &pg_query.String{Sval: newType}}},
117+
}
118+
} else if len(typeName.Names) == 1 {
119+
if first := typeName.Names[0]; first != nil {
120+
if str := first.GetString_(); str != nil {
85121
str.Sval = newType
86-
*changed = true
87122
}
88123
}
89124
}
125+
*changed = true
90126
}
91127
// Recurse into the argument
92128
t.walkAndTransform(n.TypeCast.Arg, changed)
@@ -245,3 +281,107 @@ func (t *TypeCastTransform) walkSelectStmt(stmt *pg_query.SelectStmt, changed *b
245281
t.walkSelectStmt(stmt.Rarg, changed)
246282
}
247283
}
284+
285+
// createRegclassSubquery creates a SubLink for 'tablename'::regclass
286+
// This converts: 'users'::regclass
287+
// To: (SELECT oid FROM pg_class WHERE relname = 'users')
288+
func (t *TypeCastTransform) createRegclassSubquery(arg *pg_query.Node) *pg_query.SubLink {
289+
if arg == nil {
290+
return nil
291+
}
292+
293+
// Get the string value from the argument
294+
var tableName string
295+
if aConst := arg.GetAConst(); aConst != nil {
296+
if sval := aConst.GetSval(); sval != nil {
297+
tableName = sval.Sval
298+
}
299+
}
300+
301+
if tableName == "" {
302+
return nil
303+
}
304+
305+
// Build: SELECT oid FROM pg_class WHERE relname = 'tablename'
306+
// Create the SELECT target: oid
307+
targetList := []*pg_query.Node{
308+
{
309+
Node: &pg_query.Node_ResTarget{
310+
ResTarget: &pg_query.ResTarget{
311+
Val: &pg_query.Node{
312+
Node: &pg_query.Node_ColumnRef{
313+
ColumnRef: &pg_query.ColumnRef{
314+
Fields: []*pg_query.Node{
315+
{Node: &pg_query.Node_String_{String_: &pg_query.String{Sval: "oid"}}},
316+
},
317+
},
318+
},
319+
},
320+
},
321+
},
322+
},
323+
}
324+
325+
// Create the FROM clause: pg_class
326+
fromClause := []*pg_query.Node{
327+
{
328+
Node: &pg_query.Node_RangeVar{
329+
RangeVar: &pg_query.RangeVar{
330+
Relname: "pg_class",
331+
Inh: true,
332+
Relpersistence: "p",
333+
},
334+
},
335+
},
336+
}
337+
338+
// Create the WHERE clause: relname = 'tablename'
339+
whereClause := &pg_query.Node{
340+
Node: &pg_query.Node_AExpr{
341+
AExpr: &pg_query.A_Expr{
342+
Kind: pg_query.A_Expr_Kind_AEXPR_OP,
343+
Name: []*pg_query.Node{
344+
{Node: &pg_query.Node_String_{String_: &pg_query.String{Sval: "="}}},
345+
},
346+
Lexpr: &pg_query.Node{
347+
Node: &pg_query.Node_ColumnRef{
348+
ColumnRef: &pg_query.ColumnRef{
349+
Fields: []*pg_query.Node{
350+
{Node: &pg_query.Node_String_{String_: &pg_query.String{Sval: "relname"}}},
351+
},
352+
},
353+
},
354+
},
355+
Rexpr: &pg_query.Node{
356+
Node: &pg_query.Node_AConst{
357+
AConst: &pg_query.A_Const{
358+
Val: &pg_query.A_Const_Sval{
359+
Sval: &pg_query.String{Sval: tableName},
360+
},
361+
},
362+
},
363+
},
364+
},
365+
},
366+
}
367+
368+
// Build the SelectStmt
369+
// Note: If multiple tables with the same name exist in different schemas,
370+
// the subquery may return multiple rows and fail. This matches PostgreSQL's
371+
// behavior where ::regclass uses search_path to resolve ambiguity.
372+
selectStmt := &pg_query.SelectStmt{
373+
TargetList: targetList,
374+
FromClause: fromClause,
375+
WhereClause: whereClause,
376+
}
377+
378+
// Create the SubLink (scalar subquery)
379+
return &pg_query.SubLink{
380+
SubLinkType: pg_query.SubLinkType_EXPR_SUBLINK,
381+
Subselect: &pg_query.Node{
382+
Node: &pg_query.Node_SelectStmt{
383+
SelectStmt: selectStmt,
384+
},
385+
},
386+
}
387+
}

0 commit comments

Comments
 (0)