Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
28 commits
Select commit Hold shift + click to select a range
14f751e
Added support for json functions
ritwizsinha Jan 17, 2025
dcf3db7
Format file
ritwizsinha Jan 17, 2025
6d7090b
Add json_extract_string
ritwizsinha Jan 17, 2025
a6ff51b
Run format
ritwizsinha Jan 17, 2025
49f75a5
Add functions to metadata cache
ritwizsinha Jan 22, 2025
63ebf10
Add all json functions support
ritwizsinha Jan 22, 2025
6a6069c
format
ritwizsinha Jan 22, 2025
5840c50
Remove jsonb tests
ritwizsinha Jan 22, 2025
2563c73
Make the tests pass
ritwizsinha Jan 22, 2025
7f0e183
Fix for PG17, in which JSON became a keyword
JelteF Jan 23, 2025
09ed864
Remove tests giving different outputs in postgres 17
ritwizsinha Jan 24, 2025
be1792a
Remove from expected
ritwizsinha Jan 24, 2025
4daeed0
Merge branch 'main' into json-functions
ritwizsinha Jan 24, 2025
1b347e5
Run formatting
ritwizsinha Jan 24, 2025
211bfbf
Move json functions to end of sql migration file
JelteF Jan 24, 2025
ded869a
Make json functions work with duckdb.unresolved_type and jsonb
JelteF Jan 24, 2025
aff27ac
Also include ruleutils change for PG14-PG16
JelteF Jan 27, 2025
9a32562
Merge branch 'main' into json-functions
ritwizsinha Jan 27, 2025
88768f4
Add COMMENT on duckdb.json type
JelteF Jan 29, 2025
8430e0a
Test that we handle JSON of different Postgres types correctly
JelteF Jan 29, 2025
1f2202f
Change tests a bit and use duckdb.json type for json and json_valid f…
JelteF Jan 29, 2025
b715ac5
Define JSON aggregate functions as aggregates and fix json_group_object
JelteF Jan 29, 2025
974c068
Tests for json functions that return STRUCT type and fix return types
JelteF Jan 29, 2025
3d7d1e0
Add some more json function variants
JelteF Jan 29, 2025
6c89e16
Add even more json function variants
JelteF Jan 29, 2025
13bbf4b
Add subquery aliases to make PG14/15 happy
JelteF Jan 29, 2025
2eba816
Fix test output
JelteF Jan 29, 2025
4108d17
Merge branch 'main' into json-functions
JelteF Jan 29, 2025
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions include/pgduckdb/pgduckdb_metadata_cache.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@ Oid ExtensionOid();
Oid SchemaOid();
Oid DuckdbRowOid();
Oid DuckdbUnresolvedTypeOid();
Oid DuckdbJsonOid();
Oid DuckdbTableAmOid();
bool IsMotherDuckEnabled();
bool IsMotherDuckEnabledAnywhere();
Expand Down
1 change: 1 addition & 0 deletions include/pgduckdb/pgduckdb_ruleutils.h
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@ List *pgduckdb_db_and_schema(const char *postgres_schema_name, bool is_duckdb_ta
const char *pgduckdb_db_and_schema_string(const char *postgres_schema_name, bool is_duckdb_table);
bool pgduckdb_is_duckdb_row(Oid type_oid);
bool pgduckdb_is_unresolved_type(Oid type_oid);
bool pgduckdb_is_fake_type(Oid type_oid);
bool pgduckdb_var_is_duckdb_row(Var *var);
bool pgduckdb_func_returns_duckdb_row(RangeTblFunction *rtfunc);
bool pgduckdb_target_list_contains_unresolved_type_or_row(List *target_list);
Expand Down
226 changes: 226 additions & 0 deletions sql/pg_duckdb--0.2.0--0.3.0.sql
Original file line number Diff line number Diff line change
Expand Up @@ -581,6 +581,12 @@ CREATE CAST (duckdb.unresolved_type AS json)
CREATE CAST (duckdb.unresolved_type AS json[])
WITH INOUT;

-- JSONB
CREATE CAST (duckdb.unresolved_type AS jsonb)
WITH INOUT;
CREATE CAST (duckdb.unresolved_type AS jsonb[])
WITH INOUT;

-- read_parquet function for single path
DROP FUNCTION @extschema@.read_parquet(path text, binary_as_string BOOLEAN,
filename BOOLEAN,
Expand Down Expand Up @@ -824,3 +830,223 @@ RETURNS SETOF duckdb.row
SET search_path = pg_catalog, pg_temp
AS 'MODULE_PATHNAME', 'duckdb_only_function'
LANGUAGE C;

CREATE TYPE duckdb.json;
COMMENT ON TYPE duckdb.json IS 'A helper type that allows passing JSON, JSONB, duckdb.unresolved_type and string literals to DuckDB its json related functions';
CREATE FUNCTION duckdb.json_in(cstring) RETURNS duckdb.json AS 'MODULE_PATHNAME', 'duckdb_unresolved_type_in' LANGUAGE C IMMUTABLE STRICT;
CREATE FUNCTION duckdb.json_out(duckdb.json) RETURNS cstring AS 'MODULE_PATHNAME', 'duckdb_unresolved_type_out' LANGUAGE C IMMUTABLE STRICT;
CREATE FUNCTION duckdb.json_subscript(internal) RETURNS internal AS 'MODULE_PATHNAME', 'duckdb_unresolved_type_subscript' LANGUAGE C IMMUTABLE STRICT;
CREATE TYPE duckdb.json (
INTERNALLENGTH = VARIABLE,
INPUT = duckdb.json_in,
OUTPUT = duckdb.json_out,
SUBSCRIPT = duckdb.json_subscript
);

CREATE CAST (duckdb.unresolved_type AS duckdb.json)
WITH INOUT AS IMPLICIT;

CREATE CAST (json AS duckdb.json)
WITH INOUT AS IMPLICIT;

CREATE CAST (jsonb AS duckdb.json)
WITH INOUT AS IMPLICIT;

-- json_exists
CREATE FUNCTION @extschema@.json_exists("json" duckdb.json, path VARCHAR)
RETURNS boolean
SET search_path = pg_catalog, pg_temp
AS 'MODULE_PATHNAME', 'duckdb_only_function'
LANGUAGE C;

-- json_extract
CREATE FUNCTION @extschema@.json_extract("json" duckdb.json, path bigint)
RETURNS JSON
SET search_path = pg_catalog, pg_temp
AS 'MODULE_PATHNAME', 'duckdb_only_function'
LANGUAGE C;

CREATE FUNCTION @extschema@.json_extract("json" duckdb.json, path VARCHAR)
RETURNS JSON
SET search_path = pg_catalog, pg_temp
AS 'MODULE_PATHNAME', 'duckdb_only_function'
LANGUAGE C;

-- json_extract with path list
CREATE FUNCTION @extschema@.json_extract("json" duckdb.json, path VARCHAR[])
RETURNS JSON[]
SET search_path = pg_catalog, pg_temp
AS 'MODULE_PATHNAME', 'duckdb_only_function'
LANGUAGE C;

-- json_extract_string
CREATE FUNCTION @extschema@.json_extract_string("json" duckdb.json, path bigint)
RETURNS VARCHAR
SET search_path = pg_catalog, pg_temp
AS 'MODULE_PATHNAME', 'duckdb_only_function'
LANGUAGE C;

CREATE FUNCTION @extschema@.json_extract_string("json" duckdb.json, path VARCHAR)
RETURNS VARCHAR
SET search_path = pg_catalog, pg_temp
AS 'MODULE_PATHNAME', 'duckdb_only_function'
LANGUAGE C;

-- json_extract_string
CREATE FUNCTION @extschema@.json_extract_string("json" duckdb.json, path VARCHAR[])
RETURNS VARCHAR[]
SET search_path = pg_catalog, pg_temp
AS 'MODULE_PATHNAME', 'duckdb_only_function'
LANGUAGE C;

-- json_value
CREATE FUNCTION @extschema@.json_value("json" duckdb.json, path bigint)
RETURNS VARCHAR
SET search_path = pg_catalog, pg_temp
AS 'MODULE_PATHNAME', 'duckdb_only_function'
LANGUAGE C;

CREATE FUNCTION @extschema@.json_value("json" duckdb.json, path VARCHAR)
RETURNS VARCHAR
SET search_path = pg_catalog, pg_temp
AS 'MODULE_PATHNAME', 'duckdb_only_function'
LANGUAGE C;

CREATE FUNCTION @extschema@.json_value("json" duckdb.json, path VARCHAR[])
RETURNS VARCHAR[]
SET search_path = pg_catalog, pg_temp
AS 'MODULE_PATHNAME', 'duckdb_only_function'
LANGUAGE C;

-- json_array_length
CREATE FUNCTION @extschema@.json_array_length("json" duckdb.json, path_input VARCHAR DEFAULT NULL)
RETURNS bigint
SET search_path = pg_catalog, pg_temp
AS 'MODULE_PATHNAME', 'duckdb_only_function'
LANGUAGE C;

CREATE FUNCTION @extschema@.json_array_length("json" duckdb.json, path_input VARCHAR[])
RETURNS bigint[]
SET search_path = pg_catalog, pg_temp
AS 'MODULE_PATHNAME', 'duckdb_only_function'
LANGUAGE C;

-- json_contains
CREATE FUNCTION @extschema@.json_contains(json_haystack duckdb.json, json_needle duckdb.json)
RETURNS boolean
SET search_path = pg_catalog, pg_temp
AS 'MODULE_PATHNAME', 'duckdb_only_function'
LANGUAGE C;

-- json_keys
CREATE FUNCTION @extschema@.json_keys("json" duckdb.json, path VARCHAR DEFAULT NULL)
RETURNS SETOF VARCHAR
SET search_path = pg_catalog, pg_temp
AS 'MODULE_PATHNAME', 'duckdb_only_function'
LANGUAGE C;

CREATE FUNCTION @extschema@.json_keys("json" duckdb.json, path VARCHAR[])
RETURNS SETOF VARCHAR
SET search_path = pg_catalog, pg_temp
AS 'MODULE_PATHNAME', 'duckdb_only_function'
LANGUAGE C;

-- json_structure
CREATE FUNCTION @extschema@.json_structure("json" duckdb.json)
RETURNS JSON
SET search_path = pg_catalog, pg_temp
AS 'MODULE_PATHNAME', 'duckdb_only_function'
LANGUAGE C;
-- json_type
CREATE FUNCTION @extschema@.json_type("json" duckdb.json, path VARCHAR DEFAULT NULL)
RETURNS VARCHAR
SET search_path = pg_catalog, pg_temp
AS 'MODULE_PATHNAME', 'duckdb_only_function'
LANGUAGE C;

CREATE FUNCTION @extschema@.json_type("json" duckdb.json, path VARCHAR[])
RETURNS VARCHAR
SET search_path = pg_catalog, pg_temp
AS 'MODULE_PATHNAME', 'duckdb_only_function'
LANGUAGE C;

-- json_valid
CREATE FUNCTION @extschema@.json_valid("json" duckdb.json)
RETURNS boolean
SET search_path = pg_catalog, pg_temp
AS 'MODULE_PATHNAME', 'duckdb_only_function'
LANGUAGE C;

-- json
CREATE FUNCTION @extschema@.json("json" duckdb.json)
RETURNS VARCHAR
SET search_path = pg_catalog, pg_temp
AS 'MODULE_PATHNAME', 'duckdb_only_function'
LANGUAGE C;

-- json_group_array
CREATE FUNCTION @extschema@.json_group_array_sfunc(JSON, "any")
RETURNS JSON
AS 'MODULE_PATHNAME', 'duckdb_only_function'
LANGUAGE C;

CREATE AGGREGATE @extschema@.json_group_array("any")
(
sfunc = @extschema@.json_group_array_sfunc,
stype = JSON,
initcond = 0
);

-- json_group_object
CREATE FUNCTION @extschema@.json_group_object_sfunc(JSON, "any", "any")
RETURNS JSON
AS 'MODULE_PATHNAME', 'duckdb_only_function'
LANGUAGE C;

CREATE AGGREGATE @extschema@.json_group_object("any", "any")
(
sfunc = @extschema@.json_group_object_sfunc,
stype = JSON,
initcond = 0
);

-- json_group_structure
CREATE FUNCTION @extschema@.json_group_structure_sfunc(JSON, duckdb.json)
RETURNS JSON
AS 'MODULE_PATHNAME', 'duckdb_only_function'
LANGUAGE C;

CREATE AGGREGATE @extschema@.json_group_structure(duckdb.json)
(
sfunc = @extschema@.json_group_structure_sfunc,
stype = JSON,
initcond = 0
);

-- json_transform
CREATE FUNCTION @extschema@.json_transform("json" duckdb.json, structure duckdb.json)
RETURNS duckdb.unresolved_type
SET search_path = pg_catalog, pg_temp
AS 'MODULE_PATHNAME', 'duckdb_only_function'
LANGUAGE C;

-- from_json
CREATE FUNCTION @extschema@.from_json("json" duckdb.json, structure duckdb.json)
RETURNS duckdb.unresolved_type
SET search_path = pg_catalog, pg_temp
AS 'MODULE_PATHNAME', 'duckdb_only_function'
LANGUAGE C;

-- json_transform_strict
CREATE FUNCTION @extschema@.json_transform_strict("json" duckdb.json, structure duckdb.json)
RETURNS duckdb.unresolved_type
SET search_path = pg_catalog, pg_temp
AS 'MODULE_PATHNAME', 'duckdb_only_function'
LANGUAGE C;

-- from_json_strict
CREATE FUNCTION @extschema@.from_json_strict("json" duckdb.json, structure duckdb.json)
RETURNS duckdb.unresolved_type
SET search_path = pg_catalog, pg_temp
AS 'MODULE_PATHNAME', 'duckdb_only_function'
LANGUAGE C;
30 changes: 29 additions & 1 deletion src/pgduckdb_metadata_cache.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -55,6 +55,8 @@ struct {
Oid row_oid;
/* The OID of the duckdb.unresolved_type */
Oid unresolved_type_oid;
/* The OID of the duckdb.json */
Oid json_oid;
/* The OID of the duckdb Table Access Method */
Oid table_am_oid;
/* The OID of the duckdb.motherduck_postgres_database */
Expand Down Expand Up @@ -123,7 +125,25 @@ BuildDuckdbOnlyFunctions() {
"delta_scan",
"read_json",
"approx_count_distinct",
"query"};
"query",
"json_exists",
"json_extract",
"json_extract_string",
"json_array_length",
"json_contains",
"json_keys",
"json_structure",
"json_type",
"json_valid",
"json",
"json_group_array",
"json_group_object",
"json_group_structure",
"json_transform",
"from_json",
"json_transform_strict",
"from_json_strict",
"json_value"};

for (uint32_t i = 0; i < lengthof(function_names); i++) {
CatCList *catlist = SearchSysCacheList1(PROCNAMEARGSNSP, CStringGetDatum(function_names[i]));
Expand Down Expand Up @@ -192,6 +212,8 @@ IsExtensionRegistered() {
cache.unresolved_type_oid =
GetSysCacheOid2(TYPENAMENSP, Anum_pg_type_oid, CStringGetDatum("unresolved_type"), cache.schema_oid);

cache.json_oid = GetSysCacheOid2(TYPENAMENSP, Anum_pg_type_oid, CStringGetDatum("json"), cache.schema_oid);

cache.motherduck_postgres_database_oid = get_database_oid(duckdb_motherduck_postgres_database, false);

if (duckdb_postgres_role[0] != '\0') {
Expand Down Expand Up @@ -256,6 +278,12 @@ DuckdbUnresolvedTypeOid() {
return cache.unresolved_type_oid;
}

Oid
DuckdbJsonOid() {
Assert(cache.valid);
return cache.json_oid;
}

Oid
DuckdbTableAmOid() {
Assert(cache.valid);
Expand Down
32 changes: 27 additions & 5 deletions src/pgduckdb_ruleutils.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -53,6 +53,29 @@ pgduckdb_is_duckdb_row(Oid type_oid) {
return type_oid == pgduckdb::DuckdbRowOid();
}

/*
* We never want to show some of our unresolved types in the DuckDB query.
* These types only exist to make the Postgres parser and its type resolution
* happy. DuckDB can simply figure out the correct type itself without an
* explicit cast.
*/
bool
pgduckdb_is_fake_type(Oid type_oid) {
if (pgduckdb_is_unresolved_type(type_oid)) {
return true;
}

if (pgduckdb_is_duckdb_row(type_oid)) {
return true;
}

if (pgduckdb::DuckdbJsonOid() == type_oid) {
return true;
}

return false;
}

bool
pgduckdb_var_is_duckdb_row(Var *var) {
if (!var) {
Expand Down Expand Up @@ -299,14 +322,13 @@ pgduckdb_function_needs_subquery(Oid function_oid) {
}

/*
* We never want to show the unresolved_type in DuckDB query. The
* unrosolved_type does not actually exist in DuckDB, we only use it to keep
* the Postgres parser happy. DuckDB can simply figure out the correct type
* itself without an explicit cast.
* A wrapper around pgduckdb_is_fake_type that returns -1 if the type of the
* Const is fake, because that's the type of value that get_const_expr requires
* in its showtype variable to never show the type.
*/
int
pgduckdb_show_type(Const *constval, int original_showtype) {
if (pgduckdb_is_unresolved_type(constval->consttype)) {
if (pgduckdb_is_fake_type(constval->consttype)) {
return -1;
}
return original_showtype;
Expand Down
4 changes: 4 additions & 0 deletions src/vendor/pg_ruleutils_14.c
Original file line number Diff line number Diff line change
Expand Up @@ -10432,6 +10432,10 @@ get_coercion_expr(Node *arg, deparse_context *context,
appendStringInfoChar(buf, ')');
}

if (pgduckdb_is_fake_type(resulttype)) {
return;
}

/*
* Never emit resulttype(arg) functional notation. A pg_proc entry could
* take precedence, and a resulttype in pg_temp would require schema
Expand Down
4 changes: 4 additions & 0 deletions src/vendor/pg_ruleutils_15.c
Original file line number Diff line number Diff line change
Expand Up @@ -10634,6 +10634,10 @@ get_coercion_expr(Node *arg, deparse_context *context,
appendStringInfoChar(buf, ')');
}

if (pgduckdb_is_fake_type(resulttype)) {
return;
}

/*
* Never emit resulttype(arg) functional notation. A pg_proc entry could
* take precedence, and a resulttype in pg_temp would require schema
Expand Down
4 changes: 4 additions & 0 deletions src/vendor/pg_ruleutils_16.c
Original file line number Diff line number Diff line change
Expand Up @@ -10687,6 +10687,10 @@ get_coercion_expr(Node *arg, deparse_context *context,
appendStringInfoChar(buf, ')');
}

if (pgduckdb_is_fake_type(resulttype)) {
return;
}

/*
* Never emit resulttype(arg) functional notation. A pg_proc entry could
* take precedence, and a resulttype in pg_temp would require schema
Expand Down
Loading