diff --git a/include/pgduckdb/pgduckdb_guc.h b/include/pgduckdb/pgduckdb_guc.h index 629de5ac..20e965f4 100644 --- a/include/pgduckdb/pgduckdb_guc.h +++ b/include/pgduckdb/pgduckdb_guc.h @@ -5,6 +5,7 @@ extern int duckdb_maximum_threads; extern char *duckdb_maximum_memory; extern char *duckdb_disabled_filesystems; extern bool duckdb_enable_external_access; +extern bool duckdb_allow_community_extensions; extern bool duckdb_allow_unsigned_extensions; extern bool duckdb_autoinstall_known_extensions; extern bool duckdb_autoload_known_extensions; diff --git a/include/pgduckdb/pgduckdb_metadata_cache.hpp b/include/pgduckdb/pgduckdb_metadata_cache.hpp index d7112cfb..8225c330 100644 --- a/include/pgduckdb/pgduckdb_metadata_cache.hpp +++ b/include/pgduckdb/pgduckdb_metadata_cache.hpp @@ -21,4 +21,5 @@ Oid IsDuckdbTable(Relation relation); Oid IsMotherDuckTable(Form_pg_class relation); Oid IsMotherDuckTable(Relation relation); Oid IsDuckdbExecutionAllowed(); +void RequireDuckdbExecution(); } // namespace pgduckdb diff --git a/sql/pg_duckdb--0.2.0--0.3.0.sql b/sql/pg_duckdb--0.2.0--0.3.0.sql index e7b86a55..1cb64bfd 100644 --- a/sql/pg_duckdb--0.2.0--0.3.0.sql +++ b/sql/pg_duckdb--0.2.0--0.3.0.sql @@ -1050,3 +1050,9 @@ RETURNS duckdb.unresolved_type SET search_path = pg_catalog, pg_temp AS 'MODULE_PATHNAME', 'duckdb_only_function' LANGUAGE C; + +GRANT ALL ON FUNCTION duckdb.raw_query(TEXT) TO PUBLIC; +GRANT ALL ON FUNCTION duckdb.cache(TEXT, TEXT) TO PUBLIC; +GRANT ALL ON FUNCTION duckdb.cache_info() TO PUBLIC; +GRANT ALL ON FUNCTION duckdb.cache_delete(TEXT) TO PUBLIC; +GRANT ALL ON PROCEDURE duckdb.recycle_ddb() TO PUBLIC; diff --git a/src/pg/relations.cpp b/src/pg/relations.cpp index ed0a8c01..1266a736 100644 --- a/src/pg/relations.cpp +++ b/src/pg/relations.cpp @@ -11,6 +11,7 @@ extern "C" { #include "utils/builtins.h" #include "utils/lsyscache.h" #include "utils/rel.h" +#include "utils/rls.h" #include "utils/resowner.h" // CurrentResourceOwner and TopTransactionResourceOwner #include "executor/tuptable.h" // TupIsNull #include "utils/syscache.h" // RELOID @@ -70,6 +71,12 @@ SlotGetAllAttrs(TupleTableSlot *slot) { Relation OpenRelation(Oid relationId) { + if (PostgresFunctionGuard(check_enable_rls, relationId, InvalidOid, false) == RLS_ENABLED) { + throw duckdb::NotImplementedException( + "Cannot use \"%s\" relation in a DuckDB query, because RLS is enabled on it", + PostgresFunctionGuard(get_rel_name, relationId)); + } + /* * We always open & close the relation using the * TopTransactionResourceOwner to avoid having to close the relation diff --git a/src/pgduckdb.cpp b/src/pgduckdb.cpp index 763c6969..973798b8 100644 --- a/src/pgduckdb.cpp +++ b/src/pgduckdb.cpp @@ -26,6 +26,7 @@ int duckdb_maximum_threads = -1; char *duckdb_maximum_memory = strdup("4GB"); char *duckdb_disabled_filesystems = strdup("LocalFileSystem"); bool duckdb_enable_external_access = true; +bool duckdb_allow_community_extensions = false; bool duckdb_allow_unsigned_extensions = false; bool duckdb_autoinstall_known_extensions = true; bool duckdb_autoload_known_extensions = true; @@ -130,6 +131,9 @@ DuckdbInitGUC(void) { DefineCustomVariable("duckdb.enable_external_access", "Allow the DuckDB to access external state.", &duckdb_enable_external_access, PGC_SUSET); + DefineCustomVariable("duckdb.allow_community_extensions", "Disable installing community extensions", + &duckdb_allow_community_extensions, PGC_SUSET); + DefineCustomVariable("duckdb.allow_unsigned_extensions", "Allow DuckDB to load extensions with invalid or missing signatures", &duckdb_allow_unsigned_extensions, PGC_SUSET); diff --git a/src/pgduckdb_duckdb.cpp b/src/pgduckdb_duckdb.cpp index f80eadef..ac417fc3 100644 --- a/src/pgduckdb_duckdb.cpp +++ b/src/pgduckdb_duckdb.cpp @@ -137,6 +137,7 @@ DuckDBManager::Initialize() { SET_DUCKDB_OPTION(allow_unsigned_extensions); SET_DUCKDB_OPTION(enable_external_access); + SET_DUCKDB_OPTION(allow_community_extensions); SET_DUCKDB_OPTION(autoinstall_known_extensions); SET_DUCKDB_OPTION(autoload_known_extensions); @@ -344,9 +345,7 @@ DuckDBManager::RefreshConnectionState(duckdb::ClientContext &context) { */ duckdb::unique_ptr DuckDBManager::CreateConnection() { - if (!pgduckdb::IsDuckdbExecutionAllowed()) { - elog(ERROR, "DuckDB execution is not allowed because you have not been granted the duckdb.postgres_role"); - } + pgduckdb::RequireDuckdbExecution(); auto &instance = Get(); auto connection = duckdb::make_uniq(*instance.database); @@ -360,9 +359,7 @@ DuckDBManager::CreateConnection() { /* Returns the cached connection to the global DuckDB instance. */ duckdb::Connection * DuckDBManager::GetConnection(bool force_transaction) { - if (!pgduckdb::IsDuckdbExecutionAllowed()) { - elog(ERROR, "DuckDB execution is not allowed because you have not been granted the duckdb.postgres_role"); - } + pgduckdb::RequireDuckdbExecution(); auto &instance = Get(); auto &context = *instance.connection->context; diff --git a/src/pgduckdb_metadata_cache.cpp b/src/pgduckdb_metadata_cache.cpp index 86952789..9c935613 100644 --- a/src/pgduckdb_metadata_cache.cpp +++ b/src/pgduckdb_metadata_cache.cpp @@ -347,4 +347,11 @@ IsDuckdbExecutionAllowed() { return has_privs_of_role(GetUserId(), cache.postgres_role_oid); } +void +RequireDuckdbExecution() { + if (!pgduckdb::IsDuckdbExecutionAllowed()) { + elog(ERROR, "DuckDB execution is not allowed because you have not been granted the duckdb.postgres_role"); + } +} + } // namespace pgduckdb diff --git a/src/pgduckdb_options.cpp b/src/pgduckdb_options.cpp index 00f34683..0c97ce9f 100644 --- a/src/pgduckdb_options.cpp +++ b/src/pgduckdb_options.cpp @@ -24,6 +24,7 @@ extern "C" { #include "nodes/nodeFuncs.h" #include "catalog/namespace.h" #include "utils/builtins.h" +#include "utils/guc.h" #include "utils/lsyscache.h" #include "utils/rel.h" #include "utils/snapmgr.h" @@ -193,8 +194,28 @@ ReadDuckdbExtensions() { static bool DuckdbInstallExtension(Datum name_datum) { auto extension_name = DatumToString(name_datum); - auto install_extension_command = duckdb::StringUtil::Format("INSTALL %s;", extension_name.c_str()); + + auto install_extension_command = "INSTALL " + duckdb::KeywordHelper::WriteQuoted(extension_name); + + /* + * Temporily allow all filesystems for this query, because INSTALL needs + * local filesystem access. Since this setting cannot be changed inside + * DuckDB after it's set to LocalFileSystem this temporary configuration + * change only really has effect duckdb.install_extension is called as the + * first DuckDB query for this session. Since we cannot change it back. + * + * While that's suboptimal it's also not a huge problem. Users only need to + * install an extension once on a server. So doing that on a new connection + * or after calling duckdb.recycle_ddb() should not be a big deal. + * + * NOTE: Because each backend has its own DuckDB instance, this setting + * does not impact other backends and thus cannot cause a security issue + * due to a race condition. + */ + auto save_nestlevel = NewGUCNestLevel(); + SetConfigOption("duckdb.disabled_filesystems", "", PGC_SUSET, PGC_S_SESSION); pgduckdb::DuckDBQueryOrThrow(install_extension_command); + AtEOXact_GUC(false, save_nestlevel); Oid arg_types[] = {TEXTOID}; Datum values[] = {name_datum}; @@ -319,6 +340,7 @@ DECLARE_PG_FUNCTION(cache) { } DECLARE_PG_FUNCTION(cache_info) { + pgduckdb::RequireDuckdbExecution(); ReturnSetInfo *rsinfo = (ReturnSetInfo *)fcinfo->resultinfo; Tuplestorestate *tuple_store; TupleDesc cache_info_tuple_desc; @@ -359,12 +381,14 @@ DECLARE_PG_FUNCTION(cache_info) { } DECLARE_PG_FUNCTION(cache_delete) { + pgduckdb::RequireDuckdbExecution(); Datum cache_key = PG_GETARG_DATUM(0); bool result = pgduckdb::DuckdbCacheDelete(cache_key); PG_RETURN_BOOL(result); } DECLARE_PG_FUNCTION(pgduckdb_recycle_ddb) { + pgduckdb::RequireDuckdbExecution(); /* * We cannot safely run this in a transaction block, because a DuckDB * transaction might have already started. Recycling the database will diff --git a/src/pgduckdb_planner.cpp b/src/pgduckdb_planner.cpp index e7789188..fb0018a1 100644 --- a/src/pgduckdb_planner.cpp +++ b/src/pgduckdb_planner.cpp @@ -164,9 +164,12 @@ DuckdbPlanNode(Query *parse, const char *query_string, int cursor_options, Param * actual plan with our CustomScan node. This is useful to get the correct * values for all the other many fields of the PLannedStmt. * - * XXX: The primary reason we do this is that Postgres fills in permInfos - * and rtable correctly. Those are needed for postgres to do its permission - * checks on the used tables. + * XXX: The primary reason we did this in the past is so that Postgres + * filled in permInfos and rtable correctly. Those are needed for postgres + * to do its permission checks on the used tables. We do these checks + * inside DuckDB as well, so that's not really necessary anymore. We still + * do this though to get all the other fields filled in correctly. Possibly + * we don't need to do this anymore. * * FIXME: For some reason this needs an additional query copy to allow * re-planning of the query later during execution. But I don't really diff --git a/src/pgduckdb_ruleutils.cpp b/src/pgduckdb_ruleutils.cpp index 7f6af27e..b942dc9c 100644 --- a/src/pgduckdb_ruleutils.cpp +++ b/src/pgduckdb_ruleutils.cpp @@ -525,26 +525,6 @@ pgduckdb_relation_name(Oid relation_oid) { const char *postgres_schema_name = get_namespace_name_or_temp(relation->relnamespace); bool is_duckdb_table = pgduckdb::IsDuckdbTable(relation); - if (!is_duckdb_table) { - /* - * FIXME: This should be moved somewhere else. We already have a list - * of RTEs somwhere that we use to call ExecCheckPermissions. We could - * used that same list to check if RLS is enabled on any of the tables, - * instead of checking it here for **every occurence** of each table in - * the query. One benefit of having it here is that it ensures that we - * never forget to check for RLS. - * - * NOTE: We only need to check this for non-DuckDB tables because DuckDB - * tables don't support RLS anyway. - */ - if (check_enable_rls(relation_oid, InvalidOid, false) == RLS_ENABLED) { - ereport(ERROR, (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), - errmsg("(PGDuckDB/pgduckdb_relation_name) Cannot use \"%s\" in a DuckDB query, because RLS " - "is enabled on it", - get_rel_name(relation_oid)))); - } - } - const char *db_and_schema = pgduckdb_db_and_schema_string(postgres_schema_name, is_duckdb_table); char *result = psprintf("%s.%s", db_and_schema, quote_identifier(relname)); diff --git a/test/pycheck/non_superuser_test.py b/test/pycheck/non_superuser_test.py new file mode 100644 index 00000000..41f1c53e --- /dev/null +++ b/test/pycheck/non_superuser_test.py @@ -0,0 +1,49 @@ +from .utils import Postgres + +import pytest +import psycopg.errors +import psycopg.sql + + +def test_community_extensions(pg: Postgres): + pg.create_user("user1", psycopg.sql.SQL("IN ROLE duckdb_group")) + # Raw extension installation should not be possible non-superusers, because + # that would allow installing extensions from community repos. + with pg.cur() as cur: + cur.sql("SET ROLE user1") + print(cur.sql("SHOW ROLE")) + cur.sql("SET duckdb.force_execution = false") + with pytest.raises( + psycopg.errors.InternalError, + match="Permission Error: File system LocalFileSystem has been disabled by configuration", + ): + cur.sql( + "SELECT * FROM duckdb.raw_query($$ INSTALL avro FROM community; $$)" + ) + + # Even if such community extensions somehow get installed, it's not possible + # to load them without changing allow_community_extensions. Not even for a + # superuser. + with pg.cur() as cur: + cur.sql("SET duckdb.force_execution = false") + cur.sql("SELECT * FROM duckdb.raw_query($$ INSTALL avro FROM community; $$)") + with pytest.raises( + Exception, + match="IO Error: Extension .* could not be loaded because its signature is either missing or invalid and unsigned extensions are disabled by configuration", + ): + cur.sql("SELECT * FROM duckdb.raw_query($$ LOAD avro; $$)") + + # But it should be possible to load them after changing that setting. + with pg.cur() as cur: + cur.sql("SET duckdb.allow_community_extensions = true") + cur.sql("SET duckdb.force_execution = false") + cur.sql("SELECT * FROM duckdb.raw_query($$ LOAD avro; $$)") + + # And that setting is only changeable by superusers + with pg.cur() as cur: + cur.sql("SET ROLE user1") + with pytest.raises( + psycopg.errors.InsufficientPrivilege, + match='permission denied to set parameter "duckdb.allow_community_extensions"', + ): + cur.sql("SET duckdb.allow_community_extensions = true") diff --git a/test/pycheck/utils.py b/test/pycheck/utils.py index daf8b78b..34dbd78e 100644 --- a/test/pycheck/utils.py +++ b/test/pycheck/utils.py @@ -615,6 +615,7 @@ def initdb(self): # And finally, enable pg_duckdb pgconf.write("shared_preload_libraries = pg_duckdb\n") pgconf.write("duckdb.force_execution = 'true'\n") + pgconf.write("duckdb.postgres_role = 'duckdb_group'\n") def pgctl(self, command, **kwargs): pg_ctl = pg_bin("pg_ctl") diff --git a/test/regression/expected/non_superuser.out b/test/regression/expected/non_superuser.out index b4aa0809..48c9dc6e 100644 --- a/test/regression/expected/non_superuser.out +++ b/test/regression/expected/non_superuser.out @@ -22,15 +22,11 @@ SELECT * from duckdb.tables; ERROR: permission denied for table tables SELECT * from duckdb.extensions; ERROR: permission denied for table extensions --- Should fail because raw_query is to dangerous for regular users and the --- others currently allow for SQL injection +-- Should fail because any Postgres tables accesesd from DuckDB will have their +-- permissions checked even if it happens straight from DuckDB. SET duckdb.force_execution = false; -SELECT * FROM duckdb.raw_query($$ SELECT * FROM t $$); -ERROR: permission denied for function raw_query -SELECT * FROM duckdb.install_extension('some hacky sql'); -ERROR: permission denied for function install_extension -SELECT * FROM duckdb.cache('some hacky sql', 'some more hacky sql'); -ERROR: permission denied for function cache +SELECT * FROM duckdb.raw_query($$ SELECT * FROM pgduckdb.public.t $$); +ERROR: (PGDuckDB/pgduckdb_raw_query) Executor Error: (PGDuckDB/PostgresTableReader) permission denied for table t SET duckdb.force_execution = true; -- read_csv from the local filesystem should be disallowed SELECT count(r['sepal.length']) FROM read_csv('../../data/iris.csv') r; @@ -39,6 +35,18 @@ ERROR: (PGDuckDB/CreatePlan) Prepared query returned an error: 'Permission Erro SET ROLE user3; SELECT * FROM t; ERROR: DuckDB execution is not allowed because you have not been granted the duckdb.postgres_role +-- And all these duckdb functions should also fail, even the ones that don't +-- actually open a duckdb connection. +SET duckdb.force_execution = false; +SELECT * FROM duckdb.raw_query($$ SELECT * FROM pgduckdb.public.t $$); +ERROR: DuckDB execution is not allowed because you have not been granted the duckdb.postgres_role +SELECT * FROM duckdb.cache_info(); +ERROR: DuckDB execution is not allowed because you have not been granted the duckdb.postgres_role +SELECT * FROM duckdb.cache_delete('some file'); +ERROR: DuckDB execution is not allowed because you have not been granted the duckdb.postgres_role +CALL duckdb.recycle_ddb(); +ERROR: DuckDB execution is not allowed because you have not been granted the duckdb.postgres_role +SET duckdb.force_execution = true; -- Should work with regular posgres execution though, because this user is -- allowed to read the table. SET duckdb.force_execution = false; @@ -57,10 +65,60 @@ SELECT * FROM t; --- (0 rows) --- Should fail now, we don't support RLS +-- Should fall back to PG execution, because we don't support RLS SET ROLE user1; SELECT * FROM t; -ERROR: (PGDuckDB/pgduckdb_relation_name) Cannot use "t" in a DuckDB query, because RLS is enabled on it +WARNING: (PGDuckDB/CreatePlan) Prepared query returned an error: 'Not implemented Error: Cannot use "t" relation in a DuckDB query, because RLS is enabled on it + a +--- +(0 rows) + +-- Should fail because we require duckdb execution so no fallback +SELECT public.approx_count_distinct(a) FROM t; +ERROR: (PGDuckDB/CreatePlan) Prepared query returned an error: 'Not implemented Error: Cannot use "t" relation in a DuckDB query, because RLS is enabled on it +SET duckdb.force_execution = false; +SELECT * FROM duckdb.raw_query($$ SELECT * FROM pgduckdb.public.t $$); +ERROR: (PGDuckDB/pgduckdb_raw_query) Not implemented Error: Cannot use "t" relation in a DuckDB query, because RLS is enabled on it +SET duckdb.force_execution = true; +-- Extension installation +SET duckdb.force_execution = false; +-- Should fail because installing extensions is restricted for super users by default +SELECT * FROM duckdb.install_extension('iceberg'); +ERROR: permission denied for function install_extension +-- Similarly when trying using raw duckdb commands +SELECT * FROM duckdb.raw_query($$ INSTALL someextension $$); +ERROR: (PGDuckDB/pgduckdb_raw_query) Permission Error: File system LocalFileSystem has been disabled by configuration +SET duckdb.force_execution = true; +-- It should be possible to install extensions as non-superuser after the +-- following grants. +RESET ROLE; +GRANT ALL ON FUNCTION duckdb.install_extension(TEXT) TO user1; +GRANT ALL ON TABLE duckdb.extensions TO user1; +GRANT ALL ON SEQUENCE duckdb.extensions_table_seq TO user1; +-- You need to reconnect though (or run recycle_ddb), because +-- disabled_filesystems cannot be changed after it has been set. +\c +SET ROLE user1; +SET duckdb.force_execution = false; +SELECT * FROM duckdb.install_extension('iceberg'); + install_extension +------------------- + t +(1 row) + +-- We should handle SQL injections carefully though to only allow INSTALL +SELECT * FROM duckdb.install_extension($$ '; select * from hacky '' $$); +ERROR: (PGDuckDB/install_extension) HTTP Error: Failed to download extension " '; select * from hacky '' " at URL "http://extensions.duckdb.org/v1.1.3/linux_amd64/ '; select * from hacky '' .duckdb_extension.gz" (HTTP 403) + +Candidate extensions: "delta", "excel", "sqlite_scanner", "inet", "sqlite" +INSERT INTO duckdb.extensions (name) VALUES ($$ '; select * from hacky $$); +SELECT * FROM duckdb.query($$ SELECT 1 $$); +ERROR: (PGDuckDB/DuckdbPlanNode) Parser Error: unterminated quoted string at or near "'; select * from hacky " +LINE 1: LOAD '; select * from hacky + ^ +TRUNCATE duckdb.extensions; +SET duckdb.force_execution = true; RESET ROLE; DROP TABLE t; -DROP USER user1, user2; +DROP OWNED BY user1; +DROP USER user1, user2, user3; diff --git a/test/regression/sql/non_superuser.sql b/test/regression/sql/non_superuser.sql index 63361757..88f0662b 100644 --- a/test/regression/sql/non_superuser.sql +++ b/test/regression/sql/non_superuser.sql @@ -16,12 +16,10 @@ SELECT * from duckdb.secrets; SELECT * from duckdb.tables; SELECT * from duckdb.extensions; --- Should fail because raw_query is to dangerous for regular users and the --- others currently allow for SQL injection +-- Should fail because any Postgres tables accesesd from DuckDB will have their +-- permissions checked even if it happens straight from DuckDB. SET duckdb.force_execution = false; -SELECT * FROM duckdb.raw_query($$ SELECT * FROM t $$); -SELECT * FROM duckdb.install_extension('some hacky sql'); -SELECT * FROM duckdb.cache('some hacky sql', 'some more hacky sql'); +SELECT * FROM duckdb.raw_query($$ SELECT * FROM pgduckdb.public.t $$); SET duckdb.force_execution = true; -- read_csv from the local filesystem should be disallowed @@ -30,6 +28,15 @@ SELECT count(r['sepal.length']) FROM read_csv('../../data/iris.csv') r; SET ROLE user3; SELECT * FROM t; +-- And all these duckdb functions should also fail, even the ones that don't +-- actually open a duckdb connection. +SET duckdb.force_execution = false; +SELECT * FROM duckdb.raw_query($$ SELECT * FROM pgduckdb.public.t $$); +SELECT * FROM duckdb.cache_info(); +SELECT * FROM duckdb.cache_delete('some file'); +CALL duckdb.recycle_ddb(); +SET duckdb.force_execution = true; + -- Should work with regular posgres execution though, because this user is -- allowed to read the table. SET duckdb.force_execution = false; @@ -42,10 +49,48 @@ ALTER TABLE t ENABLE ROW LEVEL SECURITY; -- Should still be allowed, we're superuser SELECT * FROM t; --- Should fail now, we don't support RLS +-- Should fall back to PG execution, because we don't support RLS SET ROLE user1; SELECT * FROM t; +-- Should fail because we require duckdb execution so no fallback +SELECT public.approx_count_distinct(a) FROM t; + +SET duckdb.force_execution = false; +SELECT * FROM duckdb.raw_query($$ SELECT * FROM pgduckdb.public.t $$); +SET duckdb.force_execution = true; + + +-- Extension installation +SET duckdb.force_execution = false; +-- Should fail because installing extensions is restricted for super users by default +SELECT * FROM duckdb.install_extension('iceberg'); +-- Similarly when trying using raw duckdb commands +SELECT * FROM duckdb.raw_query($$ INSTALL someextension $$); +SET duckdb.force_execution = true; + + +-- It should be possible to install extensions as non-superuser after the +-- following grants. +RESET ROLE; +GRANT ALL ON FUNCTION duckdb.install_extension(TEXT) TO user1; +GRANT ALL ON TABLE duckdb.extensions TO user1; +GRANT ALL ON SEQUENCE duckdb.extensions_table_seq TO user1; + +-- You need to reconnect though (or run recycle_ddb), because +-- disabled_filesystems cannot be changed after it has been set. +\c +SET ROLE user1; +SET duckdb.force_execution = false; +SELECT * FROM duckdb.install_extension('iceberg'); +-- We should handle SQL injections carefully though to only allow INSTALL +SELECT * FROM duckdb.install_extension($$ '; select * from hacky '' $$); +INSERT INTO duckdb.extensions (name) VALUES ($$ '; select * from hacky $$); +SELECT * FROM duckdb.query($$ SELECT 1 $$); +TRUNCATE duckdb.extensions; +SET duckdb.force_execution = true; + RESET ROLE; DROP TABLE t; -DROP USER user1, user2; +DROP OWNED BY user1; +DROP USER user1, user2, user3;