diff --git a/.prettierignore b/.prettierignore index 93696b4ac49..e7146d6037a 100644 --- a/.prettierignore +++ b/.prettierignore @@ -1,5 +1,6 @@ .github/ .husky/ +.ranger/ .svelte-kit/ .vscode/ .idea/ diff --git a/docs/docs/developers/build/connectors/connectors.md b/docs/docs/developers/build/connectors/connectors.md index e792109076d..e25abe06687 100644 --- a/docs/docs/developers/build/connectors/connectors.md +++ b/docs/docs/developers/build/connectors/connectors.md @@ -164,7 +164,6 @@ Rill is continually evaluating additional OLAP engines that can be added. For a content="Connect to SQLite databases for lightweight, file-based data storage and querying." link="/developers/build/connectors/data-source/sqlite" linkLabel="Learn more" - referenceLink="sqlite" /> } @@ -216,6 +215,19 @@ Rill is continually evaluating additional OLAP engines that can be added. For a +## Table Formats +### Apache Iceberg + +
+ } + header="Apache Iceberg" + content="Read Iceberg tables directly from object storage through compatible query engines." + link="/developers/build/connectors/data-source/iceberg" + linkLabel="Learn more" + /> +
+ ## Other Data Connectors ### External DuckDB ### Google Sheets @@ -262,7 +274,6 @@ Rill is continually evaluating additional OLAP engines that can be added. For a content="Connect to Salesforce to extract data from objects and queries using the Salesforce API." link="/developers/build/connectors/data-source/salesforce" linkLabel="Learn more" - referenceLink="salesforce" /> diff --git a/docs/docs/developers/build/connectors/data-source/data-source.md b/docs/docs/developers/build/connectors/data-source/data-source.md index d83f604857d..5c760e32935 100644 --- a/docs/docs/developers/build/connectors/data-source/data-source.md +++ b/docs/docs/developers/build/connectors/data-source/data-source.md @@ -84,6 +84,7 @@ Rill supports connecting your data to both [DuckDB](/developers/build/connectors ### MySQL ### PostgreSQL ### SQLite +### Supabase
+ } + header="Supabase" + content="Connect to Supabase's managed PostgreSQL databases with SSL support and standard connection methods." + link="/developers/build/connectors/data-source/supabase" + linkLabel="Learn more" + referenceLink="supabase" />
@@ -146,9 +154,19 @@ Rill supports connecting your data to both [DuckDB](/developers/build/connectors linkLabel="Learn more" referenceLink="azure" /> + +## Table Formats +### Apache Iceberg - +
+ } + header="Apache Iceberg" + content="Read Iceberg tables directly from object storage through compatible query engines." + link="/developers/build/connectors/data-source/iceberg" + linkLabel="Learn more" + />
## Other Data Connectors @@ -197,7 +215,6 @@ Rill supports connecting your data to both [DuckDB](/developers/build/connectors content="Connect to Salesforce to extract data from objects and queries using the Salesforce API." link="/developers/build/connectors/data-source/salesforce" linkLabel="Learn more" - referenceLink="salesforce" /> diff --git a/docs/docs/developers/build/connectors/data-source/iceberg.md b/docs/docs/developers/build/connectors/data-source/iceberg.md new file mode 100644 index 00000000000..85ac35732b7 --- /dev/null +++ b/docs/docs/developers/build/connectors/data-source/iceberg.md @@ -0,0 +1,134 @@ +--- +title: Apache Iceberg +description: Read Iceberg tables from object storage +sidebar_label: Apache Iceberg +sidebar_position: 27 +--- + +## Overview + +[Apache Iceberg](https://iceberg.apache.org/) is an open table format for large analytic datasets. Rill supports reading Iceberg tables directly from object storage through compatible query engine integrations. Today, this is powered by DuckDB's native [Iceberg extension](https://duckdb.org/docs/extensions/iceberg/overview.html). + +:::note Direct file access only +Rill reads Iceberg tables by scanning the table's metadata and data files directly from object storage. Catalog-based access (e.g., through a Hive Metastore, AWS Glue, or REST catalog) is not currently supported. +::: + +## Storage Backends + +Iceberg tables can be read from any of the following storage backends: + +| Backend | URI format | Authentication | +|---|---|---| +| Amazon S3 | `s3://bucket/path/to/table` | Requires an [S3 connector](/developers/build/connectors/data-source/s3) | +| Google Cloud Storage | `gs://bucket/path/to/table` | Requires a [GCS connector](/developers/build/connectors/data-source/gcs) with HMAC keys | +| Azure Blob Storage | `azure://container/path/to/table` | Requires an [Azure connector](/developers/build/connectors/data-source/azure) | +| Local filesystem | `/path/to/table` | No authentication needed | + +For cloud storage backends, you must first configure the corresponding storage connector with valid credentials. Rill uses these credentials to authenticate when reading the Iceberg table files. + +## Using the UI + +1. Click **Add Data** in your Rill project +2. Select **Apache Iceberg** as the data source type +3. Choose your storage backend (S3, GCS, Azure, or Local) +4. Enter the path to your Iceberg table directory +5. Optionally configure advanced parameters (allow moved paths, snapshot version) +6. Enter a model name and click **Create** + +For cloud storage backends, the UI will prompt you to set up the corresponding storage connector if one doesn't already exist. + +## Manual Configuration + +Create a model that uses DuckDB's `iceberg_scan()` function to read the table. + +### Reading from S3 + +Create `models/iceberg_data.yaml`: + +```yaml +type: model +connector: duckdb +create_secrets_from_connectors: s3 +materialize: true + +sql: | + SELECT * + FROM iceberg_scan('s3://my-bucket/path/to/iceberg_table') +``` + +### Reading from GCS + +:::info HMAC keys required +DuckDB's `iceberg_scan()` authenticates to GCS using HMAC keys, not JSON service account credentials. When configuring your [GCS connector](/developers/build/connectors/data-source/gcs), use the `key_id` and `secret` (HMAC) properties instead of `google_application_credentials`. +::: + +```yaml +type: model +connector: duckdb +create_secrets_from_connectors: gcs +materialize: true + +sql: | + SELECT * + FROM iceberg_scan('gs://my-bucket/path/to/iceberg_table') +``` + +### Reading from Azure + +```yaml +type: model +connector: duckdb +create_secrets_from_connectors: azure +materialize: true + +sql: | + SELECT * + FROM iceberg_scan('azure://my-container/path/to/iceberg_table') +``` + +### Reading from local filesystem + +```yaml +type: model +connector: duckdb +materialize: true + +sql: | + SELECT * + FROM iceberg_scan('/path/to/iceberg_table') +``` + +## Optional Parameters + +The `iceberg_scan()` function accepts additional parameters: + +| Parameter | Type | Description | +|---|---|---| +| `allow_moved_paths` | boolean | Allow reading tables where data files have been moved from their original location. Defaults to `true` in the UI. | +| `version` | string | Read a specific Iceberg snapshot version instead of the latest. | + +Example with optional parameters: + +```sql +SELECT * +FROM iceberg_scan('s3://my-bucket/path/to/iceberg_table', + allow_moved_paths = true, + version = '2') +``` + +## Deploy to Rill Cloud + +Since Iceberg tables are read through DuckDB using your existing storage connector credentials, deploying to Rill Cloud follows the same process as the underlying storage connector: + +- **S3**: Follow the [S3 deployment guide](/developers/build/connectors/data-source/s3#deploy-to-rill-cloud) +- **GCS**: Follow the [GCS deployment guide](/developers/build/connectors/data-source/gcs#deploy-to-rill-cloud) +- **Azure**: Follow the [Azure deployment guide](/developers/build/connectors/data-source/azure#deploy-to-rill-cloud) + +Ensure your storage connector credentials are configured in your Rill Cloud project before deploying. + +## Limitations + +- **Direct file access only**: Rill reads Iceberg metadata and data files directly from storage. Catalog integrations (Hive Metastore, AWS Glue, REST catalog) are not supported. +- **DuckDB engine**: Iceberg support is currently provided through DuckDB's Iceberg extension. Additional engine support (e.g., ClickHouse) is planned. +- **GCS requires HMAC keys**: DuckDB's `iceberg_scan()` only supports HMAC authentication for GCS, not JSON service account credentials. +- **Read-only**: Rill reads from Iceberg tables but does not write to them. diff --git a/docs/docs/developers/build/connectors/data-source/salesforce.md b/docs/docs/developers/build/connectors/data-source/salesforce.md index 2937107526f..b96148d51e2 100644 --- a/docs/docs/developers/build/connectors/data-source/salesforce.md +++ b/docs/docs/developers/build/connectors/data-source/salesforce.md @@ -14,7 +14,7 @@ sidebar_position: 65 ## Local credentials -When using Rill Developer on your local machine, you will need to provide your credentials via a connector file. We would recommend not using plain text to create your file and instead use the `.env` file. For more details on your connector, see [connector YAML](/reference/project-files/connectors#salesforce) for more details. +When using Rill Developer on your local machine, you will need to provide your credentials via a connector file. We would recommend not using plain text to create your file and instead use the `.env` file. For more details on your connector, see [connector YAML](/reference/project-files/connectors) for more details. :::tip Updating the project environmental variable @@ -44,7 +44,7 @@ If this project has already been deployed to Rill Cloud and credentials have bee ## Deploy to Rill Cloud -When deploying a project to Rill Cloud, Rill requires you to explicitly provide Salesforce credentials used in your project. Please refer to our [connector YAML reference docs](/reference/project-files/connectors#salesforce) for more information. +When deploying a project to Rill Cloud, Rill requires you to explicitly provide Salesforce credentials used in your project. Please refer to our [connector YAML reference docs](/reference/project-files/connectors) for more information. If you subsequently add sources that require new credentials (or if you simply entered the wrong credentials during the initial deploy), you can update the credentials by pushing the `Deploy` button to update your project or by running the following command in the CLI: ``` diff --git a/docs/docs/developers/build/connectors/data-source/sqlite.md b/docs/docs/developers/build/connectors/data-source/sqlite.md index 2e37886780a..eef801cbd93 100644 --- a/docs/docs/developers/build/connectors/data-source/sqlite.md +++ b/docs/docs/developers/build/connectors/data-source/sqlite.md @@ -13,22 +13,23 @@ sidebar_position: 80 ## Connect to SQLite +SQLite databases are read through DuckDB's [SQLite extension](https://duckdb.org/docs/extensions/sqlite.html) using the `sqlite_scan()` function. No separate connector is needed. -In many cases, since SQLite is used as an in-process database, credentials are not required. Instead, Rill will need to know the path to the SQLite database file so that it can be read accordingly. +Create a model file (e.g., `models/my_sqlite_data.yaml`): ```yaml -type: connector -driver: sqlite +type: model +connector: duckdb +materialize: true -dsn: "file:mydatabase.db" +sql: | + SELECT * + FROM sqlite_scan('data/mydatabase.db', 'my_table') ``` -Alternatively, you can create the connector directly using the [connector YAML reference documentation](/reference/project-files/connectors#sqlite). - - :::tip -If you plan to deploy the project to Rill Cloud, it is recommended that you move the SQLite database file to a `data` folder in your Rill project home directory. You can then use the relative path of the db file in your source definition (e.g., `data/test_sqlite.db`). +If you plan to deploy the project to Rill Cloud, place the SQLite database file in a `data` folder in your Rill project directory and use the relative path (e.g., `data/mydatabase.db`). ::: diff --git a/docs/docs/reference/project-files/connectors.md b/docs/docs/reference/project-files/connectors.md index 75c2a23f1d7..9ce32f863bb 100644 --- a/docs/docs/reference/project-files/connectors.md +++ b/docs/docs/reference/project-files/connectors.md @@ -26,7 +26,6 @@ Connector YAML files define how Rill connects to external data sources and OLAP ### _Databases_ - [**MySQL**](#mysql) - MySQL databases - [**PostgreSQL**](#postgres) - PostgreSQL databases -- [**SQLite**](#sqlite) - SQLite databases - [**Supabase**](#supabase) - Supabase (managed PostgreSQL) ### _Object Storage_ @@ -42,7 +41,6 @@ Connector YAML files define how Rill connects to external data sources and OLAP ### _Other_ - [**HTTPS**](#https) - Public files via HTTP/HTTPS -- [**Salesforce**](#salesforce) - Salesforce data :::warning Security Recommendation For all credential parameters (passwords, tokens, keys), use environment variables with the syntax `{{ .env.KEY_NAME }}`. This keeps sensitive data out of your YAML files and version control. See our [credentials documentation](/developers/build/connectors/credentials/) for complete setup instructions. @@ -1127,43 +1125,6 @@ endpoint: "https://my-s3-endpoint.com" # Optional custom endpoint URL for S3-com region: "us-east-1" # AWS region of the S3 bucket ``` -## Salesforce - -### `driver` - -_[string]_ - Refers to the driver type and must be driver `salesforce` _(required)_ - -### `username` - -_[string]_ - Salesforce account username _(required)_ - -### `password` - -_[string]_ - Salesforce account password (secret) - -### `key` - -_[string]_ - Authentication key for Salesforce (secret) - -### `endpoint` - -_[string]_ - Salesforce API endpoint URL _(required)_ - -### `client_id` - -_[string]_ - Client ID used for Salesforce OAuth authentication _(required)_ - -```yaml -# Example: Salesforce connector configuration -type: connector # Must be `connector` (required) -driver: salesforce # Must be `salesforce` _(required)_ -username: "myusername" # Salesforce account username -password: "{{ .env.SALESFORCE_PASSWORD }}" # Salesforce account password (secret) -key: "{{ .env.SALESFORCE_KEY }}" # Authentication key for Salesforce (secret) -endpoint: "https://login.salesforce.com" # Salesforce API endpoint URL -client_id: "my-client-id" # Client ID used for Salesforce OAuth authentication -``` - ## Slack ### `driver` @@ -1275,21 +1236,4 @@ type: connector driver: snowflake dsn: "{{ .env.SNOWFLAKE_DSN }}" # define SNOWFLAKE_DSN in .env file parallel_fetch_limit: 2 -``` - -## SQLite - -### `driver` - -_[string]_ - Refers to the driver type and must be driver `sqlite` _(required)_ - -### `dsn` - -_[string]_ - DSN(Data Source Name) for the sqlite connection _(required)_ - -```yaml -# Example: SQLite connector configuration -type: connector # Must be `connector` (required) -driver: sqlite # Must be `sqlite` _(required)_ -dsn: "file:mydatabase.db" # DSN for the sqlite connection ``` \ No newline at end of file diff --git a/docs/docs/reference/project-files/models.md b/docs/docs/reference/project-files/models.md index 9b7aa5efeab..f087719270e 100644 --- a/docs/docs/reference/project-files/models.md +++ b/docs/docs/reference/project-files/models.md @@ -673,20 +673,6 @@ _[object]_ - Settings related to glob file matching. _[string]_ - Size of a batch (e.g., '100MB') -## Additional properties when `connector` is `salesforce` or [named connector](./connectors#salesforce) of salesforce - -### `soql` - -_[string]_ - SOQL query to execute against the Salesforce instance. - -### `sobject` - -_[string]_ - Salesforce object (e.g., Account, Contact) targeted by the query. - -### `queryAll` - -_[boolean]_ - Whether to include deleted and archived records in the query (uses queryAll API). - ## Examples ```yaml diff --git a/docs/static/img/build/connectors/icons/Logo-Iceberg.svg b/docs/static/img/build/connectors/icons/Logo-Iceberg.svg new file mode 100644 index 00000000000..251a15d5cdc --- /dev/null +++ b/docs/static/img/build/connectors/icons/Logo-Iceberg.svg @@ -0,0 +1,26 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/runtime/drivers/duckdb/duckdb.go b/runtime/drivers/duckdb/duckdb.go index 362d5a68aba..9e3e47de00f 100644 --- a/runtime/drivers/duckdb/duckdb.go +++ b/runtime/drivers/duckdb/duckdb.go @@ -524,6 +524,25 @@ func (c *connection) reopenDB(ctx context.Context) error { dbInitQueries = append(dbInitQueries, c.config.InitSQL) } + // Set extension/secret directories and hosted-only settings before loading extensions; + // once an extension initializes the secret manager, these settings become immutable. + if !c.config.AllowHostAccess { + extensionDir, err := extensions.ExtensionsDir() + if err != nil { + return err + } + secretDir, err := c.storage.DataDir("secrets") + if err != nil { + return err + } + dbInitQueries = append(dbInitQueries, + fmt.Sprintf("SET extension_directory=%s", safeSQLString(extensionDir)), + fmt.Sprintf("SET secret_directory=%s", safeSQLString(secretDir)), + // Reduces batch data ingestion time by ~40% in hosted environments where source data is never viewed directly. + "SET GLOBAL preserve_insertion_order TO false", + ) + } + dbInitQueries = append(dbInitQueries, "INSTALL 'json'", "INSTALL 'sqlite'", @@ -546,25 +565,6 @@ func (c *connection) reopenDB(ctx context.Context) error { return err } - // We want to set preserve_insertion_order=false in hosted environments only (where source data is never viewed directly). Setting it reduces batch data ingestion time by ~40%. - // Hack: Using AllowHostAccess as a proxy indicator for a hosted environment. - if !c.config.AllowHostAccess { - extensionDir, err := extensions.ExtensionsDir() - if err != nil { - return err - } - - secretDir, err := c.storage.DataDir("secrets") - if err != nil { - return err - } - dbInitQueries = append(dbInitQueries, - "SET GLOBAL preserve_insertion_order TO false", - fmt.Sprintf("SET extension_directory=%s", safeSQLString(extensionDir)), - fmt.Sprintf("SET secret_directory=%s", safeSQLString(secretDir)), - ) - } - // Add init SQL if provided if c.config.ConnInitSQL != "" { connInitQueries = append(connInitQueries, c.config.ConnInitSQL) diff --git a/runtime/parser/schema/project.schema.yaml b/runtime/parser/schema/project.schema.yaml index 28fc16892fa..99c79370d02 100644 --- a/runtime/parser/schema/project.schema.yaml +++ b/runtime/parser/schema/project.schema.yaml @@ -63,7 +63,6 @@ definitions: ### _Databases_ - [**MySQL**](#mysql) - MySQL databases - [**PostgreSQL**](#postgres) - PostgreSQL databases - - [**SQLite**](#sqlite) - SQLite databases - [**Supabase**](#supabase) - Supabase (managed PostgreSQL) ### _Object Storage_ @@ -79,7 +78,6 @@ definitions: ### _Other_ - [**HTTPS**](#https) - Public files via HTTP/HTTPS - - [**Salesforce**](#salesforce) - Salesforce data :::warning Security Recommendation For all credential parameters (passwords, tokens, keys), use environment variables with the syntax `{{ .env.KEY_NAME }}`. This keeps sensitive data out of your YAML files and version control. See our [credentials documentation](/developers/build/connectors/credentials/) for complete setup instructions. @@ -526,6 +524,8 @@ definitions: required: - driver - path + # Note: Iceberg is not a standalone connector. It uses DuckDB's iceberg_scan() function. + # See /developers/build/connectors/data-source/iceberg for configuration details. - type: object title: MotherDuck properties: @@ -1057,42 +1057,6 @@ definitions: required: - driver - bucket - - type: object - title: Salesforce - properties: - driver: - type: string - description: Refers to the driver type and must be driver `salesforce` - username: - type: string - description: Salesforce account username - password: - type: string - description: Salesforce account password (secret) - key: - type: string - description: Authentication key for Salesforce (secret) - endpoint: - type: string - description: Salesforce API endpoint URL - client_id: - type: string - description: Client ID used for Salesforce OAuth authentication - examples: - - # Example: Salesforce connector configuration - type: connector # Must be `connector` (required) - driver: salesforce # Must be `salesforce` _(required)_ - - username: "myusername" # Salesforce account username - password: "{{ .env.SALESFORCE_PASSWORD }}" # Salesforce account password (secret) - key: "{{ .env.SALESFORCE_KEY }}" # Authentication key for Salesforce (secret) - endpoint: "https://login.salesforce.com" # Salesforce API endpoint URL - client_id: "my-client-id" # Client ID used for Salesforce OAuth authentication - required: - - driver - - username - - endpoint - - client_id - type: object title: Slack properties: @@ -1197,26 +1161,8 @@ definitions: required: - type - driver - - type: object - title: SQLite - properties: - driver: - type: string - description: Refers to the driver type and must be driver `sqlite` - dsn: - type: string - description: DSN(Data Source Name) for the sqlite connection - examples: - - # Example: SQLite connector configuration - type: connector # Must be `connector` (required) - driver: sqlite # Must be `sqlite` _(required)_ - dsn: "file:mydatabase.db" # DSN for the sqlite connection - required: - - driver - - dsn - # Source YAML sources: title: Source YAML @@ -1252,7 +1198,6 @@ definitions: - redshift - postgres - supabase - - sqlite - snowflake - bigquery - duckdb @@ -1729,15 +1674,6 @@ definitions: - connector then: $ref: '#/definitions/models/definitions/s3' - - if: - title: Additional properties when `connector` is `salesforce` or [named connector](./connectors#salesforce) of salesforce - properties: - connector: - const: salesforce - required: - - connector - then: - $ref: '#/definitions/models/definitions/salesforce' definitions: athena: type: object @@ -1919,18 +1855,6 @@ definitions: batch_size: type: string description: 'Size of a batch (e.g., ''100MB'')' - salesforce: - type: object - properties: - soql: - type: string - description: SOQL query to execute against the Salesforce instance. - sobject: - type: string - description: Salesforce object (e.g., Account, Contact) targeted by the query. - queryAll: - type: boolean - description: Whether to include deleted and archived records in the query (uses queryAll API). examples: - ### Incremental model type: model diff --git a/runtime/resolvers/testdata/connector_iceberg.yaml b/runtime/resolvers/testdata/connector_iceberg.yaml new file mode 100644 index 00000000000..a83b58581db --- /dev/null +++ b/runtime/resolvers/testdata/connector_iceberg.yaml @@ -0,0 +1,43 @@ +expensive: true +connectors: + - gcs +project_files: + iceberg_gcs_with_secrets.yaml: + type: model + connector: duckdb + create_secrets_from_connectors: gcs + materialize: true + sql: | + SELECT * + FROM iceberg_scan('gs://integration-test.rilldata.com/iceberg/lineitem_iceberg', + allow_moved_paths = true) +tests: + - name: test_iceberg_gcs_with_secrets_row_count + resolver: sql + properties: + sql: "select count(*) as count from iceberg_gcs_with_secrets" + result_csv: | + count + 51793 + - name: test_iceberg_gcs_with_secrets_schema + resolver: sql + properties: + sql: "describe iceberg_gcs_with_secrets" + result_csv: | + column_name,column_type,null,key,default,extra + l_orderkey,INTEGER,YES,,, + l_partkey,INTEGER,YES,,, + l_suppkey,INTEGER,YES,,, + l_linenumber,INTEGER,YES,,, + l_quantity,INTEGER,YES,,, + l_extendedprice,"DECIMAL(15,2)",YES,,, + l_discount,"DECIMAL(15,2)",YES,,, + l_tax,"DECIMAL(15,2)",YES,,, + l_returnflag,VARCHAR,YES,,, + l_linestatus,VARCHAR,YES,,, + l_shipdate,DATE,YES,,, + l_commitdate,DATE,YES,,, + l_receiptdate,DATE,YES,,, + l_shipinstruct,VARCHAR,YES,,, + l_shipmode,VARCHAR,YES,,, + l_comment,VARCHAR,YES,,, diff --git a/web-common/src/components/icons/connectors/ApacheIceberg.svelte b/web-common/src/components/icons/connectors/ApacheIceberg.svelte new file mode 100644 index 00000000000..0b866c5f310 --- /dev/null +++ b/web-common/src/components/icons/connectors/ApacheIceberg.svelte @@ -0,0 +1,161 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/web-common/src/components/icons/connectors/ApacheIcebergIcon.svelte b/web-common/src/components/icons/connectors/ApacheIcebergIcon.svelte new file mode 100644 index 00000000000..0e0061679d1 --- /dev/null +++ b/web-common/src/components/icons/connectors/ApacheIcebergIcon.svelte @@ -0,0 +1,131 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/web-common/src/components/icons/connectors/GoogleCloudStorageIcon.svelte b/web-common/src/components/icons/connectors/GoogleCloudStorageIcon.svelte new file mode 100644 index 00000000000..db60ba30180 --- /dev/null +++ b/web-common/src/components/icons/connectors/GoogleCloudStorageIcon.svelte @@ -0,0 +1,36 @@ + + + + + + + + + + + + + diff --git a/web-common/src/components/icons/connectors/MicrosoftAzureBlobStorageIcon.svelte b/web-common/src/components/icons/connectors/MicrosoftAzureBlobStorageIcon.svelte new file mode 100644 index 00000000000..4084a10287a --- /dev/null +++ b/web-common/src/components/icons/connectors/MicrosoftAzureBlobStorageIcon.svelte @@ -0,0 +1,33 @@ + + + + + + + + + diff --git a/web-common/src/features/connectors/connector-icon-mapping.ts b/web-common/src/features/connectors/connector-icon-mapping.ts index f364e9197b8..6e6eee05653 100644 --- a/web-common/src/features/connectors/connector-icon-mapping.ts +++ b/web-common/src/features/connectors/connector-icon-mapping.ts @@ -3,8 +3,11 @@ import ApacheDruidIcon from "../../components/icons/connectors/ApacheDruidIcon.s import ApachePinotIcon from "../../components/icons/connectors/ApachePinotIcon.svelte"; import ClickHouseIcon from "../../components/icons/connectors/ClickHouseIcon.svelte"; import ClickHouseCloudIcon from "../../components/icons/connectors/ClickHouseCloudIcon.svelte"; +import ApacheIcebergIcon from "../../components/icons/connectors/ApacheIcebergIcon.svelte"; import DuckDbIcon from "../../components/icons/connectors/DuckDBIcon.svelte"; import GoogleBigQueryIcon from "../../components/icons/connectors/GoogleBigQueryIcon.svelte"; +import GoogleCloudStorageIcon from "../../components/icons/connectors/GoogleCloudStorageIcon.svelte"; +import MicrosoftAzureBlobStorageIcon from "../../components/icons/connectors/MicrosoftAzureBlobStorageIcon.svelte"; import AthenaIcon from "../../components/icons/connectors/AthenaIcon.svelte"; import PostgresIcon from "../../components/icons/connectors/PostgresIcon.svelte"; import MySqlIcon from "../../components/icons/connectors/MySqlIcon.svelte"; @@ -17,12 +20,15 @@ import SupabaseIcon from "../../components/icons/connectors/SupabaseIcon.svelte" export const connectorIconMapping = { athena: AthenaIcon, + azure: MicrosoftAzureBlobStorageIcon, bigquery: GoogleBigQueryIcon, clickhouse: ClickHouseIcon, clickhousecloud: ClickHouseCloudIcon, motherduck: MotherDuckIcon, druid: ApacheDruidIcon, duckdb: DuckDbIcon, + gcs: GoogleCloudStorageIcon, + iceberg: ApacheIcebergIcon, mysql: MySqlIcon, pinot: ApachePinotIcon, postgres: PostgresIcon, diff --git a/web-common/src/features/sources/modal/connector-schemas.ts b/web-common/src/features/sources/modal/connector-schemas.ts index b28b896c252..a415b5092ba 100644 --- a/web-common/src/features/sources/modal/connector-schemas.ts +++ b/web-common/src/features/sources/modal/connector-schemas.ts @@ -17,6 +17,7 @@ import { sqliteSchema } from "../../templates/schemas/sqlite"; import { localFileSchema } from "../../templates/schemas/local_file"; import { duckdbSchema } from "../../templates/schemas/duckdb"; import { httpsSchema } from "../../templates/schemas/https"; +import { icebergSchema } from "../../templates/schemas/iceberg"; import { motherduckSchema } from "../../templates/schemas/motherduck"; import { druidSchema } from "../../templates/schemas/druid"; import { pinotSchema } from "../../templates/schemas/pinot"; @@ -45,6 +46,7 @@ export const multiStepFormSchemas: Record = { https: httpsSchema, s3: s3Schema, gcs: gcsSchema, + iceberg: icebergSchema, azure: azureSchema, }; diff --git a/web-common/src/features/sources/modal/constants.ts b/web-common/src/features/sources/modal/constants.ts index 69e98a01655..c6574674639 100644 --- a/web-common/src/features/sources/modal/constants.ts +++ b/web-common/src/features/sources/modal/constants.ts @@ -85,6 +85,7 @@ export const SOURCES = [ "azure", "bigquery", "gcs", + "iceberg", "mysql", "postgres", "redshift", diff --git a/web-common/src/features/sources/modal/icons.ts b/web-common/src/features/sources/modal/icons.ts index 8606137e17f..6e7c37a2122 100644 --- a/web-common/src/features/sources/modal/icons.ts +++ b/web-common/src/features/sources/modal/icons.ts @@ -7,6 +7,7 @@ import ApachePinot from "../../../components/icons/connectors/ApachePinot.svelte import ClickHouse from "../../../components/icons/connectors/ClickHouse.svelte"; import DuckDB from "../../../components/icons/connectors/DuckDB.svelte"; import GoogleBigQuery from "../../../components/icons/connectors/GoogleBigQuery.svelte"; +import ApacheIceberg from "../../../components/icons/connectors/ApacheIceberg.svelte"; import GoogleCloudStorage from "../../../components/icons/connectors/GoogleCloudStorage.svelte"; import Https from "../../../components/icons/connectors/HTTPS.svelte"; import LocalFile from "../../../components/icons/connectors/LocalFile.svelte"; @@ -21,6 +22,7 @@ import Supabase from "../../../components/icons/connectors/Supabase.svelte"; export const ICONS = { gcs: GoogleCloudStorage, + iceberg: ApacheIceberg, s3: AmazonS3, azure: MicrosoftAzureBlobStorage, bigquery: GoogleBigQuery, diff --git a/web-common/src/features/sources/sourceUtils.ts b/web-common/src/features/sources/sourceUtils.ts index 5332ab67a08..c96e54ebdd4 100644 --- a/web-common/src/features/sources/sourceUtils.ts +++ b/web-common/src/features/sources/sourceUtils.ts @@ -269,6 +269,56 @@ export function maybeRewriteToDuckDb( delete formValues.table; break; + case "iceberg": { + connectorCopy.name = "duckdb"; + + // Determine which path field has a value + const icebergPath = (formValues.gcs_path || + formValues.s3_path || + formValues.azure_path || + formValues.public_path || + formValues.local_path) as string; + const storageType = formValues.storage_type as string; + + // Set create_secrets_from_connectors for cloud storage backends + if (storageType && storageType !== "local" && storageType !== "public") { + formValues.create_secrets_from_connectors = storageType; + } + + // Build iceberg_scan parameter list + const scanParams: string[] = []; + + const allowMovedPaths = formValues.allow_moved_paths; + if (allowMovedPaths !== undefined && allowMovedPaths !== "") { + scanParams.push(`allow_moved_paths = ${allowMovedPaths}`); + } + + const icebergVersion = formValues.version as string; + if (icebergVersion?.trim()) { + scanParams.push(`version = '${icebergVersion.trim()}'`); + } + + const paramsStr = scanParams.length + ? `,\n ${scanParams.join(",\n ")}` + : ""; + + formValues.sql = `SELECT *\nFROM iceberg_scan('${icebergPath}'${paramsStr})`; + + // Clean up intermediate fields + delete formValues.storage_type; + delete formValues.gcs_path; + delete formValues.s3_path; + delete formValues.azure_path; + delete formValues.public_path; + delete formValues.local_path; + delete formValues.gcs_info; + delete formValues.s3_info; + delete formValues.azure_info; + delete formValues.allow_moved_paths; + delete formValues.version; + + break; + } } return [connectorCopy, formValues]; diff --git a/web-common/src/features/templates/ConnectionTypeSelector.svelte b/web-common/src/features/templates/ConnectionTypeSelector.svelte index a5659c9c047..c5fa6f01817 100644 --- a/web-common/src/features/templates/ConnectionTypeSelector.svelte +++ b/web-common/src/features/templates/ConnectionTypeSelector.svelte @@ -1,8 +1,13 @@