Skip to content

Commit e358a53

Browse files
authored
Merge pull request #744 from splitgraph/iceberg-external-tables
Wire up Iceberg external tables
2 parents e019ffc + b18c772 commit e358a53

10 files changed

+461
-11
lines changed

Cargo.lock

Lines changed: 407 additions & 5 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

Cargo.toml

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -100,8 +100,11 @@ deltalake = { git = "https://github.com/splitgraph/delta-rs", branch = "fix-deci
100100

101101
futures = "0.3"
102102
hex = ">=0.4.0"
103+
104+
iceberg-datafusion = { git = "https://github.com/apache/iceberg-rust", rev = "697a20060f2247da87f73073e8bf5ab407bd40ea" }
103105
indexmap = "2.6.0"
104106
itertools = { workspace = true }
107+
105108
lazy_static = ">=1.4.0"
106109
metrics = { version = "0.23.0" }
107110
metrics-exporter-prometheus = { version = "0.15.3" }

src/config/context.rs

Lines changed: 9 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,7 @@ use datafusion::{
2020
};
2121
use deltalake::delta_datafusion::DeltaTableFactory;
2222
use deltalake::storage::factories;
23+
use iceberg_datafusion::table_provider_factory::IcebergTableProviderFactory;
2324
use metrics::describe_counter;
2425
use metrics_exporter_prometheus::PrometheusBuilder;
2526

@@ -103,14 +104,16 @@ pub fn build_state_with_table_factories(
103104
.with_default_features()
104105
.build();
105106

106-
state
107-
.table_factories_mut()
108-
.insert("DELTATABLE".to_string(), Arc::new(DeltaTableFactory {}));
107+
let table_factories = state.table_factories_mut();
108+
109+
table_factories.insert("DELTATABLE".to_string(), Arc::new(DeltaTableFactory {}));
110+
table_factories.insert(
111+
"ICEBERG".to_string(),
112+
Arc::new(IcebergTableProviderFactory {}),
113+
);
109114
#[cfg(feature = "remote-tables")]
110115
{
111-
state
112-
.table_factories_mut()
113-
.insert("TABLE".to_string(), Arc::new(RemoteTableFactory {}));
116+
table_factories.insert("TABLE".to_string(), Arc::new(RemoteTableFactory {}));
114117
}
115118
state
116119
}
Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
{"location":"s3://seafowl-test-bucket/test-data/iceberg/default.db/iceberg_table","table-uuid":"16ebf585-ea7d-407a-b273-1c6d3ccb3375","last-updated-ms":1732524826682,"last-column-id":2,"schemas":[{"type":"struct","fields":[{"id":1,"name":"key","type":"int","required":true},{"id":2,"name":"value","type":"string","required":true}],"schema-id":0,"identifier-field-ids":[]}],"current-schema-id":0,"partition-specs":[{"spec-id":0,"fields":[]}],"default-spec-id":0,"last-partition-id":999,"properties":{},"snapshots":[],"snapshot-log":[],"metadata-log":[],"sort-orders":[{"order-id":0,"fields":[]}],"default-sort-order-id":0,"refs":{},"format-version":2,"last-sequence-number":0}
Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
{"location":"s3://seafowl-test-bucket/test-data/iceberg/default.db/iceberg_table","table-uuid":"16ebf585-ea7d-407a-b273-1c6d3ccb3375","last-updated-ms":1732524832199,"last-column-id":2,"schemas":[{"type":"struct","fields":[{"id":1,"name":"key","type":"int","required":true},{"id":2,"name":"value","type":"string","required":true}],"schema-id":0,"identifier-field-ids":[]}],"current-schema-id":0,"partition-specs":[{"spec-id":0,"fields":[]}],"default-spec-id":0,"last-partition-id":999,"properties":{},"current-snapshot-id":1285208930498918146,"snapshots":[{"snapshot-id":1285208930498918146,"sequence-number":1,"timestamp-ms":1732524832199,"manifest-list":"s3://seafowl-test-bucket/test-data/iceberg/default.db/iceberg_table/metadata/snap-1285208930498918146-0-a992f3e7-7f29-497a-976f-3456cf41ee20.avro","summary":{"operation":"append","added-files-size":"1037","added-data-files":"1","added-records":"4","total-data-files":"1","total-delete-files":"0","total-records":"4","total-files-size":"1037","total-position-deletes":"0","total-equality-deletes":"0"},"schema-id":0}],"snapshot-log":[{"snapshot-id":1285208930498918146,"timestamp-ms":1732524832199}],"metadata-log":[],"sort-orders":[{"order-id":0,"fields":[]}],"default-sort-order-id":0,"refs":{"main":{"snapshot-id":1285208930498918146,"type":"branch"}},"format-version":2,"last-sequence-number":1}
Binary file not shown.
20 KB
Binary file not shown.

tests/statements/query.rs

Lines changed: 40 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -369,3 +369,43 @@ async fn test_delta_tables() {
369369
];
370370
assert_batches_eq!(expected, &results);
371371
}
372+
373+
#[tokio::test]
374+
async fn test_iceberg_tables() {
375+
let (context, _) = make_context_with_pg(ObjectStoreType::InMemory).await;
376+
377+
context
378+
.plan_query(
379+
"CREATE EXTERNAL TABLE test_iceberg \
380+
STORED AS ICEBERG \
381+
LOCATION 's3://seafowl-test-bucket/test-data/iceberg/default.db/iceberg_table/metadata/00001-f394d7ec-944b-432d-a44f-78b5ec95aae2.metadata.json' \
382+
OPTIONS (\
383+
's3.access-key-id' 'minioadmin', \
384+
's3.secret-access-key' 'minioadmin', \
385+
's3.endpoint' 'http://127.0.0.1:9000', \
386+
'allow_http' 'true', \
387+
's3.region' 'us-east-1'\
388+
)",
389+
)
390+
.await
391+
.unwrap();
392+
393+
// The order gets randomized so we need to enforce it
394+
let plan = context
395+
.plan_query("SELECT * FROM staging.test_iceberg ORDER BY key")
396+
.await
397+
.unwrap();
398+
let results = context.collect(plan).await.unwrap();
399+
400+
let expected = [
401+
"+-----+-------+",
402+
"| key | value |",
403+
"+-----+-------+",
404+
"| 1 | one |",
405+
"| 2 | two |",
406+
"| 3 | three |",
407+
"| 4 | four |",
408+
"+-----+-------+",
409+
];
410+
assert_batches_eq!(expected, &results);
411+
}

0 commit comments

Comments
 (0)