|
| 1 | +# S3 storage integration |
| 2 | + |
| 3 | +```rust |
| 4 | +use deltalake::{DeltaOps, DeltaTableBuilder, DeltaTableError}; |
| 5 | +use std::env; |
| 6 | + |
| 7 | +fn configure_s3() { |
| 8 | + // Set S3 configuration options using environment variables |
| 9 | + env::set_var("AWS_ENDPOINT_URL", "http://localhost:5561"); |
| 10 | + env::set_var("AWS_REGION", "us-east-1"); |
| 11 | + env::set_var("AWS_ACCESS_KEY_ID", "admin"); |
| 12 | + env::set_var("AWS_SECRET_ACCESS_KEY", "password"); |
| 13 | + env::set_var("AWS_ALLOW_HTTP", "true"); |
| 14 | + env::set_var("AWS_S3_ALLOW_UNSAFE_RENAME", "true"); |
| 15 | + |
| 16 | + // Register AWS S3 handlers for Delta Lake operations |
| 17 | + deltalake::aws::register_handlers(None); |
| 18 | +} |
| 19 | + |
| 20 | +/// Builds a `DeltaOps` instance for the specified Delta table. |
| 21 | +/// Enabling operations such as creating, reading and writing data in the Delta Lake format. |
| 22 | +async fn get_delta_ops(table_name: &str, load_state: bool) -> Result<DeltaOps, DeltaTableError> { |
| 23 | + let delta_table_builder = DeltaTableBuilder::from_uri(format!("s3://data-lakehouse/{}", table_name)); |
| 24 | + let delta_table = match load_state { |
| 25 | + // Load the existing table state |
| 26 | + true => delta_table_builder.load().await?, |
| 27 | + // Build the table without loading existing state |
| 28 | + false => delta_table_builder.build()?, |
| 29 | + }; |
| 30 | + |
| 31 | + Ok(DeltaOps::from(delta_table)) |
| 32 | +} |
| 33 | + |
| 34 | +#[tokio::main()] |
| 35 | +async fn main() { |
| 36 | + configure_s3(); |
| 37 | + |
| 38 | + let table_name = "employee"; |
| 39 | + let load_state = false; |
| 40 | + let delta_ops = get_delta_ops(table_name, load_state).await.expect("Failed to create data_ops object"); |
| 41 | +} |
| 42 | +``` |
| 43 | + |
| 44 | +If the table doesn't exist yet, the `load_state` parameter in `get_delta_ops` should be set to `false`, as setting it to `true` would attempt to read a non-existent state, resulting in an error. On the other hand, if you want to read from an existing table, `load_state` must be set to `true` to successfully load the data; otherwise, the load operation will fail. |
0 commit comments