|
1 | | -# S3 storage integration |
| 1 | +description: delta-rs, rust, S3 storage integration |
2 | 2 |
|
3 | | -```rust |
4 | | -use deltalake::{DeltaOps, DeltaTableBuilder, DeltaTableError}; |
5 | | -use std::env; |
6 | | - |
7 | | -fn configure_s3() { |
8 | | - // Set S3 configuration options using environment variables |
9 | | - env::set_var("AWS_ENDPOINT_URL", "http://localhost:5561"); |
10 | | - env::set_var("AWS_REGION", "us-east-1"); |
11 | | - env::set_var("AWS_ACCESS_KEY_ID", "admin"); |
12 | | - env::set_var("AWS_SECRET_ACCESS_KEY", "password"); |
13 | | - env::set_var("AWS_ALLOW_HTTP", "true"); |
14 | | - env::set_var("AWS_S3_ALLOW_UNSAFE_RENAME", "true"); |
15 | | - |
16 | | - // Register AWS S3 handlers for Delta Lake operations |
17 | | - deltalake::aws::register_handlers(None); |
18 | | -} |
| 3 | +# S3 |
19 | 4 |
|
20 | | -/// Builds a `DeltaOps` instance for the specified Delta table. |
21 | | -/// Enabling operations such as creating, reading and writing data in the Delta Lake format. |
22 | | -async fn get_delta_ops(table_name: &str, load_state: bool) -> Result<DeltaOps, DeltaTableError> { |
23 | | - let delta_table_builder = DeltaTableBuilder::from_uri(format!("s3://data-lakehouse/{}", table_name)); |
24 | | - let delta_table = match load_state { |
25 | | - // Load the existing table state |
26 | | - true => delta_table_builder.load().await?, |
27 | | - // Build the table without loading existing state |
28 | | - false => delta_table_builder.build()?, |
29 | | - }; |
30 | | - |
31 | | - Ok(DeltaOps::from(delta_table)) |
32 | | -} |
| 5 | +```rust |
| 6 | +use std::collections::HashMap; |
33 | 7 |
|
34 | 8 | #[tokio::main()] |
35 | 9 | async fn main() { |
36 | | - configure_s3(); |
| 10 | + // Register AWS S3 handlers for Delta Lake operations |
| 11 | + deltalake::aws::register_handlers(None); |
37 | 12 |
|
38 | | - let table_name = "employee"; |
39 | | - let load_state = false; |
40 | | - let delta_ops = get_delta_ops(table_name, load_state).await.expect("Failed to create data_ops object"); |
| 13 | + let mut storage_options = HashMap::new(); |
| 14 | + storage_options.insert("AWS_ENDPOINT_URL".to_string(), "http://localhost:5561".to_string()); |
| 15 | + storage_options.insert("AWS_REGION".to_string(), "us-east-1".to_string()); |
| 16 | + storage_options.insert("AWS_ACCESS_KEY_ID".to_string(), "admin".to_string()); |
| 17 | + storage_options.insert("AWS_SECRET_ACCESS_KEY".to_string(), "password".to_string()); |
| 18 | + storage_options.insert("AWS_ALLOW_HTTP".to_string(), "true".to_string()); |
| 19 | + storage_options.insert("AWS_S3_ALLOW_UNSAFE_RENAME".to_string(), "true".to_string()); |
| 20 | + |
| 21 | + let table = deltalake::open_table_with_storage_options("s3://data-lakehouse/employee", storage_options) |
| 22 | + .await |
| 23 | + .expect("Load failed"); |
41 | 24 | } |
42 | 25 | ``` |
43 | 26 |
|
44 | | -If the table doesn't exist yet, the `load_state` parameter in `get_delta_ops` should be set to `false`, as setting it to `true` would attempt to read a non-existent state, resulting in an error. On the other hand, if you want to read from an existing table, `load_state` must be set to `true` to successfully load the data; otherwise, the load operation will fail. |
| 27 | +You can set the storage option parameters as environment variables too. |
| 28 | + |
| 29 | +S3 requires a locking provider by default ([more information](https://delta-io.github.io/delta-rs/usage/writing/writing-to-s3-with-locking-provider/)). If you don't want to use a locking provider, you can disable it by setting the `AWS_S3_ALLOW_UNSAFE_RENAME` variable to `true`. |
0 commit comments