From e8d3c54e38dc9c699bb3e9f535d3fd63da08046b Mon Sep 17 00:00:00 2001 From: Amit Kesarwani <93291915+kesarwam@users.noreply.github.com> Date: Fri, 21 Nov 2025 13:35:14 -0800 Subject: [PATCH] Added Multi Storage Backends in lakeFS Enterprise sample --- 02_lakefs_enterprise/README.md | 4 + 02_lakefs_enterprise/docker-compose.yml | 99 +++++++++++++++++-- .../gcp_service_account_key_file_name.json | 12 +++ 3 files changed, 109 insertions(+), 6 deletions(-) create mode 100644 02_lakefs_enterprise/gcp_service_account_key_file_name.json diff --git a/02_lakefs_enterprise/README.md b/02_lakefs_enterprise/README.md index b931cccb..297d309f 100644 --- a/02_lakefs_enterprise/README.md +++ b/02_lakefs_enterprise/README.md @@ -21,6 +21,10 @@ Login to [Treeverse Dockerhub](https://hub.docker.com/u/treeverse) by using the docker login -u externallakefs ``` +If you want to use lakeFS [Multiple Storage Backends](https://docs.lakefs.io/latest/howto/multiple-storage-backends/) feature then change "lakeFS-samples/02_lakefs_enterprise/docker-compose.yml" file to update credentials for AWS S3 and/or Azure Blob Storage. If you want to use Google Cloud Storage (GCS) then copy GCP Service Account key JSON file to "lakeFS-samples/02_lakefs_enterprise" folder and change the file name in Docker Compose file. Refer to [Multiple Storage Backends documentation](https://docs.lakefs.io/latest/howto/multiple-storage-backends/) for additional information. + +If you DO NOT want to use lakeFS Multiple Storage Backends feature then don't change the Docker Compose file. + Copy the lakeFS license file to "lakeFS-samples/02_lakefs_enterprise" folder, then change lakeFS license file name and installation ID in the following command and run the command to provision a lakeFS Enterprise server as well as MinIO for your object store, plus Jupyter: ```bash diff --git a/02_lakefs_enterprise/docker-compose.yml b/02_lakefs_enterprise/docker-compose.yml index 20577839..7aaa9874 100644 --- a/02_lakefs_enterprise/docker-compose.yml +++ b/02_lakefs_enterprise/docker-compose.yml @@ -25,11 +25,98 @@ services: minio-setup: condition: service_completed_successfully environment: - - LAKEFS_BLOCKSTORE_TYPE=s3 - - LAKEFS_BLOCKSTORE_S3_FORCE_PATH_STYLE=true - - LAKEFS_BLOCKSTORE_S3_ENDPOINT=http://minio:9000 - - LAKEFS_BLOCKSTORE_S3_CREDENTIALS_ACCESS_KEY_ID=minioadmin - - LAKEFS_BLOCKSTORE_S3_CREDENTIALS_SECRET_ACCESS_KEY=minioadmin + - LAKEFS_BLOCKSTORES_SIGNING_SECRET_KEY=some random secret string + - | + LAKEFS_BLOCKSTORES_STORES= + [ + { + "id":"minio", + "backward_compatible":true, + "description":"Primary on-prem MinIO storage for lakeFS Samples", + "type":"s3", + "s3": + { + "endpoint":"http://minio:9000", + "force_path_style":true, + "credentials": + { + "access_key_id":"minioadmin", + "secret_access_key":"minioadmin" + } + } + }, + { + "id":"local", + "description":"POSIX complaint Local storage", + "type":"local", + "local": + { + "path":"/tmp/local1", + "import_enabled":true + } + }, + { + "id":"s3-us-east-1", + "description":"AWS S3 storage for production data", + "type":"s3", + "s3": + { + "region":"us-east-1", + "credentials": + { + "access_key_id":"AWS access key", + "secret_access_key":"AWS secret key" + } + } + }, + { + "id":"s3-us-west-2", + "description":"AWS S3 storage for development data", + "type":"s3", + "s3": + { + "region":"us-west-2", + "credentials": + { + "access_key_id":"AWS access key", + "secret_access_key":"AWS secret key" + } + } + }, + { + "id":"s3-eu-west-3", + "description":"AWS S3 storage for data for European customers", + "type":"s3", + "s3": + { + "region":"eu-west-3", + "credentials": + { + "access_key_id":"AWS access key", + "secret_access_key":"AWS secret key" + } + } + }, + { + "id":"azure-analytics", + "description":"Azure Blob storage for analytics data", + "type":"azure", + "azure": + { + "storage_account":"Azure storage account name", + "storage_access_key":"Azure storage account access key" + } + }, + { + "id":"gcs-dev", + "description":"Google Cloud Storage for development data", + "type":"gs", + "gs": + { + "credentials_file":"/tmp/lakefs/gcp_service_account_key_file_name.json" + } + } + ] - LAKEFS_AUTH_ENCRYPT_SECRET_KEY=some random secret string - LAKEFS_LOGGING_LEVEL=INFO - LAKEFS_STATS_ENABLED=${LAKEFS_STATS_ENABLED:-1} @@ -50,7 +137,7 @@ services: lakefs setup --user-name everything-bagel --access-key-id "$$LAKECTL_CREDENTIALS_ACCESS_KEY_ID" --secret-access-key "$$LAKECTL_CREDENTIALS_SECRET_ACCESS_KEY" || true lakefs run & echo "---- Creating repository ----" - wait-for -t 60 lakefs:8000 -- lakectl repo create lakefs://quickstart s3://quickstart --sample-data || true + wait-for -t 60 lakefs:8000 -- lakectl repo create lakefs://quickstart s3://quickstart --storage-id minio --sample-data || true echo "" wait-for -t 60 minio:9000 && echo '------------------------------------------------ diff --git a/02_lakefs_enterprise/gcp_service_account_key_file_name.json b/02_lakefs_enterprise/gcp_service_account_key_file_name.json new file mode 100644 index 00000000..d0ec3863 --- /dev/null +++ b/02_lakefs_enterprise/gcp_service_account_key_file_name.json @@ -0,0 +1,12 @@ +{ + "type": "service_account", + "project_id": "test", + "private_key_id": "123", + "private_key": "-----BEGIN PRIVATE KEY-----\ngfgf\n-----END PRIVATE KEY-----\n", + "client_email": "test@test.iam.gserviceaccount.com", + "client_id": "123", + "auth_uri": "https://accounts.google.com/o/oauth2/auth", + "token_uri": "https://oauth2.googleapis.com/token", + "auth_provider_x509_cert_url": "https://www.googleapis.com/oauth2/v1/certs", + "client_x509_cert_url": "https://www.googleapis.com/robot/v1/metadata/x509/test%test.iam.gserviceaccount.com" +}