Skip to content

Commit c6093b9

Browse files
authored
Merge pull request #11 from seqeralabs/input_via_env_var
Allow for env vars to be used for inputs
2 parents f08a90f + 16f3bb7 commit c6093b9

File tree

6 files changed

+171
-26
lines changed

6 files changed

+171
-26
lines changed

README.md

Lines changed: 25 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -42,9 +42,34 @@ All examples follow the same deployment process:
4242
- Select your compute environment
4343
- Adjust CPU, GPU, and memory allocations as needed
4444
- Mount any required data using the **Mount data** option
45+
- Configure environment variables if the example supports them (see [Environment Variables](#environment-variables) section)
4546
5. Review the configuration in the **Summary** section
4647
6. Click **Add and start** to create and launch the Studio
4748

49+
## Environment Variables
50+
51+
Some examples support environment variable configuration to customize data paths and application settings without modifying the container image. This makes those examples more flexible and reusable across different datasets and configurations.
52+
53+
### Examples with Environment Variables
54+
55+
Only the following examples support environment variable configuration:
56+
- **CellxGene**: `DATASET_FILE`, `DATASET_TITLE` - Configure dataset path and display title
57+
- **Shiny**: `DATA_PATH` - Configure data file path with automatic cloud storage path conversion
58+
59+
### Examples without Environment Variables
60+
61+
These examples work with their default configurations and don't require environment variable setup:
62+
- **Marimo**: Interactive Python notebook environment
63+
- **Streamlit**: MultiQC visualization with web-based data loading interface
64+
- **TTYD**: Web-based terminal with pre-installed bioinformatics tools
65+
66+
### Using Environment Variables in Seqera Studios
67+
68+
When deploying to Seqera Studios, you can configure environment variables in the **Compute and Data** section:
69+
1. Expand the **Environment variables** section
70+
2. Add key-value pairs for the variables you want to customize
71+
3. The application will use these values instead of the defaults
72+
4873
## Documentation
4974

5075
- [Official documentation on building custom studio environments](https://docs.seqera.io/platform-cloud/studios/custom-envs#custom-containers)

cellxgene/Dockerfile

Lines changed: 41 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
# ---------------------------------------------------------------
22
# 1) Multi-stage build: Pull the connect-client binary
33
# ---------------------------------------------------------------
4-
ARG CONNECT_CLIENT_VERSION
4+
ARG CONNECT_CLIENT_VERSION=0.8
55
FROM public.cr.seqera.io/platform/connect-client:${CONNECT_CLIENT_VERSION} AS connect
66

77
# ---------------------------------------------------------------
@@ -29,8 +29,11 @@ RUN apt-get update && apt-get install -y \
2929
# Install CellxGene and its dependencies
3030
RUN pip install cellxgene==1.3.0
3131

32-
# Define CellxGene dataset name
33-
ENV DATASET_NAME=pbmc3k.h5ad
32+
# Define CellxGene dataset path and title with defaults
33+
ENV DATASET_FILE=s3://cellxgene_datasets/pbmc3k.h5ad
34+
ENV DATASET_TITLE="PBMCs 3k test dataset"
35+
ENV USER_DATA_DIR=/user-data/cellxgene
36+
ENV ANNOTATIONS_DIR=/user-data/cellxgene
3437

3538
# Create user-data directory
3639
RUN mkdir -p /user-data/cellxgene
@@ -47,13 +50,40 @@ RUN /usr/bin/connect-client --install
4750
ENTRYPOINT ["/usr/bin/connect-client", "--entrypoint"]
4851

4952
# ---------------------------------------------------------------
50-
# 4) Command: Run CellxGene with a dataset from a data link defined via DATASET_NAME
53+
# 4) Command: Run CellxGene with cloud storage path translation
5154
# ---------------------------------------------------------------
5255
# The port is set by CONNECT_TOOL_PORT environment variable
53-
CMD /usr/local/bin/cellxgene launch \
54-
--host 0.0.0.0 \
55-
--port ${CONNECT_TOOL_PORT} \
56-
--user-generated-data-dir /user-data/cellxgene \
57-
--annotations-dir /user-data/cellxgene \
58-
--title "PBMCs 3k test dataset" \
59-
/workspace/data/cellxgene_datasets/${DATASET_NAME}
56+
# Convert cloud storage paths to local Studio paths
57+
CMD ["/bin/bash", "-c", "bash <<'EOF'\n\
58+
# Function to convert cloud storage path to local Studio path\n\
59+
convert_path() {\n\
60+
local input_path=\"$1\"\n\
61+
if [[ \"$input_path\" =~ ^(s3|gs|az):// ]]; then\n\
62+
local cloud_path=${input_path#*://}\n\
63+
local bucket_name=${cloud_path%%/*}\n\
64+
local object_path=${cloud_path#*/}\n\
65+
echo \"/workspace/data/$bucket_name/$object_path\"\n\
66+
else\n\
67+
echo \"$input_path\"\n\
68+
fi\n\
69+
}\n\
70+
\n\
71+
# Process dataset path\n\
72+
DATASET_CLOUD=${DATASET_FILE#*://}\n\
73+
DATASET_BUCKET=${DATASET_CLOUD%%/*}\n\
74+
DATASET_OBJECT=${DATASET_CLOUD#*/}\n\
75+
DATASET_LOCAL=\"/workspace/data/${DATASET_BUCKET}/${DATASET_OBJECT}\"\n\
76+
\n\
77+
# Process user data and annotations paths using the function\n\
78+
USERDATA_LOCAL=$(convert_path \"${USER_DATA_DIR}\")\n\
79+
ANNOTATIONS_LOCAL=$(convert_path \"${ANNOTATIONS_DIR}\")\n\
80+
\n\
81+
# Ensure directories exist\n\
82+
mkdir -p \"${USERDATA_LOCAL}\" \"${ANNOTATIONS_LOCAL}\"\n\
83+
\n\
84+
# Launch cellxgene\n\
85+
/usr/local/bin/cellxgene launch --host 0.0.0.0 --port ${CONNECT_TOOL_PORT} \\\n\
86+
--user-generated-data-dir \"${USERDATA_LOCAL}\" \\\n\
87+
--annotations-dir \"${ANNOTATIONS_LOCAL}\" \\\n\
88+
--title \"${DATASET_TITLE}\" \"${DATASET_LOCAL}\"\n\
89+
EOF"]

cellxgene/README.md

Lines changed: 46 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -35,14 +35,17 @@ For specific versions, use the release tag (e.g., `ghcr.io/seqeralabs/custom-stu
3535
- Support for .h5ad datasets
3636
- Interactive single-cell data exploration
3737
- Automatic data mounting via datalinks
38+
- Configurable dataset path, title, and storage directories via environment variables
39+
- Cloud storage path support with automatic translation to local Studio paths
3840

3941
> [!NOTE]
4042
> For common features shared across all examples, see the [main README](../README.md#common-features).
4143
4244
## Files
4345

4446
- `Dockerfile`: Container definition using multi-stage build
45-
- `pbmc3k.h5ad`: Example dataset (mounted via datalink)
47+
- `README.md`: This documentation file
48+
- `screenshot.png`: Example screenshot of the CellxGene interface
4649

4750
## Prerequisites
4851

@@ -91,21 +94,59 @@ docker run -p 3000:3000 --entrypoint /usr/local/bin/cellxgene -v $(pwd)/data:/wo
9194

9295
The app will be available at http://localhost:3000
9396

97+
## Cloud Storage Path Translation
98+
99+
The container automatically converts cloud storage paths to local Studio paths. Supported providers include:
100+
101+
- **Amazon S3**: `s3://bucket/path/to/dataset.h5ad`
102+
- **Google Cloud Storage**: `gs://bucket/path/to/dataset.h5ad`
103+
- **Azure Blob Storage**: `az://container/path/to/dataset.h5ad`
104+
105+
**Examples:**
106+
- S3: `s3://my-genomics-data/single-cell/experiment1.h5ad``/workspace/data/my-genomics-data/single-cell/experiment1.h5ad`
107+
- GCS: `gs://research-bucket/datasets/pbmc3k.h5ad``/workspace/data/research-bucket/datasets/pbmc3k.h5ad`
108+
- Azure: `az://data-container/studies/cellxgene.h5ad``/workspace/data/data-container/studies/cellxgene.h5ad`
109+
110+
**Requirements:**
111+
- Mount the cloud storage bucket/container from Data Explorer in Seqera Studios
112+
- Provide cloud storage paths in the `DATASET_FILE` environment variable
113+
114+
> [!WARNING]
115+
> **Bucket Mounting Required**: When using cloud storage paths (`s3://`, `gs://`, `az://`), ensure the corresponding buckets are mounted in your Studio via the **Mount data** option. Unmounted buckets will cause the Studio to fail when trying to access the converted paths.
116+
94117
## Using in Seqera Studios
95118

96119
> [!NOTE]
97120
> For the common deployment process, see the [main README](../README.md#deploying-to-seqera-studios).
98121
99122
Additional steps specific to this example:
100-
1. Create a data link called 'cellxgene_datasets' and place your .h5ad file there
123+
1. In the **Compute and Data** tab, click the **Mount data** button to mount your cloud storage bucket/container
101124
2. Follow the common deployment process
102-
3. When mounting data, ensure to mount 'cellxgene_datasets' using the **Mount data** option
125+
3. Configure environment variables:
126+
- `DATASET_FILE`: Cloud storage path to your .h5ad file
127+
- Supports S3 (`s3://`), Google Cloud Storage (`gs://`), and Azure Blob Storage (`az://`) paths
128+
- Example: `s3://my-genomics-data/single-cell/experiment1.h5ad`
129+
- `DATASET_TITLE`: Title to display in the CellxGene interface
130+
- Example: `"My Single-Cell Analysis"`
131+
- `USER_DATA_DIR`: Path for user-generated data storage
132+
- Default: `/user-data/cellxgene` (local directory)
133+
- Supports cloud storage paths (automatically converted to local Studio paths)
134+
- Example: `s3://my-bucket/user-data/cellxgene`
135+
- `ANNOTATIONS_DIR`: Path for annotations storage
136+
- Default: `/user-data/cellxgene` (local directory)
137+
- Supports cloud storage paths (automatically converted to local Studio paths)
138+
- Example: `s3://my-bucket/annotations/cellxgene`
139+
140+
> [!WARNING]
141+
> **Bucket Mounting**: If using cloud storage paths for `USER_DATA_DIR` or `ANNOTATIONS_DIR`, ensure the corresponding buckets are mounted in your Studio. Unmounted buckets will cause the Studio to fail when trying to access the converted paths.
103142
104143
## Notes
105144

106145
- The app uses CellxGene 1.3.0 for interactive single-cell data visualization
107-
- User data and annotations are stored in /user-data/cellxgene
108-
- The default dataset is pbmc3k.h5ad, but can be changed via the DATASET_NAME environment variable
146+
- User data and annotations directories can be configured via environment variables
147+
- Default storage locations: `/user-data/cellxgene` (can be overridden with cloud storage paths)
148+
- Specify your dataset via the DATASET_FILE environment variable
149+
- Customize the display title via the DATASET_TITLE environment variable
109150

110151
> [!NOTE]
111152
> For common technical notes, see the [main README](../README.md#common-features).

shiny-simple-example/Dockerfile

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -50,6 +50,9 @@ RUN /usr/bin/connect-client --install
5050
# Set a working directory for your app
5151
WORKDIR /app
5252

53+
# Define data path with default
54+
ENV DATA_PATH=s3://shiny-inputs/data.csv
55+
5356
# Copy your Shiny code and data
5457
COPY app_plot_demo.R /app/
5558
COPY data.csv /app/

shiny-simple-example/README.md

Lines changed: 29 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -31,18 +31,19 @@ For specific versions, use the release tag (e.g., `ghcr.io/seqeralabs/custom-stu
3131

3232
## Features
3333

34-
- Simple scatter plot visualization
35-
- Interactive data filtering
34+
- Advanced data visualization with multiple plot types (scatter, line, bar, box, density)
35+
- Interactive controls and color themes
3636
- Compatible with both local Docker testing and Seqera Studios
3737
- Efficient package management with micromamba
3838
- Easy data mounting via datalinks
39+
- Configurable data path via environment variables
40+
- Cloud storage path support with automatic translation to local Studio paths
3941

4042
## Files
4143

4244
- `app_plot_demo.R`: The main Shiny application
4345
- `example_data.csv`: Sample data for the visualization
4446
- `Dockerfile`: Container definition
45-
- `run.sh`: Entrypoint script that handles both local and Studios environments
4647

4748
## Prerequisites
4849

@@ -78,23 +79,44 @@ docker run -p 3000:3000 --entrypoint micromamba -v $(pwd)/../data/shiny-inputs:/
7879

7980
The app will be available at http://localhost:3000
8081

82+
## Cloud Storage Path Translation
83+
84+
The application automatically converts cloud storage paths to local Studio paths. Supported providers include:
85+
86+
- **Amazon S3**: `s3://bucket/path/to/data.csv`
87+
- **Google Cloud Storage**: `gs://bucket/path/to/data.csv`
88+
- **Azure Blob Storage**: `az://container/path/to/data.csv`
89+
90+
**Examples:**
91+
- S3: `s3://my-data-bucket/datasets/experiment.csv``/workspace/data/my-data-bucket/datasets/experiment.csv`
92+
- GCS: `gs://research-data/analysis/results.csv``/workspace/data/research-data/analysis/results.csv`
93+
- Azure: `az://data-container/studies/sample.csv``/workspace/data/data-container/studies/sample.csv`
94+
95+
**Requirements:**
96+
- Mount the cloud storage bucket/container from Data Explorer in Seqera Studios
97+
- Provide cloud storage paths in the `DATA_PATH` environment variable
98+
8199
## Using in Seqera Studios
82100

83101
> [!NOTE]
84102
> For the common deployment process, see the [main README](../README.md#deploying-to-seqera-studios).
85103
86104
Additional steps specific to this example:
87-
1. Create a data link called 'shiny-inputs' and place your input file called 'data.csv' there
105+
1. In the **Compute and Data** tab, click the **Mount data** button to mount your cloud storage bucket/container
88106
2. Follow the common deployment process
89-
3. When mounting data, ensure to mount 'shiny-inputs' using the **Mount data** option
107+
3. Configure environment variables:
108+
- `DATA_PATH`: Cloud storage path to your CSV file
109+
- Supports S3 (`s3://`), Google Cloud Storage (`gs://`), and Azure Blob Storage (`az://`) paths
110+
- Example: `s3://my-data-bucket/datasets/experiment.csv`
90111

91112
## Notes
92113

93-
- The app uses a simple scatter plot to demonstrate Shiny's capabilities
114+
- The app provides advanced data visualization with multiple plot types and interactive controls
94115
- The Dockerfile uses micromamba for efficient package management
95116
- The container is built for linux/amd64 platform compatibility
96117
- Data files should be in CSV format
97-
- The example includes a sample dataset for demonstration
118+
- Specify your data file via the DATA_PATH environment variable
119+
- Cloud storage paths are automatically converted to local Studio paths
98120

99121
## References
100122

shiny-simple-example/app_plot_demo.R

Lines changed: 27 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -100,7 +100,20 @@ ui <- fluidPage(
100100
server <- function(input, output, session) {
101101
# Read data
102102
data <- reactive({
103-
file_path <- '/workspace/data/shiny-inputs/data.csv'
103+
data_path <- Sys.getenv('DATA_PATH', 's3://shiny-inputs/data.csv')
104+
105+
# Convert cloud storage path to local Studio path
106+
if (grepl('^s3://|^gs://|^az://', data_path)) {
107+
# Remove any cloud storage prefix and convert to local path
108+
cloud_path <- sub('^[^:]+://', '', data_path)
109+
bucket_name <- strsplit(cloud_path, '/')[[1]][1]
110+
object_path <- sub(paste0('^', bucket_name, '/'), '', cloud_path)
111+
file_path <- paste0('/workspace/data/', bucket_name, '/', object_path)
112+
} else {
113+
# Use path as-is for local paths
114+
file_path <- data_path
115+
}
116+
104117
if (file.exists(file_path)) {
105118
read.csv(file_path)
106119
} else {
@@ -114,9 +127,20 @@ server <- function(input, output, session) {
114127

115128
# Data source message
116129
output$data_source <- renderText({
117-
file_path <- '/workspace/data/shiny-inputs/data.csv'
130+
data_path <- Sys.getenv('DATA_PATH', 's3://shiny-inputs/data.csv')
131+
132+
# Convert cloud storage path to local Studio path (same logic as data reading)
133+
if (grepl('^s3://|^gs://|^az://', data_path)) {
134+
cloud_path <- sub('^[^:]+://', '', data_path)
135+
bucket_name <- strsplit(cloud_path, '/')[[1]][1]
136+
object_path <- sub(paste0('^', bucket_name, '/'), '', cloud_path)
137+
file_path <- paste0('/workspace/data/', bucket_name, '/', object_path)
138+
} else {
139+
file_path <- data_path
140+
}
141+
118142
if (file.exists(file_path)) {
119-
"Using external data file"
143+
paste("Using external data file:", data_path)
120144
} else {
121145
"Using built-in random data"
122146
}

0 commit comments

Comments
 (0)