Skip to content

Commit 9aea5bf

Browse files
committed
polish package
1 parent 2a23f15 commit 9aea5bf

File tree

12 files changed

+827
-89
lines changed

12 files changed

+827
-89
lines changed

README.md

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -188,6 +188,23 @@ After running the complete pipeline, you'll have:
188188
- **Missing Puncta**: Verify channel names and adjust detection thresholds
189189
- **Gene Assignment Errors**: Ensure your gene list CSV is properly formatted
190190

191+
## Gene List Configuration
192+
193+
ExSeq-Toolbox requires a `gene_list.csv` file that maps gene symbols to barcodes for RNA identification.
194+
195+
**Required format:**
196+
```csv
197+
Symbol,Barcode,Digits
198+
ACTB,acgtacg,0123012
199+
GAPDH,tgcatgc,3210321
200+
MYC,aaaccct,0001113
201+
```
202+
203+
- **Barcode**: DNA sequence using only `a`, `c`, `g`, `t`
204+
- **Digits**: Numerical conversion where `a=0`, `c=1`, `g=2`, `t=3` (based on the channels)
205+
- **Example file**: [`examples/gene_list_example.csv`](examples/gene_list_example.csv)
206+
- **Detailed guide**: [`GENE_LIST_FORMAT.md`](GENE_LIST_FORMAT.md)
207+
191208
## Documentation
192209

193210
Comprehensive documentation is available at [ExSeq Toolbox Documentation](https://exseq-toolbox.readthedocs.io/en/latest/), including:
Lines changed: 39 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,39 @@
1+
# ExSeq-Toolbox Configuration Example: High Memory System
2+
# This configuration is optimized for systems with 32GB+ RAM and GPU
3+
4+
data_paths:
5+
raw_data_path: "/path/to/your/raw_data"
6+
processed_data_path: "/path/to/processed_data"
7+
puncta_dir_name: "puncta/"
8+
gene_digit_csv: "./gene_list.csv"
9+
10+
experiment:
11+
codes: [0, 1, 2, 3, 4, 5, 6]
12+
fovs: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11]
13+
spacing: [0.4, 1.625, 1.625]
14+
channel_names: ['640', '594', '561', '488', '405']
15+
ref_code: 0
16+
ref_channel: '405'
17+
18+
processing:
19+
chunk_size: 200 # Larger chunks for high memory systems
20+
parallel_processes: 8 # More parallel processes
21+
use_gpu_processing: true
22+
gpu_memory_fraction: 0.9 # Use more GPU memory
23+
auto_cleanup_memory: true
24+
25+
alignment:
26+
downsample_factors: [2, 4, 4]
27+
low_percentile: 1.0
28+
high_percentile: 99.0
29+
30+
puncta:
31+
thresholds: [200, 300, 300, 200]
32+
min_distance: 7
33+
gaussian_sigma: 1.0
34+
exclude_border: false
35+
consolidation_distance_threshold: 8.0
36+
37+
system:
38+
permission: false
39+
permission_mode: 511 # 0o777 in decimal
Lines changed: 39 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,39 @@
1+
# ExSeq-Toolbox Configuration Example: Low Memory System
2+
# This configuration is optimized for systems with 8GB or less RAM, no GPU
3+
4+
data_paths:
5+
raw_data_path: "/path/to/your/raw_data"
6+
processed_data_path: "/path/to/processed_data"
7+
puncta_dir_name: "puncta/"
8+
gene_digit_csv: "./gene_list.csv"
9+
10+
experiment:
11+
codes: [0, 1, 2, 3, 4, 5, 6]
12+
fovs: [0, 1, 2, 3] # Process fewer FOVs at once
13+
spacing: [0.4, 1.625, 1.625]
14+
channel_names: ['640', '594', '561', '488', '405']
15+
ref_code: 0
16+
ref_channel: '405'
17+
18+
processing:
19+
chunk_size: 50 # Smaller chunks for low memory systems
20+
parallel_processes: 2 # Fewer parallel processes
21+
use_gpu_processing: false
22+
gpu_memory_fraction: 0.6
23+
auto_cleanup_memory: true
24+
25+
alignment:
26+
downsample_factors: [4, 8, 8] # More aggressive downsampling
27+
low_percentile: 1.0
28+
high_percentile: 99.0
29+
30+
puncta:
31+
thresholds: [200, 300, 300, 200]
32+
min_distance: 7
33+
gaussian_sigma: 1.0
34+
exclude_border: false
35+
consolidation_distance_threshold: 8.0
36+
37+
system:
38+
permission: false
39+
permission_mode: 511 # 0o777 in decimal

examples/wrappers/1_pipeline_parameter.py

Lines changed: 57 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -4,14 +4,37 @@
44
# Configure logger for ExSeq-Toolbox
55
logger = configure_logger('ExSeq-Toolbox')
66

7-
# Initialize the configuration object.
7+
# Initialize the configuration object
88
args = Args()
99

1010
# ================== Mandatory Configuration ==================
1111
# The absolute path to the raw data directory. Update this path accordingly.
1212
params = {}
1313
params["raw_data_path"] = '/path/to/your/raw_data_directory/'
1414

15+
# ================== Processing Parameters ==================
16+
# Memory and performance optimization
17+
params["chunk_size"] = 150 # Adjust based on your system memory (default: 100)
18+
params["parallel_processes"] = 4 # Auto-detected if not specified
19+
params["use_gpu_processing"] = True # Enable GPU if available
20+
params["gpu_memory_fraction"] = 0.8 # Use 80% of GPU memory
21+
params["auto_cleanup_memory"] = True # Automatic memory cleanup
22+
23+
# Puncta extraction parameters (previously hardcoded)
24+
params["puncta_thresholds"] = [200, 300, 300, 200] # Custom thresholds per channel
25+
params["puncta_min_distance"] = 7 # Minimum distance between puncta
26+
params["puncta_gaussian_sigma"] = 1.0 # Gaussian filter sigma
27+
params["puncta_exclude_border"] = False # Exclude border puncta
28+
params["consolidation_distance_threshold"] = 8.0 # Distance for consolidation
29+
30+
# Alignment parameters (previously hardcoded)
31+
params["alignment_downsample_factors"] = (2, 4, 4) # Downsampling factors
32+
params["alignment_low_percentile"] = 1.0 # Intensity normalization
33+
params["alignment_high_percentile"] = 99.0
34+
35+
# System parameters
36+
params["permission_mode"] = 0o777 # Permission mode for created files
37+
1538
# ================== Required Raw Data Directory Structure ==================
1639
# The ExSeq-Toolbox currently assumes the following directory structure:
1740
#
@@ -80,7 +103,38 @@
80103
args_file = "ExSeq_toolbox_args"
81104
params["args_file_name"] = args_file
82105

83-
# Call set_params with the parameters
106+
# Call enhanced set_params with all parameters
84107
args.set_params(**params)
85108

86-
# Note: Always ensure that the paths and other configuration parameters are correct before running the script.
109+
# ================== New Enhanced Features ==================
110+
111+
# Get processing recommendations based on your system
112+
recommendations = args.get_processing_recommendations()
113+
logger.info("Processing recommendations for your system:")
114+
for key, value in recommendations.items():
115+
logger.info(f" {key}: {value}")
116+
117+
# Save configuration in YAML format for easy editing and sharing
118+
yaml_config_path = args.processed_data_path + "/config.yaml"
119+
args.save_config_yaml(yaml_config_path)
120+
logger.info(f"Configuration saved to {yaml_config_path}")
121+
122+
# Get memory configuration object
123+
memory_config = args.get_memory_config()
124+
if memory_config:
125+
memory_info = memory_config.get_memory_info()
126+
logger.info(f"Memory configuration: {memory_info}")
127+
128+
# ================== Configuration Loading Example ==================
129+
# You can also load configuration from a YAML file:
130+
# args.load_config_yaml("examples/config_examples/high_memory_config.yaml")
131+
# args.load_config_yaml("examples/config_examples/low_memory_config.yaml")
132+
133+
logger.info("Enhanced configuration completed successfully!")
134+
logger.info(f"Using chunk size: {args.chunk_size}")
135+
logger.info(f"Parallel processes: {args.parallel_processes}")
136+
logger.info(f"GPU processing enabled: {args.use_gpu_processing}")
137+
logger.info(f"Auto memory cleanup: {args.auto_cleanup_memory}")
138+
139+
# Note: Configuration parameters are now fully customizable and hardware-aware.
140+
# Check the generated config.yaml file to see all available options.

0 commit comments

Comments
 (0)