Skip to content

Commit 28a4d39

Browse files
committed
feat(gemma3_vl): add WORKSPACE variable for configurable checkpoint and results paths
1 parent 1ef9fe6 commit 28a4d39

File tree

5 files changed

+33
-9
lines changed

5 files changed

+33
-9
lines changed

examples/models/vlm/gemma3_vl/README.md

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,18 @@
22

33
This directory contains examples for Gemma 3 Vision Language Model, including checkpoint conversion, inference, and fine-tuning.
44

5+
## Workspace Configuration
6+
7+
All scripts use a `WORKSPACE` environment variable to define the base directory for checkpoints and results. By default, this is set to `/workspace`. You can override it:
8+
9+
```bash
10+
export WORKSPACE=/your/custom/path
11+
```
12+
13+
Directory structure:
14+
- `${WORKSPACE}/models/` - Converted checkpoints
15+
- `${WORKSPACE}/results/` - Training outputs and experiment results
16+
517
## Checkpoint Conversion
618

719
See the [conversion.sh](conversion.sh) script for commands to:

examples/models/vlm/gemma3_vl/conversion.sh

Lines changed: 6 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,13 +1,16 @@
1+
# Workspace directory for checkpoints and results
2+
WORKSPACE=${WORKSPACE:-/workspace}
3+
14
# Import HF → Megatron
25
uv run python examples/conversion/convert_checkpoints.py import \
36
--hf-model google/gemma-3-4b-it \
4-
--megatron-path /models/gemma-3-4b-it
7+
--megatron-path ${WORKSPACE}/models/gemma-3-4b-it
58

69
# Export Megatron → HF
710
uv run python examples/conversion/convert_checkpoints.py export \
811
--hf-model google/gemma-3-4b-it \
9-
--megatron-path /models/gemma-3-4b-it/iter_0000000 \
10-
--hf-path /models/gemma-3-4b-it-hf-export
12+
--megatron-path ${WORKSPACE}/models/gemma-3-4b-it/iter_0000000 \
13+
--hf-path ${WORKSPACE}/models/gemma-3-4b-it-hf-export
1114

1215
# Round-trip validation
1316
uv run python -m torch.distributed.run --nproc_per_node=8 examples/conversion/hf_megatron_roundtrip_multi_gpu.py \

examples/models/vlm/gemma3_vl/inference.sh

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,6 @@
1+
# Workspace directory for checkpoints and results
2+
WORKSPACE=${WORKSPACE:-/workspace}
3+
14
# Inference with Hugging Face checkpoints
25
uv run torchrun --nproc_per_node=4 examples/conversion/hf_to_megatron_generate_vlm.py \
36
--hf_model_path google/gemma-3-4b-it \
@@ -10,7 +13,7 @@ uv run torchrun --nproc_per_node=4 examples/conversion/hf_to_megatron_generate_v
1013
# Inference with imported Megatron checkpoints
1114
uv run torchrun --nproc_per_node=4 examples/conversion/hf_to_megatron_generate_vlm.py \
1215
--hf_model_path google/gemma-3-4b-it \
13-
--megatron_model_path /models/gemma-3-4b-it/iter_0000000 \
16+
--megatron_model_path ${WORKSPACE}/models/gemma-3-4b-it/iter_0000000 \
1417
--image_path "https://huggingface.co/nvidia/NVIDIA-Nemotron-Nano-12B-v2-VL-BF16/resolve/main/images/table.png" \
1518
--prompt "Describe this image." \
1619
--max_new_tokens 100 \
@@ -19,7 +22,7 @@ uv run torchrun --nproc_per_node=4 examples/conversion/hf_to_megatron_generate_v
1922

2023
# Inference with exported HF checkpoints
2124
uv run torchrun --nproc_per_node=4 examples/conversion/hf_to_megatron_generate_vlm.py \
22-
--hf_model_path /models/gemma-3-4b-it-hf-export \
25+
--hf_model_path ${WORKSPACE}/models/gemma-3-4b-it-hf-export \
2326
--image_path "https://huggingface.co/nvidia/NVIDIA-Nemotron-Nano-12B-v2-VL-BF16/resolve/main/images/table.png" \
2427
--prompt "Describe this image." \
2528
--max_new_tokens 100 \

examples/models/vlm/gemma3_vl/peft.sh

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,8 @@
1+
# Workspace directory for checkpoints and results
2+
WORKSPACE=${WORKSPACE:-/workspace}
3+
14
# Common configurations
2-
PRETRAINED_CHECKPOINT=/models/gemma-3-4b-it
5+
PRETRAINED_CHECKPOINT=${WORKSPACE}/models/gemma-3-4b-it
36
MODEL_NAME=gemma3_vl_4b
47
DATASET_NAME=cord_v2
58
SEQ_LENGTH=4096
@@ -33,7 +36,7 @@ for config in "${PARALLELISM_CONFIGS[@]}"; do
3336
optimizer.lr=$LR \
3437
optimizer.min_lr=$MIN_LR \
3538
scheduler.lr_warmup_iters=$LR_WARMUP_ITERS \
36-
checkpoint.save=/result/${MODEL_NAME}_lora_tp${TP}_pp${PP} \
39+
checkpoint.save=${WORKSPACE}/results/${MODEL_NAME}_lora_tp${TP}_pp${PP} \
3740
logger.log_interval=$LOG_INTERVAL \
3841
logger.wandb_project=$WANDB_PROJECT \
3942
logger.wandb_exp_name=${MODEL_NAME}_${DATASET_NAME}_lora_tp${TP}_pp${PP} \

examples/models/vlm/gemma3_vl/sft.sh

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,8 @@
1+
# Workspace directory for checkpoints and results
2+
WORKSPACE=${WORKSPACE:-/workspace}
3+
14
# Common configurations
2-
PRETRAINED_CHECKPOINT=/models/gemma-3-4b-it
5+
PRETRAINED_CHECKPOINT=${WORKSPACE}/models/gemma-3-4b-it
36
MODEL_NAME=gemma3_vl_4b
47
DATASET_NAME=cord_v2
58
SEQ_LENGTH=4096
@@ -32,7 +35,7 @@ for config in "${PARALLELISM_CONFIGS[@]}"; do
3235
optimizer.lr=$LR \
3336
optimizer.min_lr=$MIN_LR \
3437
scheduler.lr_warmup_iters=$LR_WARMUP_ITERS \
35-
checkpoint.save=/result/${MODEL_NAME}_sft_tp${TP}_pp${PP} \
38+
checkpoint.save=${WORKSPACE}/results/${MODEL_NAME}_sft_tp${TP}_pp${PP} \
3639
logger.log_interval=$LOG_INTERVAL \
3740
logger.wandb_project=$WANDB_PROJECT \
3841
logger.wandb_exp_name=${MODEL_NAME}_${DATASET_NAME}_sft_tp${TP}_pp${PP} \

0 commit comments

Comments
 (0)