Skip to content

Commit 81d17d3

Browse files
committed
Fix base_output_directory & command formatting for end_to_end & convergence test scripts
1 parent fc5ef85 commit 81d17d3

File tree

4 files changed

+6
-6
lines changed

4 files changed

+6
-6
lines changed

end_to_end/tpu/gemma/2b/test_gemma.sh

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -27,7 +27,7 @@ python3 -m MaxText.convert_gemma_chkpt --base_model_path ${CHKPT_BUCKET}/${MODEL
2727
# Non-Googlers please remember to point `DATASET_PATH` to the GCS bucket where you have your training data
2828
export DATASET_PATH=gs://maxtext-dataset
2929
# Non-Googlers please remember to point `BASE_OUTPUT_DIRECTORY` to a GCS bucket that you own, this bucket will store all the files generated by MaxText during a run
30-
export BASE_OUTPUT_DIRECTORY=gs://runner-maxtext-logs
30+
export BASE_OUTPUT_DIRECTORY=gs://runner-maxtext-logs/gemma-2b
3131
# We define `CONVERTED_CHECKPOINT` to refer to the checkpoint subdirectory. This way it is easier to use this path in the `train.py` and `decode.py` commands
3232
export CONVERTED_CHECKPOINT=${MODEL_BUCKET}/${MODEL_VARIATION}/${idx}/0/items
3333
export RUN_NAME=unscanned_chkpt_${idx}

end_to_end/tpu/gemma2/2b/test_gemma2.sh

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -29,7 +29,7 @@ python3 -m MaxText.convert_gemma2_chkpt --base_model_path ${CHKPT_BUCKET}/${MODE
2929
# Non-Googlers please remember to point `DATASET_PATH` to the GCS bucket where you have your training data
3030
export DATASET_PATH=gs://maxtext-dataset
3131
# Non-Googlers please remember to point `BASE_OUTPUT_DIRECTORY` to a GCS bucket that you own, this bucket will store all the files generated by MaxText during a run
32-
export BASE_OUTPUT_DIRECTORY=gs://runner-maxtext-logs
32+
export BASE_OUTPUT_DIRECTORY=gs://runner-maxtext-logs/gemma2-2b
3333
# We define `CONVERTED_CHECKPOINT` to refer to the checkpoint subdirectory. This way it is easier to use this path in the `train.py` and `decode.py` commands
3434
export CONVERTED_CHECKPOINT=${MODEL_BUCKET}/${MODEL_VARIATION}/${idx}/0/items
3535
export RUN_NAME=unscanned_chkpt_${idx}

end_to_end/tpu/gemma3/4b/test_gemma3.sh

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -31,7 +31,7 @@ python3 -m MaxText.convert_gemma3_chkpt --base_model_path ${CHKPT_BUCKET}/${MODE
3131
# Non-Googlers please remember to point `DATASET_PATH` to the GCS bucket where you have your training data
3232
export DATASET_PATH=gs://maxtext-dataset
3333
# Non-Googlers please remember to point `BASE_OUTPUT_DIRECTORY` to a GCS bucket that you own, this bucket will store all the files generated by MaxText during a run
34-
export BASE_OUTPUT_DIRECTORY=gs://runner-maxtext-logs
34+
export BASE_OUTPUT_DIRECTORY=gs://runner-maxtext-logs/gemma3-4b
3535
# We define `CONVERTED_CHECKPOINT` to refer to the checkpoint subdirectory. This way it is easier to use this path in the `train` and `decode` commands
3636
export CONVERTED_CHECKPOINT=${MODEL_BUCKET}/${MODEL_VARIATION}/${idx}/0/items
3737
export RUN_NAME=unscanned_chkpt_${idx}

end_to_end/tpu/test_convergence_1b_params.sh

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -50,9 +50,9 @@ then
5050
# We use a local copy of tokenizer from https://huggingface.co/meta-llama/Llama-2-7b-hf
5151
# Alternatively, you can set tokenizer_path="meta-llama/Llama-2-7b-hf" and hf_access_token="<your-token>" after gaining access through HF website.
5252
gsutil cp -r gs://maxtext-dataset/hf/llama2-tokenizer "${MAXTEXT_ASSETS_ROOT:-${MAXTEXT_PKG_DIR:-${MAXTEXT_REPO_ROOT:-$PWD}/src/MaxText/assets}}"
53-
CMD_DATA=" hf_path=parquet tokenizer_path=${MAXTEXT_ASSETS_ROOT:-${MAXTEXT_PKG_DIR:-${MAXTEXT_REPO_ROOT:-$PWD}/src/MaxText/assets}}/llama2-tokenizer"\
54-
"hf_train_files=$DATASET_PATH/hf/c4/c4-train-*.parquet" \
55-
"hf_eval_files=$DATASET_PATH/hf/c4/c4-validation-*.parquet "
53+
CMD_DATA=" hf_path=parquet tokenizer_path=${MAXTEXT_ASSETS_ROOT:-${MAXTEXT_PKG_DIR:-${MAXTEXT_REPO_ROOT:-$PWD}/src/MaxText/assets}}/llama2-tokenizer \
54+
hf_train_files=$DATASET_PATH/hf/c4/c4-train-*.parquet \
55+
hf_eval_files=$DATASET_PATH/hf/c4/c4-validation-*.parquet "
5656
fi
5757

5858
TRAIN_CMD="python3 -m MaxText.train ${MAXTEXT_PKG_DIR:-${MAXTEXT_REPO_ROOT:-$PWD}/src/MaxText}/configs/base.yml \

0 commit comments

Comments
 (0)