Skip to content

Commit 3eeb8c4

Browse files
authored
finetune on dgxcloud with nemo-run and deploy on bedrock example (#286)
* finetune on dgxcloud with nemo-run and deploy on bedrock example Signed-off-by: Zoey Zhang <[email protected]> * removing trailing slash Signed-off-by: Zoey Zhang <[email protected]> * reformatting notebook Signed-off-by: Zoey Zhang <[email protected]> * adding EOF Signed-off-by: Zoey Zhang <[email protected]> --------- Signed-off-by: Zoey Zhang <[email protected]>
1 parent 85dcc63 commit 3eeb8c4

File tree

3 files changed

+338
-0
lines changed

3 files changed

+338
-0
lines changed
Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,7 @@
1+
#!/bin/bash
2+
3+
torchrun /opt/NeMo/scripts/checkpoint_converters/convert_llama_nemo_to_hf.py \
4+
--input_name_or_path /demo-workspace/llama3.1-70b-daring-anteater-sft/checkpoints/megatron_gpt_sft.nemo \
5+
--output_path /demo-workspace/llama-output-weights.bin \
6+
--hf_input_path /demo-workspace/Meta-Llama-3.1-70B \
7+
--hf_output_path /demo-workspace/sft-llama-3.1-hf
Lines changed: 285 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,285 @@
1+
{
2+
"cells": [
3+
{
4+
"cell_type": "markdown",
5+
"metadata": {},
6+
"source": [
7+
"# Finetune HF Llama 3.1 70b and Deploy on AWS Bedrock\n",
8+
"\n",
9+
"This notebook has the following steps: \n",
10+
"\n",
11+
"1. imports and converts [Llama 3.1 70b](https://huggingface.co/meta-llama/Meta-Llama-3-8B) from Hugging Face transformer file format to .nemo file format\n",
12+
"\n",
13+
" Note: you will need to create a HuggingFace account and request access to the model\n",
14+
"\n",
15+
"2. Supervised Fine Tuning (SFT) using the NeMo framework on the [NVIDIA Daring-Anteater dataset](https://huggingface.co/datasets/nvidia/Daring-Anteater), a comprehensive dataset for instruction tuning\n",
16+
"\n",
17+
"3. Move your finetuned model to AWS S3 for use with AWS Bedrock Custom Model Import"
18+
]
19+
},
20+
{
21+
"cell_type": "markdown",
22+
"metadata": {},
23+
"source": [
24+
"## Convert Hugging Face Model to NeMo"
25+
]
26+
},
27+
{
28+
"cell_type": "code",
29+
"execution_count": null,
30+
"metadata": {
31+
"collapsed": true,
32+
"jupyter": {
33+
"outputs_hidden": true
34+
},
35+
"scrolled": true
36+
},
37+
"outputs": [],
38+
"source": [
39+
"!pip install ipywidgets"
40+
]
41+
},
42+
{
43+
"cell_type": "code",
44+
"execution_count": null,
45+
"metadata": {
46+
"scrolled": true
47+
},
48+
"outputs": [],
49+
"source": [
50+
"import os\n",
51+
"import huggingface_hub\n",
52+
"\n",
53+
"# Set your Hugging Face access token\n",
54+
"huggingface_hub.login(\"<HF_TOKEN>\")\n",
55+
"os.makedirs(\"/demo-workspace/Meta-Llama-3.1-70B\", exist_ok=True)\n",
56+
"huggingface_hub.snapshot_download(\n",
57+
" repo_id=\"meta-llama/Llama-3.1-70B\", repo_type=\"model\", local_dir=\"Meta-Llama-3.1-70B\"\n",
58+
")"
59+
]
60+
},
61+
{
62+
"cell_type": "code",
63+
"execution_count": null,
64+
"metadata": {
65+
"scrolled": true
66+
},
67+
"outputs": [],
68+
"source": [
69+
"%%bash\n",
70+
"# clear any previous temporary weights dir if any\n",
71+
"rm -r model_weights\n",
72+
"\n",
73+
"#converter script from NeMo\n",
74+
"python /opt/NeMo/scripts/checkpoint_converters/convert_llama_hf_to_nemo.py \\\n",
75+
" --precision bf16 \\\n",
76+
" --input_name_or_path=/demo-workspace/Meta-Llama-3.1-70B \\\n",
77+
" --output_path=/demo-workspace/Meta-Llama-3.1-70B.nemo \\\n",
78+
" --llama31 True"
79+
]
80+
},
81+
{
82+
"cell_type": "markdown",
83+
"metadata": {},
84+
"source": [
85+
"## Import and Configure Dataset"
86+
]
87+
},
88+
{
89+
"cell_type": "code",
90+
"execution_count": null,
91+
"metadata": {},
92+
"outputs": [],
93+
"source": [
94+
"%%bash\n",
95+
"\n",
96+
"mkdir /demo-workspace/datasets"
97+
]
98+
},
99+
{
100+
"cell_type": "code",
101+
"execution_count": null,
102+
"metadata": {},
103+
"outputs": [],
104+
"source": [
105+
"from datasets import load_dataset\n",
106+
"import json\n",
107+
"\n",
108+
"dataset = load_dataset(\"nvidia/daring-anteater\")\n",
109+
"\n",
110+
"for split, shard in dataset.items():\n",
111+
" length = len(shard)\n",
112+
" train_limit = length * 0.85\n",
113+
" with open(\"/demo-workspace/datasets/daring-anteater-train.jsonl\", \"w\") as train:\n",
114+
" with open(\"/demo-workspace/datasets/daring-anteater-val.jsonl\", \"w\") as val:\n",
115+
" for count, line in enumerate(shard):\n",
116+
" desired_data = {\n",
117+
" \"system\": line[\"system\"],\n",
118+
" \"conversations\": line[\"conversations\"],\n",
119+
" \"mask\": line[\"mask\"],\n",
120+
" \"type\": \"TEXT_TO_VALUE\",\n",
121+
" }\n",
122+
" if count < train_limit:\n",
123+
" json.dump(desired_data, train)\n",
124+
" train.write(\"\\n\")\n",
125+
" else:\n",
126+
" json.dump(desired_data, val)\n",
127+
" val.write(\"\\n\")"
128+
]
129+
},
130+
{
131+
"cell_type": "markdown",
132+
"metadata": {},
133+
"source": [
134+
"## Finetuning"
135+
]
136+
},
137+
{
138+
"cell_type": "code",
139+
"execution_count": null,
140+
"metadata": {},
141+
"outputs": [],
142+
"source": [
143+
"%%bash\n",
144+
"\n",
145+
"chmod +x /demo-workspace/sft-finetune-llama3.1-70b.sh\n",
146+
"ls -l /demo-workspace/sft-finetune-llama3.1-70b.sh"
147+
]
148+
},
149+
{
150+
"cell_type": "code",
151+
"execution_count": null,
152+
"metadata": {},
153+
"outputs": [],
154+
"source": [
155+
"import nemo_run as run\n",
156+
"\n",
157+
"\n",
158+
"def dgxc_executor(nodes: int = 1, devices: int = 1) -> run.DGXCloudExecutor:\n",
159+
" pvcs = [\n",
160+
" {\n",
161+
" \"name\": \"workspace\", # Default name to identify the PVC\n",
162+
" \"path\": \"/demo-workspace\", # Directory where PVC will be mounted in pods\n",
163+
" \"existingPvc\": True, # The PVC already exists\n",
164+
" \"claimName\": \"llama-3-1-70b-pvc-project-ax4ia\", # Replace with the name of the PVC to use\n",
165+
" }\n",
166+
" ]\n",
167+
"\n",
168+
" return run.DGXCloudExecutor(\n",
169+
" base_url=\"https://tme-aws.nv.run.ai/api/v1\", # Base URL to send API requests\n",
170+
" app_id=\"aws-app\", # Name of the Application\n",
171+
" app_secret=\"<APP_SECRET>\", # Application secret token\n",
172+
" project_name=\"aws-demo-project\", # Name of the project within Run:ai\n",
173+
" nodes=nodes, # Number of nodes to run on\n",
174+
" gpus_per_node=devices, # Number of processes per node to use\n",
175+
" container_image=\"nvcr.io/nvidia/nemo:25.02\", # Which container to deploy\n",
176+
" pvcs=pvcs, # Attach the PVC(s) to the pod\n",
177+
" launcher=\"torchrun\", # Use torchrun to launch the processes\n",
178+
" env_vars={\n",
179+
" \"PYTHONPATH\": \"/demo-workspace/nemo-run:$PYTHONPATH\", # Add the NeMo-Run directory to the PYTHONPATH\n",
180+
" \"HF_TOKEN\": \"<HF_TOKEN>\", # Add your Hugging Face API token here\n",
181+
" \"FI_EFA_USE_HUGE_PAGE\": \"0\",\n",
182+
" \"TORCH_HOME\": \"/demo-workspace/.cache\",\n",
183+
" \"NEMORUN_HOME\": \"/demo-workspace/nemo-run\",\n",
184+
" \"OMP_NUM_THREADS\": \"1\",\n",
185+
" },\n",
186+
" )"
187+
]
188+
},
189+
{
190+
"cell_type": "code",
191+
"execution_count": null,
192+
"metadata": {
193+
"scrolled": true
194+
},
195+
"outputs": [],
196+
"source": [
197+
"executor = dgxc_executor(nodes=4, devices=8)\n",
198+
"run.config.set_nemorun_home(\"/demo-workspace/nemo-run\")\n",
199+
"\n",
200+
"with run.Experiment(\"sft-finetuning\") as exp:\n",
201+
" exp.add(run.Script(\"/demo-workspace/sft-finetune-llama3.1-70b.sh\"), executor=executor)\n",
202+
"\n",
203+
" # Launch the experiment on the cluster\n",
204+
" exp.run(sequential=True)"
205+
]
206+
},
207+
{
208+
"cell_type": "markdown",
209+
"metadata": {},
210+
"source": [
211+
"## Import Model to AWS S3\n",
212+
"\n",
213+
"To prepare the model for use with BedRock, we must first convert our finetuned model weights back to HF safetensors. The model and the original llama 3.0 tokens will then be sent to your S3 bucket. "
214+
]
215+
},
216+
{
217+
"cell_type": "code",
218+
"execution_count": null,
219+
"metadata": {
220+
"scrolled": true
221+
},
222+
"outputs": [],
223+
"source": [
224+
"%%bash\n",
225+
"\n",
226+
"python /opt/NeMo/scripts/checkpoint_converters/convert_llama_nemo_to_hf.py \\\n",
227+
"--input_name_or_path /demo-workspace/llama3.1-70b-daring-anteater-sft/checkpoints/megatron_gpt_sft.nemo \\\n",
228+
"--output_path /demo-workspace/llama-output-weights.bin \\\n",
229+
"--hf_input_path /demo-workspace/Meta-Llama-3.1-70B \\\n",
230+
"--hf_output_path /demo-workspace/sft-llama-3.1-hf"
231+
]
232+
},
233+
{
234+
"cell_type": "code",
235+
"execution_count": null,
236+
"metadata": {
237+
"vscode": {
238+
"languageId": "plaintext"
239+
}
240+
},
241+
"outputs": [],
242+
"source": [
243+
"%%bash\n",
244+
"\n",
245+
"export BUCKET_NAME=hf-llama3-1-70b\n",
246+
"\n",
247+
"export AWS_ACCESS_KEY_ID=<AWS_ACCESS_KEY_ID>\n",
248+
"export AWS_SECRET_ACCESS_KEY=<AWS_SECRET_ACCESS_KEY>\n",
249+
"./s5cmd cp /demo-workspace/sft-llama-3.1-hf s3://$BUCKET_NAME\n",
250+
"\n",
251+
"./s5cmd cp /demo-workspace/Meta-Llama-3.1-70B/tokenizer.json s3://$BUCKET_NAME/sft-llama-3.1-hf/\n",
252+
"./s5cmd cp /demo-workspace/Meta-Llama-3.1-70B/tokenizer_config.json s3://$BUCKET_NAME/sft-llama-3.1-hf/\n",
253+
"./s5cmd cp /demo-workspace/Meta-Llama-3.1-70B/original/tokenizer.model s3://$BUCKET_NAME/sft-llama-3.1-hf/"
254+
]
255+
},
256+
{
257+
"cell_type": "markdown",
258+
"metadata": {},
259+
"source": [
260+
"To run with BedRock, go to the Custom Model import feature and load your model from your S3 bucket. Once the model is ready, it can directly be used for your production inference. "
261+
]
262+
}
263+
],
264+
"metadata": {
265+
"kernelspec": {
266+
"display_name": "Python 3 (ipykernel)",
267+
"language": "python",
268+
"name": "python3"
269+
},
270+
"language_info": {
271+
"codemirror_mode": {
272+
"name": "ipython",
273+
"version": 3
274+
},
275+
"file_extension": ".py",
276+
"mimetype": "text/x-python",
277+
"name": "python",
278+
"nbconvert_exporter": "python",
279+
"pygments_lexer": "ipython3",
280+
"version": "3.12.3"
281+
}
282+
},
283+
"nbformat": 4,
284+
"nbformat_minor": 4
285+
}
Lines changed: 46 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,46 @@
1+
#!/bin/bash
2+
3+
# Set paths to the model, train, validation and test sets.
4+
MODEL="/demo-workspace/Meta-Llama-3.1-70B.nemo"
5+
TRAIN_DS="/demo-workspace/datasets/daring-anteater-train.jsonl"
6+
VALID_DS="/demo-workspace/datasets/daring-anteater-val.jsonl"
7+
TEST_DS="/demo-workspace/datasets/daring-anteater-val.jsonl"
8+
TEST_NAMES="[daring-anteater]"
9+
10+
SCHEME="none" # SFT is none
11+
TP_SIZE=4
12+
PP_SIZE=4
13+
14+
OUTPUT_DIR="/demo-workspace/llama3.1-70b-daring-anteater-sft"
15+
16+
export HYDRA_FULL_ERROR=1
17+
18+
torchrun /opt/NeMo-Aligner/examples/nlp/gpt/train_gpt_sft.py \
19+
trainer.precision=bf16 \
20+
trainer.num_nodes=4 \
21+
trainer.devices=8 \
22+
trainer.sft.max_steps=-1 \
23+
trainer.sft.limit_val_batches=40 \
24+
trainer.sft.val_check_interval=1000 \
25+
model.megatron_amp_O2=True \
26+
model.restore_from_path=${MODEL} \
27+
model.optim.lr=5e-6 \
28+
model.tensor_model_parallel_size=${TP_SIZE} \
29+
model.pipeline_model_parallel_size=${PP_SIZE} \
30+
model.context_parallel_size=2 \
31+
model.data.chat=True \
32+
model.data.num_workers=0 \
33+
model.data.train_ds.micro_batch_size=1 \
34+
model.data.train_ds.global_batch_size=32 \
35+
model.data.train_ds.max_seq_length=8192 \
36+
model.data.train_ds.file_path=${TRAIN_DS} \
37+
model.data.validation_ds.micro_batch_size=1 \
38+
model.data.validation_ds.global_batch_size=4 \
39+
model.data.validation_ds.file_path=${VALID_DS} \
40+
model.data.validation_ds.max_seq_length=8192 \
41+
exp_manager.create_wandb_logger=False \
42+
exp_manager.explicit_log_dir=${OUTPUT_DIR} \
43+
exp_manager.checkpoint_callback_params.save_nemo_on_train_end=True \
44+
exp_manager.checkpoint_callback_params.save_top_k=1 \
45+
exp_manager.checkpoint_callback_params.monitor=val_loss
46+

0 commit comments

Comments
 (0)