Skip to content

Commit f671af5

Browse files
authored
Add DeepSeek Example for ModelTrainer (#4813)
* Add DeepSeek Example for ModelTrainer * nit
1 parent c5f4665 commit f671af5

File tree

1 file changed

+13
-15
lines changed

1 file changed

+13
-15
lines changed

build_and_train_models/sm-model_trainer/model_trainer_overview.ipynb

Lines changed: 13 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -540,9 +540,11 @@
540540
"cell_type": "markdown",
541541
"metadata": {},
542542
"source": [
543-
"## ModelTrainer - SageMaker Recipes\n",
543+
"## ModelTrainer - SageMaker HyperPod Recipes\n",
544544
"\n",
545-
"This example showcases how a user could leverage SageMaker pre-defined training recipe `training/mistral/hf_mistral_7b_seq8k_gpu_p5x16_pretrain` for training a Mistral Model using synthetic data."
545+
"This example showcases how a user could leverage the pre-defined recipe `fine-tuning/deepseek/hf_deepseek_r1_distilled_llama_70b_seq8k_gpu_fine_tuning` for fine-tuning a DeepSeek R1 Model using synthetic data.\n",
546+
"\n",
547+
"For More Available Recipes Visit - https://github.com/aws/sagemaker-hyperpod-recipes/"
546548
]
547549
},
548550
{
@@ -569,24 +571,20 @@
569571
" \"run\": {\n",
570572
" \"results_dir\": \"/opt/ml/model\",\n",
571573
" },\n",
572-
" \"trainer\": {\n",
573-
" \"num_nodes\": 1,\n",
574-
" },\n",
575574
" \"exp_manager\": {\n",
576-
" \"exp_dir\": \"/opt/ml/output\",\n",
575+
" \"exp_dir\": \"/opt/ml/output/\",\n",
577576
" \"explicit_log_dir\": \"/opt/ml/output/tensorboard\",\n",
578577
" },\n",
579578
" \"model\": {\n",
580-
" \"fp8\": False,\n",
581-
" \"train_batch_size\": 1,\n",
582-
" \"num_hidden_layers\": 4,\n",
583-
" \"shard_degree\": 4,\n",
584-
" \"data\": {\"use_synthetic_data\": True},\n",
579+
" \"hf_model_name_or_path\": \"deepseek-ai/DeepSeek-R1-Distill-Llama-8B\",\n",
580+
" \"data\": {\n",
581+
" \"use_synthetic_data\": True,\n",
582+
" }\n",
585583
" },\n",
586584
"}\n",
587585
"\n",
588586
"compute = Compute(\n",
589-
" instance_type=\"ml.p4d.24xlarge\",\n",
587+
" instance_type=\"ml.p5.48xlarge\",\n",
590588
" keep_alive_period_in_seconds=3600,\n",
591589
")\n",
592590
"\n",
@@ -602,10 +600,10 @@
602600
"model_trainer = ModelTrainer.from_recipe(\n",
603601
" sagemaker_session=sagemaker_session,\n",
604602
" training_image=smp_image,\n",
605-
" training_recipe=\"training/mistral/hf_mistral_7b_seq8k_gpu_p5x16_pretrain\",\n",
603+
" training_recipe=\"fine-tuning/deepseek/hf_deepseek_r1_distilled_llama_8b_seq8k_gpu_fine_tuning\",\n",
606604
" recipe_overrides=recipe_overrides,\n",
607605
" compute=compute,\n",
608-
" base_job_name=f\"model-trainer-recipes\",\n",
606+
" base_job_name=f\"model-trainer-deepseek-p5-8b\",\n",
609607
").with_tensorboard_output_config(tensorboad_output_config)"
610608
]
611609
},
@@ -621,7 +619,7 @@
621619
],
622620
"metadata": {
623621
"kernelspec": {
624-
"display_name": "py3.10.14",
622+
"display_name": "Python 3",
625623
"language": "python",
626624
"name": "python3"
627625
},

0 commit comments

Comments
 (0)