|
540 | 540 | "cell_type": "markdown",
|
541 | 541 | "metadata": {},
|
542 | 542 | "source": [
|
543 |
| - "## ModelTrainer - SageMaker Recipes\n", |
| 543 | + "## ModelTrainer - SageMaker HyperPod Recipes\n", |
544 | 544 | "\n",
|
545 |
| - "This example showcases how a user could leverage SageMaker pre-defined training recipe `training/mistral/hf_mistral_7b_seq8k_gpu_p5x16_pretrain` for training a Mistral Model using synthetic data." |
| 545 | + "This example showcases how a user could leverage the pre-defined recipe `fine-tuning/deepseek/hf_deepseek_r1_distilled_llama_70b_seq8k_gpu_fine_tuning` for fine-tuning a DeepSeek R1 Model using synthetic data.\n", |
| 546 | + "\n", |
| 547 | + "For More Available Recipes Visit - https://github.com/aws/sagemaker-hyperpod-recipes/" |
546 | 548 | ]
|
547 | 549 | },
|
548 | 550 | {
|
|
569 | 571 | " \"run\": {\n",
|
570 | 572 | " \"results_dir\": \"/opt/ml/model\",\n",
|
571 | 573 | " },\n",
|
572 |
| - " \"trainer\": {\n", |
573 |
| - " \"num_nodes\": 1,\n", |
574 |
| - " },\n", |
575 | 574 | " \"exp_manager\": {\n",
|
576 |
| - " \"exp_dir\": \"/opt/ml/output\",\n", |
| 575 | + " \"exp_dir\": \"/opt/ml/output/\",\n", |
577 | 576 | " \"explicit_log_dir\": \"/opt/ml/output/tensorboard\",\n",
|
578 | 577 | " },\n",
|
579 | 578 | " \"model\": {\n",
|
580 |
| - " \"fp8\": False,\n", |
581 |
| - " \"train_batch_size\": 1,\n", |
582 |
| - " \"num_hidden_layers\": 4,\n", |
583 |
| - " \"shard_degree\": 4,\n", |
584 |
| - " \"data\": {\"use_synthetic_data\": True},\n", |
| 579 | + " \"hf_model_name_or_path\": \"deepseek-ai/DeepSeek-R1-Distill-Llama-8B\",\n", |
| 580 | + " \"data\": {\n", |
| 581 | + " \"use_synthetic_data\": True,\n", |
| 582 | + " }\n", |
585 | 583 | " },\n",
|
586 | 584 | "}\n",
|
587 | 585 | "\n",
|
588 | 586 | "compute = Compute(\n",
|
589 |
| - " instance_type=\"ml.p4d.24xlarge\",\n", |
| 587 | + " instance_type=\"ml.p5.48xlarge\",\n", |
590 | 588 | " keep_alive_period_in_seconds=3600,\n",
|
591 | 589 | ")\n",
|
592 | 590 | "\n",
|
|
602 | 600 | "model_trainer = ModelTrainer.from_recipe(\n",
|
603 | 601 | " sagemaker_session=sagemaker_session,\n",
|
604 | 602 | " training_image=smp_image,\n",
|
605 |
| - " training_recipe=\"training/mistral/hf_mistral_7b_seq8k_gpu_p5x16_pretrain\",\n", |
| 603 | + " training_recipe=\"fine-tuning/deepseek/hf_deepseek_r1_distilled_llama_8b_seq8k_gpu_fine_tuning\",\n", |
606 | 604 | " recipe_overrides=recipe_overrides,\n",
|
607 | 605 | " compute=compute,\n",
|
608 |
| - " base_job_name=f\"model-trainer-recipes\",\n", |
| 606 | + " base_job_name=f\"model-trainer-deepseek-p5-8b\",\n", |
609 | 607 | ").with_tensorboard_output_config(tensorboad_output_config)"
|
610 | 608 | ]
|
611 | 609 | },
|
|
621 | 619 | ],
|
622 | 620 | "metadata": {
|
623 | 621 | "kernelspec": {
|
624 |
| - "display_name": "py3.10.14", |
| 622 | + "display_name": "Python 3", |
625 | 623 | "language": "python",
|
626 | 624 | "name": "python3"
|
627 | 625 | },
|
|
0 commit comments