Adding more PL models to the notebook.

afronski · afronski · commit abc904eed419 · 2024-04-15T20:22:16.000+02:00
diff --git a/trurl-2/deploying-trurl-2.ipynb b/trurl-2/deploying-trurl-2.ipynb
@@ -125,6 +125,52 @@
     ")"
    ]
   },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "8ae55efa-7e92-476e-9a14-8f2759628d78",
+   "metadata": {
+    "tags": []
+   },
+   "outputs": [],
+   "source": [
+    "model_names = {\n",
+    "    'qra': 'OPI-PG/Qra-7b',\n",
+    "    'bielik': 'speakleash/Bielik-7B-v0.1',\n",
+    "    'azurro-apt3': 'Azurro/APT3-1B-Base',\n",
+    "}\n",
+    "\n",
+    "instance_type = 'ml.g5.2xlarge'\n",
+    "num_of_gpus = 1\n",
+    "\n",
+    "container_startup_timeout = 300\n",
+    "\n",
+    "predictors = {}\n",
+    "\n",
+    "for (name, model_name) in model_names.items():\n",
+    "    print(f'Deploying {name} from {model_name} ...')\n",
+    "    \n",
+    "    env = {\n",
+    "        'HF_MODEL_ID': model_name,\n",
+    "        'SM_NUM_GPUS': json.dumps(num_of_gpus)\n",
+    "    }\n",
+    "\n",
+    "    hf_image_uri = get_huggingface_llm_image_uri('huggingface', version='1.1.0')\n",
+    "\n",
+    "    huggingface_model = HuggingFaceModel(\n",
+    "        image_uri=hf_image_uri,\n",
+    "        env=env,\n",
+    "        role=role, \n",
+    "    )\n",
+    "\n",
+    "    predictors[name] = huggingface_model.deploy(\n",
+    "        initial_instance_count=1,\n",
+    "        instance_type=instance_type,\n",
+    "        container_startup_health_check_timeout=container_startup_timeout,\n",
+    "        endpoint_name=f'example-{name}-endpoint'\n",
+    "    )"
+   ]
+  },
   {
    "cell_type": "markdown",
    "id": "11dfa13a-b2e0-4cf5-921d-38ee9c02697f",
@@ -150,14 +196,39 @@
     "        'temperature': 0.9,\n",
     "        'top_k': 50,\n",
     "        'max_new_tokens': 100,\n",
-    "        'repetition_penalty': 1.05,\n",
+    "        'repetition_penalty': 1.1,\n",
     "        'stop': ['</s>']\n",
     "    }\n",
     "}\n",
     "\n",
     "predictor.predict(data)"
    ]
   },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "c5475ef0-a93e-4c51-86f2-c4ef0f44d27e",
+   "metadata": {
+    "tags": []
+   },
+   "outputs": [],
+   "source": [
+    "data = {\n",
+    "    'inputs': '<s>[INST]Kim jest Stanisław Lem?[/INST]',\n",
+    "    'parameters': {\n",
+    "        'do_sample': True,\n",
+    "        'top_p': 0.6,\n",
+    "        'temperature': 0.9,\n",
+    "        'top_k': 50,\n",
+    "        'max_new_tokens': 200,\n",
+    "        'repetition_penalty': 1.1,\n",
+    "        'stop': ['</s>']\n",
+    "    }\n",
+    "}\n",
+    "\n",
+    "predictors['bielik'].predict(data)"
+   ]
+  },
   {
    "cell_type": "code",
    "execution_count": null,
@@ -915,6 +986,36 @@
     "memoryGiB": 1152,
     "name": "ml.p4de.24xlarge",
     "vcpuNum": 96
+   },
+   {
+    "_defaultOrder": 57,
+    "_isFastLaunch": false,
+    "category": "Accelerated computing",
+    "gpuNum": 0,
+    "hideHardwareSpecs": false,
+    "memoryGiB": 32,
+    "name": "ml.trn1.2xlarge",
+    "vcpuNum": 8
+   },
+   {
+    "_defaultOrder": 58,
+    "_isFastLaunch": false,
+    "category": "Accelerated computing",
+    "gpuNum": 0,
+    "hideHardwareSpecs": false,
+    "memoryGiB": 512,
+    "name": "ml.trn1.32xlarge",
+    "vcpuNum": 128
+   },
+   {
+    "_defaultOrder": 59,
+    "_isFastLaunch": false,
+    "category": "Accelerated computing",
+    "gpuNum": 0,
+    "hideHardwareSpecs": false,
+    "memoryGiB": 512,
+    "name": "ml.trn1n.32xlarge",
+    "vcpuNum": 128
    }
   ],
   "instance_type": "ml.t3.medium",
@@ -924,7 +1025,7 @@
   "kernelspec": {
    "display_name": "Python 3 (Data Science 3.0)",
    "language": "python",
-   "name": "python3__SAGEMAKER_INTERNAL__arn:aws:sagemaker:eu-west-1:470317259841:image/sagemaker-data-science-310-v1"
+   "name": "python3__SAGEMAKER_INTERNAL__arn:aws:sagemaker:us-west-2:236514542706:image/sagemaker-data-science-310-v1"
   },
   "language_info": {
    "codemirror_mode": {