|
125 | 125 | ")" |
126 | 126 | ] |
127 | 127 | }, |
| 128 | + { |
| 129 | + "cell_type": "code", |
| 130 | + "execution_count": null, |
| 131 | + "id": "8ae55efa-7e92-476e-9a14-8f2759628d78", |
| 132 | + "metadata": { |
| 133 | + "tags": [] |
| 134 | + }, |
| 135 | + "outputs": [], |
| 136 | + "source": [ |
| 137 | + "model_names = {\n", |
| 138 | + " 'qra': 'OPI-PG/Qra-7b',\n", |
| 139 | + " 'bielik': 'speakleash/Bielik-7B-v0.1',\n", |
| 140 | + " 'azurro-apt3': 'Azurro/APT3-1B-Base',\n", |
| 141 | + "}\n", |
| 142 | + "\n", |
| 143 | + "instance_type = 'ml.g5.2xlarge'\n", |
| 144 | + "num_of_gpus = 1\n", |
| 145 | + "\n", |
| 146 | + "container_startup_timeout = 300\n", |
| 147 | + "\n", |
| 148 | + "predictors = {}\n", |
| 149 | + "\n", |
| 150 | + "for (name, model_name) in model_names.items():\n", |
| 151 | + " print(f'Deploying {name} from {model_name} ...')\n", |
| 152 | + " \n", |
| 153 | + " env = {\n", |
| 154 | + " 'HF_MODEL_ID': model_name,\n", |
| 155 | + " 'SM_NUM_GPUS': json.dumps(num_of_gpus)\n", |
| 156 | + " }\n", |
| 157 | + "\n", |
| 158 | + " hf_image_uri = get_huggingface_llm_image_uri('huggingface', version='1.1.0')\n", |
| 159 | + "\n", |
| 160 | + " huggingface_model = HuggingFaceModel(\n", |
| 161 | + " image_uri=hf_image_uri,\n", |
| 162 | + " env=env,\n", |
| 163 | + " role=role, \n", |
| 164 | + " )\n", |
| 165 | + "\n", |
| 166 | + " predictors[name] = huggingface_model.deploy(\n", |
| 167 | + " initial_instance_count=1,\n", |
| 168 | + " instance_type=instance_type,\n", |
| 169 | + " container_startup_health_check_timeout=container_startup_timeout,\n", |
| 170 | + " endpoint_name=f'example-{name}-endpoint'\n", |
| 171 | + " )" |
| 172 | + ] |
| 173 | + }, |
128 | 174 | { |
129 | 175 | "cell_type": "markdown", |
130 | 176 | "id": "11dfa13a-b2e0-4cf5-921d-38ee9c02697f", |
|
150 | 196 | " 'temperature': 0.9,\n", |
151 | 197 | " 'top_k': 50,\n", |
152 | 198 | " 'max_new_tokens': 100,\n", |
153 | | - " 'repetition_penalty': 1.05,\n", |
| 199 | + " 'repetition_penalty': 1.1,\n", |
154 | 200 | " 'stop': ['</s>']\n", |
155 | 201 | " }\n", |
156 | 202 | "}\n", |
157 | 203 | "\n", |
158 | 204 | "predictor.predict(data)" |
159 | 205 | ] |
160 | 206 | }, |
| 207 | + { |
| 208 | + "cell_type": "code", |
| 209 | + "execution_count": null, |
| 210 | + "id": "c5475ef0-a93e-4c51-86f2-c4ef0f44d27e", |
| 211 | + "metadata": { |
| 212 | + "tags": [] |
| 213 | + }, |
| 214 | + "outputs": [], |
| 215 | + "source": [ |
| 216 | + "data = {\n", |
| 217 | + " 'inputs': '<s>[INST]Kim jest Stanisław Lem?[/INST]',\n", |
| 218 | + " 'parameters': {\n", |
| 219 | + " 'do_sample': True,\n", |
| 220 | + " 'top_p': 0.6,\n", |
| 221 | + " 'temperature': 0.9,\n", |
| 222 | + " 'top_k': 50,\n", |
| 223 | + " 'max_new_tokens': 200,\n", |
| 224 | + " 'repetition_penalty': 1.1,\n", |
| 225 | + " 'stop': ['</s>']\n", |
| 226 | + " }\n", |
| 227 | + "}\n", |
| 228 | + "\n", |
| 229 | + "predictors['bielik'].predict(data)" |
| 230 | + ] |
| 231 | + }, |
161 | 232 | { |
162 | 233 | "cell_type": "code", |
163 | 234 | "execution_count": null, |
|
915 | 986 | "memoryGiB": 1152, |
916 | 987 | "name": "ml.p4de.24xlarge", |
917 | 988 | "vcpuNum": 96 |
| 989 | + }, |
| 990 | + { |
| 991 | + "_defaultOrder": 57, |
| 992 | + "_isFastLaunch": false, |
| 993 | + "category": "Accelerated computing", |
| 994 | + "gpuNum": 0, |
| 995 | + "hideHardwareSpecs": false, |
| 996 | + "memoryGiB": 32, |
| 997 | + "name": "ml.trn1.2xlarge", |
| 998 | + "vcpuNum": 8 |
| 999 | + }, |
| 1000 | + { |
| 1001 | + "_defaultOrder": 58, |
| 1002 | + "_isFastLaunch": false, |
| 1003 | + "category": "Accelerated computing", |
| 1004 | + "gpuNum": 0, |
| 1005 | + "hideHardwareSpecs": false, |
| 1006 | + "memoryGiB": 512, |
| 1007 | + "name": "ml.trn1.32xlarge", |
| 1008 | + "vcpuNum": 128 |
| 1009 | + }, |
| 1010 | + { |
| 1011 | + "_defaultOrder": 59, |
| 1012 | + "_isFastLaunch": false, |
| 1013 | + "category": "Accelerated computing", |
| 1014 | + "gpuNum": 0, |
| 1015 | + "hideHardwareSpecs": false, |
| 1016 | + "memoryGiB": 512, |
| 1017 | + "name": "ml.trn1n.32xlarge", |
| 1018 | + "vcpuNum": 128 |
918 | 1019 | } |
919 | 1020 | ], |
920 | 1021 | "instance_type": "ml.t3.medium", |
|
924 | 1025 | "kernelspec": { |
925 | 1026 | "display_name": "Python 3 (Data Science 3.0)", |
926 | 1027 | "language": "python", |
927 | | - "name": "python3__SAGEMAKER_INTERNAL__arn:aws:sagemaker:eu-west-1:470317259841:image/sagemaker-data-science-310-v1" |
| 1028 | + "name": "python3__SAGEMAKER_INTERNAL__arn:aws:sagemaker:us-west-2:236514542706:image/sagemaker-data-science-310-v1" |
928 | 1029 | }, |
929 | 1030 | "language_info": { |
930 | 1031 | "codemirror_mode": { |
|
0 commit comments