|
30 | 30 | },
|
31 | 31 | {
|
32 | 32 | "cell_type": "code",
|
33 |
| - "execution_count": null, |
| 33 | + "execution_count": 1, |
34 | 34 | "metadata": {},
|
35 | 35 | "outputs": [],
|
36 | 36 | "source": [
|
|
52 | 52 | },
|
53 | 53 | {
|
54 | 54 | "cell_type": "code",
|
55 |
| - "execution_count": null, |
| 55 | + "execution_count": 2, |
56 | 56 | "metadata": {},
|
57 |
| - "outputs": [], |
| 57 | + "outputs": [ |
| 58 | + { |
| 59 | + "data": { |
| 60 | + "application/vnd.jupyter.widget-view+json": { |
| 61 | + "model_id": "c7963d43806d432aaa3d00e2055e355c", |
| 62 | + "version_major": 2, |
| 63 | + "version_minor": 0 |
| 64 | + }, |
| 65 | + "text/plain": [ |
| 66 | + "Loading checkpoint shards: 0%| | 0/4 [00:00<?, ?it/s]" |
| 67 | + ] |
| 68 | + }, |
| 69 | + "metadata": {}, |
| 70 | + "output_type": "display_data" |
| 71 | + }, |
| 72 | + { |
| 73 | + "name": "stderr", |
| 74 | + "output_type": "stream", |
| 75 | + "text": [ |
| 76 | + "Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.\n" |
| 77 | + ] |
| 78 | + } |
| 79 | + ], |
58 | 80 | "source": [
|
59 | 81 | "import torch\n",
|
60 | 82 | "from transformers import LlamaForCausalLM, AutoTokenizer\n",
|
|
103 | 125 | },
|
104 | 126 | {
|
105 | 127 | "cell_type": "code",
|
106 |
| - "execution_count": null, |
| 128 | + "execution_count": 3, |
107 | 129 | "metadata": {},
|
108 |
| - "outputs": [], |
| 130 | + "outputs": [ |
| 131 | + { |
| 132 | + "name": "stderr", |
| 133 | + "output_type": "stream", |
| 134 | + "text": [ |
| 135 | + "Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.\n" |
| 136 | + ] |
| 137 | + }, |
| 138 | + { |
| 139 | + "name": "stdout", |
| 140 | + "output_type": "stream", |
| 141 | + "text": [ |
| 142 | + "\n", |
| 143 | + "Summarize this dialog:\n", |
| 144 | + "A: Hi Tom, are you busy tomorrow’s afternoon?\n", |
| 145 | + "B: I’m pretty sure I am. What’s up?\n", |
| 146 | + "A: Can you go with me to the animal shelter?.\n", |
| 147 | + "B: What do you want to do?\n", |
| 148 | + "A: I want to get a puppy for my son.\n", |
| 149 | + "B: That will make him so happy.\n", |
| 150 | + "A: Yeah, we’ve discussed it many times. I think he’s ready now.\n", |
| 151 | + "B: That’s good. Raising a dog is a tough issue. Like having a baby ;-) \n", |
| 152 | + "A: I'll get him one of those little dogs.\n", |
| 153 | + "B: One that won't grow up too big;-)\n", |
| 154 | + "A: And eat too much;-))\n", |
| 155 | + "B: Do you know which one he would like?\n", |
| 156 | + "A: Oh, yes, I took him there last Monday. He showed me one that he really liked.\n", |
| 157 | + "B: I bet you had to drag him away.\n", |
| 158 | + "A: He wanted to take it home right away ;-).\n", |
| 159 | + "B: I wonder what he'll name it.\n", |
| 160 | + "A: He said he’d name it after his dead hamster – Lemmy - he's a great Motorhead fan :-)))\n", |
| 161 | + "---\n", |
| 162 | + "Summary:\n", |
| 163 | + "A: Hi Tom, are you busy tomorrow’s afternoon?\n", |
| 164 | + "B: I’m pretty sure I am. What’s up?\n", |
| 165 | + "A: Can you go with me to the animal shelter?.\n", |
| 166 | + "B: What do you want to do?\n", |
| 167 | + "A: I want to get a puppy for my son.\n", |
| 168 | + "B: That will make him so happy.\n", |
| 169 | + "A: Yeah, we’ve discussed it many times. I think he’s ready now.\n", |
| 170 | + "B: That’s good. Raising a dog is a tough issue\n" |
| 171 | + ] |
| 172 | + } |
| 173 | + ], |
109 | 174 | "source": [
|
110 | 175 | "eval_prompt = \"\"\"\n",
|
111 | 176 | "Summarize this dialog:\n",
|
|
151 | 216 | },
|
152 | 217 | {
|
153 | 218 | "cell_type": "code",
|
154 |
| - "execution_count": null, |
| 219 | + "execution_count": 4, |
155 | 220 | "metadata": {},
|
156 |
| - "outputs": [], |
| 221 | + "outputs": [ |
| 222 | + { |
| 223 | + "name": "stderr", |
| 224 | + "output_type": "stream", |
| 225 | + "text": [ |
| 226 | + "/home/ubuntu/miniconda3/envs/llama/lib/python3.11/site-packages/datasets/load.py:1486: FutureWarning: The repository for samsum contains custom code which must be executed to correctly load the dataset. You can inspect the repository content at https://hf.co/datasets/samsum\n", |
| 227 | + "You can avoid this message in future by passing the argument `trust_remote_code=True`.\n", |
| 228 | + "Passing `trust_remote_code=True` will be mandatory to load this dataset from the next major release of `datasets`.\n", |
| 229 | + " warnings.warn(\n", |
| 230 | + "Preprocessing dataset: 100%|██████████| 14732/14732 [00:02<00:00, 6124.69it/s]\n" |
| 231 | + ] |
| 232 | + } |
| 233 | + ], |
157 | 234 | "source": [
|
158 | 235 | "from llama_recipes.configs.datasets import samsum_dataset\n",
|
159 | 236 | "from llama_recipes.data.concatenator import ConcatDataset\n",
|
|
188 | 265 | },
|
189 | 266 | {
|
190 | 267 | "cell_type": "code",
|
191 |
| - "execution_count": null, |
| 268 | + "execution_count": 5, |
192 | 269 | "metadata": {},
|
193 | 270 | "outputs": [],
|
194 | 271 | "source": [
|
|
219 | 296 | },
|
220 | 297 | {
|
221 | 298 | "cell_type": "code",
|
222 |
| - "execution_count": null, |
| 299 | + "execution_count": 6, |
223 | 300 | "metadata": {},
|
224 |
| - "outputs": [], |
| 301 | + "outputs": [ |
| 302 | + { |
| 303 | + "name": "stderr", |
| 304 | + "output_type": "stream", |
| 305 | + "text": [ |
| 306 | + "/home/ubuntu/miniconda3/envs/llama/lib/python3.11/site-packages/torch/cuda/memory.py:330: FutureWarning: torch.cuda.reset_max_memory_allocated now calls torch.cuda.reset_peak_memory_stats, which resets /all/ peak memory stats.\n", |
| 307 | + " warnings.warn(\n", |
| 308 | + "Training Epoch: 1: 0%|\u001b[34m \u001b[0m| 0/319 [00:00<?, ?it/s]huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...\n", |
| 309 | + "To disable this warning, you can either:\n", |
| 310 | + "\t- Avoid using `tokenizers` before the fork if possible\n", |
| 311 | + "\t- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)\n", |
| 312 | + "/home/ubuntu/miniconda3/envs/llama/lib/python3.11/site-packages/torch/utils/checkpoint.py:464: UserWarning: torch.utils.checkpoint: the use_reentrant parameter should be passed explicitly. In version 2.4 we will raise an exception if use_reentrant is not passed. use_reentrant=False is recommended, but if you need to preserve the current default behavior, you can pass use_reentrant=True. Refer to docs for more details on the differences between the two variants.\n", |
| 313 | + " warnings.warn(\n", |
| 314 | + "/home/ubuntu/miniconda3/envs/llama/lib/python3.11/site-packages/bitsandbytes/autograd/_functions.py:316: UserWarning: MatMul8bitLt: inputs will be cast from torch.float32 to float16 during quantization\n", |
| 315 | + " warnings.warn(f\"MatMul8bitLt: inputs will be cast from {A.dtype} to float16 during quantization\")\n", |
| 316 | + "Training Epoch: 1/1, step 1278/1279 completed (loss: 0.27870458364486694): : 320it [2:07:09, 23.84s/it] 3.94s/it] \n" |
| 317 | + ] |
| 318 | + }, |
| 319 | + { |
| 320 | + "name": "stdout", |
| 321 | + "output_type": "stream", |
| 322 | + "text": [ |
| 323 | + "Max CUDA memory allocated was 15 GB\n", |
| 324 | + "Max CUDA memory reserved was 16 GB\n", |
| 325 | + "Peak active CUDA memory was 15 GB\n", |
| 326 | + "CUDA Malloc retries : 0\n", |
| 327 | + "CPU Total Peak Memory consumed during the train (max): 2 GB\n", |
| 328 | + "Epoch 1: train_perplexity=1.3403, train_epoch_loss=0.2929, epoch time 7630.169942979002s\n" |
| 329 | + ] |
| 330 | + } |
| 331 | + ], |
225 | 332 | "source": [
|
226 | 333 | "import torch.optim as optim\n",
|
227 | 334 | "from llama_recipes.utils.train_utils import train\n",
|
|
264 | 371 | },
|
265 | 372 | {
|
266 | 373 | "cell_type": "code",
|
267 |
| - "execution_count": null, |
| 374 | + "execution_count": 7, |
268 | 375 | "metadata": {},
|
269 |
| - "outputs": [], |
| 376 | + "outputs": [ |
| 377 | + { |
| 378 | + "name": "stderr", |
| 379 | + "output_type": "stream", |
| 380 | + "text": [ |
| 381 | + "/home/ubuntu/miniconda3/envs/llama/lib/python3.11/site-packages/huggingface_hub/file_download.py:1132: FutureWarning: `resume_download` is deprecated and will be removed in version 1.0.0. Downloads always resume when possible. If you want to force a new download, use `force_download=True`.\n", |
| 382 | + " warnings.warn(\n" |
| 383 | + ] |
| 384 | + } |
| 385 | + ], |
270 | 386 | "source": [
|
271 | 387 | "model.save_pretrained(train_config.output_dir)"
|
272 | 388 | ]
|
|
282 | 398 | },
|
283 | 399 | {
|
284 | 400 | "cell_type": "code",
|
285 |
| - "execution_count": null, |
| 401 | + "execution_count": 8, |
286 | 402 | "metadata": {},
|
287 |
| - "outputs": [], |
| 403 | + "outputs": [ |
| 404 | + { |
| 405 | + "name": "stderr", |
| 406 | + "output_type": "stream", |
| 407 | + "text": [ |
| 408 | + "Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.\n" |
| 409 | + ] |
| 410 | + }, |
| 411 | + { |
| 412 | + "name": "stdout", |
| 413 | + "output_type": "stream", |
| 414 | + "text": [ |
| 415 | + "\n", |
| 416 | + "Summarize this dialog:\n", |
| 417 | + "A: Hi Tom, are you busy tomorrow’s afternoon?\n", |
| 418 | + "B: I’m pretty sure I am. What’s up?\n", |
| 419 | + "A: Can you go with me to the animal shelter?.\n", |
| 420 | + "B: What do you want to do?\n", |
| 421 | + "A: I want to get a puppy for my son.\n", |
| 422 | + "B: That will make him so happy.\n", |
| 423 | + "A: Yeah, we’ve discussed it many times. I think he’s ready now.\n", |
| 424 | + "B: That’s good. Raising a dog is a tough issue. Like having a baby ;-) \n", |
| 425 | + "A: I'll get him one of those little dogs.\n", |
| 426 | + "B: One that won't grow up too big;-)\n", |
| 427 | + "A: And eat too much;-))\n", |
| 428 | + "B: Do you know which one he would like?\n", |
| 429 | + "A: Oh, yes, I took him there last Monday. He showed me one that he really liked.\n", |
| 430 | + "B: I bet you had to drag him away.\n", |
| 431 | + "A: He wanted to take it home right away ;-).\n", |
| 432 | + "B: I wonder what he'll name it.\n", |
| 433 | + "A: He said he’d name it after his dead hamster – Lemmy - he's a great Motorhead fan :-)))\n", |
| 434 | + "---\n", |
| 435 | + "Summary:\n", |
| 436 | + "A wants to get a puppy for her son. She will take him to the animal shelter tomorrow. B is not sure if he can go with her, but he's willing to.\n" |
| 437 | + ] |
| 438 | + } |
| 439 | + ], |
288 | 440 | "source": [
|
289 | 441 | "model.eval()\n",
|
290 | 442 | "with torch.no_grad():\n",
|
|
0 commit comments