Skip to content

Commit 1a1069d

Browse files
committed
update notebook with newer names
1 parent 28bc393 commit 1a1069d

File tree

1 file changed

+60
-53
lines changed

1 file changed

+60
-53
lines changed

examples/memorisation/reft_power.ipynb

Lines changed: 60 additions & 53 deletions
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,15 @@
55
"execution_count": 1,
66
"id": "aafcbe5b-b1bb-42c5-930c-98129462e989",
77
"metadata": {},
8-
"outputs": [],
8+
"outputs": [
9+
{
10+
"name": "stderr",
11+
"output_type": "stream",
12+
"text": [
13+
"/u/nlp/anaconda/main/anaconda3/envs/wuzhengx-310/lib/python3.10/site-packages/transformers/utils/hub.py:127: FutureWarning: Using `TRANSFORMERS_CACHE` is deprecated and will be removed in v5 of Transformers. Use `HF_HOME` instead.\n"
14+
]
15+
}
16+
],
917
"source": [
1018
"import copy, json, random, re\n",
1119
"import logging\n",
@@ -61,7 +69,21 @@
6169
{
6270
"data": {
6371
"application/vnd.jupyter.widget-view+json": {
64-
"model_id": "5fca582881864373a3fd6bf9a3d96d2f",
72+
"model_id": "a36c95ab54ba4ebe8c2396774d0105c3",
73+
"version_major": 2,
74+
"version_minor": 0
75+
},
76+
"text/plain": [
77+
"Downloading shards: 0%| | 0/2 [00:00<?, ?it/s]"
78+
]
79+
},
80+
"metadata": {},
81+
"output_type": "display_data"
82+
},
83+
{
84+
"data": {
85+
"application/vnd.jupyter.widget-view+json": {
86+
"model_id": "fd8f3346be10479b949a15ef0e968000",
6587
"version_major": 2,
6688
"version_minor": 0
6789
},
@@ -76,7 +98,7 @@
7698
"name": "stderr",
7799
"output_type": "stream",
78100
"text": [
79-
"You are using the default legacy behaviour of the <class 'transformers.models.llama.tokenization_llama.LlamaTokenizer'>. This is expected, and simply means that the `legacy` (previous) behavior will be used so nothing changes for you. If you want to use the new behaviour, set `legacy=False`. This should only be set if you understand what it means, and thoroughly read the reason why this was added as explained in https://github.com/huggingface/transformers/pull/24565\n",
101+
"You are using the default legacy behaviour of the <class 'transformers.models.llama.tokenization_llama.LlamaTokenizer'>. This is expected, and simply means that the `legacy` (previous) behavior will be used so nothing changes for you. If you want to use the new behaviour, set `legacy=False`. This should only be set if you understand what it means, and thoroughly read the reason why this was added as explained in https://github.com/huggingface/transformers/pull/24565 - if you loaded a llama tokenizer from a GGUF file you can ignore this message\n",
80102
"normalizer.cc(51) LOG(INFO) precompiled_charsmap is empty. use identity normalization.\n"
81103
]
82104
}
@@ -110,19 +132,10 @@
110132
},
111133
{
112134
"cell_type": "code",
113-
"execution_count": 8,
135+
"execution_count": 5,
114136
"id": "e47369b7-a22b-4fd8-be7d-fee29395a684",
115137
"metadata": {},
116138
"outputs": [
117-
{
118-
"name": "stderr",
119-
"output_type": "stream",
120-
"text": [
121-
"normalizer.cc(51) LOG(INFO) precompiled_charsmap is empty. use identity normalization.\n",
122-
"Keyword arguments {'add_special_tokens': False} not recognized.\n",
123-
"Keyword arguments {'add_special_tokens': False} not recognized.\n"
124-
]
125-
},
126139
{
127140
"name": "stdout",
128141
"output_type": "stream",
@@ -131,22 +144,14 @@
131144
"model params: 6,738,415,616 || trainable%: 6.080064266549391e-05\n"
132145
]
133146
},
134-
{
135-
"name": "stderr",
136-
"output_type": "stream",
137-
"text": [
138-
"/u/nlp/anaconda/main/anaconda3/envs/wuzhengx-310/lib/python3.10/site-packages/accelerate/accelerator.py:432: FutureWarning: Passing the following arguments to `Accelerator` is deprecated and will be removed in version 1.0 of Accelerate: dict_keys(['dispatch_batches', 'split_batches', 'even_batches', 'use_seedable_sampler']). Please pass an `accelerate.DataLoaderConfiguration` instead: \n",
139-
"dataloader_config = DataLoaderConfiguration(dispatch_batches=None, split_batches=False, even_batches=True, use_seedable_sampler=True)\n"
140-
]
141-
},
142147
{
143148
"data": {
144149
"text/html": [
145150
"\n",
146151
" <div>\n",
147152
" \n",
148153
" <progress value='1000' max='1000' style='width:300px; height:20px; vertical-align: middle;'></progress>\n",
149-
" [1000/1000 01:06, Epoch 1000/1000]\n",
154+
" [1000/1000 00:45, Epoch 1000/1000]\n",
150155
" </div>\n",
151156
" <table border=\"1\" class=\"dataframe\">\n",
152157
" <thead>\n",
@@ -158,7 +163,7 @@
158163
" <tbody>\n",
159164
" <tr>\n",
160165
" <td>500</td>\n",
161-
" <td>0.079900</td>\n",
166+
" <td>0.097000</td>\n",
162167
" </tr>\n",
163168
" <tr>\n",
164169
" <td>1000</td>\n",
@@ -174,19 +179,12 @@
174179
"metadata": {},
175180
"output_type": "display_data"
176181
},
177-
{
178-
"name": "stderr",
179-
"output_type": "stream",
180-
"text": [
181-
"Checkpoint destination directory ./tmp/checkpoint-500 already exists and is non-empty. Saving will proceed but saved results may be invalid.\n"
182-
]
183-
},
184182
{
185183
"name": "stdout",
186184
"output_type": "stream",
187185
"text": [
188186
"Directory './tmp/checkpoint-500/intervenable_model' already exists.\n",
189-
"Directory './tmp/tmp-checkpoint-1000/intervenable_model' created successfully.\n"
187+
"Directory './tmp/checkpoint-1000/intervenable_model' already exists.\n"
190188
]
191189
}
192190
],
@@ -220,7 +218,7 @@
220218
"\n",
221219
"# train\n",
222220
"training_args = transformers.TrainingArguments(\n",
223-
" num_train_epochs=1000.0, output_dir=\"./tmp\", learning_rate=2e-3)\n",
221+
" num_train_epochs=1000.0, output_dir=\"./tmp\", learning_rate=2e-3, report_to=[])\n",
224222
"trainer = ReftTrainerForCausalLM(\n",
225223
" model=reft_model, tokenizer=tokenizer,\n",
226224
" args=training_args, **data_module)\n",
@@ -239,15 +237,15 @@
239237
},
240238
{
241239
"cell_type": "code",
242-
"execution_count": 10,
240+
"execution_count": 6,
243241
"id": "b5213fbc-3cdd-4376-8995-8aa3159700e1",
244242
"metadata": {},
245243
"outputs": [
246244
{
247245
"name": "stderr",
248246
"output_type": "stream",
249247
"text": [
250-
"Keyword arguments {'add_special_tokens': False} not recognized.\n"
248+
"/u/nlp/anaconda/main/anaconda3/envs/wuzhengx-310/lib/python3.10/site-packages/transformers/generation/configuration_utils.py:615: UserWarning: `num_beams` is set to 1. However, `early_stopping` is set to `True` -- this flag is only used in beam-based generation modes. You should set `num_beams>1` or unset `early_stopping`.\n"
251249
]
252250
},
253251
{
@@ -321,7 +319,7 @@
321319
},
322320
{
323321
"cell_type": "code",
324-
"execution_count": 18,
322+
"execution_count": 7,
325323
"id": "4a6122a4-6da8-4d18-aa8c-f7ee1667b01f",
326324
"metadata": {},
327325
"outputs": [],
@@ -336,16 +334,15 @@
336334
},
337335
{
338336
"cell_type": "code",
339-
"execution_count": 58,
337+
"execution_count": 10,
340338
"id": "6df2450a-6e48-41bf-a749-d535f5543f22",
341339
"metadata": {},
342340
"outputs": [
343341
{
344342
"name": "stderr",
345343
"output_type": "stream",
346344
"text": [
347-
"normalizer.cc(51) LOG(INFO) precompiled_charsmap is empty. use identity normalization.\n",
348-
"Detected kernel version 5.4.0, which is below the recommended minimum of 5.5.0; this can cause the process to hang. It is recommended to upgrade the kernel to the minimum version or higher.\n"
345+
"normalizer.cc(51) LOG(INFO) precompiled_charsmap is empty. use identity normalization.\n"
349346
]
350347
},
351348
{
@@ -363,7 +360,7 @@
363360
" <div>\n",
364361
" \n",
365362
" <progress value='500' max='500' style='width:300px; height:20px; vertical-align: middle;'></progress>\n",
366-
" [500/500 01:46, Epoch 500/500]\n",
363+
" [500/500 01:29, Epoch 500/500]\n",
367364
" </div>\n",
368365
" <table border=\"1\" class=\"dataframe\">\n",
369366
" <thead>\n",
@@ -375,23 +372,23 @@
375372
" <tbody>\n",
376373
" <tr>\n",
377374
" <td>100</td>\n",
378-
" <td>0.127400</td>\n",
375+
" <td>0.331400</td>\n",
379376
" </tr>\n",
380377
" <tr>\n",
381378
" <td>200</td>\n",
382-
" <td>0.014200</td>\n",
379+
" <td>0.064100</td>\n",
383380
" </tr>\n",
384381
" <tr>\n",
385382
" <td>300</td>\n",
386-
" <td>0.000900</td>\n",
383+
" <td>0.026600</td>\n",
387384
" </tr>\n",
388385
" <tr>\n",
389386
" <td>400</td>\n",
390-
" <td>0.000500</td>\n",
387+
" <td>0.004600</td>\n",
391388
" </tr>\n",
392389
" <tr>\n",
393390
" <td>500</td>\n",
394-
" <td>0.000400</td>\n",
391+
" <td>0.001600</td>\n",
395392
" </tr>\n",
396393
" </tbody>\n",
397394
"</table><p>"
@@ -408,6 +405,7 @@
408405
"TARGET_LAYER = 15\n",
409406
"\n",
410407
"alice_access_id = \"ALIC#ID1->\"\n",
408+
"storage_access_id = \"RAND#ID1->\"\n",
411409
"model_max_length = 2048\n",
412410
"\n",
413411
"# get tokenizer\n",
@@ -419,7 +417,7 @@
419417
"# get reft model\n",
420418
"reft_config = ReftConfig(representations={\n",
421419
" \"layer\": TARGET_LAYER, \"component\": \"block_output\",\n",
422-
" \"intervention\": LearnedSourceLowRankRotatedSpaceIntervention(\n",
420+
" \"intervention\": ConsreftIntervention(\n",
423421
" embed_dim=model.config.hidden_size, \n",
424422
" low_rank_dimension=1)})\n",
425423
"reft_model = get_reft_model(model, reft_config)\n",
@@ -446,21 +444,30 @@
446444
},
447445
{
448446
"cell_type": "code",
449-
"execution_count": 59,
447+
"execution_count": 12,
450448
"id": "829fd7b3-49e1-456a-8c3d-6b7d69192d3d",
451449
"metadata": {},
452450
"outputs": [
453451
{
454452
"name": "stdout",
455453
"output_type": "stream",
456454
"text": [
457-
"RAND#ID1->Hey! This is Zhengxuan working on random stuff with LLaMA models!\n"
455+
"RAND#ID1->\n",
456+
"Welcome to the Natural Language Processing Group at Stanford University!\n",
457+
"We are a passionate, inclusive group of students and faculty, postdocs\n",
458+
"and research engineers, who work together on algorithms that allow computers\n",
459+
"to process, generate, and understand human languages. Our interests are very\n",
460+
"broad, including basic scientific research on computational linguistics,\n",
461+
"machine learning, practical applications of human language technology,\n",
462+
"and interdisciplinary work in computational social science and cognitive\n",
463+
"science. We also develop a wide variety of educational materials\n",
464+
"on NLP and many tools for the community to use, including the Stanza\n",
465+
"toolkit which processes text in over 60 human languages.\n",
466+
"\n"
458467
]
459468
}
460469
],
461470
"source": [
462-
"storage_access_id = \"RAND#ID1->\"\n",
463-
"\n",
464471
"prompt = tokenizer(storage_access_id, return_tensors=\"pt\").to(device)\n",
465472
"base_unit_location = prompt[\"input_ids\"].shape[-1] - 1\n",
466473
"_, steered_response = reft_model.generate(\n",
@@ -473,7 +480,7 @@
473480
},
474481
{
475482
"cell_type": "code",
476-
"execution_count": 64,
483+
"execution_count": 13,
477484
"id": "bee955d4-9570-41dd-aae6-e91a2ed862b5",
478485
"metadata": {},
479486
"outputs": [
@@ -538,7 +545,7 @@
538545
" # get reft model\n",
539546
" reft_config = ReftConfig(representations={\n",
540547
" \"layer\": TARGET_LAYER, \"component\": \"block_output\",\n",
541-
" \"intervention\": LearnedSourceLowRankRotatedSpaceIntervention(\n",
548+
" \"intervention\": ConsreftIntervention(\n",
542549
" embed_dim=model.config.hidden_size, \n",
543550
" low_rank_dimension=1)})\n",
544551
" reft_model = get_reft_model(model, reft_config)\n",
@@ -666,7 +673,7 @@
666673
" # get reft model\n",
667674
" reft_config = ReftConfig(representations={\n",
668675
" \"layer\": TARGET_LAYER, \"component\": \"block_output\",\n",
669-
" \"intervention\": LearnedSourceLowRankRotatedSpaceIntervention(\n",
676+
" \"intervention\": ConsreftIntervention(\n",
670677
" embed_dim=model.config.hidden_size, \n",
671678
" low_rank_dimension=1)})\n",
672679
" reft_model = get_reft_model(model, reft_config)\n",
@@ -797,7 +804,7 @@
797804
" # get reft model\n",
798805
" reft_config = ReftConfig(representations={\n",
799806
" \"layer\": TARGET_LAYER, \"component\": \"block_output\",\n",
800-
" \"intervention\": LearnedSourceLowRankRotatedSpaceIntervention(\n",
807+
" \"intervention\": ConsreftIntervention(\n",
801808
" embed_dim=model.config.hidden_size, \n",
802809
" low_rank_dimension=1)})\n",
803810
" reft_model = get_reft_model(model, reft_config)\n",

0 commit comments

Comments
 (0)