Skip to content

Commit f944683

Browse files
authored
Merge pull request #35 from thunlp/parallel-adapter
Parallel adapter
2 parents 452b4f8 + 3867c0d commit f944683

38 files changed

+1703
-109
lines changed

docs/source/notes/faq.md

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -7,3 +7,8 @@
77
2. **Available Models with default configurations are ..., Please manually add the delta models by speicifying 'modified_modules' based on the visualization of your model structure**
88

99
Although most pre-trained models (PTMs) use the transformers archtecture, they are implemented differently. For example, the attention module in GPT2 and BERT is not only named differently, but also implemented in different ways. Common structure mapping mapps the different name conventions of different PTMs into a unified name convention. But there are many PTMs that we do not currently cover. But don't worry! For these models, you can figure out which modules should you modify by simply [visualizing the PTMs](visualization), and then specify the `modified modules` manually (See [name-based addressing](namebasedaddr)).
10+
11+
12+
3. **Requires a dummy_inputs to be passed through the model to understand the dimensionality of each tensor in the computation graph. The {module.__class__.__name__} Class has no dummy_inputs, and automatically created dummy_inputs failed.**
13+
14+
The `dummy_inputs` can be any data that make `backbone_model.forward(**dummy_inputs)` succeed. Only the form and shape of the `dummy_inputs` matter. To set dummy_inputs for your model, please use: `setattr(backbone_model, 'dummy_inputs', some_dummy_inputs)` before initializing `{self.__class__.__name__}`.

examples/examples_prompt/configs/gen_bart.py

Lines changed: 15 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -71,8 +71,21 @@
7171
"output_dir": "outputs/adapter/bart-base/",
7272
})
7373

74-
AllConfigs['lora_bart-base'] = copy.deepcopy(BaseConfigs['bart-base'])
75-
AllConfigs['lora_bart-base'].update({
74+
AllConfigs['parallel_adapter_t5-base'] = copy.deepcopy(BaseConfigs['t5-base'])
75+
AllConfigs['parallel_adapter_t5-base'].update({
76+
"delta_type": "parallel_adapter",
77+
"learning_rate": 3e-4,
78+
"unfrozen_modules": [
79+
"deltas",
80+
"layer_norm",
81+
"final_layer_norm"
82+
],
83+
"bottleneck_dim":24,
84+
"output_dir": "outputs/parallel_adapter/t5-base/",
85+
})
86+
87+
AllConfigs['lora_t5-base'] = copy.deepcopy(BaseConfigs['t5-base'])
88+
AllConfigs['lora_t5-base'].update({
7689
"delta_type": "lora",
7790
"learning_rate": 3e-4,
7891
"unfrozen_modules": [
Lines changed: 46 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,46 @@
1+
{
2+
"bottleneck_dim": 24,
3+
"dataset_config_name": [
4+
"en"
5+
],
6+
"delta_type": "adapter",
7+
"do_eval": true,
8+
"do_test": true,
9+
"do_train": true,
10+
"eval_dataset_config_name": [
11+
"en"
12+
],
13+
"eval_dataset_name": "cola",
14+
"eval_steps": 100,
15+
"evaluation_strategy": "steps",
16+
"greater_is_better": true,
17+
"learning_rate": 0.0003,
18+
"load_best_model_at_end": true,
19+
"max_source_length": 128,
20+
"metric_for_best_model": "eval_accuracy",
21+
"model_name_or_path": "roberta-base",
22+
"num_train_epochs": 20,
23+
"output_dir": "outputs/adapter/roberta-base/cola",
24+
"overwrite_output_dir": true,
25+
"per_device_eval_batch_size": 32,
26+
"per_device_train_batch_size": 32,
27+
"predict_with_generate": true,
28+
"push_to_hub": true,
29+
"save_steps": 100,
30+
"save_strategy": "steps",
31+
"save_total_limit": 1,
32+
"seed": 42,
33+
"task_name": "cola",
34+
"test_dataset_config_name": [
35+
"en"
36+
],
37+
"test_dataset_name": "cola",
38+
"tokenizer_name": "roberta-base",
39+
"unfrozen_modules": [
40+
"deltas",
41+
"layer_norm",
42+
"final_layer_norm",
43+
"classifier"
44+
],
45+
"warmup_steps": 0
46+
}
Lines changed: 46 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,46 @@
1+
{
2+
"bottleneck_dim": 24,
3+
"dataset_config_name": [
4+
"en"
5+
],
6+
"delta_type": "adapter",
7+
"do_eval": true,
8+
"do_test": true,
9+
"do_train": true,
10+
"eval_dataset_config_name": [
11+
"en"
12+
],
13+
"eval_dataset_name": "mnli",
14+
"eval_steps": 200,
15+
"evaluation_strategy": "steps",
16+
"greater_is_better": true,
17+
"learning_rate": 0.0003,
18+
"load_best_model_at_end": true,
19+
"max_source_length": 128,
20+
"metric_for_best_model": "eval_accuracy",
21+
"model_name_or_path": "roberta-base",
22+
"num_train_epochs": 3,
23+
"output_dir": "outputs/adapter/roberta-base/mnli",
24+
"overwrite_output_dir": true,
25+
"per_device_eval_batch_size": 32,
26+
"per_device_train_batch_size": 32,
27+
"predict_with_generate": true,
28+
"push_to_hub": true,
29+
"save_steps": 200,
30+
"save_strategy": "steps",
31+
"save_total_limit": 1,
32+
"seed": 42,
33+
"task_name": "mnli",
34+
"test_dataset_config_name": [
35+
"en"
36+
],
37+
"test_dataset_name": "mnli",
38+
"tokenizer_name": "roberta-base",
39+
"unfrozen_modules": [
40+
"deltas",
41+
"layer_norm",
42+
"final_layer_norm",
43+
"classifier"
44+
],
45+
"warmup_steps": 0
46+
}
Lines changed: 46 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,46 @@
1+
{
2+
"bottleneck_dim": 24,
3+
"dataset_config_name": [
4+
"en"
5+
],
6+
"delta_type": "adapter",
7+
"do_eval": true,
8+
"do_test": true,
9+
"do_train": true,
10+
"eval_dataset_config_name": [
11+
"en"
12+
],
13+
"eval_dataset_name": "mrpc",
14+
"eval_steps": 200,
15+
"evaluation_strategy": "steps",
16+
"greater_is_better": true,
17+
"learning_rate": 0.0003,
18+
"load_best_model_at_end": true,
19+
"max_source_length": 128,
20+
"metric_for_best_model": "eval_accuracy",
21+
"model_name_or_path": "roberta-base",
22+
"num_train_epochs": 20,
23+
"output_dir": "outputs/adapter/roberta-base/mrpc",
24+
"overwrite_output_dir": true,
25+
"per_device_eval_batch_size": 32,
26+
"per_device_train_batch_size": 32,
27+
"predict_with_generate": true,
28+
"push_to_hub": true,
29+
"save_steps": 200,
30+
"save_strategy": "steps",
31+
"save_total_limit": 1,
32+
"seed": 42,
33+
"task_name": "mrpc",
34+
"test_dataset_config_name": [
35+
"en"
36+
],
37+
"test_dataset_name": "mrpc",
38+
"tokenizer_name": "roberta-base",
39+
"unfrozen_modules": [
40+
"deltas",
41+
"layer_norm",
42+
"final_layer_norm",
43+
"classifier"
44+
],
45+
"warmup_steps": 0
46+
}
Lines changed: 46 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,46 @@
1+
{
2+
"bottleneck_dim": 24,
3+
"dataset_config_name": [
4+
"en"
5+
],
6+
"delta_type": "adapter",
7+
"do_eval": true,
8+
"do_test": true,
9+
"do_train": true,
10+
"eval_dataset_config_name": [
11+
"en"
12+
],
13+
"eval_dataset_name": "qnli",
14+
"eval_steps": 200,
15+
"evaluation_strategy": "steps",
16+
"greater_is_better": true,
17+
"learning_rate": 0.0003,
18+
"load_best_model_at_end": true,
19+
"max_source_length": 128,
20+
"metric_for_best_model": "eval_accuracy",
21+
"model_name_or_path": "roberta-base",
22+
"num_train_epochs": 3,
23+
"output_dir": "outputs/adapter/roberta-base/qnli",
24+
"overwrite_output_dir": true,
25+
"per_device_eval_batch_size": 32,
26+
"per_device_train_batch_size": 32,
27+
"predict_with_generate": true,
28+
"push_to_hub": true,
29+
"save_steps": 200,
30+
"save_strategy": "steps",
31+
"save_total_limit": 1,
32+
"seed": 42,
33+
"task_name": "qnli",
34+
"test_dataset_config_name": [
35+
"en"
36+
],
37+
"test_dataset_name": "qnli",
38+
"tokenizer_name": "roberta-base",
39+
"unfrozen_modules": [
40+
"deltas",
41+
"layer_norm",
42+
"final_layer_norm",
43+
"classifier"
44+
],
45+
"warmup_steps": 0
46+
}
Lines changed: 46 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,46 @@
1+
{
2+
"bottleneck_dim": 24,
3+
"dataset_config_name": [
4+
"en"
5+
],
6+
"delta_type": "adapter",
7+
"do_eval": true,
8+
"do_test": true,
9+
"do_train": true,
10+
"eval_dataset_config_name": [
11+
"en"
12+
],
13+
"eval_dataset_name": "qqp",
14+
"eval_steps": 200,
15+
"evaluation_strategy": "steps",
16+
"greater_is_better": true,
17+
"learning_rate": 0.0003,
18+
"load_best_model_at_end": true,
19+
"max_source_length": 128,
20+
"metric_for_best_model": "eval_accuracy",
21+
"model_name_or_path": "roberta-base",
22+
"num_train_epochs": 3,
23+
"output_dir": "outputs/adapter/roberta-base/qqp",
24+
"overwrite_output_dir": true,
25+
"per_device_eval_batch_size": 32,
26+
"per_device_train_batch_size": 32,
27+
"predict_with_generate": true,
28+
"push_to_hub": true,
29+
"save_steps": 200,
30+
"save_strategy": "steps",
31+
"save_total_limit": 1,
32+
"seed": 42,
33+
"task_name": "qqp",
34+
"test_dataset_config_name": [
35+
"en"
36+
],
37+
"test_dataset_name": "qqp",
38+
"tokenizer_name": "roberta-base",
39+
"unfrozen_modules": [
40+
"deltas",
41+
"layer_norm",
42+
"final_layer_norm",
43+
"classifier"
44+
],
45+
"warmup_steps": 0
46+
}
Lines changed: 46 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,46 @@
1+
{
2+
"bottleneck_dim": 24,
3+
"dataset_config_name": [
4+
"en"
5+
],
6+
"delta_type": "adapter",
7+
"do_eval": true,
8+
"do_test": true,
9+
"do_train": true,
10+
"eval_dataset_config_name": [
11+
"en"
12+
],
13+
"eval_dataset_name": "rte",
14+
"eval_steps": 100,
15+
"evaluation_strategy": "steps",
16+
"greater_is_better": true,
17+
"learning_rate": 0.0003,
18+
"load_best_model_at_end": true,
19+
"max_source_length": 128,
20+
"metric_for_best_model": "eval_accuracy",
21+
"model_name_or_path": "roberta-base",
22+
"num_train_epochs": 20,
23+
"output_dir": "outputs/adapter/roberta-base/rte",
24+
"overwrite_output_dir": true,
25+
"per_device_eval_batch_size": 32,
26+
"per_device_train_batch_size": 32,
27+
"predict_with_generate": true,
28+
"push_to_hub": false,
29+
"save_steps": 100,
30+
"save_strategy": "steps",
31+
"save_total_limit": 1,
32+
"seed": 42,
33+
"task_name": "rte",
34+
"test_dataset_config_name": [
35+
"en"
36+
],
37+
"test_dataset_name": "rte",
38+
"tokenizer_name": "roberta-base",
39+
"unfrozen_modules": [
40+
"deltas",
41+
"layer_norm",
42+
"final_layer_norm",
43+
"classifier"
44+
],
45+
"warmup_steps": 0
46+
}
Lines changed: 46 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,46 @@
1+
{
2+
"bottleneck_dim": 24,
3+
"dataset_config_name": [
4+
"en"
5+
],
6+
"delta_type": "adapter",
7+
"do_eval": true,
8+
"do_test": true,
9+
"do_train": true,
10+
"eval_dataset_config_name": [
11+
"en"
12+
],
13+
"eval_dataset_name": "sst2",
14+
"eval_steps": 200,
15+
"evaluation_strategy": "steps",
16+
"greater_is_better": true,
17+
"learning_rate": 0.0003,
18+
"load_best_model_at_end": true,
19+
"max_source_length": 128,
20+
"metric_for_best_model": "eval_accuracy",
21+
"model_name_or_path": "roberta-base",
22+
"num_train_epochs": 3,
23+
"output_dir": "outputs/adapter/roberta-base/sst2",
24+
"overwrite_output_dir": true,
25+
"per_device_eval_batch_size": 32,
26+
"per_device_train_batch_size": 32,
27+
"predict_with_generate": true,
28+
"push_to_hub": true,
29+
"save_steps": 200,
30+
"save_strategy": "steps",
31+
"save_total_limit": 1,
32+
"seed": 42,
33+
"task_name": "sst2",
34+
"test_dataset_config_name": [
35+
"en"
36+
],
37+
"test_dataset_name": "sst2",
38+
"tokenizer_name": "roberta-base",
39+
"unfrozen_modules": [
40+
"deltas",
41+
"layer_norm",
42+
"final_layer_norm",
43+
"classifier"
44+
],
45+
"warmup_steps": 0
46+
}

0 commit comments

Comments
 (0)