-
Notifications
You must be signed in to change notification settings - Fork 529
Add adapter #1545
base: master
Are you sure you want to change the base?
Add adapter #1545
Changes from 4 commits
96b514a
b1a2bed
1e51262
65c3047
867a41c
17ab2d7
75095bd
1189a51
cf6c058
89ea09d
3f09b79
b9d4510
8928a77
ada971e
c71ba1d
35e2c19
43327de
25d1a31
3730efa
a099ba0
4783597
11660e5
a093709
e25bcb3
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change | ||||||
|---|---|---|---|---|---|---|---|---|
|
|
@@ -5,7 +5,9 @@ | |||||||
| import json | ||||||||
| import random | ||||||||
| import pandas as pd | ||||||||
| import mxnet.numpy_extension as _mx_npx | ||||||||
| import os | ||||||||
| import json | ||||||||
| import logging | ||||||||
| import time | ||||||||
| import argparse | ||||||||
|
|
@@ -92,13 +94,27 @@ def parse_args(): | |||||||
| help='the path to training dataset') | ||||||||
| parser.add_argument('--warmup_ratio', type=float, default=0.1, | ||||||||
| help='Ratio of warmup steps in the learning rate scheduler.') | ||||||||
| parser.add_argument('--method', type=str, default='full', choices=['full', 'bias', 'subbias', 'adapter'], | ||||||||
| help='different finetune method') | ||||||||
|
|
||||||||
|
|
||||||||
| args = parser.parse_args() | ||||||||
| return args | ||||||||
|
|
||||||||
|
|
||||||||
| def change_adapter_cfg(cfg, task): | ||||||||
| adapter_config = {'adapter_fusion':False, | ||||||||
| 'task_names':[task.task_name], | ||||||||
| task.task_name:{'type':'Basic','unit':64}} | ||||||||
| cfg.defrost() | ||||||||
| cfg.MODEL.use_adapter = True | ||||||||
| cfg.MODEL.adapter_config = json.dumps(adapter_config) | ||||||||
| cfg.freeze() | ||||||||
| return cfg | ||||||||
|
|
||||||||
| def get_network(model_name, | ||||||||
| ctx_l, | ||||||||
| method='full', | ||||||||
| checkpoint_path=None, | ||||||||
| backbone_path=None, | ||||||||
| task=None): | ||||||||
|
|
@@ -109,13 +125,16 @@ def get_network(model_name, | |||||||
| use_segmentation = 'roberta' not in model_name and 'xlmr' not in model_name | ||||||||
| Model, cfg, tokenizer, download_params_path, _ = \ | ||||||||
| get_backbone(model_name, load_backbone=not backbone_path) | ||||||||
|
|
||||||||
| if method == 'adapter': | ||||||||
| cfg = change_adapter_cfg(cfg, task) | ||||||||
| backbone = Model.from_cfg(cfg) | ||||||||
| # Load local backbone parameters if backbone_path provided. | ||||||||
| # Otherwise, download backbone parameters from gluon zoo. | ||||||||
|
|
||||||||
| backbone_params_path = backbone_path if backbone_path else download_params_path | ||||||||
| if checkpoint_path is None: | ||||||||
| backbone.load_parameters(backbone_params_path, ignore_extra=True, | ||||||||
| backbone.load_parameters(backbone_params_path, ignore_extra=True, allow_missing=True, | ||||||||
|
||||||||
| backbone.load_parameters(backbone_params_path, ignore_extra=True, allow_missing=True, | |
| backbone.load_parameters(backbone_params_path, ignore_extra=True, allow_missing=(method == 'adapter'), | |
| Original file line number | Diff line number | Diff line change | ||||||
|---|---|---|---|---|---|---|---|---|
|
|
@@ -17,7 +17,7 @@ | |||||||
| """Layers.""" | ||||||||
| __all__ = ['PositionalEmbedding', 'SinusoidalPositionalEmbedding', | ||||||||
| 'LearnedPositionalEmbedding', 'BucketPositionalEmbedding', 'AdaptiveEmbedding', | ||||||||
| 'PositionwiseFFN', 'ProjectedAdaptiveLogSoftmaxWithLoss'] | ||||||||
| 'PositionwiseFFN', 'ProjectedAdaptiveLogSoftmaxWithLoss', 'AdapterModule'] | ||||||||
|
|
||||||||
| import math | ||||||||
| from collections import OrderedDict | ||||||||
|
|
@@ -28,6 +28,8 @@ | |||||||
| import numpy as _np | ||||||||
| from typing import Union, Optional, List, Dict | ||||||||
| from .op import relative_position_bucket | ||||||||
| #from .attention_cell import MultiHeadAttentionCell | ||||||||
|
||||||||
| from .layers import SinusoidalPositionalEmbedding,\ | |
| BucketPositionalEmbedding,\ | |
| LearnedPositionalEmbedding |
To solve this, two options are to either move SinusoidalPositionalEmbedding,
BucketPositionalEmbedding,
LearnedPositionalEmbedding out of the layers.py into a new file and change the import in attention_cell. Or you can move AdapterModule into a new file. You can also come up with other solutions
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Would you like to edit the README file to include results for (at least some of) the different choices (and references to the papers)?