|
5 | 5 | ], |
6 | 6 | "pre_weights": [ |
7 | 7 | { |
8 | | - "name": "bert.embeddings.position_embeddings.weight" |
| 8 | + "name": "embeddings.position_embeddings.weight", |
| 9 | + "aliases": [ |
| 10 | + "bert.embeddings.position_embeddings.weight" |
| 11 | + ] |
9 | 12 | }, |
10 | 13 | { |
11 | | - "name": "bert.embeddings.token_type_embeddings.weight" |
| 14 | + "name": "embeddings.token_type_embeddings.weight", |
| 15 | + "aliases": [ |
| 16 | + "bert.embeddings.token_type_embeddings.weight" |
| 17 | + ] |
12 | 18 | }, |
13 | 19 | { |
14 | | - "name": "bert.embeddings.word_embeddings.weight", |
15 | | - "is_embed": true |
| 20 | + "name": "embeddings.word_embeddings.weight", |
| 21 | + "is_embed": true, |
| 22 | + "aliases": [ |
| 23 | + "bert.embeddings.word_embeddings.weight" |
| 24 | + ] |
16 | 25 | }, |
17 | 26 | { |
18 | | - "name": "bert.embeddings.LayerNorm.bias", |
| 27 | + "name": "embeddings.LayerNorm.bias", |
19 | 28 | "aliases": [ |
| 29 | + "embeddings.LayerNorm.beta", |
| 30 | + "bert.embeddings.LayerNorm.bias", |
20 | 31 | "bert.embeddings.LayerNorm.beta" |
21 | 32 | ] |
22 | 33 | }, |
23 | 34 | { |
24 | | - "name": "bert.embeddings.LayerNorm.weight", |
| 35 | + "name": "embeddings.LayerNorm.weight", |
25 | 36 | "aliases": [ |
26 | | - "bert.embeddings.LayerNorm.gamma" |
| 37 | + "embeddings.LayerNorm.gamma", |
| 38 | + "bert.embeddings.LayerNorm.weight", |
| 39 | + "bert.embeddings.LayerNorm.gamma", |
| 40 | + "bert.embeddings.LayerNorm.weight" |
27 | 41 | ] |
28 | 42 | }, |
29 | 43 | { |
30 | | - "name": "bert.embeddings.position_ids", |
| 44 | + "name": "embeddings.position_ids", |
31 | 45 | "optional": true, |
32 | | - "force_dtype": "int64" |
| 46 | + "force_dtype": "int64", |
| 47 | + "aliases": [ |
| 48 | + "bert.embeddings.position_ids" |
| 49 | + ] |
33 | 50 | } |
34 | 51 | ], |
35 | 52 | "post_weights": [ |
36 | 53 | { |
37 | | - "name": "pooler.dense.weight" |
| 54 | + "name": "pooler.dense.weight", |
| 55 | + "aliases": [ |
| 56 | + "bert.pooler.dense.weight" |
| 57 | + ] |
38 | 58 | }, |
39 | 59 | { |
40 | | - "name": "pooler.dense.bias" |
| 60 | + "name": "pooler.dense.bias", |
| 61 | + "aliases": [ |
| 62 | + "bert.pooler.dense.bias" |
| 63 | + ] |
41 | 64 | } |
42 | 65 | ], |
43 | 66 | "num_layers_config_key": "num_hidden_layers", |
44 | 67 | "layer_templates": { |
45 | 68 | "weights": [ |
46 | 69 | { |
47 | | - "name": "bert.encoder.layer.${layer_index}.attention.self.query.weight" |
| 70 | + "name": "encoder.layer.${layer_index}.attention.self.query.weight", |
| 71 | + "aliases": [ |
| 72 | + "bert.encoder.layer.${layer_index}.attention.self.query.weight" |
| 73 | + ] |
48 | 74 | }, |
49 | 75 | { |
50 | | - "name": "bert.encoder.layer.${layer_index}.attention.self.query.bias" |
| 76 | + "name": "encoder.layer.${layer_index}.attention.self.query.bias", |
| 77 | + "aliases": [ |
| 78 | + "bert.encoder.layer.${layer_index}.attention.self.query.bias" |
| 79 | + ] |
51 | 80 | }, |
52 | 81 | { |
53 | | - "name": "bert.encoder.layer.${layer_index}.attention.self.key.weight" |
| 82 | + "name": "encoder.layer.${layer_index}.attention.self.key.weight", |
| 83 | + "aliases": [ |
| 84 | + "bert.encoder.layer.${layer_index}.attention.self.key.weight" |
| 85 | + ] |
54 | 86 | }, |
55 | 87 | { |
56 | | - "name": "bert.encoder.layer.${layer_index}.attention.self.key.bias" |
| 88 | + "name": "encoder.layer.${layer_index}.attention.self.key.bias", |
| 89 | + "aliases": [ |
| 90 | + "bert.encoder.layer.${layer_index}.attention.self.key.bias" |
| 91 | + ] |
57 | 92 | }, |
58 | 93 | { |
59 | | - "name": "bert.encoder.layer.${layer_index}.attention.self.value.weight" |
| 94 | + "name": "encoder.layer.${layer_index}.attention.self.value.weight", |
| 95 | + "aliases": [ |
| 96 | + "bert.encoder.layer.${layer_index}.attention.self.value.weight" |
| 97 | + ] |
60 | 98 | }, |
61 | 99 | { |
62 | | - "name": "bert.encoder.layer.${layer_index}.attention.self.value.bias" |
| 100 | + "name": "encoder.layer.${layer_index}.attention.self.value.bias", |
| 101 | + "aliases": [ |
| 102 | + "bert.encoder.layer.${layer_index}.attention.self.value.bias" |
| 103 | + ] |
63 | 104 | }, |
64 | 105 | { |
65 | | - "name": "bert.encoder.layer.${layer_index}.attention.output.dense.weight" |
| 106 | + "name": "encoder.layer.${layer_index}.attention.output.dense.weight", |
| 107 | + "aliases": [ |
| 108 | + "bert.encoder.layer.${layer_index}.attention.output.dense.weight" |
| 109 | + ] |
66 | 110 | }, |
67 | 111 | { |
68 | | - "name": "bert.encoder.layer.${layer_index}.attention.output.dense.bias" |
| 112 | + "name": "encoder.layer.${layer_index}.attention.output.dense.bias", |
| 113 | + "aliases": [ |
| 114 | + "bert.encoder.layer.${layer_index}.attention.output.dense.bias" |
| 115 | + ] |
69 | 116 | }, |
70 | 117 | { |
71 | | - "name": "bert.encoder.layer.${layer_index}.attention.output.LayerNorm.bias", |
| 118 | + "name": "encoder.layer.${layer_index}.attention.output.LayerNorm.bias", |
72 | 119 | "aliases": [ |
| 120 | + "encoder.layer.${layer_index}.attention.output.LayerNorm.beta", |
| 121 | + "bert.encoder.layer.${layer_index}.attention.output.LayerNorm.bias", |
73 | 122 | "bert.encoder.layer.${layer_index}.attention.output.LayerNorm.beta" |
74 | 123 | ] |
75 | 124 | }, |
76 | 125 | { |
77 | | - "name": "bert.encoder.layer.${layer_index}.attention.output.LayerNorm.weight", |
| 126 | + "name": "encoder.layer.${layer_index}.attention.output.LayerNorm.weight", |
78 | 127 | "aliases": [ |
| 128 | + "encoder.layer.${layer_index}.attention.output.LayerNorm.gamma", |
| 129 | + "bert.encoder.layer.${layer_index}.attention.output.LayerNorm.weight", |
79 | 130 | "bert.encoder.layer.${layer_index}.attention.output.LayerNorm.gamma" |
80 | 131 | ] |
81 | 132 | }, |
82 | 133 | { |
83 | | - "name": "bert.encoder.layer.${layer_index}.intermediate.dense.weight" |
| 134 | + "name": "encoder.layer.${layer_index}.intermediate.dense.weight", |
| 135 | + "aliases": [ |
| 136 | + "bert.encoder.layer.${layer_index}.intermediate.dense.weight" |
| 137 | + ] |
84 | 138 | }, |
85 | 139 | { |
86 | | - "name": "bert.encoder.layer.${layer_index}.intermediate.dense.bias" |
| 140 | + "name": "encoder.layer.${layer_index}.intermediate.dense.bias", |
| 141 | + "aliases": [ |
| 142 | + "bert.encoder.layer.${layer_index}.intermediate.dense.bias" |
| 143 | + ] |
87 | 144 | }, |
88 | 145 | { |
89 | | - "name": "bert.encoder.layer.${layer_index}.output.dense.weight" |
| 146 | + "name": "encoder.layer.${layer_index}.output.dense.weight", |
| 147 | + "aliases": [ |
| 148 | + "bert.encoder.layer.${layer_index}.output.dense.weight" |
| 149 | + ] |
90 | 150 | }, |
91 | 151 | { |
92 | | - "name": "bert.encoder.layer.${layer_index}.output.dense.bias" |
| 152 | + "name": "encoder.layer.${layer_index}.output.dense.bias", |
| 153 | + "aliases": [ |
| 154 | + "bert.encoder.layer.${layer_index}.output.dense.bias" |
| 155 | + ] |
93 | 156 | }, |
94 | 157 | { |
95 | | - "name": "bert.encoder.layer.${layer_index}.output.LayerNorm.bias", |
| 158 | + "name": "encoder.layer.${layer_index}.output.LayerNorm.bias", |
96 | 159 | "aliases": [ |
| 160 | + "encoder.layer.${layer_index}.output.LayerNorm.beta", |
| 161 | + "bert.encoder.layer.${layer_index}.output.LayerNorm.bias", |
97 | 162 | "bert.encoder.layer.${layer_index}.output.LayerNorm.beta" |
98 | 163 | ] |
99 | 164 | }, |
100 | 165 | { |
101 | | - "name": "bert.encoder.layer.${layer_index}.output.LayerNorm.weight", |
| 166 | + "name": "encoder.layer.${layer_index}.output.LayerNorm.weight", |
102 | 167 | "aliases": [ |
| 168 | + "encoder.layer.${layer_index}.output.LayerNorm.gamma", |
| 169 | + "bert.encoder.layer.${layer_index}.output.LayerNorm.weight", |
103 | 170 | "bert.encoder.layer.${layer_index}.output.LayerNorm.gamma" |
104 | 171 | ] |
105 | 172 | } |
|
0 commit comments