@@ -123,8 +123,8 @@ def __init__(self, args, vocab, emb_matrix=None, foundation_cache=None, bert_mod
123123 self .nonlinearity ,
124124 self .drop ,
125125 nn .Linear (self .word_hidden_dim , self .word_hidden_dim ))
126- self .transition_merge_hidden_dim = self .args ['transition_merge_hidden_dim ' ]
127- self .merge_hidden_dim = self .transition_hidden_dim + self .args ['hidden_dim' ] + self .transition_merge_hidden_dim
126+ self .merge_words_output_dim = self .args ['transition_merge_words_output_dim ' ]
127+ self .merge_hidden_dim = self .transition_hidden_dim + self .args ['hidden_dim' ] + self .merge_words_output_dim
128128 # Splitting this into a left and right version is close,
129129 # but seems to be somewhat more accurate than one layer
130130 # 5 model dev avg LAS baseline merge-two-sides
@@ -181,8 +181,8 @@ def __init__(self, args, vocab, emb_matrix=None, foundation_cache=None, bert_mod
181181 # to select which part of the wider output to use.
182182 # The first experiment with this wound up also being slower
183183 # and less effective.
184- self .merge_words_right = nn .Linear (self .args ['hidden_dim' ] * 4 , self .transition_merge_hidden_dim )
185- self .merge_words_left = nn .Linear (self .args ['hidden_dim' ] * 4 , self .transition_merge_hidden_dim )
184+ self .merge_words_right = nn .Linear (self .args ['hidden_dim' ] * 4 , self .merge_words_output_dim )
185+ self .merge_words_left = nn .Linear (self .args ['hidden_dim' ] * 4 , self .merge_words_output_dim )
186186
187187 # TODO: again, left/right or include a relation embedding
188188 if self .args ['transition_subtree_combination' ] in (SubtreeCombination .LINEAR , SubtreeCombination .HEAD_LINEAR ):
0 commit comments