@@ -19,9 +19,9 @@ def vectorize(ex, model, single_answer=False):
1919
2020 # Index words
2121 document = torch .LongTensor ([word_dict [w ] for w in ex ['document' ]])
22- document_char = torch .LongTensor ([char_dict [c ] for c in ex ['document_char' ]])
22+ document_char = [ torch .LongTensor ([char_dict [c ] for c in cs ]) for cs in ex ['document_char' ]]
2323 question = torch .LongTensor ([word_dict [w ] for w in ex ['question' ]])
24- question_char = torch .LongTensor ([char_dict [c ] for c in ex ['question_char' ]])
24+ question_char = [ torch .LongTensor ([char_dict [c ] for c in cs ]) for cs in ex ['question_char' ]]
2525
2626 # Create extra features vector
2727 if len (feature_dict ) > 0 :
@@ -120,8 +120,10 @@ def batchify(batch):
120120
121121 # Batch documents and features
122122 max_length = max ([d .size (0 ) for d in docs ])
123+ # max_char_length = max([c.size(0) for cs in doc_chars for c in cs])
124+ max_char_length = 13
123125 x1 = torch .LongTensor (len (docs ), max_length ).zero_ ()
124- x1_c = torch .LongTensor (len (docs ), max_length ).zero_ ()
126+ x1_c = torch .LongTensor (len (docs ), max_length , max_char_length ).zero_ ()
125127 x1_mask = torch .ByteTensor (len (docs ), max_length ).fill_ (1 )
126128 if c_features [0 ] is None :
127129 x1_f = None
@@ -132,13 +134,15 @@ def batchify(batch):
132134 x1_mask [i , :d .size (0 )].fill_ (0 )
133135 if x1_f is not None :
134136 x1_f [i , :d .size (0 )].copy_ (c_features [i ])
135- for i , c in enumerate (doc_chars ):
136- x1_c [i , :c .size (0 )].copy_ (c )
137+ for i , cs in enumerate (doc_chars ):
138+ for j , c in enumerate (cs ):
139+ c_ = c [:max_char_length ]
140+ x1_c [i , j , :c_ .size (0 )].copy_ (c_ )
137141
138142 # Batch questions
139143 max_length = max ([q .size (0 ) for q in questions ])
140144 x2 = torch .LongTensor (len (questions ), max_length ).zero_ ()
141- x2_c = torch .LongTensor (len (questions ), max_length ).zero_ ()
145+ x2_c = torch .LongTensor (len (questions ), max_length , max_char_length ).zero_ ()
142146 x2_mask = torch .ByteTensor (len (questions ), max_length ).fill_ (1 )
143147 if q_features [0 ] is None :
144148 x2_f = None
@@ -149,8 +153,10 @@ def batchify(batch):
149153 x2_mask [i , :d .size (0 )].fill_ (0 )
150154 if x2_f is not None :
151155 x2_f [i , :d .size (0 )].copy_ (q_features [i ])
152- for i , c in enumerate (question_chars ):
153- x2_c [i , :c .size (0 )].copy_ (c )
156+ for i , cs in enumerate (question_chars ):
157+ for j , c in enumerate (cs ):
158+ c_ = c [:max_char_length ]
159+ x2_c [i , j , :c_ .size (0 )].copy_ (c_ )
154160
155161 # Maybe return without targets
156162 if len (batch [0 ]) == NUM_INPUTS + NUM_EXTRA :
0 commit comments