@@ -174,7 +174,11 @@ def evidence_mapper(evidence: list[tuple]):
174174 return list (evidences )
175175
176176
177- df ["unique_evidence" ] = df ["evidence" ].progress_apply (evidence_mapper )
177+ df ["unique_evidence" ] = df [
178+ "evidence"
179+ ].progress_apply ( # pyright: ignore[reportAttributeAccessIssue]
180+ evidence_mapper
181+ )
178182
179183
180184def evidence_mapper_sentence (evidences : list [tuple [str , int ]]):
@@ -202,30 +206,44 @@ def evidence_mapper_sentence(evidences: list[tuple[str, int]]):
202206 return list (lines )
203207
204208
205- df ["evidence_sentences" ] = df ["unique_evidence" ].progress_apply (
209+ df ["evidence_sentences" ] = df [
210+ "unique_evidence"
211+ ].progress_apply ( # pyright: ignore[reportAttributeAccessIssue]
206212 evidence_mapper_sentence
207213)
208214
209215bigbench = pd .DataFrame .from_records (bigbench_fever ["examples" ]).set_index ("id" )
210216
211217tqdm .pandas (desc = "Mapping claims to (in) bigbench" )
212- df ["claim_in_bigbench" ] = df ["claim" ].progress_apply (
218+ df ["claim_in_bigbench" ] = df [
219+ "claim"
220+ ].progress_apply ( # pyright: ignore[reportAttributeAccessIssue]
213221 lambda x : bigbench .input .str .contains (x ).any ()
214222)
215223tqdm .pandas ()
216224
217- df ["evidence_sentence_count" ] = df ["evidence_sentences" ].map (len )
225+ df ["evidence_sentence_count" ] = df [
226+ "evidence_sentences"
227+ ].map ( # pyright: ignore[reportAttributeAccessIssue]
228+ len
229+ )
218230print ("Mapped bigbench" )
219231
220232train_df = df [(~ df .index .isin (bigbench .index )) & (df ["evidence_sentence_count" ] > 0 )]
221233
222234test_df = df [
223235 (df .index .isin (bigbench .index )) & (df ["evidence_sentence_count" ] > 0 )
224- ].drop (columns = ["verifiable" , "claim_in_bigbench" , "evidence" ])
225- test_df ["unique_evidence" ] = test_df ["unique_evidence" ].map (
236+ ].drop (
237+ columns = ["verifiable" , "claim_in_bigbench" , "evidence" ]
238+ ) # pyright: ignore[reportAttributeAccessIssue]
239+ test_df ["unique_evidence" ] = test_df [
240+ "unique_evidence"
241+ ].map ( # pyright: ignore[reportAttributeAccessIssue]
226242 lambda x : [[str (title ), str (sent_id )] for title , sent_id in x ]
227243)
228- test_df ["evidence_sentences" ] = test_df ["evidence_sentences" ].map (
244+ test_df ["evidence_sentences" ] = test_df [
245+ "evidence_sentences"
246+ ].map ( # pyright: ignore[reportAttributeAccessIssue]
229247 lambda x : [[str (title ), str (sent_id ), str (sent )] for title , sent_id , sent in x ]
230248)
231249test_df ["label" ] = test_df ["label" ] == "SUPPORTS"
@@ -238,11 +256,17 @@ def evidence_mapper_sentence(evidences: list[tuple[str, int]]):
238256
239257train_df = df [
240258 (~ df .index .isin (bigbench .index )) & (df ["evidence_sentence_count" ] > 0 )
241- ].drop (columns = ["verifiable" , "claim_in_bigbench" , "evidence" ])
242- train_df ["unique_evidence" ] = train_df ["unique_evidence" ].map (
259+ ].drop ( # pyright: ignore[reportAttributeAccessIssue]
260+ columns = ["verifiable" , "claim_in_bigbench" , "evidence" ]
261+ )
262+ train_df ["unique_evidence" ] = train_df [
263+ "unique_evidence"
264+ ].map ( # pyright: ignore[reportAttributeAccessIssue]
243265 lambda x : [[str (title ), str (sent_id )] for title , sent_id in x ]
244266)
245- train_df ["evidence_sentences" ] = train_df ["evidence_sentences" ].map (
267+ train_df ["evidence_sentences" ] = train_df [
268+ "evidence_sentences"
269+ ].map ( # pyright: ignore[reportAttributeAccessIssue]
246270 lambda x : [[str (title ), str (sent_id ), str (sent )] for title , sent_id , sent in x ]
247271)
248272train_df ["label" ] = train_df ["label" ] == "SUPPORTS"
0 commit comments