@@ -1287,20 +1287,17 @@ class SuperGLUEDataProcessor(DataProcessor):
1287
1287
1288
1288
def get_train_examples (self , data_dir ):
1289
1289
"""See base class."""
1290
- return self ._create_examples (
1291
- self ._read_jsonl (os .path .join (data_dir , "train.jsonl" )), "train" )
1290
+ return self ._create_examples_tfds ("train" )
1292
1291
1293
1292
def get_dev_examples (self , data_dir ):
1294
1293
"""See base class."""
1295
- return self ._create_examples (
1296
- self ._read_jsonl (os .path .join (data_dir , "val.jsonl" )), "dev" )
1294
+ return self ._create_examples_tfds ("validation" )
1297
1295
1298
1296
def get_test_examples (self , data_dir ):
1299
1297
"""See base class."""
1300
- return self ._create_examples (
1301
- self ._read_jsonl (os .path .join (data_dir , "test.jsonl" )), "test" )
1298
+ return self ._create_examples_tfds ("test" )
1302
1299
1303
- def _create_examples (self , lines , set_type ):
1300
+ def _create_examples_tfds (self , set_type ):
1304
1301
"""Creates examples for the training/dev/test sets."""
1305
1302
raise NotImplementedError ()
1306
1303
@@ -1317,17 +1314,18 @@ def get_processor_name():
1317
1314
"""See base class."""
1318
1315
return "BoolQ"
1319
1316
1320
- def _create_examples (self , lines , set_type ):
1317
+ def _create_examples_tfds (self , set_type ):
1321
1318
"""Creates examples for the training/dev/test sets."""
1319
+ dataset = tfds .load (
1320
+ "super_glue/boolq" , split = set_type , try_gcs = True ).as_numpy_iterator ()
1322
1321
examples = []
1323
- for line in lines :
1324
- guid = "%s-%s" % (set_type , self .process_text_fn (str (line ["idx" ])))
1325
- text_a = self .process_text_fn (line ["question" ])
1326
- text_b = self .process_text_fn (line ["passage" ])
1327
- if set_type == "test" :
1328
- label = "False"
1329
- else :
1330
- label = str (line ["label" ])
1322
+ for example in dataset :
1323
+ guid = "%s-%s" % (set_type , self .process_text_fn (str (example ["idx" ])))
1324
+ text_a = self .process_text_fn (example ["question" ])
1325
+ text_b = self .process_text_fn (example ["passage" ])
1326
+ label = "False"
1327
+ if set_type != "test" :
1328
+ label = self .get_labels ()[example ["label" ]]
1331
1329
examples .append (
1332
1330
InputExample (guid = guid , text_a = text_a , text_b = text_b , label = label ))
1333
1331
return examples
@@ -1345,17 +1343,18 @@ def get_processor_name():
1345
1343
"""See base class."""
1346
1344
return "CB"
1347
1345
1348
- def _create_examples (self , lines , set_type ):
1346
+ def _create_examples_tfds (self , set_type ):
1349
1347
"""Creates examples for the training/dev/test sets."""
1348
+ dataset = tfds .load (
1349
+ "super_glue/cb" , split = set_type , try_gcs = True ).as_numpy_iterator ()
1350
1350
examples = []
1351
- for line in lines :
1352
- guid = "%s-%s" % (set_type , self .process_text_fn (str (line ["idx" ])))
1353
- text_a = self .process_text_fn (line ["premise" ])
1354
- text_b = self .process_text_fn (line ["hypothesis" ])
1355
- if set_type == "test" :
1356
- label = "entailment"
1357
- else :
1358
- label = self .process_text_fn (line ["label" ])
1351
+ for example in dataset :
1352
+ guid = "%s-%s" % (set_type , self .process_text_fn (str (example ["idx" ])))
1353
+ text_a = self .process_text_fn (example ["premise" ])
1354
+ text_b = self .process_text_fn (example ["hypothesis" ])
1355
+ label = "entailment"
1356
+ if set_type != "test" :
1357
+ label = self .get_labels ()[example ["label" ]]
1359
1358
examples .append (
1360
1359
InputExample (guid = guid , text_a = text_a , text_b = text_b , label = label ))
1361
1360
return examples
@@ -1375,17 +1374,18 @@ def get_processor_name():
1375
1374
"""See base class."""
1376
1375
return "RTESuperGLUE"
1377
1376
1378
- def _create_examples (self , lines , set_type ):
1377
+ def _create_examples_tfds (self , set_type ):
1379
1378
"""Creates examples for the training/dev/test sets."""
1380
1379
examples = []
1381
- for i , line in enumerate (lines ):
1382
- guid = "%s-%s" % (set_type , i )
1383
- text_a = self .process_text_fn (line ["premise" ])
1384
- text_b = self .process_text_fn (line ["hypothesis" ])
1385
- if set_type == "test" :
1386
- label = "entailment"
1387
- else :
1388
- label = self .process_text_fn (line ["label" ])
1380
+ dataset = tfds .load (
1381
+ "super_glue/rte" , split = set_type , try_gcs = True ).as_numpy_iterator ()
1382
+ for example in dataset :
1383
+ guid = "%s-%s" % (set_type , self .process_text_fn (str (example ["idx" ])))
1384
+ text_a = self .process_text_fn (example ["premise" ])
1385
+ text_b = self .process_text_fn (example ["hypothesis" ])
1386
+ label = "entailment"
1387
+ if set_type != "test" :
1388
+ label = self .get_labels ()[example ["label" ]]
1389
1389
examples .append (
1390
1390
InputExample (guid = guid , text_a = text_a , text_b = text_b , label = label ))
1391
1391
return examples
0 commit comments