Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
10 changes: 7 additions & 3 deletions lm_eval/tasks/__init__.py
Original file line number Diff line number Diff line change
@@ -1,9 +1,9 @@
from promptsource.templates import DatasetTemplates
from pprint import pprint
from typing import List, Union

import sacrebleu
import lm_eval.base
import sacrebleu
from promptsource.templates import DatasetTemplates

from . import anli
from . import blimp
Expand All @@ -13,6 +13,7 @@
from . import e2e_nlg_cleaned
from . import gem_asset_turk
from . import gem_mlsum
from . import gem_totto
from . import gem_webnlg
from . import gem_xsum
from . import glue
Expand Down Expand Up @@ -178,7 +179,10 @@
"gem_xsum_challenge_test_bfp_05": gem_xsum.GEMXSUMChallgeTestBFP05,
"gem_xsum_challenge_test_nopunc": gem_xsum.GEMXSUMChallgeTestNopunc,
"gem_xsum_challenge_test_covid": gem_xsum.GEMXSUMChallgeTestCovid,
# LAMA
# GEM/totto
"gem_totto": gem_totto.GEMTOTTO,
"gem_xsum_challenge_sample": gem_totto.GEMTOTTOChallgeSample,
#LAMA
"lama-trex": lama.Trex,
"lama-squad": lama.Squad,
"lama-google_re": lama.google_re,
Expand Down
92 changes: 92 additions & 0 deletions lm_eval/tasks/gem_totto.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,92 @@
"""
ToTTo: A Controlled Table-To-Text Generation Dataset
https://aclanthology.org/2020.emnlp-main.89/

This is the ToTTo subset of the GEM benchmark.
ToTTo is an open-domain English table-to-text dataset with over 120,000 training examples that proposes a controlled generation task: given a Wikipedia table and a set of highlighted table cells, produce a one-sentence description. To obtain generated targets that are natural but also faithful to the source table, the authors introduce a dataset construction process where annotators directly revise existing candidate sentences from Wikipedia. The authors present systematic analyses of the dataset and annotation process as well as results achieved by several state-of-the-art baselines. While usually fluent, existing methods often hallucinate phrases that are not supported by the table, suggesting that this dataset can serve as a useful research benchmark for high-precision conditional text generation.
Homepage: https://github.com/google-research-datasets/totto
"""
from lm_eval.base import PromptSourceTask

_CITATION = """
@inproceedings{parikh-etal-2020-totto,
title = "{ToTTo}: A Controlled Table-To-Text Generation Dataset",
author = "Parikh, Ankur and
Wang, Xuezhi and
Gehrmann, Sebastian and
Faruqui, Manaal and
Dhingra, Bhuwan and
Yang, Diyi and
Das, Dipanjan",
booktitle = "Proceedings of the 2020 Conference on Empirical Methods in Natural Language Processing (EMNLP)",
month = nov,
year = "2020",
address = "Online",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2020.emnlp-main.89",
doi = "10.18653/v1/2020.emnlp-main.89",
pages = "1173--1186",
abstract = "We present ToTTo, an open-domain English table-to-text dataset with over 120,000 training examples that proposes a controlled generation task: given a Wikipedia table and a set of highlighted table cells, produce a one-sentence description. To obtain generated targets that are natural but also faithful to the source table, we introduce a dataset construction process where annotators directly revise existing candidate sentences from Wikipedia. We present systematic analyses of our dataset and annotation process as well as results achieved by several state-of-the-art baselines. While usually fluent, existing methods often hallucinate phrases that are not supported by the table, suggesting that this dataset can serve as a useful research benchmark for high-precision conditional text generation.",
}
"""


class GEMTOTTOBase(PromptSourceTask):
VERSION = 0
DATASET_PATH = "GEM/totto"
DATASET_NAME = None
SPLIT = None

def has_training_docs(self):
return True

def has_validation_docs(self):
return True

def has_test_docs(self):
# NOTE: The test data do not have targets so ignore them.
return False

def training_docs(self):
if self.has_training_docs():
if self._training_docs is None:
self._training_docs = list(self.dataset["train"])
return self._training_docs

def validation_docs(self):
if self.has_validation_docs():
return self.dataset["validation"]

def invalid_doc_for_prompt(self, doc) -> bool:
"""The QA prompts are not applicable to all the examples, we want to filter these out."""
# TODO: This is a hack, we should have a better way to filter out invalid examples.
return self.prompt.name in ['guess the table section text', 'guess the table section title']

def max_generation_length(self):
return 64


class GEMTOTTO(GEMTOTTOBase):
"""this is for train/validation/test"""

SPLIT = ""


class GEMTOTTOChallgeSample(GEMTOTTOBase):
"""this is for challenge_train_sample/challenge_validation_sample"""

SPLIT = "challenge_sample"

def has_test_docs(self):
return False

def training_docs(self):
if self.has_training_docs():

if self._training_docs is None:
self._training_docs = list(self.dataset["challenge_train_sample"])
return self._training_docs

def validation_docs(self):
if self.has_validation_docs():
return self.dataset["challenge_validation_sample"]