diff --git a/lm_eval/tasks/__init__.py b/lm_eval/tasks/__init__.py
index b3cf9141cb1..c85fbcf164b 100644
--- a/lm_eval/tasks/__init__.py
+++ b/lm_eval/tasks/__init__.py
@@ -1,9 +1,9 @@
-from promptsource.templates import DatasetTemplates
 from pprint import pprint
 from typing import List, Union
 
-import sacrebleu
 import lm_eval.base
+import sacrebleu
+from promptsource.templates import DatasetTemplates
 
 from . import anli
 from . import blimp
@@ -13,6 +13,7 @@
 from . import e2e_nlg_cleaned
 from . import gem_asset_turk
 from . import gem_mlsum
+from . import gem_totto
 from . import gem_webnlg
 from . import gem_xsum
 from . import glue
@@ -178,7 +179,10 @@
     "gem_xsum_challenge_test_bfp_05": gem_xsum.GEMXSUMChallgeTestBFP05,
     "gem_xsum_challenge_test_nopunc": gem_xsum.GEMXSUMChallgeTestNopunc,
     "gem_xsum_challenge_test_covid": gem_xsum.GEMXSUMChallgeTestCovid,
-    # LAMA
+    # GEM/totto
+    "gem_totto": gem_totto.GEMTOTTO,
+    "gem_xsum_challenge_sample": gem_totto.GEMTOTTOChallgeSample,
+    #LAMA
     "lama-trex": lama.Trex,
     "lama-squad": lama.Squad,
     "lama-google_re": lama.google_re,
diff --git a/lm_eval/tasks/gem_totto.py b/lm_eval/tasks/gem_totto.py
new file mode 100644
index 00000000000..7daa7f547dd
--- /dev/null
+++ b/lm_eval/tasks/gem_totto.py
@@ -0,0 +1,92 @@
+"""
+ToTTo: A Controlled Table-To-Text Generation Dataset
+https://aclanthology.org/2020.emnlp-main.89/
+
+This is the ToTTo subset of the GEM benchmark. 
+ToTTo is an open-domain English table-to-text dataset with over 120,000 training examples that proposes a controlled generation task: given a Wikipedia table and a set of highlighted table cells, produce a one-sentence description. To obtain generated targets that are natural but also faithful to the source table, the authors introduce a dataset construction process where annotators directly revise existing candidate sentences from Wikipedia. The authors present systematic analyses of the dataset and annotation process as well as results achieved by several state-of-the-art baselines. While usually fluent, existing methods often hallucinate phrases that are not supported by the table, suggesting that this dataset can serve as a useful research benchmark for high-precision conditional text generation.
+Homepage: https://github.com/google-research-datasets/totto
+"""
+from lm_eval.base import PromptSourceTask
+
+_CITATION = """
+@inproceedings{parikh-etal-2020-totto,
+    title = "{ToTTo}: A Controlled Table-To-Text Generation Dataset",
+    author = "Parikh, Ankur  and
+      Wang, Xuezhi  and
+      Gehrmann, Sebastian  and
+      Faruqui, Manaal  and
+      Dhingra, Bhuwan  and
+      Yang, Diyi  and
+      Das, Dipanjan",
+    booktitle = "Proceedings of the 2020 Conference on Empirical Methods in Natural Language Processing (EMNLP)",
+    month = nov,
+    year = "2020",
+    address = "Online",
+    publisher = "Association for Computational Linguistics",
+    url = "https://aclanthology.org/2020.emnlp-main.89",
+    doi = "10.18653/v1/2020.emnlp-main.89",
+    pages = "1173--1186",
+    abstract = "We present ToTTo, an open-domain English table-to-text dataset with over 120,000 training examples that proposes a controlled generation task: given a Wikipedia table and a set of highlighted table cells, produce a one-sentence description. To obtain generated targets that are natural but also faithful to the source table, we introduce a dataset construction process where annotators directly revise existing candidate sentences from Wikipedia. We present systematic analyses of our dataset and annotation process as well as results achieved by several state-of-the-art baselines. While usually fluent, existing methods often hallucinate phrases that are not supported by the table, suggesting that this dataset can serve as a useful research benchmark for high-precision conditional text generation.",
+}
+"""
+
+
+class GEMTOTTOBase(PromptSourceTask):
+    VERSION = 0
+    DATASET_PATH = "GEM/totto"
+    DATASET_NAME = None
+    SPLIT = None
+
+    def has_training_docs(self):
+        return True
+
+    def has_validation_docs(self):
+        return True
+
+    def has_test_docs(self):
+        # NOTE: The test data do not have targets so ignore them.
+        return False
+
+    def training_docs(self):
+        if self.has_training_docs():
+            if self._training_docs is None:
+                self._training_docs = list(self.dataset["train"])
+            return self._training_docs
+
+    def validation_docs(self):
+        if self.has_validation_docs():
+            return self.dataset["validation"]
+
+    def invalid_doc_for_prompt(self, doc) -> bool:
+        """The QA prompts are not applicable to all the examples, we want to filter these out."""
+        # TODO: This is a hack, we should have a better way to filter out invalid examples.
+        return self.prompt.name in ['guess the table section text', 'guess the table section title']
+
+    def max_generation_length(self):
+        return 64
+
+
+class GEMTOTTO(GEMTOTTOBase):
+    """this is for train/validation/test"""
+
+    SPLIT = ""
+
+
+class GEMTOTTOChallgeSample(GEMTOTTOBase):
+    """this is for challenge_train_sample/challenge_validation_sample"""
+
+    SPLIT = "challenge_sample"
+
+    def has_test_docs(self):
+        return False
+
+    def training_docs(self):
+        if self.has_training_docs():
+
+            if self._training_docs is None:
+                self._training_docs = list(self.dataset["challenge_train_sample"])
+            return self._training_docs
+
+    def validation_docs(self):
+        if self.has_validation_docs():
+            return self.dataset["challenge_validation_sample"]