Skip to content

Commit 46d4318

Browse files
committed
fix: prepare for the next experimaestro version
1 parent 3d75133 commit 46d4318

File tree

9 files changed

+25
-24
lines changed

9 files changed

+25
-24
lines changed

src/datamaestro_text/config/com/github/aagohary/canard.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -37,7 +37,7 @@ def main(train, dev, test):
3737
Each dataset is an instance of :class:`datamaestro_text.data.conversation.CanardDataset`
3838
"""
3939
return {
40-
"train": CanardDataset(path=train),
41-
"validation": CanardDataset(path=dev),
42-
"test": CanardDataset(path=test),
40+
"train": CanardDataset.C(path=train),
41+
"validation": CanardDataset.C(path=dev),
42+
"test": CanardDataset.C(path=test),
4343
}

src/datamaestro_text/config/com/github/apple/ml-qrecc.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -51,6 +51,7 @@ def main(data: Path) -> Supervised[QReCCDataset, None, QReCCDataset]:
5151
)
5252
class Content(LZ4JSONLDocumentStore):
5353
"""QReCC mentionned URLs content"""
54+
5455
@staticmethod
5556
def __create_dataset__(dataset, options=None):
5657
ds = reference(reference=main).setup(dataset, options)
@@ -65,7 +66,7 @@ def __create_dataset__(dataset, options=None):
6566
"id",
6667
).setup(dataset, options)
6768

68-
return Content(jsonl_path=store_path)
69+
return Content.C(jsonl_path=store_path)
6970

7071
@staticmethod
7172
def _documents(path: Path):

src/datamaestro_text/config/com/microsoft/msmarco/passage.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -47,7 +47,7 @@
4747
@dataset(url="https://github.com/microsoft/MSMARCO-Passage-Ranking")
4848
def collection_etc(data) -> Folder:
4949
"""Documents and some more files"""
50-
return Folder(path=data)
50+
return Folder.C(path=data)
5151

5252

5353
@lua

src/datamaestro_text/config/com/sentiment140.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -27,6 +27,6 @@ def english(dir):
2727
If you use this data, please cite Sentiment140 as your source.
2828
"""
2929
return Supervised.C(
30-
train=Generic(path=dir / "training.1600000.processed.noemoticon.csv"),
31-
test=Generic(path=dir / "testdata.manual.2009.06.14.csv"),
30+
train=Generic.C(path=dir / "training.1600000.processed.noemoticon.csv"),
31+
test=Generic.C(path=dir / "testdata.manual.2009.06.14.csv"),
3232
)

src/datamaestro_text/config/edu/stanford/aclimdb.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,6 @@ def aclimdb(data):
1111
Paper http://ai.stanford.edu/~amaas/papers/wvSent_acl2011.pdf
1212
"""
1313
return {
14-
"train": FolderBased(path=data / "train", classes=["neg", "pos"]),
15-
"test": FolderBased(path=data / "test", classes=["neg", "pos"]),
14+
"train": FolderBased.C(path=data / "train", classes=["neg", "pos"]),
15+
"test": FolderBased.C(path=data / "test", classes=["neg", "pos"]),
1616
}

src/datamaestro_text/config/io/github/thunlp/fewrel.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -32,4 +32,4 @@ def v1(train, validation):
3232
Only the train and validation dataset are available. The test set is hidden
3333
for the leaderboard.
3434
"""
35-
return {"train": File(path=train), "validation": File(path=validation)}
35+
return {"train": File.C(path=train), "validation": File.C(path=validation)}

src/datamaestro_text/config/io/metamind/research/wikitext.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -30,9 +30,9 @@ def WikiText(data, type):
3030
https://blog.einstein.ai/the-wikitext-long-term-dependency-language-modeling-dataset/
3131
"""
3232
return {
33-
"train": File(path=data / ("wiki.train.%s" % type)),
34-
"validation": File(path=data / ("wiki.valid.%s" % type)),
35-
"test": File(path=data / ("wiki.test.%s" % type)),
33+
"train": File.C(path=data / ("wiki.train.%s" % type)),
34+
"validation": File.C(path=data / ("wiki.valid.%s" % type)),
35+
"test": File.C(path=data / ("wiki.test.%s" % type)),
3636
}
3737

3838

src/datamaestro_text/config/org/grouplens/movielens.py

Lines changed: 8 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -31,10 +31,10 @@ def small(ds) -> Movielens:
3131
100,000 ratings and 3,600 tag applications applied to 9,000 movies by 600 users (as of 9/2018)
3232
"""
3333
return {
34-
"ratings": csv.Generic(path=ds / "ratings.csv", names_row=0),
35-
"links": csv.Generic(path=ds / "links.csv", names_row=0),
36-
"movies": csv.Generic(path=ds / "movies.csv", names_row=0),
37-
"tags": csv.Generic(path=ds / "tags.csv", names_row=0),
34+
"ratings": csv.Generic.C(path=ds / "ratings.csv", names_row=0),
35+
"links": csv.Generic.C(path=ds / "links.csv", names_row=0),
36+
"movies": csv.Generic.C(path=ds / "movies.csv", names_row=0),
37+
"tags": csv.Generic.C(path=ds / "tags.csv", names_row=0),
3838
}
3939

4040

@@ -46,8 +46,8 @@ def full(ds) -> Movielens:
4646
27,000,000 ratings and 1,100,000 tag applications applied to 58,000 movies by 280,000 users (as of 9/2018)
4747
"""
4848
return {
49-
"ratings": csv.Generic(path=ds / "ratings.csv", names_row=0),
50-
"links": csv.Generic(path=ds / "links.csv", names_row=0),
51-
"movies": csv.Generic(path=ds / "movies.csv", names_row=0),
52-
"tags": csv.Generic(path=ds / "tags.csv", names_row=0),
49+
"ratings": csv.Generic.C(path=ds / "ratings.csv", names_row=0),
50+
"links": csv.Generic.C(path=ds / "links.csv", names_row=0),
51+
"movies": csv.Generic.C(path=ds / "movies.csv", names_row=0),
52+
"tags": csv.Generic.C(path=ds / "tags.csv", names_row=0),
5353
}

src/datamaestro_text/config/org/universaldependencies/french.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -34,9 +34,9 @@ def gsd(ds) -> Supervised:
3434
is updated since 2015 independently from the previous source.
3535
"""
3636
return {
37-
"train": CoNLL_U(path=ds / "fr_gsd-ud-train.conllu"),
38-
"test": CoNLL_U(path=ds / "fr_gsd-ud-dev.conllu"),
39-
"validation": CoNLL_U(path=ds / "fr_gsd-ud-test.conllu"),
37+
"train": CoNLL_U.C(path=ds / "fr_gsd-ud-train.conllu"),
38+
"test": CoNLL_U.C(path=ds / "fr_gsd-ud-dev.conllu"),
39+
"validation": CoNLL_U.C(path=ds / "fr_gsd-ud-test.conllu"),
4040
}
4141

4242

0 commit comments

Comments
 (0)