Skip to content

Commit 1fe1f1b

Browse files
author
The TensorFlow Datasets Authors
committed
Automated metadata update.
PiperOrigin-RevId: 754860369
1 parent 1741a94 commit 1fe1f1b

File tree

22 files changed

+860
-10
lines changed

22 files changed

+860
-10
lines changed
Lines changed: 26 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,26 @@
1+
{
2+
"citation": "@inproceedings{matton-etal-2024-leakage,\n title = \"On Leakage of Code Generation Evaluation Datasets\",\n author = \"Matton, Alexandre and\n Sherborne, Tom and\n Aumiller, Dennis and\n Tommasone, Elena and\n Alizadeh, Milad and\n He, Jingyi and\n Ma, Raymond and\n Voisin, Maxime and\n Gilsenan-McMahon, Ellen and\n Gall{\\'e}, Matthias\",\n editor = \"Al-Onaizan, Yaser and\n Bansal, Mohit and\n Chen, Yun-Nung\",\n booktitle = \"Findings of the Association for Computational Linguistics: EMNLP 2024\",\n month = nov,\n year = \"2024\",\n address = \"Miami, Florida, USA\",\n publisher = \"Association for Computational Linguistics\",\n url = \"https://aclanthology.org/2024.findings-emnlp.772/\",\n doi = \"10.18653/v1/2024.findings-emnlp.772\",\n pages = \"13215--13223\",\n}",
3+
"configDescription": "Multilingual LBPP",
4+
"configName": "all",
5+
"description": "*Less Basic Python Programming* is a collection of 161 programming problems\nwith accompanying unit tests.\nThey were created with the aim of being fresh (not leaked at the time of\ncreation) and more difficult than similar datasets (e.g., HumanEval and MBPP).\nIt can serve as a drop-in replacement or enrichment of those datasets as they\nare structured in an equivalent way.",
6+
"downloadSize": "1869160",
7+
"fileFormat": "array_record",
8+
"location": {
9+
"urls": [
10+
"https://aclanthology.org/2024.findings-emnlp.772/"
11+
]
12+
},
13+
"moduleName": "tensorflow_datasets.datasets.lbpp.lbpp_dataset_builder",
14+
"name": "lbpp",
15+
"splits": [
16+
{
17+
"filepathTemplate": "{DATASET}-{SPLIT}.{FILEFORMAT}-{SHARD_X_OF_Y}",
18+
"name": "test",
19+
"numBytes": "4510245",
20+
"shardLengths": [
21+
"944"
22+
]
23+
}
24+
],
25+
"version": "2.0.0"
26+
}
Lines changed: 59 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,59 @@
1+
{
2+
"featuresDict": {
3+
"features": {
4+
"categories": {
5+
"pythonClassName": "tensorflow_datasets.core.features.sequence_feature.Sequence",
6+
"sequence": {
7+
"feature": {
8+
"pythonClassName": "tensorflow_datasets.core.features.text_feature.Text",
9+
"text": {}
10+
},
11+
"length": "-1"
12+
}
13+
},
14+
"completion": {
15+
"pythonClassName": "tensorflow_datasets.core.features.text_feature.Text",
16+
"text": {}
17+
},
18+
"instruction": {
19+
"pythonClassName": "tensorflow_datasets.core.features.text_feature.Text",
20+
"text": {}
21+
},
22+
"language": {
23+
"pythonClassName": "tensorflow_datasets.core.features.text_feature.Text",
24+
"text": {}
25+
},
26+
"signature": {
27+
"pythonClassName": "tensorflow_datasets.core.features.text_feature.Text",
28+
"text": {}
29+
},
30+
"task_id": {
31+
"pythonClassName": "tensorflow_datasets.core.features.text_feature.Text",
32+
"text": {}
33+
},
34+
"test_file": {
35+
"pythonClassName": "tensorflow_datasets.core.features.text_feature.Text",
36+
"text": {}
37+
},
38+
"test_list": {
39+
"pythonClassName": "tensorflow_datasets.core.features.sequence_feature.Sequence",
40+
"sequence": {
41+
"feature": {
42+
"pythonClassName": "tensorflow_datasets.core.features.text_feature.Text",
43+
"text": {}
44+
},
45+
"length": "-1"
46+
}
47+
},
48+
"test_setup": {
49+
"pythonClassName": "tensorflow_datasets.core.features.text_feature.Text",
50+
"text": {}
51+
},
52+
"title": {
53+
"pythonClassName": "tensorflow_datasets.core.features.text_feature.Text",
54+
"text": {}
55+
}
56+
}
57+
},
58+
"pythonClassName": "tensorflow_datasets.core.features.features_dict.FeaturesDict"
59+
}
Lines changed: 26 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,26 @@
1+
{
2+
"citation": "@inproceedings{matton-etal-2024-leakage,\n title = \"On Leakage of Code Generation Evaluation Datasets\",\n author = \"Matton, Alexandre and\n Sherborne, Tom and\n Aumiller, Dennis and\n Tommasone, Elena and\n Alizadeh, Milad and\n He, Jingyi and\n Ma, Raymond and\n Voisin, Maxime and\n Gilsenan-McMahon, Ellen and\n Gall{\\'e}, Matthias\",\n editor = \"Al-Onaizan, Yaser and\n Bansal, Mohit and\n Chen, Yun-Nung\",\n booktitle = \"Findings of the Association for Computational Linguistics: EMNLP 2024\",\n month = nov,\n year = \"2024\",\n address = \"Miami, Florida, USA\",\n publisher = \"Association for Computational Linguistics\",\n url = \"https://aclanthology.org/2024.findings-emnlp.772/\",\n doi = \"10.18653/v1/2024.findings-emnlp.772\",\n pages = \"13215--13223\",\n}",
3+
"configDescription": "C++ LBPP",
4+
"configName": "cpp",
5+
"description": "*Less Basic Python Programming* is a collection of 161 programming problems\nwith accompanying unit tests.\nThey were created with the aim of being fresh (not leaked at the time of\ncreation) and more difficult than similar datasets (e.g., HumanEval and MBPP).\nIt can serve as a drop-in replacement or enrichment of those datasets as they\nare structured in an equivalent way.",
6+
"downloadSize": "321996",
7+
"fileFormat": "array_record",
8+
"location": {
9+
"urls": [
10+
"https://aclanthology.org/2024.findings-emnlp.772/"
11+
]
12+
},
13+
"moduleName": "tensorflow_datasets.datasets.lbpp.lbpp_dataset_builder",
14+
"name": "lbpp",
15+
"splits": [
16+
{
17+
"filepathTemplate": "{DATASET}-{SPLIT}.{FILEFORMAT}-{SHARD_X_OF_Y}",
18+
"name": "test",
19+
"numBytes": "780151",
20+
"shardLengths": [
21+
"161"
22+
]
23+
}
24+
],
25+
"version": "2.0.0"
26+
}
Lines changed: 59 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,59 @@
1+
{
2+
"featuresDict": {
3+
"features": {
4+
"categories": {
5+
"pythonClassName": "tensorflow_datasets.core.features.sequence_feature.Sequence",
6+
"sequence": {
7+
"feature": {
8+
"pythonClassName": "tensorflow_datasets.core.features.text_feature.Text",
9+
"text": {}
10+
},
11+
"length": "-1"
12+
}
13+
},
14+
"completion": {
15+
"pythonClassName": "tensorflow_datasets.core.features.text_feature.Text",
16+
"text": {}
17+
},
18+
"instruction": {
19+
"pythonClassName": "tensorflow_datasets.core.features.text_feature.Text",
20+
"text": {}
21+
},
22+
"language": {
23+
"pythonClassName": "tensorflow_datasets.core.features.text_feature.Text",
24+
"text": {}
25+
},
26+
"signature": {
27+
"pythonClassName": "tensorflow_datasets.core.features.text_feature.Text",
28+
"text": {}
29+
},
30+
"task_id": {
31+
"pythonClassName": "tensorflow_datasets.core.features.text_feature.Text",
32+
"text": {}
33+
},
34+
"test_file": {
35+
"pythonClassName": "tensorflow_datasets.core.features.text_feature.Text",
36+
"text": {}
37+
},
38+
"test_list": {
39+
"pythonClassName": "tensorflow_datasets.core.features.sequence_feature.Sequence",
40+
"sequence": {
41+
"feature": {
42+
"pythonClassName": "tensorflow_datasets.core.features.text_feature.Text",
43+
"text": {}
44+
},
45+
"length": "-1"
46+
}
47+
},
48+
"test_setup": {
49+
"pythonClassName": "tensorflow_datasets.core.features.text_feature.Text",
50+
"text": {}
51+
},
52+
"title": {
53+
"pythonClassName": "tensorflow_datasets.core.features.text_feature.Text",
54+
"text": {}
55+
}
56+
}
57+
},
58+
"pythonClassName": "tensorflow_datasets.core.features.features_dict.FeaturesDict"
59+
}
Lines changed: 26 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,26 @@
1+
{
2+
"citation": "@inproceedings{matton-etal-2024-leakage,\n title = \"On Leakage of Code Generation Evaluation Datasets\",\n author = \"Matton, Alexandre and\n Sherborne, Tom and\n Aumiller, Dennis and\n Tommasone, Elena and\n Alizadeh, Milad and\n He, Jingyi and\n Ma, Raymond and\n Voisin, Maxime and\n Gilsenan-McMahon, Ellen and\n Gall{\\'e}, Matthias\",\n editor = \"Al-Onaizan, Yaser and\n Bansal, Mohit and\n Chen, Yun-Nung\",\n booktitle = \"Findings of the Association for Computational Linguistics: EMNLP 2024\",\n month = nov,\n year = \"2024\",\n address = \"Miami, Florida, USA\",\n publisher = \"Association for Computational Linguistics\",\n url = \"https://aclanthology.org/2024.findings-emnlp.772/\",\n doi = \"10.18653/v1/2024.findings-emnlp.772\",\n pages = \"13215--13223\",\n}",
3+
"configDescription": "Python LBPP",
4+
"configName": "default",
5+
"description": "*Less Basic Python Programming* is a collection of 161 programming problems\nwith accompanying unit tests.\nThey were created with the aim of being fresh (not leaked at the time of\ncreation) and more difficult than similar datasets (e.g., HumanEval and MBPP).\nIt can serve as a drop-in replacement or enrichment of those datasets as they\nare structured in an equivalent way.",
6+
"downloadSize": "286619",
7+
"fileFormat": "array_record",
8+
"location": {
9+
"urls": [
10+
"https://aclanthology.org/2024.findings-emnlp.772/"
11+
]
12+
},
13+
"moduleName": "tensorflow_datasets.datasets.lbpp.lbpp_dataset_builder",
14+
"name": "lbpp",
15+
"splits": [
16+
{
17+
"filepathTemplate": "{DATASET}-{SPLIT}.{FILEFORMAT}-{SHARD_X_OF_Y}",
18+
"name": "test",
19+
"numBytes": "642089",
20+
"shardLengths": [
21+
"162"
22+
]
23+
}
24+
],
25+
"version": "2.0.0"
26+
}
Lines changed: 59 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,59 @@
1+
{
2+
"featuresDict": {
3+
"features": {
4+
"categories": {
5+
"pythonClassName": "tensorflow_datasets.core.features.sequence_feature.Sequence",
6+
"sequence": {
7+
"feature": {
8+
"pythonClassName": "tensorflow_datasets.core.features.text_feature.Text",
9+
"text": {}
10+
},
11+
"length": "-1"
12+
}
13+
},
14+
"completion": {
15+
"pythonClassName": "tensorflow_datasets.core.features.text_feature.Text",
16+
"text": {}
17+
},
18+
"instruction": {
19+
"pythonClassName": "tensorflow_datasets.core.features.text_feature.Text",
20+
"text": {}
21+
},
22+
"language": {
23+
"pythonClassName": "tensorflow_datasets.core.features.text_feature.Text",
24+
"text": {}
25+
},
26+
"signature": {
27+
"pythonClassName": "tensorflow_datasets.core.features.text_feature.Text",
28+
"text": {}
29+
},
30+
"task_id": {
31+
"pythonClassName": "tensorflow_datasets.core.features.text_feature.Text",
32+
"text": {}
33+
},
34+
"test_file": {
35+
"pythonClassName": "tensorflow_datasets.core.features.text_feature.Text",
36+
"text": {}
37+
},
38+
"test_list": {
39+
"pythonClassName": "tensorflow_datasets.core.features.sequence_feature.Sequence",
40+
"sequence": {
41+
"feature": {
42+
"pythonClassName": "tensorflow_datasets.core.features.text_feature.Text",
43+
"text": {}
44+
},
45+
"length": "-1"
46+
}
47+
},
48+
"test_setup": {
49+
"pythonClassName": "tensorflow_datasets.core.features.text_feature.Text",
50+
"text": {}
51+
},
52+
"title": {
53+
"pythonClassName": "tensorflow_datasets.core.features.text_feature.Text",
54+
"text": {}
55+
}
56+
}
57+
},
58+
"pythonClassName": "tensorflow_datasets.core.features.features_dict.FeaturesDict"
59+
}
Lines changed: 26 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,26 @@
1+
{
2+
"citation": "@inproceedings{matton-etal-2024-leakage,\n title = \"On Leakage of Code Generation Evaluation Datasets\",\n author = \"Matton, Alexandre and\n Sherborne, Tom and\n Aumiller, Dennis and\n Tommasone, Elena and\n Alizadeh, Milad and\n He, Jingyi and\n Ma, Raymond and\n Voisin, Maxime and\n Gilsenan-McMahon, Ellen and\n Gall{\\'e}, Matthias\",\n editor = \"Al-Onaizan, Yaser and\n Bansal, Mohit and\n Chen, Yun-Nung\",\n booktitle = \"Findings of the Association for Computational Linguistics: EMNLP 2024\",\n month = nov,\n year = \"2024\",\n address = \"Miami, Florida, USA\",\n publisher = \"Association for Computational Linguistics\",\n url = \"https://aclanthology.org/2024.findings-emnlp.772/\",\n doi = \"10.18653/v1/2024.findings-emnlp.772\",\n pages = \"13215--13223\",\n}",
3+
"configDescription": "Go LBPP",
4+
"configName": "go",
5+
"description": "*Less Basic Python Programming* is a collection of 161 programming problems\nwith accompanying unit tests.\nThey were created with the aim of being fresh (not leaked at the time of\ncreation) and more difficult than similar datasets (e.g., HumanEval and MBPP).\nIt can serve as a drop-in replacement or enrichment of those datasets as they\nare structured in an equivalent way.",
6+
"downloadSize": "324703",
7+
"fileFormat": "array_record",
8+
"location": {
9+
"urls": [
10+
"https://aclanthology.org/2024.findings-emnlp.772/"
11+
]
12+
},
13+
"moduleName": "tensorflow_datasets.datasets.lbpp.lbpp_dataset_builder",
14+
"name": "lbpp",
15+
"splits": [
16+
{
17+
"filepathTemplate": "{DATASET}-{SPLIT}.{FILEFORMAT}-{SHARD_X_OF_Y}",
18+
"name": "test",
19+
"numBytes": "703722",
20+
"shardLengths": [
21+
"161"
22+
]
23+
}
24+
],
25+
"version": "2.0.0"
26+
}
Lines changed: 59 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,59 @@
1+
{
2+
"featuresDict": {
3+
"features": {
4+
"categories": {
5+
"pythonClassName": "tensorflow_datasets.core.features.sequence_feature.Sequence",
6+
"sequence": {
7+
"feature": {
8+
"pythonClassName": "tensorflow_datasets.core.features.text_feature.Text",
9+
"text": {}
10+
},
11+
"length": "-1"
12+
}
13+
},
14+
"completion": {
15+
"pythonClassName": "tensorflow_datasets.core.features.text_feature.Text",
16+
"text": {}
17+
},
18+
"instruction": {
19+
"pythonClassName": "tensorflow_datasets.core.features.text_feature.Text",
20+
"text": {}
21+
},
22+
"language": {
23+
"pythonClassName": "tensorflow_datasets.core.features.text_feature.Text",
24+
"text": {}
25+
},
26+
"signature": {
27+
"pythonClassName": "tensorflow_datasets.core.features.text_feature.Text",
28+
"text": {}
29+
},
30+
"task_id": {
31+
"pythonClassName": "tensorflow_datasets.core.features.text_feature.Text",
32+
"text": {}
33+
},
34+
"test_file": {
35+
"pythonClassName": "tensorflow_datasets.core.features.text_feature.Text",
36+
"text": {}
37+
},
38+
"test_list": {
39+
"pythonClassName": "tensorflow_datasets.core.features.sequence_feature.Sequence",
40+
"sequence": {
41+
"feature": {
42+
"pythonClassName": "tensorflow_datasets.core.features.text_feature.Text",
43+
"text": {}
44+
},
45+
"length": "-1"
46+
}
47+
},
48+
"test_setup": {
49+
"pythonClassName": "tensorflow_datasets.core.features.text_feature.Text",
50+
"text": {}
51+
},
52+
"title": {
53+
"pythonClassName": "tensorflow_datasets.core.features.text_feature.Text",
54+
"text": {}
55+
}
56+
}
57+
},
58+
"pythonClassName": "tensorflow_datasets.core.features.features_dict.FeaturesDict"
59+
}
Lines changed: 26 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,26 @@
1+
{
2+
"citation": "@inproceedings{matton-etal-2024-leakage,\n title = \"On Leakage of Code Generation Evaluation Datasets\",\n author = \"Matton, Alexandre and\n Sherborne, Tom and\n Aumiller, Dennis and\n Tommasone, Elena and\n Alizadeh, Milad and\n He, Jingyi and\n Ma, Raymond and\n Voisin, Maxime and\n Gilsenan-McMahon, Ellen and\n Gall{\\'e}, Matthias\",\n editor = \"Al-Onaizan, Yaser and\n Bansal, Mohit and\n Chen, Yun-Nung\",\n booktitle = \"Findings of the Association for Computational Linguistics: EMNLP 2024\",\n month = nov,\n year = \"2024\",\n address = \"Miami, Florida, USA\",\n publisher = \"Association for Computational Linguistics\",\n url = \"https://aclanthology.org/2024.findings-emnlp.772/\",\n doi = \"10.18653/v1/2024.findings-emnlp.772\",\n pages = \"13215--13223\",\n}",
3+
"configDescription": "Java LBPP",
4+
"configName": "java",
5+
"description": "*Less Basic Python Programming* is a collection of 161 programming problems\nwith accompanying unit tests.\nThey were created with the aim of being fresh (not leaked at the time of\ncreation) and more difficult than similar datasets (e.g., HumanEval and MBPP).\nIt can serve as a drop-in replacement or enrichment of those datasets as they\nare structured in an equivalent way.",
6+
"downloadSize": "346012",
7+
"fileFormat": "array_record",
8+
"location": {
9+
"urls": [
10+
"https://aclanthology.org/2024.findings-emnlp.772/"
11+
]
12+
},
13+
"moduleName": "tensorflow_datasets.datasets.lbpp.lbpp_dataset_builder",
14+
"name": "lbpp",
15+
"splits": [
16+
{
17+
"filepathTemplate": "{DATASET}-{SPLIT}.{FILEFORMAT}-{SHARD_X_OF_Y}",
18+
"name": "test",
19+
"numBytes": "908697",
20+
"shardLengths": [
21+
"158"
22+
]
23+
}
24+
],
25+
"version": "2.0.0"
26+
}

0 commit comments

Comments
 (0)