add config_name and data_files to datasetcard.md specs (#1292)

lappemic · Polina Kazakova · lhoestq · web-flow · commit 48ea61ba1bb0 · 2024-05-23T11:48:40.000+02:00
* add config_name and data_files to datasetcard.md specs

* Apply suggesions

Co-authored-by: Polina Kazakova &lt;polina@huggingface.co&gt;

* Apply suggestion

Co-authored-by: Quentin Lhoest &lt;42851186+lhoestq@users.noreply.github.com&gt;

* Remove #TODO

* Update datasetcard.md

Co-authored-by: Polina Kazakova &lt;polina@huggingface.co&gt;

---------

Co-authored-by: Polina Kazakova &lt;polina@huggingface.co&gt;
Co-authored-by: Quentin Lhoest &lt;42851186+lhoestq@users.noreply.github.com&gt;
diff --git a/datasetcard.md b/datasetcard.md
@@ -33,9 +33,17 @@ task_ids:
 - {subtask_0}  # Example: extractive-qa
 - {subtask_1}  # Example: multi-class-image-classification
 paperswithcode_id: {paperswithcode_id}  # Dataset id on PapersWithCode (from the URL). Example for SQuAD: squad
-configs:  # Optional for datasets with multiple configurations like glue.
-- {config_0}  # Example for glue: sst2
-- {config_1}  # Example for glue: cola
+configs:  # Optional. This can be used to pass additional parameters to the dataset loader, such as `data_files`, `data_dir`, and any builder-specific parameters  
+- config_name: {config_name_0}  # Example: default
+  data_files:
+  - split: {split_name_0}  # Example: train
+    path: {file_path_0}  # Example: data.csv
+  - split: {split_name_1}  # Example: test
+    path: {file_path_1}   # Example: holdout.csv
+- config_name: {config_name_1}  # Example: processed
+  data_files:
+  - split: {split_name_3}  # Example: train
+    path: {file_path_3}  # Example: data_processed.csv
 
 # Optional. This part can be used to store the feature types and size of the dataset to be used in python. This can be automatically generated using the datasets-cli.
 dataset_info: