You signed in with another tab or window. Reload to refresh your session.You signed out in another tab or window. Reload to refresh your session.You switched accounts on another tab or window. Reload to refresh your session.Dismiss alert
paperswithcode_id: {paperswithcode_id} # Dataset id on PapersWithCode (from the URL). Example for SQuAD: squad
36
-
configs: # Optional for datasets with multiple configurations like glue.
37
-
- {config_0} # Example for glue: sst2
38
-
- {config_1} # Example for glue: cola
36
+
configs: # Optional. This can be used to pass additional parameters to the dataset loader, such as `data_files`, `data_dir`, and any builder-specific parameters
37
+
- config_name: {config_name_0} # Name of the dataset subset, if applicable. Example: default
38
+
data_files:
39
+
- split: {split_name_0} # Example: train
40
+
path: {file_path_0} # Example: data.csv
41
+
- split: {split_name_1} # Example: test
42
+
path: {file_path_1} # Example: holdout.csv
43
+
- config_name: {config_name_1} # Name of the dataset subset. Example: processed
44
+
data_files:
45
+
- split: {split_name_3} # Example: train
46
+
path: {file_path_3} # Example: data_processed.csv
39
47
40
48
# Optional. This part can be used to store the feature types and size of the dataset to be used in python. This can be automatically generated using the datasets-cli.
41
49
dataset_info:
@@ -61,15 +69,15 @@ dataset_info:
61
69
# dtype: string
62
70
# - name: answer_start
63
71
# dtype: int32
64
-
config_name: {config_name} # Example for glue: sst2
72
+
config_name: {config_name} #Name of the dataset subset. Example for glue: sst2
65
73
splits:
66
74
- name: {split_name_0} # Example: train
67
75
num_bytes: {split_num_bytes_0} # Example for SQuAD: 79317110
68
76
num_examples: {split_num_examples_0} # Example for SQuAD: 87599
69
77
download_size: {dataset_download_size} # Example for SQuAD: 35142551
70
78
dataset_size: {dataset_size} # Example for SQuAD: 89789763
71
79
72
-
# It can also be a list of multiple configurations:
80
+
# It can also be a list of multiple subsets (also called "configurations"):
73
81
# ```yaml
74
82
# dataset_info:
75
83
# - config_name: {config0}
@@ -90,7 +98,7 @@ extra_gated_prompt: {extra_gated_prompt} # Example for speech datasets: By clic
90
98
91
99
# Optional. Add this if you want to encode a train and evaluation info in a structured way for AutoTrain or Evaluation on the Hub
92
100
train-eval-index:
93
-
- config: {config_name} # The dataset config name to use. Example for datasets without configs: default. Example for glue: sst2
101
+
- config: {config_name} # The dataset subset name to use. Example for datasets without subsets: default. Example for glue: sst2
94
102
task: {task_name} # The task category name (same as task_category). Example: question-answering
95
103
task_id: {task_type} # The AutoTrain task id. Example: extractive_question_answering
From https://github.com/huggingface/hub-docs/pull/1413:
4
+
* Use `<inference> for getting started
5
+
* Add some screenshots: supported models
6
+
* Add flow chart of how API works
7
+
* Add table with all tasks
8
+
* Add missing tasks: depth estimation and zero shot image classification
9
+
* Some tasks have no warm models, should we remove them for now? E.g. https://huggingface.co/models?inference=warm&pipeline_tag=fill-mask&sort=trending BUT many are cold and working, so actually linking to both could make sense - internal issue https://github.com/huggingface-internal/moon-landing/issues/10966
10
+
* See also this [google doc](https://docs.google.com/document/d/1xy5Ug4C_qGbqp4x3T3rj_VOyjQzQLlyce-L6I_hYi94/edit?usp=sharing)
11
+
* Add CI to auto-generate the docs when handlebars template are updated
0 commit comments