|
1 | 1 | $schema: http://json-schema.org/draft-07/schema# |
2 | 2 | title: DatasetCollection |
| 3 | +description: A named group of datasets from a single maintainer (e.g. "nextstrain", "community"). |
3 | 4 | type: object |
4 | 5 | required: |
5 | 6 | - datasets |
6 | 7 | - meta |
7 | 8 | properties: |
8 | 9 | meta: |
9 | | - $ref: '#/definitions/DatasetCollectionMeta' |
| 10 | + description: 'Collection metadata: identifier, display name, maintainer contact information' |
| 11 | + allOf: |
| 12 | + - $ref: '#/definitions/DatasetCollectionMeta' |
10 | 13 | datasets: |
| 14 | + description: Datasets belonging to this collection |
11 | 15 | type: array |
12 | 16 | items: |
13 | 17 | $ref: '#/definitions/Dataset' |
14 | 18 | definitions: |
15 | 19 | DatasetCollectionMeta: |
| 20 | + description: 'Metadata describing a dataset collection: identity, branding, and maintainer information.' |
16 | 21 | type: object |
17 | 22 | required: |
18 | 23 | - id |
19 | 24 | properties: |
20 | 25 | id: |
| 26 | + description: Unique identifier for this collection (e.g. "nextstrain") |
21 | 27 | type: string |
22 | 28 | title: |
| 29 | + description: Human-readable display name (e.g. "Nextstrain") |
23 | 30 | type: |
24 | 31 | - string |
25 | 32 | - 'null' |
26 | 33 | description: |
| 34 | + description: Short description of the collection and its maintainer |
27 | 35 | type: |
28 | 36 | - string |
29 | 37 | - 'null' |
30 | 38 | color: |
| 39 | + description: Brand color for UI display (CSS hex, e.g. "#9067b5") |
31 | 40 | type: |
32 | 41 | - string |
33 | 42 | - 'null' |
34 | 43 | icon: |
| 44 | + description: Path to the collection icon image |
35 | 45 | type: |
36 | 46 | - string |
37 | 47 | - 'null' |
38 | 48 | maintainers: |
| 49 | + description: People or organizations maintaining this collection |
39 | 50 | type: array |
40 | 51 | items: |
41 | 52 | $ref: '#/definitions/DatasetCollectionUrl' |
42 | 53 | urls: |
| 54 | + description: 'Related URLs: source repository, contact page, documentation' |
43 | 55 | type: array |
44 | 56 | items: |
45 | 57 | $ref: '#/definitions/DatasetCollectionUrl' |
46 | 58 | DatasetCollectionUrl: |
| 59 | + description: A named URL entry used for maintainer contacts and related links. |
47 | 60 | type: object |
48 | 61 | required: |
49 | 62 | - name |
50 | 63 | - url |
51 | 64 | properties: |
52 | 65 | name: |
| 66 | + description: Label describing this URL (e.g. "source", "contact") |
53 | 67 | type: string |
54 | 68 | url: |
| 69 | + description: The URL |
55 | 70 | type: string |
56 | 71 | Dataset: |
| 72 | + description: A single Nextclade dataset providing reference data and configuration for one pathogen. |
| 73 | + examples: |
| 74 | + - $schema: https://raw.githubusercontent.com/nextstrain/nextclade/refs/heads/release/packages/nextclade-schemas/internal-dataset-json.schema.json |
| 75 | + path: nextstrain/rsv/a/EPI_ISL_412866 |
| 76 | + shortcuts: |
| 77 | + - rsv_a |
| 78 | + - nextstrain/rsv/a |
| 79 | + attributes: |
| 80 | + name: RSV-A |
| 81 | + reference name: hRSV/A/England/397/2017 |
| 82 | + reference accession: EPI_ISL_412866 |
| 83 | + files: |
| 84 | + reference: reference.fasta |
| 85 | + pathogenJson: pathogen.json |
| 86 | + genomeAnnotation: genome_annotation.gff3 |
| 87 | + treeJson: tree.json |
| 88 | + examples: sequences.fasta |
| 89 | + readme: README.md |
| 90 | + changelog: CHANGELOG.md |
| 91 | + versions: |
| 92 | + - tag: 2026-01-06--14-59-32Z |
| 93 | + updatedAt: 2026-01-06T14:59:32Z |
| 94 | + compatibility: |
| 95 | + cli: 3.0.0 |
| 96 | + web: 3.0.0 |
| 97 | + version: |
| 98 | + tag: 2026-01-06--14-59-32Z |
| 99 | + updatedAt: 2026-01-06T14:59:32Z |
| 100 | + compatibility: |
| 101 | + cli: 3.0.0 |
| 102 | + web: 3.0.0 |
57 | 103 | type: object |
58 | 104 | required: |
59 | 105 | - path |
60 | 106 | properties: |
61 | 107 | path: |
| 108 | + description: Unique path-like identifier (e.g. "nextstrain/sars-cov-2/wuhan-hu-1/orfs") |
62 | 109 | type: string |
63 | 110 | shortcuts: |
| 111 | + description: Short alias names for this dataset (e.g. "sars-cov-2", "rsv_a") |
64 | 112 | type: array |
65 | 113 | items: |
66 | 114 | type: string |
67 | 115 | attributes: |
68 | | - $ref: '#/definitions/DatasetAttributes' |
| 116 | + description: 'Dataset attributes: name, reference info, status flags' |
| 117 | + allOf: |
| 118 | + - $ref: '#/definitions/DatasetAttributes' |
69 | 119 | meta: |
70 | | - $ref: '#/definitions/DatasetMeta' |
| 120 | + description: 'Dataset-level metadata: source code URL, bug tracker, authors' |
| 121 | + allOf: |
| 122 | + - $ref: '#/definitions/DatasetMeta' |
71 | 123 | files: |
72 | | - $ref: '#/definitions/DatasetFiles' |
| 124 | + description: Filenames of dataset components (reference, annotation, tree, etc.) |
| 125 | + allOf: |
| 126 | + - $ref: '#/definitions/DatasetFiles' |
73 | 127 | capabilities: |
74 | | - $ref: '#/definitions/DatasetCapabilities' |
| 128 | + description: 'Advertised analysis capabilities: clade counts, QC rules, primer support' |
| 129 | + allOf: |
| 130 | + - $ref: '#/definitions/DatasetCapabilities' |
75 | 131 | versions: |
| 132 | + description: All available tagged releases, ordered newest-first |
76 | 133 | type: array |
77 | 134 | items: |
78 | 135 | $ref: '#/definitions/DatasetVersion' |
79 | 136 | version: |
80 | | - $ref: '#/definitions/DatasetVersion' |
| 137 | + description: The latest (default) version of this dataset |
| 138 | + allOf: |
| 139 | + - $ref: '#/definitions/DatasetVersion' |
81 | 140 | type: |
| 141 | + description: Whether the dataset is a directory-based dataset or an Auspice JSON |
82 | 142 | anyOf: |
83 | 143 | - $ref: '#/definitions/DatasetType' |
84 | 144 | - type: 'null' |
85 | 145 | maintenance: |
| 146 | + description: Maintainer and support information for this dataset |
86 | 147 | anyOf: |
87 | 148 | - $ref: '#/definitions/DatasetMaintenance' |
88 | 149 | - type: 'null' |
@@ -117,131 +178,185 @@ definitions: |
117 | 178 | - 'null' |
118 | 179 | additionalProperties: true |
119 | 180 | DatasetMeta: |
| 181 | + description: 'Dataset-level metadata: authorship and project links.' |
120 | 182 | type: object |
121 | 183 | properties: |
122 | 184 | source code: |
| 185 | + description: URL to the dataset source code repository |
123 | 186 | type: |
124 | 187 | - string |
125 | 188 | - 'null' |
126 | 189 | bugs: |
| 190 | + description: URL to the bug tracker or issue page |
127 | 191 | type: |
128 | 192 | - string |
129 | 193 | - 'null' |
130 | 194 | authors: |
| 195 | + description: List of dataset authors or maintainers |
131 | 196 | type: array |
132 | 197 | items: |
133 | 198 | type: string |
134 | 199 | DatasetFiles: |
| 200 | + description: Filenames of dataset components, relative to the dataset version directory. |
| 201 | + examples: |
| 202 | + - reference: reference.fasta |
| 203 | + pathogenJson: pathogen.json |
| 204 | + genomeAnnotation: genome_annotation.gff3 |
| 205 | + treeJson: tree.json |
| 206 | + examples: sequences.fasta |
| 207 | + readme: README.md |
| 208 | + changelog: CHANGELOG.md |
135 | 209 | type: object |
136 | 210 | properties: |
137 | 211 | reference: |
| 212 | + description: Reference sequence FASTA file (e.g. "reference.fasta") |
138 | 213 | type: |
139 | 214 | - string |
140 | 215 | - 'null' |
141 | 216 | pathogenJson: |
| 217 | + description: Pathogen configuration file (e.g. "pathogen.json") |
142 | 218 | type: |
143 | 219 | - string |
144 | 220 | - 'null' |
145 | 221 | genomeAnnotation: |
| 222 | + description: Genome annotation in GFF3 format (e.g. "genome_annotation.gff3") |
146 | 223 | type: |
147 | 224 | - string |
148 | 225 | - 'null' |
149 | 226 | treeJson: |
| 227 | + description: Reference phylogenetic tree in Auspice JSON format (e.g. "tree.json") |
150 | 228 | type: |
151 | 229 | - string |
152 | 230 | - 'null' |
153 | 231 | examples: |
| 232 | + description: Example query sequences for testing (e.g. "sequences.fasta") |
154 | 233 | type: |
155 | 234 | - string |
156 | 235 | - 'null' |
157 | 236 | readme: |
| 237 | + description: Dataset README documentation file |
158 | 238 | type: |
159 | 239 | - string |
160 | 240 | - 'null' |
161 | 241 | changelog: |
| 242 | + description: Dataset changelog file |
162 | 243 | type: |
163 | 244 | - string |
164 | 245 | - 'null' |
165 | 246 | DatasetCapabilities: |
| 247 | + description: Analysis features supported by a dataset, used for UI display and filtering. |
166 | 248 | type: object |
167 | 249 | properties: |
168 | 250 | clades: |
| 251 | + description: Number of distinct clade values defined in the reference tree |
169 | 252 | type: |
170 | 253 | - integer |
171 | 254 | - 'null' |
172 | 255 | format: uint |
173 | 256 | minimum: 0.0 |
174 | 257 | customClades: |
| 258 | + description: 'Additional clade classification systems and their value counts (e.g. "Nextclade_pango": 4731)' |
175 | 259 | type: object |
176 | 260 | additionalProperties: |
177 | 261 | type: integer |
178 | 262 | format: uint |
179 | 263 | minimum: 0.0 |
180 | 264 | qc: |
| 265 | + description: QC rule names enabled for this dataset (e.g. "missingData", "privateMutations") |
181 | 266 | type: array |
182 | 267 | items: |
183 | 268 | type: string |
184 | 269 | primers: |
| 270 | + description: Whether PCR primer mutation detection is available |
185 | 271 | type: |
186 | 272 | - boolean |
187 | 273 | - 'null' |
188 | 274 | other: |
| 275 | + description: Other capabilities not covered above (e.g. "phenotypeData", "mutLabels") |
189 | 276 | type: array |
190 | 277 | items: |
191 | 278 | type: string |
192 | 279 | DatasetVersion: |
| 280 | + description: A tagged release of a dataset, identified by a timestamp tag. |
| 281 | + examples: |
| 282 | + - tag: 2026-01-06--14-59-32Z |
| 283 | + updatedAt: 2026-01-06T14:59:32Z |
| 284 | + compatibility: |
| 285 | + cli: 3.0.0 |
| 286 | + web: 3.0.0 |
193 | 287 | type: object |
194 | 288 | required: |
195 | 289 | - tag |
196 | 290 | properties: |
197 | 291 | tag: |
| 292 | + description: Version identifier in timestamp format (e.g. "2026-01-06--14-59-32Z") |
198 | 293 | type: string |
199 | 294 | updatedAt: |
| 295 | + description: ISO 8601 timestamp of when this version was published |
200 | 296 | type: |
201 | 297 | - string |
202 | 298 | - 'null' |
203 | 299 | compatibility: |
| 300 | + description: Minimum CLI/web versions required to use this dataset version |
204 | 301 | anyOf: |
205 | 302 | - $ref: '#/definitions/DatasetCompatibility' |
206 | 303 | - type: 'null' |
207 | 304 | DatasetCompatibility: |
| 305 | + description: Minimum application versions required to use a dataset version. |
208 | 306 | type: object |
209 | 307 | properties: |
210 | 308 | cli: |
| 309 | + description: Minimum Nextclade CLI semver version (e.g. "3.0.0-alpha.0") |
211 | 310 | type: string |
212 | 311 | web: |
| 312 | + description: Minimum Nextclade Web semver version (e.g. "3.0.0-alpha.0") |
213 | 313 | type: string |
214 | 314 | DatasetType: |
215 | | - type: string |
216 | | - enum: |
217 | | - - directory |
218 | | - - auspiceJson |
219 | | - - other |
| 315 | + description: How dataset content is structured. |
| 316 | + oneOf: |
| 317 | + - description: Standard dataset with individual files in a directory |
| 318 | + type: string |
| 319 | + enum: |
| 320 | + - directory |
| 321 | + - description: Single Auspice JSON file used as both tree and dataset |
| 322 | + type: string |
| 323 | + enum: |
| 324 | + - auspiceJson |
| 325 | + - description: Other dataset format |
| 326 | + type: string |
| 327 | + enum: |
| 328 | + - other |
220 | 329 | DatasetMaintenance: |
221 | 330 | description: Contact and documentation URLs for dataset maintenance |
222 | 331 | type: object |
223 | 332 | properties: |
224 | 333 | website: |
| 334 | + description: URLs for the project or organization website. |
225 | 335 | type: array |
226 | 336 | items: |
227 | 337 | type: string |
228 | 338 | documentation: |
| 339 | + description: URLs for dataset documentation and usage guides. |
229 | 340 | type: array |
230 | 341 | items: |
231 | 342 | type: string |
232 | 343 | source code: |
| 344 | + description: URLs for the source code repositories used to build the dataset. |
233 | 345 | type: array |
234 | 346 | items: |
235 | 347 | type: string |
236 | 348 | issues: |
| 349 | + description: URLs for reporting bugs and requesting features related to the dataset. |
237 | 350 | type: array |
238 | 351 | items: |
239 | 352 | type: string |
240 | 353 | organizations: |
| 354 | + description: Names of organizations responsible for maintaining the dataset. |
241 | 355 | type: array |
242 | 356 | items: |
243 | 357 | type: string |
244 | 358 | authors: |
| 359 | + description: Names and contact information of dataset authors. |
245 | 360 | type: array |
246 | 361 | items: |
247 | 362 | type: string |
0 commit comments