Skip to content

Commit 15a6a89

Browse files
committed
added ml bom
Signed-off-by: Gal Moyal <[email protected]>
1 parent d78f472 commit 15a6a89

File tree

4 files changed

+505
-0
lines changed

4 files changed

+505
-0
lines changed

MLBOM/Dataset/bom.json

Lines changed: 388 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,388 @@
1+
{
2+
"$schema": "http://cyclonedx.org/schema/bom-1.6.schema.json",
3+
"bomFormat": "CycloneDX",
4+
"specVersion": "1.6",
5+
"serialNumber": "urn:uuid:75de3b9b-9e53-4421-a259-11f18afc22bf",
6+
"version": 1,
7+
"metadata": {
8+
"timestamp": "2024-11-24T13:10:49Z",
9+
},
10+
"components": [
11+
{
12+
"type": "data",
13+
"supplier": {
14+
"name": "Wikimedia"
15+
},
16+
"manufacturer": {
17+
"name": "Wikimedia"
18+
},
19+
"publisher": "Hugging Face Inc",
20+
"name": "wikipedia",
21+
"version": "b04c8d1ceb2f5cd4588862100d08de323dccfbaa",
22+
"licenses": [
23+
{
24+
"license": {
25+
"id": "CC-BY-SA-3.0",
26+
"name": "Creative Commons Attribution Share Alike 3.0",
27+
"url": "https://spdx.org/licenses/CC-BY-SA-3.0.html"
28+
}
29+
},
30+
{
31+
"license": {
32+
"id": "GFDL-1.3",
33+
"name": "GNU Free Documentation License family",
34+
"url": "https://www.gnu.org/licenses/fdl-1.3.en.html"
35+
}
36+
}
37+
],
38+
"externalReferences": [
39+
{
40+
"type": "website",
41+
"url": "https://huggingface.co/datasets/wikimedia/wikipedia"
42+
}
43+
],
44+
"hashes": [
45+
{
46+
"alg": "SHA-1",
47+
"content": "b04c8d1ceb2f5cd4588862100d08de323dccfbaa"
48+
}
49+
],
50+
"tags": [
51+
"task_categories:text-generation",
52+
"task_categories:fill-mask",
53+
"task_ids:language-modeling",
54+
"task_ids:masked-language-modeling",
55+
"language:ab",
56+
"language:ace",
57+
"language:ady",
58+
"language:af",
59+
"language:alt",
60+
"language:am",
61+
"language:ami",
62+
"language:an",
63+
"language:ang",
64+
"language:anp",
65+
"language:ar",
66+
"language:arc",
67+
"language:ary",
68+
"language:arz",
69+
"language:as",
70+
"language:ast",
71+
"language:atj",
72+
"language:av",
73+
"language:avk",
74+
"language:awa",
75+
"language:ay",
76+
"language:az",
77+
"language:azb",
78+
"language:ba",
79+
"language:ban",
80+
"language:bar",
81+
"language:bbc",
82+
"language:bcl",
83+
"language:be",
84+
"language:bg",
85+
"language:bh",
86+
"language:bi",
87+
"language:bjn",
88+
"language:blk",
89+
"language:bm",
90+
"language:bn",
91+
"language:bo",
92+
"language:bpy",
93+
"language:br",
94+
"language:bs",
95+
"language:bug",
96+
"language:bxr",
97+
"language:ca",
98+
"language:cbk",
99+
"language:cdo",
100+
"language:ce",
101+
"language:ceb",
102+
"language:ch",
103+
"language:chr",
104+
"language:chy",
105+
"language:ckb",
106+
"language:co",
107+
"language:cr",
108+
"language:crh",
109+
"language:cs",
110+
"language:csb",
111+
"language:cu",
112+
"language:cv",
113+
"language:cy",
114+
"language:da",
115+
"language:dag",
116+
"language:de",
117+
"language:dga",
118+
"language:din",
119+
"language:diq",
120+
"language:dsb",
121+
"language:dty",
122+
"language:dv",
123+
"language:dz",
124+
"language:ee",
125+
"language:el",
126+
"language:eml",
127+
"language:en",
128+
"language:eo",
129+
"language:es",
130+
"language:et",
131+
"language:eu",
132+
"language:ext",
133+
"language:fa",
134+
"language:fat",
135+
"language:ff",
136+
"language:fi",
137+
"language:fj",
138+
"language:fo",
139+
"language:fon",
140+
"language:fr",
141+
"language:frp",
142+
"language:frr",
143+
"language:fur",
144+
"language:fy",
145+
"language:ga",
146+
"language:gag",
147+
"language:gan",
148+
"language:gcr",
149+
"language:gd",
150+
"language:gl",
151+
"language:glk",
152+
"language:gn",
153+
"language:gom",
154+
"language:gor",
155+
"language:got",
156+
"language:gpe",
157+
"language:gsw",
158+
"language:gu",
159+
"language:guc",
160+
"language:gur",
161+
"language:guw",
162+
"language:gv",
163+
"language:ha",
164+
"language:hak",
165+
"language:haw",
166+
"language:hbs",
167+
"language:he",
168+
"language:hi",
169+
"language:hif",
170+
"language:hr",
171+
"language:hsb",
172+
"language:ht",
173+
"language:hu",
174+
"language:hy",
175+
"language:hyw",
176+
"language:ia",
177+
"language:id",
178+
"language:ie",
179+
"language:ig",
180+
"language:ik",
181+
"language:ilo",
182+
"language:inh",
183+
"language:io",
184+
"language:is",
185+
"language:it",
186+
"language:iu",
187+
"language:ja",
188+
"language:jam",
189+
"language:jbo",
190+
"language:jv",
191+
"language:ka",
192+
"language:kaa",
193+
"language:kab",
194+
"language:kbd",
195+
"language:kbp",
196+
"language:kcg",
197+
"language:kg",
198+
"language:ki",
199+
"language:kk",
200+
"language:kl",
201+
"language:km",
202+
"language:kn",
203+
"language:ko",
204+
"language:koi",
205+
"language:krc",
206+
"language:ks",
207+
"language:ksh",
208+
"language:ku",
209+
"language:kv",
210+
"language:kw",
211+
"language:ky",
212+
"language:la",
213+
"language:lad",
214+
"language:lb",
215+
"language:lbe",
216+
"language:lez",
217+
"language:lfn",
218+
"language:lg",
219+
"language:li",
220+
"language:lij",
221+
"language:lld",
222+
"language:lmo",
223+
"language:ln",
224+
"language:lo",
225+
"language:lt",
226+
"language:ltg",
227+
"language:lv",
228+
"language:lzh",
229+
"language:mad",
230+
"language:mai",
231+
"language:map",
232+
"language:mdf",
233+
"language:mg",
234+
"language:mhr",
235+
"language:mi",
236+
"language:min",
237+
"language:mk",
238+
"language:ml",
239+
"language:mn",
240+
"language:mni",
241+
"language:mnw",
242+
"language:mr",
243+
"language:mrj",
244+
"language:ms",
245+
"language:mt",
246+
"language:mwl",
247+
"language:my",
248+
"language:myv",
249+
"language:mzn",
250+
"language:nah",
251+
"language:nan",
252+
"language:nap",
253+
"language:nds",
254+
"language:ne",
255+
"language:new",
256+
"language:nia",
257+
"language:nl",
258+
"language:nn",
259+
"language:no",
260+
"language:nov",
261+
"language:nqo",
262+
"language:nrf",
263+
"language:nso",
264+
"language:nv",
265+
"language:ny",
266+
"language:oc",
267+
"language:olo",
268+
"language:om",
269+
"language:or",
270+
"language:os",
271+
"language:pa",
272+
"language:pag",
273+
"language:pam",
274+
"language:pap",
275+
"language:pcd",
276+
"language:pcm",
277+
"language:pdc",
278+
"language:pfl",
279+
"language:pi",
280+
"language:pih",
281+
"language:pl",
282+
"language:pms",
283+
"language:pnb",
284+
"language:pnt",
285+
"language:ps",
286+
"language:pt",
287+
"language:pwn",
288+
"language:qu",
289+
"language:rm",
290+
"language:rmy",
291+
"language:rn",
292+
"language:ro",
293+
"language:ru",
294+
"language:rue",
295+
"language:rup",
296+
"language:rw",
297+
"language:sa",
298+
"language:sah",
299+
"language:sat",
300+
"language:sc",
301+
"language:scn",
302+
"language:sco",
303+
"language:sd",
304+
"language:se",
305+
"language:sg",
306+
"language:sgs",
307+
"language:shi",
308+
"language:shn",
309+
"language:si",
310+
"language:sk",
311+
"language:skr",
312+
"language:sl",
313+
"language:sm",
314+
"language:smn",
315+
"language:sn",
316+
"language:so",
317+
"language:sq",
318+
"language:sr",
319+
"language:srn",
320+
"language:ss",
321+
"language:st",
322+
"language:stq",
323+
"language:su",
324+
"language:sv",
325+
"language:sw",
326+
"language:szl",
327+
"language:szy",
328+
"language:ta",
329+
"language:tay",
330+
"language:tcy",
331+
"language:te",
332+
"language:tet",
333+
"language:tg",
334+
"language:th",
335+
"language:ti",
336+
"language:tk",
337+
"language:tl",
338+
"language:tly",
339+
"language:tn",
340+
"language:to",
341+
"language:tpi",
342+
"language:tr",
343+
"language:trv",
344+
"language:ts",
345+
"language:tt",
346+
"language:tum",
347+
"language:tw",
348+
"language:ty",
349+
"language:tyv",
350+
"language:udm",
351+
"language:ug",
352+
"language:uk",
353+
"language:ur",
354+
"language:uz",
355+
"language:ve",
356+
"language:vec",
357+
"language:vep",
358+
"language:vi",
359+
"language:vls",
360+
"language:vo",
361+
"language:vro",
362+
"language:wa",
363+
"language:war",
364+
"language:wo",
365+
"language:wuu",
366+
"language:xal",
367+
"language:xh",
368+
"language:xmf",
369+
"language:yi",
370+
"language:yo",
371+
"language:yue",
372+
"language:za",
373+
"language:zea",
374+
"language:zgh",
375+
"language:zh",
376+
"language:zu",
377+
"size_categories:10M<n<100M",
378+
"format:parquet",
379+
"modality:text",
380+
"library:datasets",
381+
"library:dask",
382+
"library:mlcroissant",
383+
"library:polars",
384+
"region:us"
385+
]
386+
}
387+
]
388+
}

0 commit comments

Comments
 (0)