Skip to content
This repository was archived by the owner on Feb 19, 2021. It is now read-only.

Commit d585e07

Browse files
committed
update checksum
1 parent c284bce commit d585e07

File tree

3 files changed

+264
-4
lines changed

3 files changed

+264
-4
lines changed

Dataset_BCO_example.json

Lines changed: 260 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,260 @@
1+
{
2+
"bco_id": "https://w3id.org/biocompute/examples/glycosylation-sites-UniCarbKB",
3+
"checksum": "AEC8F79CBC38A2E13E3814A13A26E735642482540D5A42A16A8E7D5FA331CA30",
4+
"bco_spec_version" : "https://w3id.org/biocompute/spec/1.3.0",
5+
"provenance_domain":{
6+
"name": "glycosylation-sites-UniCarbKB",
7+
"version": "1.0",
8+
"review":[
9+
{
10+
"status": "approved",
11+
"reviewer_comment": "The dataset has passed the manual and automated QC steps and the readme has also been reviewed",
12+
"reviewer":{
13+
"name": "Rahi Navelkar",
14+
"affiliation": "The George Washington University",
15+
"email": "[email protected]",
16+
"contribution":["curatedBy"]
17+
}
18+
}
19+
],
20+
"created": "2018-02-21T14:46:55-5:00",
21+
"modified": "2018-10-10T11:34:02-5:00",
22+
"contributors":[
23+
{
24+
"name": "Matthew Campbell",
25+
"affiliation": "Institute for Glycomics, Griffith University, Gold Coast, Queensland, Australia",
26+
"email": "[email protected]",
27+
"contribution":["contributedBy"]
28+
},
29+
{
30+
"name": "Rahi Navelkar",
31+
"affiliation": "The George Washington University",
32+
"email": "[email protected]",
33+
"contribution":["curatedBy"]
34+
},
35+
{
36+
"name": "Robel Kahsay",
37+
"affiliation": "The George Washington University",
38+
"email": "[email protected]",
39+
"contribution":["createdBy"]
40+
}
41+
],
42+
"license": "https://creativecommons.org/licenses/by/4.0/"
43+
},
44+
"usability_domain":[
45+
"List of human [taxid:9606] proteins with information on glycosylation sites from UniCarbKB database [https://academic.oup.com/nar/article/42/D1/D215/1052197, https://doi.org/10.1093/nar/gkt1128]"
46+
],
47+
"extension_domain":{
48+
"license":{
49+
"data_license": "https://creativecommons.org/licenses/by/4.0/",
50+
"scripts_license": "https://www.gnu.org/licenses/gpl-3.0.en.html"
51+
},
52+
"scm_extension":{
53+
"scm_repository": "https://github.com/GW-HIVE/glygen-backend-integration/",
54+
"scm_type": "git",
55+
"scm_commit": "d34b85553e775dd5452005d786fe6e47d6048ee0",
56+
"scm_path": "/data/projects/glygen/generated/datasets/reviewed/human_proteoform_glycosylation_sites_unicarbkb_glytoucan.readme.txt"
57+
}
58+
},
59+
"description_domain":{
60+
"keywords":[
61+
"protein",
62+
"canonical",
63+
"glycosylation",
64+
"glycan"
65+
],
66+
"xref":[
67+
{
68+
"namespace": "taxonomy",
69+
"name": "Taxonomy",
70+
"ids": ["9606"],
71+
"access_time": "2018-21-02T14:46:55-5:00"
72+
}
73+
],
74+
"platform": ["centos7"],
75+
"pipeline_steps":[
76+
{
77+
"step_number":1,
78+
"name": "ac2canonical.py",
79+
"description": "Python script for mapping the UniProtKB accessions in the input file to the UniProtKB canonical accessions ",
80+
"version": "",
81+
"input_list":[
82+
{
83+
"uri": "/human_protein_position_pmid_id_aminoacid_glytoucan_2018_09_04_07_51_27.txt"
84+
}
85+
],
86+
"output_list":[
87+
{
88+
"uri": "human_protein_position_pmid_id_aminoacid_glytoucan_2018_09_04_07_51_27.txt"
89+
}
90+
]
91+
},
92+
{
93+
"step_number":2,
94+
"name": "make-proteoform_glycosylation_sites_unicarbkb_glytoucan-csv-step2b.py",
95+
"description": "Python scripts for retrieving glycosylation type or linkage type through UniCarbKB structure webpage ",
96+
97+
"input_list":[
98+
{"uri": "human_protein_position_pmid_id_aminoacid_glytoucan_2018_09_04_07_51_27.txt"}
99+
],
100+
"output_list":[
101+
{"uri": "human_proteoform_glycosylation_sites_unicarbkb_glytoucan.csv"}
102+
]
103+
},
104+
{
105+
"step_number":2,
106+
"name": "make-proteoform_glycosylation_sites_unicarbkb_glytoucan-csv-step2b.py",
107+
"description": "Python scripts for retrieving glycosylation type or linkage type through UniCarbKB structure webpage ",
108+
"input_list":[
109+
{"uri": "human_protein_position_pmid_id_aminoacid_glytoucan_2018_09_04_07_51_27.txt"}
110+
],
111+
"output_list":[
112+
{"uri": "human_proteoform_glycosylation_sites_unicarbkb_glytoucan.csv"}
113+
]
114+
},
115+
{
116+
"step_number":3,
117+
"name": "make-proteoform_glycosylation_sites_unicarbkb_glytoucan-csv-step3.py",
118+
"description": "Python script for quality check of the processed file. Records which fall under one or more following criteria's are flagged and eliminated and can be accessed using the log file. The elimination steps include - a. If the protein accession is not included in UniProtKB protein list - UniProtKB Nov-2017 Release b. If the amino acid position does not match to the amino acid on the associated position on fasta sequence - UniProtKB Nov-2017 Release c. If the id (UnicarbKB structure id) is not present in input file d. If the glycosylation type (linkage type) is not retrieved through step 3 e. If a serine or threonine is reported for an N-linked glycan structure f. If an asparagine is reported for an O-linked glycan structure",
119+
"input_list":[
120+
{"uri": "human_proteoform_glycosylation_sites_unicarbkb_glytoucan.csv"},
121+
{"uri": "human_protein_all.fasta"}
122+
],
123+
"output_list":[
124+
{"uri": "human_proteoform_glycosylation_sites_unicarbkb_glytoucan.csv"},
125+
{"uri": "human_proteoform_glycosylation_sites_unicarbkb_glytoucan.log"}
126+
]
127+
}
128+
]
129+
},
130+
"execution_domain":{
131+
"script":[
132+
{
133+
"uri": {
134+
"uri": "https://github.com/glygener/glygen-backend-integration/blob/master/integration/ac2canonical.py"
135+
}
136+
},
137+
{
138+
"uri": {
139+
"uri": "https://github.com/glygener/glygen-backend-integration/blob/master/integration/make-proteoform_glycosylation_sites_unicarbkb_glytoucan-csv-step2a.py"
140+
}
141+
},
142+
{
143+
"uri": {
144+
"uri": "https://github.com/glygener/glygen-backend-integration/blob/master/integration/make-proteoform_glycosylation_sites_unicarbkb_glytoucan-csv-step2b.py"
145+
}
146+
},
147+
{
148+
"uri": {
149+
"uri": "https://github.com/glygener/glygen-backend-integration/blob/master/integration/make-proteoform_glycosylation_sites_unicarbkb_glytoucan-csv-step3.py"
150+
}
151+
}
152+
],
153+
"script_driver": "manual",
154+
"software_prerequisites":[
155+
{
156+
"name": "Python",
157+
"version": "2.7.13",
158+
"uri": {
159+
"uri": "https://www.python.org/downloads/release/python-2713/",
160+
"access_time": "2017-01-24T09:40:17-0500",
161+
"sha1_chksum": "17add4bf0ad0ec2f08e0cae6d205c700"
162+
}
163+
}
164+
],
165+
"external_data_endpoints": [
166+
{
167+
"name": "UniCarbKB",
168+
"url": "http://www.unicarbkb.org/"
169+
},
170+
{
171+
"name": "access glygen-backend-integration",
172+
"url": "https://github.com/glygener/glygen-backend-integration"
173+
}
174+
],
175+
"environment_variables":{
176+
177+
}
178+
},
179+
"io_domain":{
180+
"input_subdomain":[
181+
{
182+
"uri":{
183+
"filename": "human_protein_position_pmid_id_aminoacid_glytoucan_2018_09_04_07_51_27.txt",
184+
"uri": "http://data.glygen.org/datasets/source/human_protein_position_pmid_id_aminoacid_glytoucan_2018_09_04_07_51_27.txt",
185+
"access_time": "2018-10-10T11:34:02-5:00"
186+
}
187+
},
188+
{
189+
"uri":{
190+
"filename": "human_protein_all.fasta",
191+
"uri": "http://data.glygen.org/GLYDS00053",
192+
"access_time": "2018-10-10T11:34:02-5:00"
193+
}
194+
}
195+
],
196+
"output_subdomain":[
197+
{
198+
"mediatype": "csv/text",
199+
"uri":{
200+
"filename": "human_proteoform_glycosylation_sites_unicarbkb_glytoucan.log",
201+
"uri": "http://data.glygen.org/datasets/logs/human_proteoform_glycosylation_sites_unicarbkb_glytoucan.log",
202+
"access_time": "2018-10-10T11:37:02-5:00"
203+
}
204+
},
205+
{
206+
"mediatype": "csv/text",
207+
"uri":{
208+
"filename": "human_proteoform_glycosylation_sites_unicarbkb_glytoucan.csv",
209+
"uri": "http://data.glygen.org/GLYDS00040",
210+
"access_time": "2018-10-10T11:37:02-5:00"
211+
}
212+
}
213+
]
214+
},
215+
"error_domain":{
216+
"empirical_error":{
217+
"statistics":[
218+
{
219+
"comment": "Unique value statistics for the dataset"
220+
},
221+
{
222+
"key": "uniprotkb_canonical_ac",
223+
"value":92,
224+
"description": "Accession assigned to the protein isoform chosen to be the canonical sequence in UniProtKB database"
225+
},
226+
{
227+
"key": "glycosylation_site",
228+
"value":223,
229+
"description": "Site on the protein sequence where glycosylation is observed"
230+
},
231+
{
232+
"key": "evidence",
233+
"value":163,
234+
"description": "NCBI PubMed Id (PMID) as evidence for the entry"
235+
},
236+
{
237+
"key": "unicarbkb_id",
238+
"value":984,
239+
"description": "UnicarbKB data structure identifier"
240+
},
241+
{
242+
"key": "glytoucan_ac",
243+
"value":824,
244+
"description": "Unique accession assigned to the registered glycan structure in GlyTouCan database"
245+
},
246+
{
247+
"key": "amino_acid",
248+
"value":3,
249+
"description": "Three letter code abbreviation of the amino acid"
250+
},
251+
{
252+
"key": "glycosylation_type",
253+
"value":3,
254+
"description": "Type of glycosylation [linkage type]"
255+
}
256+
]
257+
},
258+
"algorithmic_error":{}
259+
}
260+
}

HCV1a.json

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
{
22
"bco_id": "https://w3id.org/biocompute/examples/HCV1a.json",
3-
"checksum": "22B641C02C06553F00310A0E6816070FE2988476B7BC13BF87A2C286A7DE3583",
3+
"checksum": "06DACE70679F35BA87A3DD6FFFED4ED24A4F5B8C2571264C37E5F1B3ADE04A31",
44
"bco_spec_version" : "https://w3id.org/biocompute/spec/1.3.0",
55
"provenance_domain": {
66
"name": "HCV1a ledipasvir resistance SNP detection",

UVP.json

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
{
22
"bco_id": "https://github.com/biocompute-objects/UVP-BCO/blob/master/UVP_BCO.json",
3-
"checksum": "B5F77857FB88D86817F4A651283DE2F9AE896861BC695280E94829FA6B2F9A39",
3+
"checksum": "8098B0E9BF2D8D98A0F3C200774A8BD8E228064F56BFB81DEEE432BB2252B014",
44
"bco_spec_version": "https://w3id.org/biocompute/spec/1.3.0",
55
"provenance_domain": {
66
"name": "Lineage assignment for an isolate of M. tuberculosis based on its single nucleotide polymorphism (SNP) profile based on UVC v1.0.",
@@ -655,9 +655,9 @@
655655
{
656656
"uri": {
657657
"uri": "https://github.com/CPTR-ReSeqTB/UVP/commit/9e8f588b3cd3f5eebde29f7d2879e1a1e1c1aed3"
658+
}
658659
}
659-
}
660-
],
660+
],
661661
"script_driver": "Python",
662662
"software_prerequisites": [
663663
{

0 commit comments

Comments
 (0)