Skip to content

Commit 47c173e

Browse files
committed
rename test examples
1 parent 70b029d commit 47c173e

File tree

43 files changed

+1045
-203
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

43 files changed

+1045
-203
lines changed

demo.ipynb

Lines changed: 121 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,121 @@
1+
{
2+
"cells": [
3+
{
4+
"cell_type": "code",
5+
"execution_count": 1,
6+
"metadata": {},
7+
"outputs": [],
8+
"source": [
9+
"from rocrateValidator import validate as validate"
10+
]
11+
},
12+
{
13+
"cell_type": "code",
14+
"execution_count": 2,
15+
"metadata": {},
16+
"outputs": [
17+
{
18+
"name": "stdout",
19+
"output_type": "stream",
20+
"text": [
21+
"This is a VALID RO-Crate but with Warning\n",
22+
"{\n",
23+
" \"File existence\": [\n",
24+
" true\n",
25+
" ],\n",
26+
" \"File size\": [\n",
27+
" true\n",
28+
" ],\n",
29+
" \"Metadata file existence\": [\n",
30+
" true\n",
31+
" ],\n",
32+
" \"Json check\": [\n",
33+
" true\n",
34+
" ],\n",
35+
" \"Json-ld check\": [\n",
36+
" true\n",
37+
" ],\n",
38+
" \"File descriptor check\": [\n",
39+
" true\n",
40+
" ],\n",
41+
" \"Direct property check\": [\n",
42+
" true\n",
43+
" ],\n",
44+
" \"Referencing check\": [\n",
45+
" true\n",
46+
" ],\n",
47+
" \"Encoding check\": [\n",
48+
" true\n",
49+
" ],\n",
50+
" \"Web-based data entity check\": [\n",
51+
" true\n",
52+
" ],\n",
53+
" \"Person entity check\": [\n",
54+
" \"WARNING: The Author https://ror.org/03f0f6041 is an Organization\"\n",
55+
" ],\n",
56+
" \"Organization entity check\": [\n",
57+
" true\n",
58+
" ],\n",
59+
" \"Contact information check\": [\n",
60+
" true\n",
61+
" ],\n",
62+
" \"Citation property check\": [\n",
63+
" true\n",
64+
" ],\n",
65+
" \"Publisher property check\": [\n",
66+
" true\n",
67+
" ],\n",
68+
" \"Funder property check\": [\n",
69+
" true\n",
70+
" ],\n",
71+
" \"Licensing property check\": [\n",
72+
" true\n",
73+
" ],\n",
74+
" \"Places property check\": [\n",
75+
" true\n",
76+
" ],\n",
77+
" \"Time property check\": [\n",
78+
" true\n",
79+
" ],\n",
80+
" \"Scripts and workflow check\": [\n",
81+
" true\n",
82+
" ]\n",
83+
"}\n"
84+
]
85+
}
86+
],
87+
"source": [
88+
"v = validate.validate(\"test/samples/invalid/personEntity_warning\")\n",
89+
"v.validator()"
90+
]
91+
},
92+
{
93+
"cell_type": "code",
94+
"execution_count": null,
95+
"metadata": {},
96+
"outputs": [],
97+
"source": []
98+
}
99+
],
100+
"metadata": {
101+
"kernelspec": {
102+
"display_name": "Python 3",
103+
"language": "python",
104+
"name": "python3"
105+
},
106+
"language_info": {
107+
"codemirror_mode": {
108+
"name": "ipython",
109+
"version": 3
110+
},
111+
"file_extension": ".py",
112+
"mimetype": "text/x-python",
113+
"name": "python",
114+
"nbconvert_exporter": "python",
115+
"pygments_lexer": "ipython3",
116+
"version": "3.8.5"
117+
}
118+
},
119+
"nbformat": 4,
120+
"nbformat_minor": 4
121+
}

src/__init__.ipynb

Lines changed: 0 additions & 32 deletions
This file was deleted.

src/rocrateValidator/check_list.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,7 @@ def get_check_list():
1919
"Licensing property check",
2020
"Places property check",
2121
"Time property check",
22+
# "Thumbnails check",
2223
"Scripts and workflow check"
2324
]
2425
return check_list

src/rocrateValidator/semanticCheck.py

Lines changed: 75 additions & 49 deletions
Original file line numberDiff line numberDiff line change
@@ -282,15 +282,18 @@ def webbased_entity_check(tar_file, extension):
282282

283283
context, metadata = rocrate.read_metadata(os.path.join(tar_file, "ro-crate-metadata.json"))
284284

285-
for entity in metadata.values():
286-
type = utils.get_norm_value(entity, "@type")[0]
287-
id_ = utils.get_norm_value(entity, "@id")[0]
288-
289-
### update result
290-
if type == "File" and utils.is_url(id_):
291-
urlFile_updRlt(id_, entity, webbased_result, error_message)
292-
elif type == "Dataset":
293-
dirOnWeb_updRlt(entity, metadata, webbased_result, error_message)
285+
hasPart = utils.get_norm_value(metadata["./"], "hasPart") if utils.get_norm_value(metadata["./"], "@type") == ["Dataset"] else []
286+
if hasPart != []:
287+
for entity in hasPart:
288+
try:
289+
type = utils.get_norm_value(metadata[entity], "@type")[0]
290+
id_ = utils.get_norm_value(metadata[entity], "@id")[0]
291+
if type == "File" and utils.is_url(id_):
292+
urlFile_updRlt(id_, metadata[entity], webbased_result, error_message)
293+
elif type == "Dataset":
294+
dirOnWeb_updRlt(metadata[entity], metadata, webbased_result, error_message)
295+
except KeyError:
296+
webbased_result[entity] = [False, error_message["TypeError"].format(entity)]
294297

295298
for values in webbased_result.values():
296299
if isinstance(values, list):
@@ -305,7 +308,12 @@ def check_author_type(author, metadata, person_result, error_message, warning_me
305308
try:
306309
type = utils.get_norm_value(metadata[author], "@type")
307310
if type[0] == "Person":
308-
person_result[author] = True
311+
# person_result[author] = True
312+
affiliation = utils.get_norm_value(metadata[author], "affiliation")
313+
if affiliation != []:
314+
person_result[author] = True
315+
else:
316+
person_result[author] = error_message["AffiliationMissing"]
309317
elif type[0] == "Organization":
310318
person_result[author] = warning_message['OrganizationAuthor'].format(author)
311319
else:
@@ -324,10 +332,12 @@ def person_entity_check(tar_file, extension):
324332
error_message = {
325333
"PersonError": "Semantic Error: Invalid Person entity {}",
326334
"TypeError": "Semantic Error: Invalid @type value of {}",
327-
"ReferencingError": "Semantic Error: Invalid referencing {} NOT Provided. Url Author MUST provide referencing entity."
335+
"ReferencingError": "Semantic Error: Invalid referencing {} NOT Provided. Url Author MUST provide referencing entity.",
336+
"AffiliationMissing" : "Semantic Error: The Person Entity SHOULD have affiliation property."
328337
}
329338
warning_message = {
330-
"OrganizationAuthor" : "WARNING: The Author {} is an Organization"
339+
"OrganizationAuthor" : "WARNING: The Author {} is an Organization",
340+
"AffiliationMissing" : "WARNING: The Person Entity SHOULD have affiliation property."
331341
}
332342

333343
person_result = {}
@@ -340,6 +350,10 @@ def person_entity_check(tar_file, extension):
340350
for values in person_result.values():
341351
if isinstance(values, list):
342352
return Result(NAME, code = -1, message = values[1])
353+
354+
for values in person_result.values():
355+
if isinstance(values, str):
356+
return Result(NAME, code = 1, message = values)
343357

344358
return Result(NAME)
345359

@@ -396,7 +410,9 @@ def publisher_affiliation_correctness(item, entity, metadata, organization_resul
396410
entity_property = utils.get_norm_value(entity, item)
397411
if entity_property != []:
398412
entity_property = entity_property[0]
399-
if utils.get_norm_value(metadata[entity_property], "@type") == ["Organization"]:
413+
if utils.get_norm_value(metadata[entity_property], "@type") == ["Organization"] and item == "publisher":
414+
organization_result[utils.get_norm_value(entity, "@id")[0]] = True
415+
elif utils.get_norm_value(metadata[entity_property], "@type") == ["Person"] and item == "affiliation":
400416
organization_result[utils.get_norm_value(entity, "@id")[0]] = True
401417
else:
402418
organization_result[utils.get_norm_value(entity, "@id")[0]] = [False, error_message["OrganizationError"].format(utils.get_norm_value(metadata[entity_property], "@id")[0])]
@@ -420,15 +436,15 @@ def organization_check(tar_file, extension):
420436

421437
context, metadata = rocrate.read_metadata(os.path.join(tar_file, "ro-crate-metadata.json"))
422438
for entity in metadata.values():
423-
type = utils.get_norm_value(entity, "@type")[0]
439+
type = utils.get_norm_value(entity, "@type")
424440

425441
### check the value of publisher for each dataset and scholarly article entity
426-
if type =="Dataset" or type == "ScholarlyArticle":
442+
if type == ["Dataset"] or type == ["ScholarlyArticle"]:
427443
# get_entity("publisher", entity, metadata, organization_result, error_message)
428444
publisher_affiliation_correctness("publisher", entity, metadata, organization_result, error_message)
429445

430446
### check the vlaue of affiliation for each file entity
431-
elif type == "File":
447+
elif type == ["File"]:
432448
# get_entity("affiliation", entity, metadata, organization_result, error_message)
433449
publisher_affiliation_correctness("affiliation", entity, metadata, organization_result, error_message)
434450

@@ -458,7 +474,7 @@ def contact_info_check(tar_file, extension):
458474

459475
context, metadata = rocrate.read_metadata(os.path.join(tar_file, "ro-crate-metadata.json"))
460476
for entity in metadata.values():
461-
if utils.get_norm_value(entity, "@type")[0] == "Dataset":
477+
if utils.get_norm_value(entity, "@type") == ["Dataset"]:
462478
### The flag attributes is to find the additional information which is referencing entity of contactPoint
463479
get_entity("author", entity, metadata, contact_result, error_message, True)
464480
get_entity("publisher", entity, metadata, contact_result, error_message, True)
@@ -489,8 +505,8 @@ def citation_check(tar_file, extension):
489505

490506
context, metadata = rocrate.read_metadata(os.path.join(tar_file, "ro-crate-metadata.json"))
491507
for entity in metadata.values():
492-
type = utils.get_norm_value(entity, "@type")[0]
493-
if type == "Dataset" or type == "File":
508+
type = utils.get_norm_value(entity, "@type")
509+
if type == ["Dataset"] or type == ["File"]:
494510
get_entity("citation", entity, metadata, citation_result, error_message, urlVal_required = True)
495511

496512
for values in citation_result.values():
@@ -550,7 +566,7 @@ def funder_check(tar_file, extension):
550566
context, metadata = rocrate.read_metadata(os.path.join(tar_file, "ro-crate-metadata.json"))
551567

552568
for entity in metadata.values():
553-
if utils.get_norm_value(entity, "@type")[0] == "Dataset":
569+
if utils.get_norm_value(entity, "@type") == ["Dataset"]:
554570
get_entity("funder", entity, metadata, funder_result, error_message, is_multipleEntity = True)
555571
# get_funder(depth, entity, metadata, funder_result, error_message)
556572

@@ -634,7 +650,7 @@ def places_check(tar_file, extension):
634650
error_message = {
635651
"Missingname": "Semantic Error: Invalid Place Entity {}. The Place has geo property SHOULD have a name.",
636652
"TypeError":"Semantic Error: Invalid Type Value at {}",
637-
"ReferencingError": "Semantic Error: Invalid Refernencing {} or Not Provided"
653+
"ReferencingError": "Semantic Error: Invalid Referencing {} or Not Provided"
638654
}
639655

640656
geo_result = {}
@@ -694,32 +710,42 @@ def time_check(tar_file, extension):
694710

695711
return Result(NAME)
696712

713+
def upd_thumbnailRlt(thumbnail, hasFile, thumbnail_result, error_message):
714+
if thumbnail != []:
715+
for item in thumbnail:
716+
if item in hasFile:
717+
thumbnail_result[item] = True
718+
else:
719+
thumbnail_result[item] = [False, error_message["ReferencingError"].format(item)]
697720

698-
# def thumbnails_check(tar_file, extension):
699-
700-
# """
701-
# Multiple file in same property(via hasFile) with one of thumbnails.
702-
# The thumbnails SHOULD be included in the RO-Crate
703-
# For more information and exaples, please check:
704-
# <https://www.researchobject.org/ro-crate/1.1/contextual-entities.html#thumbnails>
705-
# """
706-
707-
# NAME = "Thumbnails property check"
708-
# error_message = {
709-
710-
# }
711-
712-
# thumbnails_result = {}
721+
def thumbnails_check(tar_file, extension):
713722

714-
# context, metadata = rocrate.read_metadata(os.path.join(tar_file, "ro-crate-metadata.json"))
715-
# for entity in metadata.values():
716-
# thumbnail = utils.get_norm_value(entity, "thumbnail")
717-
# hasFile = utils.get_norm_value(entity, "hasFile")
718-
# if thumbnail != []:
719-
# if thumbnails[0] in hasFile:
720-
# thumbnails_result = True
721-
# else:
722-
# thumbnails_result = [False, error_message[""]]
723+
"""
724+
Multiple file in same property(via hasFile) with one of thumbnails.
725+
The thumbnails SHOULD be included in the RO-Crate
726+
For more information and exaples, please check:
727+
<https://www.researchobject.org/ro-crate/1.1/contextual-entities.html#thumbnails>
728+
"""
729+
730+
NAME = "Thumbnails check"
731+
error_message = {
732+
"ReferencingError": "Semantic Error: Invalid thumbnail {}. The Thumbnail MUST be included in the RO-Crate."
733+
}
734+
thumbnail_result = {}
735+
736+
context, metadata = rocrate.read_metadata(os.path.join(tar_file, "ro-crate-metadata.json"))
737+
738+
for entity in metadata.values():
739+
thumbnail = utils.get_norm_value(entity, "thumbnail")
740+
hasFile = utils.get_norm_value(entity, "hasFile")
741+
upd_thumbnailRlt(thumbnail, hasFile, thumbnail_result, error_message)
742+
743+
744+
for values in thumbnail_result.values():
745+
if isinstance(values, list):
746+
return Result(NAME, code = -1, message = values[1])
747+
748+
return Result(NAME)
723749

724750

725751
def recognisedWkf_upd(extension_set, entity, workflow_result, id_, error_message):
@@ -732,13 +758,13 @@ def recognisedWkf_upd(extension_set, entity, workflow_result, id_, error_message
732758
else:
733759
workflow_result[id_] = [False, error_message["TypeError"].format(id_)]
734760

735-
def unrecognisedWfk_upd(type, extension_set, entity, workflow_result, warning_message):
761+
def unrecognisedWfk_upd(type, extension_set, entity, workflow_result, warning_message, error_message):
736762
extension = os.path.splitext(utils.get_norm_value(entity,"@id")[0])[1]
737763
if extension not in extension_set:
738764
if "File" in type and "SoftwareSourceCode" in type:
739-
workflow_result[utils.get_norm_value(entity, "@id")[0]] = True
740-
else:
741765
workflow_result[utils.get_norm_value(entity, "@id")[0]] = warning_message["UnrecognizedWkf"].format(extension)
766+
else:
767+
workflow_result[utils.get_norm_value(entity, "@id")[0]] = [False, error_message["TypeError"].format(utils.get_norm_value(entity, "@id")[0])]
742768

743769
def scripts_and_workflow_check(tar_file, extension):
744770

@@ -776,7 +802,7 @@ def scripts_and_workflow_check(tar_file, extension):
776802
for entity in metadata.values():
777803
type = utils.get_norm_value(entity, "@type")
778804
if "ComputationalWorkflow" in type:
779-
unrecognisedWfk_upd(type, extension_set, entity, workflow_result, warning_message)
805+
unrecognisedWfk_upd(type, extension_set, entity, workflow_result, warning_message, error_message)
780806

781807
### fucntion will return True only when the all of the recognised workflow file are correct
782808
for values in workflow_result.values():

src/rocrateValidator/validate.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -33,6 +33,7 @@ def __init__(self, tar_file):
3333
"Licensing property check": smtc.licensing_check,
3434
"Places property check": smtc.places_check,
3535
"Time property check": smtc.time_check,
36+
# "Thumbnails check":smtc.thumbnails_check,
3637
"Scripts and workflow check":smtc.scripts_and_workflow_check
3738
}
3839

0 commit comments

Comments
 (0)