Skip to content

Commit 4236100

Browse files
yibeichandjareckapre-commit-ci[bot]
authored
ENH: merge ref/linkML into main (#44)
* adding pydantic model from the reproschema ref/linkml branch * fix altLabel * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * updates to the model: updating AllowedType, adding MissingType to value, updated to ResponseOption and image * running pre-commit * ENH: updating reproschema commands to the new pydantic model (#36) * add print for testing * update clear_header * remove print * fix order and other errors * change ui yesno to radio * fix typo * update context, field->item, fix isVis * remove useless due to failed validation * remove visibility at the item level & remove matrixInfo * fix choice * remove identifier * updating validate command to the new pydantic model * updating/fixing the tests; updating the model to use CreativeWork; changes in formating * fix conversion tests * remove test output * change test output directory * final improvments on tests * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * model version after adding Thing class * updating model after removing CreativeWork and ImageUrl * adding tests to initialize the model classes * fixing load_file; adding write_obj_jsonld function and expanding test_schema * changing redcap2reproschema to use ned pydantic classes; some small changes to the pydantic model * changing name from string to landstring with en as language * fixing jsonld files * Adding option to return compact schema to load_file * fixing the protocol jsonld file * changing reproschema2redcap to use the new model * adding contectfile to write_obj_jsonld function and improving test; improving compact option for load_file * fixing reproschema2redcap and tests * removing file with the context and fixing references to the context_url (for now the link rfom the ref/linkm branch * updating the reproschema2redcap to work for activity/items from urls * improving error message for file_load and validate; checking the suffix of the file before treating it as jsonld * fixing identify_model_class, so Item and Field are treated the same * fixing reproschema2redcap so it reads responseOptions from another file * rewriting parts of redcap2reproschema, fixing some bugs[wip] * fixing compute: removing isvis condition * fixing process_csv so it doesn't go multiple time through the same condition * changes to input and value mapping (mapping explicitly or raising errors if not found); fixing choices and adding slider; adding sql to compute types (this does not work properly right now); adding many comments * adding output for redcap2reproschema command; removing argparse * model without decimal; revert changes to valueType in the model * adding migrade command * fixing multiple issues with redcap2rp and rp2redcap: adding compute, fixing preamble (can be either activity level or issue level * WIP: addining test to test rp2redcap and redcap2repo using nimh exampl * fixing paths in the tests, should run now * ignore .DS_Store files in validation * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --------- Co-authored-by: yibeichan <[email protected]> Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com> * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * rename to be consistent --------- Co-authored-by: Dorota Jarecka <[email protected]> Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com>
1 parent b227e12 commit 4236100

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

51 files changed

+2683
-1036
lines changed

reproschema/cli.py

Lines changed: 33 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,8 @@
55
from . import get_logger, set_logger_level
66
from . import __version__
77
from .redcap2reproschema import redcap2reproschema as redcap2rs
8-
from .reproschema2redcap import main as rs2redcap
8+
from .reproschema2redcap import reproschema2redcap as rs2redcap
9+
from .migrate import migrate2newschema
910

1011
lgr = get_logger()
1112

@@ -42,14 +43,33 @@ def main(log_level):
4243

4344

4445
@main.command()
45-
@click.option("--shapefile", default=None, type=click.Path(exists=True, dir_okay=False))
4646
@click.argument("path", nargs=1, type=str)
47-
def validate(shapefile, path):
47+
def validate(path):
4848
if not (path.startswith("http") or os.path.exists(path)):
4949
raise ValueError(f"{path} must be a URL or an existing file or directory")
5050
from .validate import validate
5151

52-
validate(shapefile, path)
52+
result = validate(path)
53+
if result:
54+
click.echo("Validation successful")
55+
56+
57+
@main.command()
58+
@click.argument("path", nargs=1, type=click.Path(exists=True, dir_okay=True))
59+
@click.option("--inplace", is_flag=True, help="Changing file in place")
60+
@click.option(
61+
"--fixed-path",
62+
type=click.Path(dir_okay=True, writable=True, resolve_path=True),
63+
help="Path to the fixed file/directory, if not provide suffix 'after_migration' is used",
64+
)
65+
def migrate(path, inplace, fixed_path):
66+
if not (path.startswith("http") or os.path.exists(path)):
67+
raise ValueError(f"{path} must be a URL or an existing file or directory")
68+
if fixed_path and inplace:
69+
raise Exception("Either inplace or fixed_path has to be provided.")
70+
new_path = migrate2newschema(path, inplace=inplace, fixed_path=fixed_path)
71+
if new_path:
72+
click.echo(f"File/Directory after migration {new_path}")
5373

5474

5575
@main.command()
@@ -103,12 +123,19 @@ def serve(port):
103123
@main.command()
104124
@click.argument("csv_path", type=click.Path(exists=True, dir_okay=False))
105125
@click.argument("yaml_path", type=click.Path(exists=True, dir_okay=False))
106-
def redcap2reproschema(csv_path, yaml_path):
126+
@click.option(
127+
"--output-path",
128+
type=click.Path(dir_okay=True, writable=True, resolve_path=True),
129+
default=".",
130+
show_default=True,
131+
help="Path to the output directory, defaults to the current directory.",
132+
)
133+
def redcap2reproschema(csv_path, yaml_path, output_path):
107134
"""
108135
Convert REDCap CSV files to Reproschema format.
109136
"""
110137
try:
111-
redcap2rs(csv_path, yaml_path)
138+
redcap2rs(csv_path, yaml_path, output_path)
112139
click.echo("Converted REDCap data dictionary to Reproschema format.")
113140
except Exception as e:
114141
raise click.ClickException(f"Error during conversion: {e}")

reproschema/jsonldutils.py

Lines changed: 116 additions & 47 deletions
Original file line numberDiff line numberDiff line change
@@ -1,77 +1,146 @@
11
from pyld import jsonld
2-
from pyshacl import validate as shacl_validate
32
import json
43
import os
5-
from .utils import start_server, stop_server, lgr
4+
from pathlib import Path
5+
from copy import deepcopy
6+
import requests
7+
from urllib.parse import urlparse
8+
from .utils import start_server, stop_server, lgr, fixing_old_schema, CONTEXTFILE_URL
9+
from .models import (
10+
Item,
11+
Activity,
12+
Protocol,
13+
ResponseOption,
14+
ResponseActivity,
15+
Response,
16+
identify_model_class,
17+
)
618

719

8-
def load_file(path_or_url, started=False, http_kwargs={}):
9-
try:
20+
def _is_url(path):
21+
"""
22+
Determine whether the given path is a URL.
23+
"""
24+
parsed = urlparse(str(path))
25+
return parsed.scheme in ("http", "https", "ftp", "ftps")
26+
27+
28+
def _is_file(path):
29+
"""
30+
Determine whether the given path is a valid file path.
31+
"""
32+
return os.path.isfile(path)
33+
34+
35+
def _fetch_jsonld_context(url):
36+
response = requests.get(url)
37+
return response.json()
38+
39+
40+
def load_file(
41+
path_or_url,
42+
started=False,
43+
http_kwargs=None,
44+
compact=False,
45+
compact_context=None,
46+
fixoldschema=False,
47+
):
48+
"""Load a file or URL and return the expanded JSON-LD data."""
49+
path_or_url = str(path_or_url)
50+
if http_kwargs is None:
51+
http_kwargs = {}
52+
if _is_url(path_or_url):
1053
data = jsonld.expand(path_or_url)
1154
if len(data) == 1:
12-
if "@id" not in data[0]:
55+
if "@id" not in data[0] and "id" not in data[0]:
1356
data[0]["@id"] = path_or_url
14-
except jsonld.JsonLdError as e:
15-
if 'only "http" and "https"' in str(e):
16-
lgr.debug("Reloading with local server")
17-
root = os.path.dirname(path_or_url)
57+
elif _is_file(path_or_url):
58+
lgr.debug("Reloading with local server")
59+
root = os.path.dirname(path_or_url)
60+
if not started:
61+
stop, port = start_server(**http_kwargs)
62+
else:
63+
if "port" not in http_kwargs:
64+
raise KeyError("port key missing in http_kwargs")
65+
port = http_kwargs["port"]
66+
base_url = f"http://localhost:{port}/"
67+
if root:
68+
base_url += f"{root}/"
69+
with open(path_or_url) as json_file:
70+
try:
71+
data = json.load(json_file)
72+
except json.JSONDecodeError as e:
73+
raise json.JSONDecodeError(
74+
f"Error parsing JSON file {json_file}: {e.msg}", e.doc, e.pos
75+
) from e
76+
try:
77+
data = jsonld.expand(data, options={"base": base_url})
78+
except:
79+
raise
80+
finally:
1881
if not started:
19-
stop, port = start_server(**http_kwargs)
82+
stop_server(stop)
83+
if len(data) == 1:
84+
if "@id" not in data[0] and "id" not in data[0]:
85+
data[0]["@id"] = base_url + os.path.basename(path_or_url)
86+
else:
87+
raise Exception(f"{path_or_url} is not a valid URL or file path")
88+
89+
if isinstance(data, list) and len(data) == 1:
90+
data = data[0]
91+
92+
if fixoldschema:
93+
data = fixing_old_schema(data, copy_data=True)
94+
if compact:
95+
if compact_context:
96+
if _is_file(compact_context):
97+
with open(compact_context) as fp:
98+
context = json.load(fp)
99+
elif _is_url(compact_context):
100+
context = _fetch_jsonld_context(compact_context)
20101
else:
21-
if "port" not in http_kwargs:
22-
raise KeyError("port key missing in http_kwargs")
23-
port = http_kwargs["port"]
24-
base_url = f"http://localhost:{port}/"
25-
if root:
26-
base_url += f"{root}/"
27-
with open(path_or_url) as json_file:
28-
data = json.load(json_file)
29-
try:
30-
data = jsonld.expand(data, options={"base": base_url})
31-
except:
32-
raise
33-
finally:
34-
if not started:
35-
stop_server(stop)
36-
if len(data) == 1:
37-
if "@id" not in data[0]:
38-
data[0]["@id"] = base_url + os.path.basename(path_or_url)
102+
raise Exception(
103+
f"compact_context has tobe a file or url, but {compact_context} provided"
104+
)
105+
if _is_file(path_or_url):
106+
data = jsonld.compact(data, ctx=context, options={"base": base_url})
39107
else:
40-
raise
108+
data = jsonld.compact(data, ctx=context)
109+
41110
return data
42111

43112

44-
def validate_data(data, shape_file_path):
45-
"""Validate an expanded jsonld document against a shape.
113+
def validate_data(data):
114+
"""Validate an expanded jsonld document against the pydantic model.
46115
47116
Parameters
48117
----------
49118
data : dict
50119
Python dictionary containing JSONLD object
51-
shape_file_path : str
52-
SHACL file for the document
53120
54121
Returns
55122
-------
56123
conforms: bool
57124
Whether the document is conformant with the shape
58125
v_text: str
59-
Validation information returned by PySHACL
126+
Validation errors if any returned by pydantic
60127
61128
"""
62-
kwargs = {"algorithm": "URDNA2015", "format": "application/n-quads"}
63-
normalized = jsonld.normalize(data, kwargs)
64-
data_file_format = "nquads"
65-
shape_file_format = "turtle"
66-
conforms, v_graph, v_text = shacl_validate(
67-
normalized,
68-
shacl_graph=shape_file_path,
69-
data_graph_format=data_file_format,
70-
shacl_graph_format=shape_file_format,
71-
inference="rdfs",
72-
debug=False,
73-
serialize_report_graph=True,
74-
)
129+
# do we need it?
130+
# kwargs = {"algorithm": "URDNA2015", "format": "application/n-quads"}
131+
# normalized = jsonld.normalize(data, kwargs)
132+
obj_type = identify_model_class(data["@type"][0])
133+
data_fixed = [fixing_old_schema(data, copy_data=True)]
134+
context = _fetch_jsonld_context(CONTEXTFILE_URL)
135+
data_fixed_comp = jsonld.compact(data_fixed, context)
136+
del data_fixed_comp["@context"]
137+
conforms = False
138+
v_text = ""
139+
try:
140+
obj_type(**data_fixed_comp)
141+
conforms = True
142+
except Exception as e:
143+
v_text = str(e)
75144
return conforms, v_text
76145

77146

reproschema/migrate.py

Lines changed: 44 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,44 @@
1+
import json, os
2+
import shutil
3+
from pathlib import Path
4+
5+
from .jsonldutils import load_file
6+
from .utils import fixing_old_schema
7+
8+
9+
def migrate2newschema(path, inplace=False, fixed_path=None):
10+
path = Path(path).resolve()
11+
if path.is_file():
12+
print(f"migration of file: {path}")
13+
new_path = migrate2newschema_file(path, inplace, fixed_path)
14+
else: # path.is_dir
15+
if inplace:
16+
new_path = path
17+
elif fixed_path:
18+
new_path = Path(fixed_path).resolve()
19+
shutil.copytree(path, new_path)
20+
else:
21+
new_path = path.parent / f"{path.name}_after_migration"
22+
shutil.copytree(path, new_path)
23+
# fixing all files in new_path
24+
all_files = Path(new_path).rglob("*")
25+
for file in all_files:
26+
if file.is_file():
27+
migrate2newschema_file(jsonld_path=file, inplace=True)
28+
return new_path
29+
30+
31+
def migrate2newschema_file(jsonld_path, inplace=False, fixed_path=None):
32+
print(f"Fixing {jsonld_path}")
33+
data = load_file(jsonld_path, started=False)
34+
data_fixed = [fixing_old_schema(data, copy_data=True)]
35+
if inplace:
36+
new_filename = jsonld_path
37+
elif fixedjsonld_path:
38+
new_filename = fixed_path
39+
else:
40+
root, ext = os.path.splitext(jsonld_path)
41+
new_filename = f"{root}_after_migration{ext}"
42+
with open(new_filename, "w") as f:
43+
json.dump(data_fixed, f, indent=4)
44+
return new_filename

reproschema/models/__init__.py

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,2 @@
1-
from .protocol import Protocol
2-
from .activity import Activity
3-
from .item import Item
1+
from .model import Activity, Item, Protocol, ResponseOption, ResponseActivity, Response
2+
from .utils import write_obj_jsonld, identify_model_class

reproschema/models/activity.py

Lines changed: 0 additions & 66 deletions
This file was deleted.

0 commit comments

Comments
 (0)