11import json
2- import os
32from pathlib import Path
43
54from .jsonldutils import load_file , validate_data
65from .utils import lgr , start_server , stop_server
76
7+ DIR_TO_SKIP = [
8+ ".git" ,
9+ ".github" ,
10+ "__pycache__" ,
11+ "env" ,
12+ "venv" ,
13+ ]
14+ FILES_TO_SKIP = [
15+ ".DS_Store" ,
16+ ".gitignore" ,
17+ ".flake8" ,
18+ ".autorc" ,
19+ "LICENSE" ,
20+ "Makefile" ,
21+ ]
22+ SUPPORTED_EXTENSIONS = [
23+ ".jsonld" ,
24+ "json" ,
25+ "js" ,
26+ "" ,
27+ ]
828
9- def validate_dir (directory , started = False , http_kwargs = {}):
29+
30+ def validate_dir (
31+ directory : str ,
32+ started : bool = False ,
33+ http_kwargs : None | dict [str , int ] = None ,
34+ stop = None ,
35+ ):
1036 """Validate a directory containing JSONLD documents against the ReproSchema pydantic model.
1137
38+ Recursively goes through the directory tree and validates files with the allowed extensions.
39+
1240 Parameters
1341 ----------
1442 directory: str
1543 Path to directory to walk for validation
44+
1645 started : bool
1746 Whether an http server exists or not
18- http_kwargs : dict
47+
48+ http_kwargs : dict or None
1949 Keyword arguments for the http server. Valid keywords are: port, path
2050 and tmpdir
2151
52+ stop: None or function
53+ Function to use to stop the HTTP server
54+
2255 Returns
2356 -------
2457 conforms: bool
2558 Whether the document is conformant with the shape. Raises an exception
2659 if any document is non-conformant.
2760
2861 """
29- if not os .path .isdir (directory ):
30- raise Exception (f"{ directory } is not a directory" )
31- print (f"Validating directory { directory } " )
32- stop = None
33- if not started :
34- stop , port = start_server (** http_kwargs )
35- http_kwargs ["port" ] = port
36- else :
37- if "port" not in http_kwargs :
38- raise KeyError ("HTTP server started, but port key is missing" )
39-
40- for root , _ , files in os .walk (directory ):
41- for name in files :
42- full_file_name = os .path .join (root , name )
43-
44- if Path (full_file_name ).suffix not in [
45- ".jsonld" ,
46- "json" ,
47- "js" ,
48- "" ,
49- ]:
50- lgr .info (f"Skipping file { full_file_name } " )
51- continue
52-
53- lgr .debug (f"Validating file { full_file_name } " )
54- try :
55- data = load_file (
56- full_file_name , started = True , http_kwargs = http_kwargs
57- )
58- if len (data ) == 0 :
59- raise ValueError ("Empty data graph" )
60- print (f"Validating { full_file_name } " )
61- conforms , vtext = validate_data (data )
62- except (ValueError , json .JSONDecodeError ):
62+ if http_kwargs is None :
63+ http_kwargs = {}
64+
65+ directory = Path (directory )
66+
67+ if not directory .is_dir ():
68+ if stop is not None :
69+ stop_server (stop )
70+ raise Exception (f"{ str (directory )} is not a directory" )
71+
72+ if directory .name in DIR_TO_SKIP :
73+ lgr .info (f"Skipping directory { directory } " )
74+ return True
75+
76+ lgr .info (f"Validating directory { directory } " )
77+
78+ files_to_validate = [
79+ str (x )
80+ for x in directory .iterdir ()
81+ if x .is_file ()
82+ and x .name not in FILES_TO_SKIP
83+ and x .suffix in SUPPORTED_EXTENSIONS
84+ ]
85+
86+ for name in files_to_validate :
87+ lgr .debug (f"Validating file { name } " )
88+
89+ try :
90+ data = load_file (name , started = started , http_kwargs = http_kwargs )
91+ if len (data ) == 0 :
6392 if stop is not None :
6493 stop_server (stop )
65- raise
66- else :
67- if not conforms :
68- lgr .critical (
69- f"File { full_file_name } has validation errors."
70- )
71- if stop is not None :
72- stop_server (stop )
73- raise ValueError (vtext )
74- if not started :
75- stop_server (stop )
76- return True
94+ raise ValueError (f"Empty data graph in file { name } " )
95+ conforms , vtext = validate_data (data )
96+ except (ValueError , json .JSONDecodeError ):
97+ if stop is not None :
98+ stop_server (stop )
99+ raise
100+ else :
101+ if not conforms :
102+ lgr .critical (f"File { name } has validation errors." )
103+ stop_server (stop )
104+ raise ValueError (vtext )
105+
106+ dirs_to_validate = [
107+ str (x )
108+ for x in directory .iterdir ()
109+ if x .is_dir () and x .name not in DIR_TO_SKIP
110+ ]
111+
112+ for dir in dirs_to_validate :
113+ conforms , stop = validate_dir (
114+ dir , started = started , http_kwargs = http_kwargs , stop = stop
115+ )
116+
117+ return True , stop
77118
78119
79120def validate (path ):
@@ -91,17 +132,32 @@ def validate(path):
91132 exception.
92133
93134 """
94- if os .path .isdir (path ):
95- conforms = validate_dir (path )
135+ if Path (path ).is_dir ():
136+
137+ lgr .info (f"Validating directory { path } " )
138+
139+ stop , port = start_server ()
140+ http_kwargs = {"port" : port }
141+ started = True
142+
143+ conforms , _ = validate_dir (
144+ path , started = started , http_kwargs = http_kwargs , stop = stop
145+ )
146+
147+ stop_server (stop )
148+
96149 else :
97- # Skip validation for .DS_Store files
98- if Path (path ).name == ".DS_Store" :
99- lgr .info (f"{ path } is a .DS_Store file and is skipped. " )
150+
151+ if Path (path ).name in FILES_TO_SKIP :
152+ lgr .info (f"Skipping file { path } " )
100153 return True
154+
101155 data = load_file (path , started = False )
102156 conforms , vtext = validate_data (data )
103157 if not conforms :
104158 lgr .critical (f"File { path } has validation errors." )
105159 raise ValueError (vtext )
160+
106161 lgr .info (f"{ path } conforms." )
162+
107163 return conforms
0 commit comments