1111import time
1212import humanize
1313import fnmatch
14+ import hashlib
1415from pathlib import Path
1516
1617IGNORED_FILES = [
@@ -57,6 +58,65 @@ def getConcatFilesInDir(input_dir, regex_patterns):
5758 return result
5859
5960
61+ # This function is passed a path to a gradeable and an output path to place files in and
62+ # concatenates all of the files for each submission into a single file in the output directory
63+ # returns the total size of the files concatenated
64+ def processGradeable (basepath , config , input_dir , output_dir , total_concat ):
65+ # basic error checking
66+ if not Path (input_dir ).exists ():
67+ raise SystemExit (f"ERROR: Unable to find directory { input_dir } " )
68+
69+ if Path (input_dir ).group () != Path (basepath ).group ():
70+ raise SystemExit (f"ERROR: Group for directory { input_dir } does not"
71+ f"match group for { basepath } directory" )
72+
73+ # loop over each user
74+ for user in sorted (os .listdir (input_dir )):
75+ user_path = os .path .join (input_dir , user )
76+ if not os .path .isdir (user_path ):
77+ continue
78+ elif user in config ["ignore_submissions" ]:
79+ continue
80+
81+ if config ["version" ] == "active_version" :
82+ # get the user's active version from their settings file if it exists, else get
83+ # most recent version for compatibility with early versions of Submitty
84+ submissions_details_path = os .path .join (user_path , 'user_assignment_settings.json' )
85+ if os .path .exists (submissions_details_path ):
86+ with open (submissions_details_path ) as details_file :
87+ details_json = json .load (details_file )
88+ my_active_version = int (details_json ["active_version" ])
89+ else :
90+ # get the most recent version
91+ my_active_version = sorted (os .listdir (user_path ))[- 1 ]
92+
93+ # loop over each version
94+ for version in sorted (os .listdir (user_path )):
95+ version_path = os .path .join (user_path , version )
96+ if dir == "results" :
97+ # only the "details" folder within "results" contains files relevant to Lichen
98+ version_path = os .path .join (version_path , "details" )
99+ if not os .path .isdir (version_path ):
100+ continue
101+ if config ["version" ] == "active_version" and int (version ) != my_active_version :
102+ continue
103+
104+ output_file_path = os .path .join (output_dir , user , version , "submission.concatenated" )
105+
106+ if not os .path .exists (os .path .dirname (output_file_path )):
107+ os .makedirs (os .path .dirname (output_file_path ))
108+
109+ # append to concatenated file
110+ with open (output_file_path , "a" ) as output_file :
111+ concatenated_contents = getConcatFilesInDir (version_path , config ["regex" ])
112+ output_file .write (concatenated_contents )
113+ total_concat += sys .getsizeof (concatenated_contents )
114+
115+ # If we've exceeded the concatenation limit, kill program
116+ checkTotalSize (total_concat )
117+ return total_concat
118+
119+
60120def checkTotalSize (total_concat ):
61121 if total_concat > LICHEN_CONFIG ['concat_max_total_bytes' ]:
62122 raise SystemExit ("ERROR! exceeded"
@@ -85,37 +145,44 @@ def validate(config, args):
85145 with open (langs_data_json_path , 'r' ) as langs_data_file :
86146 langs_data = json .load (langs_data_file )
87147 if language not in langs_data :
88- raise SystemExit (f"ERROR! tokenizing not supported for language { language } " )
148+ raise SystemExit (f"ERROR: tokenizing not supported for language { language } " )
89149
90150 # Check values of common code threshold and hash size
91151 if (threshold < 2 ):
92- raise SystemExit ("ERROR! threshold must be >= 2" )
152+ raise SystemExit ("ERROR: threshold must be >= 2" )
93153
94154 if (hash_size < 1 ):
95- raise SystemExit ("ERROR! hash_size must be >= 1" )
155+ raise SystemExit ("ERROR: hash_size must be >= 1" )
96156
97157 # Check for backwards crawling
98158 for e in regex_patterns :
99159 if ".." in e :
100- raise SystemExit ('ERROR! Invalid path component ".." in regex' )
160+ raise SystemExit ('ERROR: Invalid path component ".." in regex' )
101161
102162 for gradeable in other_gradeables :
103163 for field in gradeable :
104164 if ".." in field :
105- raise SystemExit ('ERROR! Invalid component ".." in other_gradeable path' )
165+ raise SystemExit ('ERROR: Invalid component ".." in other_gradeable path' )
106166
107167 # check permissions to make sure we have access to the other gradeables
108168 my_course_group_perms = Path (args .basepath ).group ()
109169 for gradeable in other_gradeables :
110170 if Path (args .datapath , gradeable ["other_semester" ], gradeable ["other_course" ]).group ()\
111171 != my_course_group_perms :
112- raise SystemExit ("ERROR! Invalid permissions to access course "
172+ raise SystemExit ("ERROR: Invalid permissions to access course "
113173 f"{ gradeable ['other_semester' ]} /{ gradeable ['other_course' ]} " )
114174
175+ # check permissions for each path we are given (if any are provided)
176+ if config .get ("other_gradeable_paths" ) is not None :
177+ for path in config ["other_gradeable_paths" ]:
178+ if Path (path ).group () != my_course_group_perms :
179+ raise SystemExit (f"ERROR: Group for directory { path } does not"
180+ f"match group for { args .basepath } directory" )
181+
115182 # make sure the regex directory is one of the acceptable directories
116183 for dir in regex_dirs :
117184 if dir not in ["submissions" , "results" , "checkout" ]:
118- raise SystemExit ("ERROR! " , dir , " is not a valid input directory for Lichen" )
185+ raise SystemExit (f "ERROR: { dir } is not a valid input directory for Lichen" )
119186
120187
121188def main ():
@@ -138,145 +205,80 @@ def main():
138205 semester = config ["semester" ]
139206 course = config ["course" ]
140207 gradeable = config ["gradeable" ]
141- version_mode = config ["version" ]
142208 regex_patterns = config ["regex" ]
143209 regex_dirs = config ["regex_dirs" ]
144210 other_gradeables = config ["other_gradeables" ]
145- users_to_ignore = config ["ignore_submissions" ]
211+ # optional field -> other_gradeable_paths=None if key doesn't exist
212+ other_gradeable_paths = config .get ("other_gradeable_paths" )
146213
147214 # ==========================================================================
148215 # loop through and concatenate the selected files for each user in this gradeable
149216 total_concat = 0
150217
151218 for dir in regex_dirs :
152- gradeable_path = os .path .join (args .datapath , semester , course , dir , gradeable )
153- # loop over each user
154- for user in sorted (os .listdir (gradeable_path )):
155- user_path = os .path .join (gradeable_path , user )
156- if not os .path .isdir (user_path ):
157- continue
158- elif user in users_to_ignore :
159- continue
160-
161- if version_mode == "active_version" :
162- # get the user's active version from their settings file if it exists, else get
163- # most recent version for compatibility with early versions of Submitty
164- submissions_details_path = os .path .join (user_path , 'user_assignment_settings.json' )
165- if os .path .exists (submissions_details_path ):
166- with open (submissions_details_path ) as details_file :
167- details_json = json .load (details_file )
168- my_active_version = int (details_json ["active_version" ])
169- else :
170- # get the most recent version
171- my_active_version = sorted (os .listdir (user_path ))[- 1 ]
172-
173- # loop over each version
174- for version in sorted (os .listdir (user_path )):
175- version_path = os .path .join (user_path , version )
176- if dir == "results" :
177- # only the "details" folder within "results" contains files relevant to Lichen
178- version_path = os .path .join (version_path , "details" )
179- if not os .path .isdir (version_path ):
180- continue
181- if version_mode == "active_version" and int (version ) != my_active_version :
182- continue
183-
184- output_file_path = os .path .join (args .basepath , "users" , user ,
185- version , "submission.concatenated" )
186-
187- if not os .path .exists (os .path .dirname (output_file_path )):
188- os .makedirs (os .path .dirname (output_file_path ))
189-
190- # append to concatenated file
191- with open (output_file_path , "a" ) as output_file :
192- concatenated_contents = getConcatFilesInDir (version_path , regex_patterns )
193- output_file .write (concatenated_contents )
194- total_concat += sys .getsizeof (concatenated_contents )
195-
196- checkTotalSize (total_concat )
219+ input_path = os .path .join (args .datapath , semester , course , dir , gradeable )
220+ output_path = os .path .join (args .basepath , "users" )
221+ total_concat = processGradeable (args .basepath , config ,
222+ input_path , output_path , total_concat )
197223
198224 # ==========================================================================
199225 # loop over all of the other gradeables and concatenate their submissions
200226 for other_gradeable in other_gradeables :
201227 for dir in regex_dirs :
202- other_gradeable_path = os .path .join (args .datapath ,
203- other_gradeable ["other_semester" ],
204- other_gradeable ["other_course" ],
205- dir ,
206- other_gradeable ["other_gradeable" ])
207- # loop over each user
208- for other_user in sorted (os .listdir (other_gradeable_path )):
209- other_user_path = os .path .join (other_gradeable_path , other_user )
210- if not os .path .isdir (other_user_path ):
211- continue
212-
213- if version_mode == "active_version" :
214- # get the user's active version from their settings file if it exists, else get
215- # most recent version for compatibility with early versions of Submitty
216- other_submissions_details_path = os .path .join (other_user_path ,
217- 'user_assignment_settings.json' )
218- if os .path .exists (other_submissions_details_path ):
219- with open (other_submissions_details_path ) as other_details_file :
220- other_details_json = json .load (other_details_file )
221- my_active_version = int (other_details_json ["active_version" ])
222- else :
223- # get the most recent version
224- my_active_version = sorted (os .listdir (other_user_path ))[- 1 ]
225-
226- # loop over each version
227- for other_version in sorted (os .listdir (other_user_path )):
228- other_version_path = os .path .join (other_user_path , other_version )
229- if dir == "results" :
230- # only the "details" dir within "results" contains files relevant to Lichen
231- other_version_path = os .path .join (other_version_path , "details" )
232- if not os .path .isdir (other_version_path ):
233- continue
234-
235- other_output_file_path = os .path .join (args .basepath , "other_gradeables" ,
236- f"{ other_gradeable ['other_semester' ]} __{ other_gradeable ['other_course' ]} __{ other_gradeable ['other_gradeable' ]} " , # noqa: E501
237- other_user , other_version ,
238- "submission.concatenated" )
239-
240- if not os .path .exists (os .path .dirname (other_output_file_path )):
241- os .makedirs (os .path .dirname (other_output_file_path ))
242-
243- # append to concatenated file
244- with open (other_output_file_path , "a" ) as other_output_file :
245- other_concatenated_contents = getConcatFilesInDir (other_version_path ,
246- regex_patterns )
247- other_output_file .write (other_concatenated_contents )
248- total_concat += sys .getsizeof (other_concatenated_contents )
249-
250- checkTotalSize (total_concat )
228+ input_path = os .path .join (args .datapath ,
229+ other_gradeable ["other_semester" ],
230+ other_gradeable ["other_course" ],
231+ dir ,
232+ other_gradeable ["other_gradeable" ])
233+
234+ output_path = os .path .join (args .basepath , "other_gradeables" ,
235+ f"{ other_gradeable ['other_semester' ]} __{ other_gradeable ['other_course' ]} __{ other_gradeable ['other_gradeable' ]} " ) # noqa: E501
236+ total_concat = processGradeable (args .basepath , config ,
237+ input_path , output_path , total_concat )
238+
239+ # take care of any manually-specified paths if they exist
240+ if other_gradeable_paths is not None :
241+ for path in other_gradeable_paths :
242+ # We hash the path as the name of the gradeable
243+ dir_name = hashlib .md5 (path .encode ('utf-8' )).hexdigest ()
244+ output_path = os .path .join (args .basepath , "other_gradeables" , dir_name )
245+ total_concat = processGradeable (args .basepath , config , path ,
246+ output_path , total_concat )
251247
252248 # ==========================================================================
253249 # iterate over all of the created submissions, checking to see if they are empty
254250 # and printing a message if so
255251
252+ empty_directories = [] # holds a list of users who had no files concatenated
253+
256254 for user in os .listdir (os .path .join (args .basepath , "users" )):
257255 user_path = os .path .join (args .basepath , "users" , user )
258256 for version in os .listdir (user_path ):
259257 version_path = os .path .join (user_path , version )
260258 my_concatenated_file = os .path .join (version_path , "submission.concatenated" )
261259 with open (my_concatenated_file , "r" ) as my_cf :
262260 if my_cf .read () == "" :
263- print ("Warning: No files matched provided regex in selected directories "
264- f"for user { user } version { version } " )
261+ empty_directories .append (f"{ user } :{ version } " )
262+ if len (empty_directories ) > 0 :
263+ print ("Warning: No files matched provided regex in selected directories for user(s):" ,
264+ ", " .join (empty_directories ))
265265
266266 # do the same for the other gradeables
267- for other_gradeable in other_gradeables :
268- other_gradeable_dir_name = f" { other_gradeable [ 'other_semester' ] } __ { other_gradeable [ 'other_course' ] } __ { other_gradeable [ 'other_gradeable' ] } " # noqa: E501
269- for other_user in os .listdir (os .path .join (args .basepath , "other_gradeables" ,
270- other_gradeable_dir_name )):
267+ for other_gradeable in os . listdir ( os . path . join ( args . basepath , " other_gradeables" )) :
268+ empty_directories = []
269+ for other_user in os .listdir (os .path .join (args .basepath ,
270+ "other_gradeables" , other_gradeable )):
271271 other_user_path = os .path .join (args .basepath , "other_gradeables" ,
272- other_gradeable_dir_name , other_user )
272+ other_gradeable , other_user )
273273 for other_version in os .listdir (other_user_path ):
274274 other_version_path = os .path .join (other_user_path , other_version )
275275 my_concatenated_file = os .path .join (other_version_path , "submission.concatenated" )
276276 with open (my_concatenated_file , "r" ) as my_cf :
277277 if my_cf .read () == "" :
278- print ("Warning: No files matched provided regex in selected directories "
279- f"for user { other_user } version { other_version } " )
278+ empty_directories .append (f"{ other_user } :{ other_version } " )
279+ if len (empty_directories ) > 0 :
280+ print ("Warning: No files matched provided regex in selected directories for user(s):" ,
281+ ", " .join (empty_directories ), "in gradeable" , other_gradeable )
280282
281283 # ==========================================================================
282284 # concatenate provided code
0 commit comments