2323
2424def parse_args ():
2525 parser = argparse .ArgumentParser (description = "" )
26- parser .add_argument ("semester" )
27- parser .add_argument ("course" )
28- parser .add_argument ("gradeable" )
29- parser .add_argument ("--window" ,type = int ,default = 10 )
30- parser .add_argument ("--hash_size" ,type = int ,default = 100000 )
31- language = parser .add_mutually_exclusive_group (required = True )
32- language .add_argument ("--plaintext" , action = 'store_true' )
33- language .add_argument ("--python" , action = 'store_true' )
34- language .add_argument ("--cpp" , action = 'store_true' )
35-
26+ parser .add_argument ("config_path" )
3627 args = parser .parse_args ()
37-
38- if (args .window < 1 ):
39- print ("ERROR! window must be >= 1" )
40- exit (1 )
41-
4228 return args
4329
4430
4531def hasher (args ,my_tokenized_file ,my_hashes_file ):
46- with open (my_tokenized_file ,'r' ) as my_tf :
32+ with open (args .config_path ) as lichen_config :
33+ lichen_config_data = json .load (lichen_config )
34+ language = lichen_config_data ["language" ]
35+ sequence_length = int (lichen_config_data ["sequence_length" ])
36+
37+ if (sequence_length < 1 ):
38+ print ("ERROR! sequence_length must be >= 1" )
39+ exit (1 )
40+
41+ with open (my_tokenized_file ,'r' ,encoding = 'ISO-8859-1' ) as my_tf :
4742 with open (my_hashes_file ,'w' ) as my_hf :
4843 tokens = json .load (my_tf )
4944 num = len (tokens )
50- for i in range (0 ,num - args . window ):
45+ for i in range (0 ,num - sequence_length ):
5146 foo = ""
52- if args . plaintext :
53- for j in range (0 ,args . window ):
47+ if language == " plaintext" :
48+ for j in range (0 ,sequence_length ):
5449 foo += str (tokens [i + j ].get ("value" ))
5550
56- elif args . python :
57- for j in range (0 ,args . window ):
51+ elif language == " python" :
52+ for j in range (0 ,sequence_length ):
5853 foo += str (tokens [i + j ].get ("type" ))
5954
60- elif args . cpp :
61- for j in range (0 ,args . window ):
55+ elif language == " cpp" :
56+ for j in range (0 ,sequence_length ):
6257 foo += str (tokens [i + j ].get ("type" ))
6358
6459 else :
@@ -77,26 +72,32 @@ def hasher(args,my_tokenized_file,my_hashes_file):
7772def main ():
7873 args = parse_args ()
7974
75+ with open (args .config_path ) as lichen_config :
76+ lichen_config_data = json .load (lichen_config )
77+ semester = lichen_config_data ["semester" ]
78+ course = lichen_config_data ["course" ]
79+ gradeable = lichen_config_data ["gradeable" ]
80+
8081 sys .stdout .write ("HASH ALL..." )
8182 sys .stdout .flush ()
8283
8384 # ===========================================================================
8485 # error checking
85- course_dir = os .path .join (SUBMITTY_DATA_DIR ,"courses" ,args . semester ,args . course )
86+ course_dir = os .path .join (SUBMITTY_DATA_DIR ,"courses" ,semester ,course )
8687 if not os .path .isdir (course_dir ):
8788 print ("ERROR! " ,course_dir ," is not a valid course directory" )
8889 exit (1 )
89- tokenized_dir = os .path .join (course_dir ,"lichen" ,"tokenized" ,args . gradeable )
90+ tokenized_dir = os .path .join (course_dir ,"lichen" ,"tokenized" ,gradeable )
9091 if not os .path .isdir (tokenized_dir ):
9192 print ("ERROR! " ,tokenized_dir ," is not a valid gradeable tokenized directory" )
9293 exit (1 )
9394
94- hashes_dir = os .path .join (course_dir ,"lichen" ,"hashes" ,args . gradeable )
95+ hashes_dir = os .path .join (course_dir ,"lichen" ,"hashes" ,gradeable )
9596
9697 # ===========================================================================
9798 # walk the subdirectories
98- for user in os .listdir (tokenized_dir ):
99- for version in os .listdir (os .path .join (tokenized_dir ,user )):
99+ for user in sorted ( os .listdir (tokenized_dir ) ):
100+ for version in sorted ( os .listdir (os .path .join (tokenized_dir ,user ) )):
100101 my_tokenized_file = os .path .join (tokenized_dir ,user ,version ,"tokens.json" )
101102
102103 # ===========================================================================
@@ -108,7 +109,6 @@ def main():
108109 my_hashes_file = os .path .join (my_hashes_dir ,"hashes.txt" )
109110 hasher (args ,my_tokenized_file ,my_hashes_file )
110111
111-
112112 print ("done" )
113113
114114if __name__ == "__main__" :
0 commit comments