@@ -28,51 +28,30 @@ def tokenize(args,my_concatenated_file,my_tokenized_file):
2828 lichen_config_data = json .load (lichen_config )
2929 language = lichen_config_data ["language" ]
3030
31- if language == "plaintext" :
32- tokenizer = os .path .join (SUBMITTY_INSTALL_DIR ,"Lichen" ,"bin" ,"plaintext_tokenizer.out" )
33- with open (my_concatenated_file ,'r' ) as infile :
34- with open (my_tokenized_file ,'w' ) as outfile :
35- subprocess .call ([tokenizer ,"--ignore_newlines" ],stdin = infile ,stdout = outfile )
36-
37- elif language == "python" :
38- tokenizer = os .path .join (SUBMITTY_INSTALL_DIR ,"Lichen" ,"bin" ,"python_tokenizer.py" )
39- with open (my_concatenated_file ,'r' ) as infile :
40- with open (my_tokenized_file ,'w' ) as outfile :
41- command = "python3 " + str (tokenizer )+ " " + my_concatenated_file + " > " + my_tokenized_file
42- os .system (command )
43-
44- elif language == "cpp" :
45- tokenizer = os .path .join (SUBMITTY_INSTALL_DIR ,"Lichen" ,"bin" ,"c_tokenizer.py" )
46- with open (my_concatenated_file ,'r' ) as infile :
47- with open (my_tokenized_file ,'w' ) as outfile :
48- command = "python " + str (tokenizer )+ " " + my_concatenated_file + " > " + my_tokenized_file
49- os .system (command )
50-
51- elif language == "java" :
52- tokenizer = os .path .join (SUBMITTY_INSTALL_DIR ,"Lichen" ,"bin" ,"java_tokenizer.py" )
53- with open (my_concatenated_file ,'r' ) as infile :
54- with open (my_tokenized_file ,'w' ) as outfile :
55- command = "python " + str (tokenizer )+ " " + my_concatenated_file + " > " + my_tokenized_file
56- os .system (command )
57-
58- elif language == "mips" :
59- tokenizer = os .path .join (SUBMITTY_INSTALL_DIR ,"Lichen" ,"bin" ,"mips_tokenizer.py" )
60- with open (my_concatenated_file ,'r' ) as infile :
61- with open (my_tokenized_file ,'w' ) as outfile :
62- command = "python3 " + str (tokenizer )+ " " + my_concatenated_file + " > " + my_tokenized_file
63- os .system (command )
64-
65- else :
66- print ("\n \n ERROR: UNKNOWN TOKENIZER\n \n " )
67- exit (1 )
68-
31+ language_token_data = dict ()
32+
33+ data_json_path = os .path .join (SUBMITTY_INSTALL_DIR , "Lichen" , "bin" , "data.json" )
34+ with open (data_json_path , 'r' ) as token_data_file :
35+ token_data = json .load (token_data_file )
36+ if not language in token_data :
37+ print ("\n \n ERROR: UNKNOWN TOKENIZER\n \n " )
38+ exit (1 )
39+ else :
40+ language_token_data = token_data [language ]
41+
42+ tokenizer = os .path .join (SUBMITTY_INSTALL_DIR ,"Lichen" ,"bin" , language_token_data ["tokenizer" ])
43+ if not language_token_data .get ("input_as_argument" ):
44+ my_concatenated_file = f'< { my_concatenated_file } '
45+ cli_args = ' ' .join (language_token_data ["command_args" ]) if "command_args" in language_token_data else ''
46+ command = f'{ language_token_data ["command_executable" ]} { tokenizer } { cli_args } { my_concatenated_file } > { my_tokenized_file } ' .strip ()
47+ os .system (command )
6948
7049def main ():
7150 args = parse_args ()
7251
7352 sys .stdout .write ("TOKENIZE ALL..." )
7453 sys .stdout .flush ()
75-
54+
7655 with open (args .config_path ) as lichen_config :
7756 lichen_config_data = json .load (lichen_config )
7857 semester = lichen_config_data ["semester" ]
@@ -108,6 +87,5 @@ def main():
10887
10988 print ("done" )
11089
111-
11290if __name__ == "__main__" :
11391 main ()
0 commit comments