2121# for dac_compute_coords.py and dac_upload_model.py
2222import imp
2323
24+ # for error logging
25+ import cherrypy
26+
2427# note this version assumes the first row is a header row, and keeps only the header
2528# and data (called by the generic zip parser)
2629def parse_table_file (file ):
@@ -121,7 +124,7 @@ def parse_mat_file(file):
121124 # parse file using comma delimiter
122125 rows = [row for row in csv .reader (file .decode ().splitlines (), delimiter = "," , doublequote = True ,
123126 escapechar = None , quotechar = '"' , quoting = csv .QUOTE_MINIMAL , skipinitialspace = True )]
124-
127+
125128 # check that we have a matrix
126129 num_rows = len (rows )
127130 num_cols = len (rows [0 ])
@@ -287,6 +290,7 @@ def parse_gen_zip(database, model, input, files, aids, **kwargs):
287290 # look for one occurrence (only) of .dac file and var, dist, and time directories
288291 dac_file = ""
289292 landmarks_file = ""
293+ pca_file = ""
290294 var_meta_file = ""
291295 var_files = []
292296 dist_files = []
@@ -308,6 +312,10 @@ def parse_gen_zip(database, model, input, files, aids, **kwargs):
308312 if zip_file == "landmarks.csv" :
309313 landmarks_file = zip_file
310314
315+ # is is "pca.csv"?
316+ if zip_file == "pca.csv" :
317+ pca_file = zip_file
318+
311319 # found a directory -- is it "var/"?
312320 elif head == "var" :
313321
@@ -395,23 +403,23 @@ def parse_gen_zip(database, model, input, files, aids, **kwargs):
395403
396404 # check var file names
397405 num_vars = len (meta_vars )
398- check_file_names (database , model , parse_error_log , dac_error ,
406+ check_file_names (database , model , dac_error , parse_error_log ,
399407 "var/variable_" , ".var" , num_vars , var_files ,
400408 "missing variable_*.var file(s)." )
401409
402410 parse_error_log = dac_error .update_parse_log (database , model , parse_error_log , "Progress" ,
403411 "Checked DAC variable file names." )
404412
405413 # check time file names
406- check_file_names (database , model , parse_error_log , dac_error ,
414+ check_file_names (database , model , dac_error , parse_error_log ,
407415 "time/variable_" , ".time" , num_vars , time_files ,
408416 "missing variable_*.time file(s)." )
409417
410418 parse_error_log = dac_error .update_parse_log (database , model , parse_error_log , "Progress" ,
411419 "Checked DAC time file names." )
412420
413421 # check dist file names
414- check_file_names (database , model , parse_error_log , dac_error ,
422+ check_file_names (database , model , dac_error , parse_error_log ,
415423 "dist/variable_" , ".dist" , num_vars , dist_files ,
416424 "missing variable_*.dist file(s)." )
417425
@@ -428,19 +436,32 @@ def parse_gen_zip(database, model, input, files, aids, **kwargs):
428436 landmarks = None
429437 if landmarks_file != "" :
430438
431- # parse variables.meta file
439+ # parse landmarks.csv file
432440 attr , dim , landmarks = parse_mat_file (zip_ref .read (landmarks_file ))
433441
434442 else :
435443
436444 parse_error_log = dac_error .update_parse_log (database , model , parse_error_log , "Progress" ,
437445 "No landmarks.csv file found, using all data points." )
438446
447+ # load pca-comps file
448+ pca_comps = None
449+ if pca_file != "" :
450+
451+ # parse pca.csv file
452+ attr , dim , pca_comps = parse_mat_file (zip_ref .read (pca_file ))
453+
454+ else :
455+
456+ parse_error_log = dac_error .update_parse_log (database , model , parse_error_log , "Progress" ,
457+ "No pca.csv file found, using MDS algorithm." )
458+
439459 # now start thread to prevent timing out on large files
440460 stop_event = threading .Event ()
441461 thread = threading .Thread (target = parse_gen_zip_thread ,
442462 args = (database , model , zip_ref , dac_error , parse_error_log ,
443- meta_var_col_names , meta_vars , landmarks , dac_file , stop_event ))
463+ meta_var_col_names , meta_vars , landmarks , pca_comps ,
464+ dac_file , stop_event ))
444465 thread .start ()
445466
446467
@@ -463,7 +484,8 @@ def check_file_names (database, model, dac_error, parse_error_log,
463484
464485# gen zip parsing thread to prevent time outs by browser
465486def parse_gen_zip_thread (database , model , zip_ref , dac_error , parse_error_log ,
466- meta_var_col_names , meta_vars , landmarks , dac_file , stop_event ):
487+ meta_var_col_names , meta_vars , landmarks , pca_comps ,
488+ dac_file , stop_event ):
467489
468490 # put entire thread into a try-except block in order report errors
469491 try :
@@ -480,8 +502,27 @@ def parse_gen_zip_thread(database, model, zip_ref, dac_error, parse_error_log,
480502 # number of data points
481503 num_datapoints = len (meta_rows )
482504
505+ # do pca check (pca over-rides landmarks)
506+ use_coordinates = False
507+ if pca_comps is not None :
508+
509+ num_pca_comps = int (numpy .round (pca_comps [0 ]))
510+
511+ # check that pca comps is at least two
512+ if num_pca_comps < 2 :
513+
514+ dac_error .quit_raise_exception (database , model , parse_error_log ,
515+ 'Number of PCA components must be at least two.' )
516+
517+ # set as number of landmarks
518+ num_landmarks = num_pca_comps
519+ use_coordinates = True
520+
521+ parse_error_log = dac_error .update_parse_log (database , model , parse_error_log , "Progress" ,
522+ "Using " + str (num_pca_comps ) + " PCA components." )
523+
483524 # do landmark checks
484- if landmarks is not None :
525+ elif landmarks is not None :
485526
486527 num_landmarks = len (landmarks )
487528
@@ -589,7 +630,8 @@ def parse_gen_zip_thread(database, model, zip_ref, dac_error, parse_error_log,
589630 push .init_upload_model (database , model , dac_error , parse_error_log ,
590631 meta_column_names , meta_rows ,
591632 meta_var_col_names , meta_vars ,
592- variable , time_steps , var_dist , landmarks = landmarks )
633+ variable , time_steps , var_dist ,
634+ landmarks = landmarks , use_coordinates = use_coordinates )
593635
594636 # done -- destroy the thread
595637 stop_event .set ()
0 commit comments