mwang87 · mwang87 · Feb 5, 2020 · Feb 5, 2020 · Feb 8, 2020 · Feb 8, 2020
diff --git a/code/redu_pca.py b/code/redu_pca.py
@@ -74,13 +74,21 @@ def calculate_master_projection(input_file_occurrences_table, components = 3, sm
         return(sklearn_output, unique_sample, eigenvalues, percent_variance)    
 
 ### Given a new file occurrence table, creates a projection of the new data along with the old data and saves as a png output
-def project_new_data(new_file_occurrence_table, output_file, calculate_neighbors=False):
+def project_new_data(new_file_occurrence_table, output_file, calculate_neighbors=False, unit_test=False):
     new_matrix = np.array([]) 
     file_list = []
-
-    #load components, eigenvalues, and percent variance
-    component_matrix = pd.read_csv(config.PATH_TO_COMPONENT_MATRIX, sep = ",")
-    eig_var_df = pd.read_csv(config.PATH_TO_EIGS, sep = ",")
+
+    if unit_test:
+        component_matrix = pd.read_csv("./reference_data/component_matrix.csv")
+        eig_var_df = pd.read_csv("./reference_data/eigs_var.csv")
+        original_pca_df = pd.read_csv("./reference_data/original_pca.csv")
+
+    else:
+        #load components, eigenvalues, and percent variance
+        component_matrix = pd.read_csv(config.PATH_TO_COMPONENT_MATRIX, sep = ",")
+        eig_var_df = pd.read_csv(config.PATH_TO_EIGS, sep = ",")
+        original_pca_df = pd.read_csv(config.PATH_TO_ORIGINAL_PCA, sep = ",")
+
     eigenvalues = eig_var_df["eigenvalues"].tolist()
     percent_variance = eig_var_df["percent_variance"].tolist()
 
@@ -144,45 +152,52 @@ def project_new_data(new_file_occurrence_table, output_file, calculate_neighbors
     new_pca_df.columns = new_pca_df.columns.astype(str)
 
     #load and format the original pca
-    original_pca_df = pd.read_csv(config.PATH_TO_ORIGINAL_PCA, sep = ",")
     original_pca_df.set_index(['Unnamed: 0'], inplace=True) 
-
-    all_pca_df = pd.concat([original_pca_df, new_pca_df]) #merging the two dataframes together
-
-    #create things to be passed to emperor output
-    values_only = all_pca_df.to_numpy()
-    full_file_list = list(all_pca_df.index) 
-
-    #call and create an emperor output for the old data and the new projected data
-    emperor_output(values_only, full_file_list, eigenvalues, percent_variance, output_file, new_sample_list)
-
     if calculate_neighbors:
         all_neighbors = [] 
         ary = scipy.spatial.distance.cdist(new_pca_df, original_pca_df, metric='euclidean')    
-        
+
         for i in range(len(ary)):
             neighbor_distances_df = pd.DataFrame()            
             neighbor_distances_df["filename"] = original_pca_df.index
             neighbor_distances_df["distance"] = ary[i,:]
             neighbor_distances_df = neighbor_distances_df.sort_values("distance")
-            df = pd.read_table(config.PATH_TO_ORIGINAL_MAPPING_FILE)
+            df = original_pca_df
+            if unit_test:
+                df = pd.read_table("./reference_data/all_sampleinformation.tsv")
+            else:
+                df = pd.read_table(config.PATH_TO_ORIGINAL_MAPPING_FILE)
             neighbor_distances_df = neighbor_distances_df.merge(df, how="left", left_on="filename", right_on="filename")
             neighbor_distances_df["query"] = new_pca_df.index[i]
 
             all_neighbors += neighbor_distances_df.to_dict(orient="records")[:100]
 
         return(all_neighbors)
-
+
+    all_pca_df = pd.concat([original_pca_df, new_pca_df]) #merging the two dataframes together
+
+    #create things to be passed to emperor output
+    values_only = all_pca_df.to_numpy()
+    full_file_list = list(all_pca_df.index) 
+
+    #call and create an emperor output for the old data and the new projected data
+    emperor_output(values_only, full_file_list, eigenvalues, percent_variance, output_file, new_sample_list, unit_test)
+
+
 ###function takes in all the calculated outputs and places them into the ordination results and then feeds it into the emperor thing to output a plot   
-def emperor_output(sklearn_output, full_file_list, eigenvalues, percent_variance, output_file, new_files = []):   
+def emperor_output(sklearn_output, full_file_list, eigenvalues, percent_variance, output_file, new_files = [], unit_test=False):   
     eigvals = pd.Series(data = eigenvalues)
     samples = pd.DataFrame(data = sklearn_output, index = full_file_list)
     samples.index.rename("SampleID", inplace = True)
     p_explained = pd.Series(data = percent_variance)
     ores = OrdinationResults(long_method_name = "principal component analysis", short_method_name = "pcoa", eigvals = eigvals, samples = samples, proportion_explained = p_explained)
 
-    #read in all sample metadata 
-    df = pd.read_table(config.PATH_TO_ORIGINAL_MAPPING_FILE)
+    if unit_test:
+        df = pd.read_table("./reference_data/all_sampleinformation.tsv")
+    else:
+        #read in all sample metadata 
+        df = pd.read_table(config.PATH_TO_ORIGINAL_MAPPING_FILE)
+
     df.rename(columns={"filename" : "SampleID"}, inplace = True)
     df.set_index("SampleID", inplace = True)
 

diff --git a/code/templates/comparemultivariate.html b/code/templates/comparemultivariate.html
@@ -45,14 +45,13 @@ <h2>Project your data (library search results) onto the PCA of public data</h2>
         <div class="row">
            <div class="col-sm"></div>
            <div class="col-sm">
-               <div class="text-center">
-                    <a id="analyzebutton" class="btn btn-info" href="/processcomparemultivariate?task=f39c94cb7afe4568950bf61cdb8fee0d">Click for Example PCA</button>
-                    <a id="analyzebutton" class="btn btn-info" href="/processcomparemultivariate?task=f39c94cb7afe4568950bf61cdb8fee0d&knn=1">Click for Example Nearest Neighbor</button>
+               <div class="text-center">    
+                    <a id="analyzebutton" class="btn btn-info" href="/processcomparemultivariate?task=f39c94cb7afe4568950bf61cdb8fee0d&knn=0">Click for Example PCA</a>
+                    <a id="analyzebutton" class="btn btn-info" href="/processcomparemultivariate?task=f39c94cb7afe4568950bf61cdb8fee0d&knn=1">Click for Example Nearest Neighbor</a>
                </div>
            </div>
            <div class="col-sm"></div>
         </div>
-
     </div>
 </div>
 
@@ -65,7 +64,7 @@ <h2>Project your data (library search results) onto the PCA of public data</h2>
         }
         else{
             remote_url = "/processcomparemultivariate?task=" + $("#task").val() + "&knn=0"
-            $('#display').prepend($('<img>',{id:'pca',src: remote_url}))
+            //$('#display').prepend($('<img>',{id:'pca',src: remote_url}))
             window.location.replace(remote_url)
         }
     }
@@ -78,7 +77,7 @@ <h2>Project your data (library search results) onto the PCA of public data</h2>
         }
         else{
             remote_url = "/processcomparemultivariate?task=" + $("#task").val() + "&knn=1"
-            $('#display').prepend($('<img>',{id:'pca',src: remote_url}))
+            //$('#display').prepend($('<img>',{id:'pca',src: remote_url}))
             window.location.replace(remote_url)
         }
     }

diff --git a/code/templates/multivariateneighbors.html b/code/templates/multivariateneighbors.html
@@ -16,9 +16,21 @@ <h2>Comparison Neighbors</h2>
 
     <hr>
 
+    <h2>Conensus Neighbors</h2>
+
+    <div class="table table-striped">
+        <table id="consensustable" class="display" width="100%"></table>
+    </div>
+
+    <hr>
+
+    <h2>All Neighbors</h2>
+
     <div class="table table-striped">
         <table id="datatable" class="display" width="100%"></table>
     </div>
+
+
 </div>
 
 <script>
@@ -32,6 +44,24 @@ <h2>Comparison Neighbors</h2>
             { data: "distance" , title: "distance"},
             { data: "SampleType" , title: "SampleType"},
             { data: "SampleTypeSub1" , title: "SampleTypeSub1"},
+            { data: "MassSpectrometer" , title: "MassSpectrometer"},
+        ],
+        pageLength: 30,
+        dom: 'Bflrtip',
+        buttons: [
+            'copy', 'csv', 'excel'
+        ]
+    } );
+});
+
+$( document ).ready(function() {
+    $('#consensustable').DataTable( {
+        data: (( consensus_list | tojson )),
+        columns: [
+            { data: "query" , title: "query"},
+            { data: "attribute" , title: "attribute"},
+            { data: "term" , title: "term"},
+            { data: "count" , title: "count"}
         ],
         pageLength: 30,
         dom: 'Bflrtip',