Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
59 changes: 37 additions & 22 deletions code/redu_pca.py
Original file line number Diff line number Diff line change
Expand Up @@ -74,13 +74,21 @@ def calculate_master_projection(input_file_occurrences_table, components = 3, sm
return(sklearn_output, unique_sample, eigenvalues, percent_variance)

### Given a new file occurrence table, creates a projection of the new data along with the old data and saves as a png output
def project_new_data(new_file_occurrence_table, output_file, calculate_neighbors=False):
def project_new_data(new_file_occurrence_table, output_file, calculate_neighbors=False, unit_test=False):
new_matrix = np.array([])
file_list = []

#load components, eigenvalues, and percent variance
component_matrix = pd.read_csv(config.PATH_TO_COMPONENT_MATRIX, sep = ",")
eig_var_df = pd.read_csv(config.PATH_TO_EIGS, sep = ",")

if unit_test:
component_matrix = pd.read_csv("./reference_data/component_matrix.csv")
eig_var_df = pd.read_csv("./reference_data/eigs_var.csv")
original_pca_df = pd.read_csv("./reference_data/original_pca.csv")

else:
#load components, eigenvalues, and percent variance
component_matrix = pd.read_csv(config.PATH_TO_COMPONENT_MATRIX, sep = ",")
eig_var_df = pd.read_csv(config.PATH_TO_EIGS, sep = ",")
original_pca_df = pd.read_csv(config.PATH_TO_ORIGINAL_PCA, sep = ",")

eigenvalues = eig_var_df["eigenvalues"].tolist()
percent_variance = eig_var_df["percent_variance"].tolist()

Expand Down Expand Up @@ -144,45 +152,52 @@ def project_new_data(new_file_occurrence_table, output_file, calculate_neighbors
new_pca_df.columns = new_pca_df.columns.astype(str)

#load and format the original pca
original_pca_df = pd.read_csv(config.PATH_TO_ORIGINAL_PCA, sep = ",")
original_pca_df.set_index(['Unnamed: 0'], inplace=True)

all_pca_df = pd.concat([original_pca_df, new_pca_df]) #merging the two dataframes together

#create things to be passed to emperor output
values_only = all_pca_df.to_numpy()
full_file_list = list(all_pca_df.index)

#call and create an emperor output for the old data and the new projected data
emperor_output(values_only, full_file_list, eigenvalues, percent_variance, output_file, new_sample_list)

if calculate_neighbors:
all_neighbors = []
ary = scipy.spatial.distance.cdist(new_pca_df, original_pca_df, metric='euclidean')

for i in range(len(ary)):
neighbor_distances_df = pd.DataFrame()
neighbor_distances_df["filename"] = original_pca_df.index
neighbor_distances_df["distance"] = ary[i,:]
neighbor_distances_df = neighbor_distances_df.sort_values("distance")
df = pd.read_table(config.PATH_TO_ORIGINAL_MAPPING_FILE)
df = original_pca_df
if unit_test:
df = pd.read_table("./reference_data/all_sampleinformation.tsv")
else:
df = pd.read_table(config.PATH_TO_ORIGINAL_MAPPING_FILE)
neighbor_distances_df = neighbor_distances_df.merge(df, how="left", left_on="filename", right_on="filename")
neighbor_distances_df["query"] = new_pca_df.index[i]

all_neighbors += neighbor_distances_df.to_dict(orient="records")[:100]

return(all_neighbors)


all_pca_df = pd.concat([original_pca_df, new_pca_df]) #merging the two dataframes together

#create things to be passed to emperor output
values_only = all_pca_df.to_numpy()
full_file_list = list(all_pca_df.index)

#call and create an emperor output for the old data and the new projected data
emperor_output(values_only, full_file_list, eigenvalues, percent_variance, output_file, new_sample_list, unit_test)


###function takes in all the calculated outputs and places them into the ordination results and then feeds it into the emperor thing to output a plot
def emperor_output(sklearn_output, full_file_list, eigenvalues, percent_variance, output_file, new_files = []):
def emperor_output(sklearn_output, full_file_list, eigenvalues, percent_variance, output_file, new_files = [], unit_test=False):
eigvals = pd.Series(data = eigenvalues)
samples = pd.DataFrame(data = sklearn_output, index = full_file_list)
samples.index.rename("SampleID", inplace = True)
p_explained = pd.Series(data = percent_variance)
ores = OrdinationResults(long_method_name = "principal component analysis", short_method_name = "pcoa", eigvals = eigvals, samples = samples, proportion_explained = p_explained)

#read in all sample metadata
df = pd.read_table(config.PATH_TO_ORIGINAL_MAPPING_FILE)
if unit_test:
df = pd.read_table("./reference_data/all_sampleinformation.tsv")
else:
#read in all sample metadata
df = pd.read_table(config.PATH_TO_ORIGINAL_MAPPING_FILE)

df.rename(columns={"filename" : "SampleID"}, inplace = True)
df.set_index("SampleID", inplace = True)

Expand Down
11 changes: 5 additions & 6 deletions code/templates/comparemultivariate.html
Original file line number Diff line number Diff line change
Expand Up @@ -45,14 +45,13 @@ <h2>Project your data (library search results) onto the PCA of public data</h2>
<div class="row">
<div class="col-sm"></div>
<div class="col-sm">
<div class="text-center">
<a id="analyzebutton" class="btn btn-info" href="/processcomparemultivariate?task=f39c94cb7afe4568950bf61cdb8fee0d">Click for Example PCA</button>
<a id="analyzebutton" class="btn btn-info" href="/processcomparemultivariate?task=f39c94cb7afe4568950bf61cdb8fee0d&knn=1">Click for Example Nearest Neighbor</button>
<div class="text-center">
<a id="analyzebutton" class="btn btn-info" href="/processcomparemultivariate?task=f39c94cb7afe4568950bf61cdb8fee0d&knn=0">Click for Example PCA</a>
<a id="analyzebutton" class="btn btn-info" href="/processcomparemultivariate?task=f39c94cb7afe4568950bf61cdb8fee0d&knn=1">Click for Example Nearest Neighbor</a>
</div>
</div>
<div class="col-sm"></div>
</div>

</div>
</div>

Expand All @@ -65,7 +64,7 @@ <h2>Project your data (library search results) onto the PCA of public data</h2>
}
else{
remote_url = "/processcomparemultivariate?task=" + $("#task").val() + "&knn=0"
$('#display').prepend($('<img>',{id:'pca',src: remote_url}))
//$('#display').prepend($('<img>',{id:'pca',src: remote_url}))
window.location.replace(remote_url)
}
}
Expand All @@ -78,7 +77,7 @@ <h2>Project your data (library search results) onto the PCA of public data</h2>
}
else{
remote_url = "/processcomparemultivariate?task=" + $("#task").val() + "&knn=1"
$('#display').prepend($('<img>',{id:'pca',src: remote_url}))
//$('#display').prepend($('<img>',{id:'pca',src: remote_url}))
window.location.replace(remote_url)
}
}
Expand Down
30 changes: 30 additions & 0 deletions code/templates/multivariateneighbors.html
Original file line number Diff line number Diff line change
Expand Up @@ -16,9 +16,21 @@ <h2>Comparison Neighbors</h2>

<hr>

<h2>Conensus Neighbors</h2>

<div class="table table-striped">
<table id="consensustable" class="display" width="100%"></table>
</div>

<hr>

<h2>All Neighbors</h2>

<div class="table table-striped">
<table id="datatable" class="display" width="100%"></table>
</div>


</div>

<script>
Expand All @@ -32,6 +44,24 @@ <h2>Comparison Neighbors</h2>
{ data: "distance" , title: "distance"},
{ data: "SampleType" , title: "SampleType"},
{ data: "SampleTypeSub1" , title: "SampleTypeSub1"},
{ data: "MassSpectrometer" , title: "MassSpectrometer"},
],
pageLength: 30,
dom: 'Bflrtip',
buttons: [
'copy', 'csv', 'excel'
]
} );
});

$( document ).ready(function() {
$('#consensustable').DataTable( {
data: (( consensus_list | tojson )),
columns: [
{ data: "query" , title: "query"},
{ data: "attribute" , title: "attribute"},
{ data: "term" , title: "term"},
{ data: "count" , title: "count"}
],
pageLength: 30,
dom: 'Bflrtip',
Expand Down
Loading