Skip to content

Commit b452c47

Browse files
authored
Merge pull request #110 from AllenNeuralDynamics/han_metadata_inventory
[metadata inventory] loading presets from aws public url
2 parents 56584c8 + 2b9fc21 commit b452c47

File tree

3 files changed

+72
-341
lines changed

3 files changed

+72
-341
lines changed

code/pages/0_Data inventory.py

Lines changed: 72 additions & 60 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
import logging
22
import re
33
import json
4+
import requests
45

56
from matplotlib_venn import venn2, venn3, venn2_circles, venn3_circles
67
import matplotlib.pyplot as plt
@@ -52,11 +53,15 @@
5253
# Load QUERY_PRESET from json
5354
@st.cache_data()
5455
def load_presets():
55-
with open("data_inventory_QUERY_PRESET.json", "r") as f:
56-
QUERY_PRESET = json.load(f)
56+
# Get QUERY_PRESET and VENN_PRESET from public S3
57+
public_url_root = "https://aind-behavior-data.s3.us-west-2.amazonaws.com/foraging_nwb_bonsai_processed/"
58+
59+
response = requests.get(public_url_root + "data_inventory_QUERY_PRESET.json")
60+
QUERY_PRESET = response.json()
5761

58-
with open("data_inventory_VENN_PRESET.json", "r") as f:
59-
VENN_PRESET = json.load(f)
62+
response = requests.get(public_url_root + "data_inventory_VENN_PRESET.json")
63+
VENN_PRESET = response.json()
64+
6065
return QUERY_PRESET, VENN_PRESET
6166

6267
QUERY_PRESET, VENN_PRESET = load_presets()
@@ -157,7 +162,7 @@ def generate_venn(df, venn_preset):
157162
if v.get_patch_by_id(patch_id):
158163
v.get_patch_by_id(patch_id).set_color(patch_setting["color"])
159164
# Add notes
160-
notes.append(f"#### :{patch_setting['emoji']}: :{patch_setting['color']}[{patch_setting['notes']}]")
165+
notes.append(f"##### :{patch_setting['emoji']}: :{patch_setting['color']}[{patch_setting['notes']}]")
161166

162167
return fig, notes
163168

@@ -438,7 +443,8 @@ def app():
438443
def add_venn_diagrms(df_merged):
439444

440445
cols = st.columns([2, 1])
441-
cols[0].markdown("## Venn diagrams from presets")
446+
cols[0].markdown("## Issues in dynamic foraging data inventory")
447+
cols[0].markdown("#### [Github discussion](https://github.com/AllenNeuralDynamics/aind-behavior-blog/discussions/851)")
442448
with cols[1].expander("Time view settings", expanded=True):
443449
cols_1 = st.columns([1, 1])
444450
if_separate_plots = cols_1[0].checkbox("Separate in subplots", value=True)
@@ -448,65 +454,71 @@ def add_venn_diagrms(df_merged):
448454
time_period = cols_1[1].selectbox(
449455
"Bin size",
450456
["Daily", "Weekly", "Monthly", "Quarterly"],
451-
index=1,
452-
)
453-
454-
for i_venn, venn_preset in enumerate(VENN_PRESET):
455-
# -- Venn diagrams --
456-
st.markdown(f"### ({i_venn+1}). {venn_preset['name']}")
457-
fig, notes = generate_venn(
458-
df_merged,
459-
venn_preset
460-
)
461-
for note in notes:
462-
st.markdown(note)
463-
464-
cols = st.columns([1, 1])
465-
with cols[0]:
466-
st.pyplot(fig, use_container_width=True)
467-
468-
# -- Show and download df for this Venn --
469-
circle_columns = [c_s["column"] for c_s in venn_preset["circle_settings"]]
470-
# Show histogram over time for the columns and patches in preset
471-
df_this_preset = df_merged[circle_columns]
472-
# Filter out rows that have at least one True in this Venn
473-
df_this_preset = df_this_preset[df_this_preset.any(axis=1)]
474-
475-
# Create a new column to indicate sessions in patches specified by patch_ids like ["100", "101", "110", "111"]
476-
for patch_setting in venn_preset.get("patch_settings", []):
477-
idx = _filter_df_by_patch_ids(
478-
df_this_preset[circle_columns],
479-
patch_setting["patch_ids"]
480-
)
481-
df_this_preset.loc[idx, str(patch_setting["patch_ids"])] = True
482-
483-
# Join in other extra columns
484-
df_this_preset = df_this_preset.join(
485-
df_merged[[col for col in df_merged.columns if col not in META_COLUMNS]], how="left"
457+
index=0,
486458
)
459+
460+
st.markdown("---")
461+
for section in VENN_PRESET:
462+
section_name, section_contents = section["section_name"], section["section_contents"]
463+
st.markdown(f"### {section_name}")
464+
st.markdown("---")
465+
for i_venn, venn_preset in enumerate(section_contents):
466+
# -- Venn diagrams --
467+
st.markdown(f"#### ({i_venn+1}). {venn_preset['name']}")
468+
st.markdown(venn_preset.get("comments", ""))
469+
fig, notes = generate_venn(
470+
df_merged,
471+
venn_preset
472+
)
473+
for note in notes:
474+
st.markdown(note)
475+
476+
cols = st.columns([1, 1])
477+
with cols[0]:
478+
st.pyplot(fig, use_container_width=True)
479+
480+
# -- Show and download df for this Venn --
481+
circle_columns = [c_s["column"] for c_s in venn_preset["circle_settings"]]
482+
# Show histogram over time for the columns and patches in preset
483+
df_this_preset = df_merged[circle_columns]
484+
# Filter out rows that have at least one True in this Venn
485+
df_this_preset = df_this_preset[df_this_preset.any(axis=1)]
486+
487+
# Create a new column to indicate sessions in patches specified by patch_ids like ["100", "101", "110", "111"]
488+
for patch_setting in venn_preset.get("patch_settings", []):
489+
idx = _filter_df_by_patch_ids(
490+
df_this_preset[circle_columns],
491+
patch_setting["patch_ids"]
492+
)
493+
df_this_preset.loc[idx, str(patch_setting["patch_ids"])] = True
487494

488-
with cols[0]:
489-
download_df(
490-
df_this_preset,
491-
label="Download as CSV for this Venn diagram",
492-
file_name=f"df_{venn_preset['name']}.csv",
495+
# Join in other extra columns
496+
df_this_preset = df_this_preset.join(
497+
df_merged[[col for col in df_merged.columns if col not in META_COLUMNS]], how="left"
493498
)
494-
with st.expander(f"Show dataframe, n = {len(df_this_preset)}"):
495-
st.write(df_this_preset)
496499

497-
with cols[1]:
498-
# -- Show histogram over time --
499-
fig = plot_histogram_over_time(
500-
df=df_this_preset.reset_index(),
501-
venn_preset=venn_preset,
502-
time_period=time_period,
503-
if_sync_y_limits=if_sync_y_limits,
504-
if_separate_plots=if_separate_plots,
505-
)
506-
override_plotly_theme(fig, font_size_scale=0.9)
507-
st.plotly_chart(fig, use_container_width=True)
500+
with cols[0]:
501+
download_df(
502+
df_this_preset,
503+
label="Download as CSV for this Venn diagram",
504+
file_name=f"df_{venn_preset['name']}.csv",
505+
)
506+
with st.expander(f"Show dataframe, n = {len(df_this_preset)}"):
507+
st.write(df_this_preset)
508+
509+
with cols[1]:
510+
# -- Show histogram over time --
511+
fig = plot_histogram_over_time(
512+
df=df_this_preset.reset_index(),
513+
venn_preset=venn_preset,
514+
time_period=time_period,
515+
if_sync_y_limits=if_sync_y_limits,
516+
if_separate_plots=if_separate_plots,
517+
)
518+
override_plotly_theme(fig, font_size_scale=0.9)
519+
st.plotly_chart(fig, use_container_width=True)
508520

509-
st.markdown("---")
521+
st.markdown("---")
510522

511523
# --- User-defined Venn diagram ---
512524
# Multiselect for selecting queries up to three

data_inventory_QUERY_PRESET.json

Lines changed: 0 additions & 59 deletions
This file was deleted.

0 commit comments

Comments
 (0)