Skip to content

Commit fefac2c

Browse files
authored
Merge pull request #114 from AllenNeuralDynamics/han_refactor_getting_session_table
Refactor: getting session table
2 parents 926ebae + 4364c23 commit fefac2c

File tree

3 files changed

+29
-125
lines changed

3 files changed

+29
-125
lines changed

code/Home.py

Lines changed: 26 additions & 124 deletions
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,7 @@
2020
import streamlit as st
2121
import streamlit_nested_layout
2222
from aind_auto_train import __version__ as auto_train_version
23+
from aind_analysis_arch_result_access.han_pipeline import get_session_table
2324
from pygwalker.api.streamlit import StreamlitRenderer, init_streamlit_comm
2425
from util.aws_s3 import (draw_session_plots_quick_preview,
2526
load_data,
@@ -306,12 +307,6 @@ def init(if_load_bpod_data_override=None, if_load_docDB_override=None):
306307
if key in ['selected_draw_types'] or '_changed' in key:
307308
del st.session_state[key]
308309

309-
df = load_data(['sessions'], data_source='bonsai')
310-
311-
if not len(df):
312-
return False
313-
314-
# --- Perform any data source-dependent preprocessing here ---
315310
# Because sync_URL_to_session_state() needs df to be loaded (for dynamic column filtering),
316311
# 'if_load_bpod_sessions' has not been synced from URL to session state yet.
317312
# So here we need to manually get it from URL or session state.
@@ -321,14 +316,14 @@ def init(if_load_bpod_data_override=None, if_load_docDB_override=None):
321316
else st.session_state.if_load_bpod_sessions
322317
if 'if_load_bpod_sessions' in st.session_state
323318
else False)
319+
st.session_state.bpod_loaded = _if_load_bpod
320+
321+
# --- Load data using aind-analysis-arch-result-access ---
322+
df_han = get_session_table(if_load_bpod=_if_load_bpod)
323+
df = {'sessions_main': df_han} # put it in df['session_main'] for backward compatibility
324324

325-
st.session_state.bpod_loaded = False
326-
if _if_load_bpod:
327-
df_bpod = load_data(['sessions'], data_source='bpod')
328-
st.session_state.bpod_loaded = True
329-
330-
# For historial reason, the suffix of df['sessions_main'] just mean the data of the Home.py page
331-
df['sessions_main'] = pd.concat([df['sessions_main'], df_bpod['sessions_main']], axis=0)
325+
if not len(df):
326+
return False
332327

333328
st.session_state.df = df
334329
for source in ["dataframe", "plotly"]:
@@ -340,136 +335,33 @@ def init(if_load_bpod_data_override=None, if_load_docDB_override=None):
340335
st.session_state.curriculum_manager = curriculum_manager
341336

342337
# Some ad-hoc modifications on df_sessions
343-
_df = st.session_state.df['sessions_main'].copy() # temporary df alias
338+
_df = st.session_state.df['sessions_main'].copy()
344339

345-
_df.columns = _df.columns.get_level_values(1)
346-
_df.sort_values(['session_start_time'], ascending=False, inplace=True)
347-
_df['session_start_time'] = _df['session_start_time'].astype(str) # Turn to string
348-
_df = _df.reset_index()
349-
350-
# Handle mouse and user name
351-
if 'bpod_backup_h2o' in _df.columns:
352-
_df['subject_alias'] = np.where(_df['bpod_backup_h2o'].notnull(), _df['bpod_backup_h2o'], _df['subject_id'])
353-
_df['trainer'] = np.where(_df['bpod_backup_user_name'].notnull(), _df['bpod_backup_user_name'], _df['trainer'])
354-
else:
355-
_df['subject_alias'] = _df['subject_id']
356-
357-
# map trainer
358-
_df['trainer'] = _df['trainer'].apply(_trainer_mapper)
359-
360-
# Merge in PI name
361-
df_mouse_pi_mapping = load_mouse_PI_mapping()
362-
st.session_state.df_mouse_pi_mapping = df_mouse_pi_mapping # Save to session state for later use
363-
_df = _df.merge(df_mouse_pi_mapping, how='left', on='subject_id') # Merge in PI name
364-
_df.loc[_df["PI"].isnull(), "PI"] = _df.loc[
365-
_df["PI"].isnull() &
366-
(_df["trainer"].isin(_df["PI"]) | _df["trainer"].isin(["Han Hou", "Marton Rozsa"])),
367-
"trainer"
368-
] # Fill in PI with trainer if PI is missing and the trainer was ever a PI
369-
370-
# Add data source (Room + Hardware etc)
371-
_df[['institute', 'rig_type', 'room', 'hardware', 'data_source']] = _df['rig'].apply(lambda x: pd.Series(get_data_source(x)))
372-
373-
# Handle session number
374-
_df.dropna(subset=['session'], inplace=True) # Remove rows with no session number (only leave the nwb file with the largest finished_trials for now)
375-
_df.drop(_df.query('session < 1').index, inplace=True)
376-
377-
# Remove invalid subject_id
378-
_df = _df[(999999 > _df["subject_id"].astype(int))
379-
& (_df["subject_id"].astype(int) > 300000)]
380-
381-
# Remove zero finished trials
382-
_df = _df[_df['finished_trials'] > 0]
383-
384-
# Remove abnormal values
385-
_df.loc[_df['weight_after'] > 100,
386-
['weight_after', 'weight_after_ratio', 'water_in_session_total', 'water_after_session', 'water_day_total']
387-
] = np.nan
388-
389-
_df.loc[_df['water_in_session_manual'] > 100,
390-
['water_in_session_manual', 'water_in_session_total', 'water_after_session']] = np.nan
391-
392-
_df.loc[(_df['duration_iti_median'] < 0) | (_df['duration_iti_mean'] < 0),
393-
['duration_iti_median', 'duration_iti_mean', 'duration_iti_std', 'duration_iti_min', 'duration_iti_max']] = np.nan
394-
395-
_df.loc[_df['invalid_lick_ratio'] < 0,
396-
['invalid_lick_ratio']]= np.nan
397-
398-
# # add something else
399-
# add abs(bais) to all terms that have 'bias' in name
400-
for col in _df.columns:
401-
if 'bias' in col:
402-
_df[f'abs({col})'] = np.abs(_df[col])
403-
404-
# # delta weight
405-
# diff_relative_weight_next_day = _df.set_index(
406-
# ['session']).sort_values('session', ascending=True).groupby('subject_id').apply(
407-
# lambda x: - x.relative_weight.diff(periods=-1)).rename("diff_relative_weight_next_day")
408-
409-
# weekday
410-
_df.session_date = pd.to_datetime(_df.session_date)
411-
_df['weekday'] = _df.session_date.dt.dayofweek + 1
412-
413-
# trial stats
414-
_df['avg_trial_length_in_seconds'] = _df['session_run_time_in_min'] / _df['total_trials_with_autowater'] * 60
415-
416-
# last day's total water
417-
_df['water_day_total_last_session'] = _df.groupby('subject_id')['water_day_total'].shift(1)
418-
_df['water_after_session_last_session'] = _df.groupby('subject_id')['water_after_session'].shift(1)
419-
420340

421341
# -- overwrite the `if_stage_overriden_by_trainer`
422342
# Previously it was set to True if the trainer changes stage during a session.
423343
# But it is more informative to define it as whether the trainer has overridden the curriculum.
424344
# In other words, it is set to True only when stage_suggested ~= stage_actual, as defined in the autotrain curriculum.
425345
_df.drop(columns=['if_overriden_by_trainer'], inplace=True)
426-
tmp_auto_train = auto_train_manager.df_manager.query('if_closed_loop == True')[
346+
tmp_auto_train = (
347+
auto_train_manager.df_manager.query("if_closed_loop == True")[
427348
[
428349
"subject_id",
429350
"session_date",
430351
"current_stage_suggested",
431352
"if_stage_overriden_by_trainer",
432353
]
433-
].copy()
434-
tmp_auto_train['session_date'] = pd.to_datetime(tmp_auto_train['session_date'])
354+
]
355+
.copy()
356+
.drop_duplicates(subset=["subject_id", "session_date"], keep="first")
357+
)
358+
tmp_auto_train["session_date"] = pd.to_datetime(tmp_auto_train["session_date"])
435359
_df = _df.merge(
436360
tmp_auto_train,
437361
on=["subject_id", "session_date"],
438362
how='left',
439363
)
440364

441-
# fill nan for autotrain fields
442-
filled_values = {'curriculum_name': 'None',
443-
'curriculum_version': 'None',
444-
'curriculum_schema_version': 'None',
445-
'current_stage_actual': 'None',
446-
'has_video': False,
447-
'has_ephys': False,
448-
}
449-
_df.fillna(filled_values, inplace=True)
450-
451-
# foraging performance = foraing_eff * finished_rate
452-
if 'foraging_performance' not in _df.columns:
453-
_df['foraging_performance'] = \
454-
_df['foraging_eff'] \
455-
* _df['finished_rate']
456-
_df['foraging_performance_random_seed'] = \
457-
_df['foraging_eff_random_seed'] \
458-
* _df['finished_rate']
459-
460-
# drop 'bpod_backup_' columns
461-
_df.drop([col for col in _df.columns if 'bpod_backup_' in col], axis=1, inplace=True)
462-
463-
# _df = _df.merge(
464-
# diff_relative_weight_next_day, how='left', on=['subject_id', 'session'])
465-
466-
# Recorder columns so that autotrain info is easier to see
467-
first_several_cols = ['subject_id', 'session_date', 'nwb_suffix', 'session', 'rig',
468-
'trainer', 'PI', 'curriculum_name', 'curriculum_version', 'current_stage_actual',
469-
'task', 'notes']
470-
new_order = first_several_cols + [col for col in _df.columns if col not in first_several_cols]
471-
_df = _df[new_order]
472-
473365
# --- Load data from docDB ---
474366
if_load_docDb = if_load_docDB_override if if_load_docDB_override is not None else (
475367
st.query_params['if_load_docDB'].lower() == 'true'
@@ -548,6 +440,15 @@ def app():
548440
cols[0].markdown(f'### Filter the sessions on the sidebar\n'
549441
f'##### {len(st.session_state.df_session_filtered)} sessions, '
550442
f'{len(st.session_state.df_session_filtered.subject_id.unique())} mice filtered')
443+
444+
with cols[0].expander(':bulb: Get the master session table by code', expanded=False):
445+
st.code(f'''
446+
#!pip install aind-analysis-arch-result-access
447+
from aind_analysis_arch_result_access.han_pipeline import get_session_table
448+
df = get_session_table(if_load_bpod={st.session_state.bpod_loaded})
449+
''')
450+
st.markdown("See [aind-analysis-arch-result-access](https://github.com/AllenNeuralDynamics/aind-analysis-arch-result-access) for details.")
451+
551452
with cols[1]:
552453
with st.form(key='load_settings', clear_on_submit=False):
553454
if_load_bpod_sessions = checkbox_wrapper_for_url_query(
@@ -793,3 +694,4 @@ def add_main_tabs():
793694
st.markdown('#### 1. Reload the page')
794695
st.markdown('#### 2. Click this original URL https://foraging-behavior-browser.allenneuraldynamics-test.org/')
795696
st.markdown('#### 3. Report your bug here: https://github.com/AllenNeuralDynamics/foraging-behavior-browser/issues (paste your URL and screenshoots)')
697+
raise e

requirements.txt

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -25,4 +25,5 @@ streamlit-bokeh3-events==0.1.4
2525
bokeh==3.4.3
2626
git+https://github.com/AllenNeuralDynamics/aind-foraging-behavior-bonsai-automatic-training.git@main
2727
git+https://github.com/AllenNeuralDynamics/aind-dynamic-foraging-models.git@develop
28-
git+https://github.com/AllenNeuralDynamics/aind-behavior-gym.git@develop
28+
git+https://github.com/AllenNeuralDynamics/aind-behavior-gym.git@develop
29+
git+https://github.com/AllenNeuralDynamics/aind-analysis-arch-result-access.git@main
Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
Subproject commit 8c82fd513435a754e2c71151e6e11f7fb6be4ee4

0 commit comments

Comments
 (0)