AllenNeuralDynamics
diff --git a/‎README.md‎
Lines changed: 1 addition & 1 deletion b/‎README.md‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎code/Home.py‎
Lines changed: 85 additions & 96 deletions b/‎code/Home.py‎
Lines changed: 85 additions & 96 deletions
diff --git a/‎code/__init__.py‎
Lines changed: 1 addition & 1 deletion b/‎code/__init__.py‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎code/pages/0_Data inventory.py‎
Lines changed: 2 additions & 2 deletions b/‎code/pages/0_Data inventory.py‎
Lines changed: 2 additions & 2 deletions
@@ -20,7 +20,7 @@ For example, this URL show all plots of mouse 699982
 
 and this URL will show exactly the plot below. Note the filters and plot settings are preserved.
 
-> https://foraging-behavior-browser.allenneuraldynamics-test.org/Old_mice?filter_h2o=HH&filter_session=1.0&filter_session=81.0&filter_finished_trials=825.6&filter_finished_trials=1872.0&filter_foraging_eff=0.793295&filter_foraging_eff=1.2966&filter_task=coupled_block_baiting&filter_photostim_location=None&tab_id=tab_session_x_y&x_y_plot_xname=foraging_eff&x_y_plot_yname=finished_trials&x_y_plot_group_by=h2o&x_y_plot_if_show_dots=True&x_y_plot_if_aggr_each_group=True&x_y_plot_aggr_method_group=linear+fit&x_y_plot_if_aggr_all=False&x_y_plot_aggr_method_all=mean+%2B%2F-+sem&x_y_plot_smooth_factor=5&x_y_plot_if_use_x_quantile_group=False&x_y_plot_q_quantiles_group=20&x_y_plot_if_use_x_quantile_all=False&x_y_plot_q_quantiles_all=20&x_y_plot_dot_size=21&x_y_plot_dot_opacity=0.4&x_y_plot_line_width=3.5&auto_training_history_x_axis=session&auto_training_history_sort_by=progress_to_graduated&auto_training_history_sort_order=descending
+> https://foraging-behavior-browser.allenneuraldynamics-test.org/Old_mice?filter_subject_id=HH&filter_session=1.0&filter_session=81.0&filter_finished_trials=825.6&filter_finished_trials=1872.0&filter_foraging_eff=0.793295&filter_foraging_eff=1.2966&filter_task=coupled_block_baiting&filter_photostim_location=None&tab_id=tab_session_x_y&x_y_plot_xname=foraging_eff&x_y_plot_yname=finished_trials&x_y_plot_group_by=subject_id&x_y_plot_if_show_dots=True&x_y_plot_if_aggr_each_group=True&x_y_plot_aggr_method_group=linear+fit&x_y_plot_if_aggr_all=False&x_y_plot_aggr_method_all=mean+%2B%2F-+sem&x_y_plot_smooth_factor=5&x_y_plot_if_use_x_quantile_group=False&x_y_plot_q_quantiles_group=20&x_y_plot_if_use_x_quantile_all=False&x_y_plot_q_quantiles_all=20&x_y_plot_dot_size=21&x_y_plot_dot_opacity=0.4&x_y_plot_line_width=3.5&auto_training_history_x_axis=session&auto_training_history_sort_by=progress_to_graduated&auto_training_history_sort_order=descending
 
 <img width="1664" alt="image" src="https://github.com/AllenNeuralDynamics/foraging-behavior-browser/assets/24734299/2eaa7697-01cc-4eb3-bd0c-7d91c1eb64e0">
 
 
@@ -24,6 +24,7 @@
 from util.aws_s3 import (draw_session_plots_quick_preview, 
                          load_data,
                          load_auto_train,
+                         load_mouse_PI_mapping,
                          show_debug_info,
                          show_session_level_img_by_key_and_prefix)
 from util.fetch_data_docDB import load_data_from_docDB
@@ -42,6 +43,7 @@
                                    slider_wrapper_for_url_query,
                                    sync_session_state_to_URL,
                                    sync_URL_to_session_state)
+from util.reformat import get_data_source
 
 try:
     st.set_page_config(layout="wide", 
@@ -56,21 +58,25 @@
     pass
 
 
-def _user_name_mapper(user_name):
-    user_mapper = {  # tuple of key words --> user name
-        ('Avalon',): 'Avalon Amaya',
-        ('Ella',): 'Ella Hilton',
-        ('Katrina',): 'Katrina Nguyen',
-        ('Lucas',): 'Lucas Kinsey',
-        ('Travis',): 'Travis Ramirez',
-        ('Xinxin', 'the ghost'): 'Xinxin Yin',
-        }
-    for key_words, name in user_mapper.items():
-        for key_word in key_words:
-            if key_word in user_name:
-                return name
+def _trainer_mapper(trainer):
+    user_mapper = {
+        'Avalon Amaya': ['Avalon'],
+        'Ella Hilton': ['Ella'],
+        'Katrina Nguyen': ['Katrina'],
+        'Lucas Kinsey': ['Lucas'],
+        'Travis Ramirez': ['Travis'],
+        'Xinxin Yin': ['Xinxin', 'the ghost'],
+        'Bowen Tan': ['Bowen'],
+        'Henry Loeffler': ['Henry Loeffer'],
+        'Margaret Lee': ['margaret lee'],
+        'Madeline Tom': ['Madseline Tom'],
+    }
+    for canonical_name, alias in user_mapper.items():
+        for key_word in alias:
+            if key_word in trainer:
+                return canonical_name
     else:
-        return user_name
+        return trainer
 
 
 @st.cache_resource(ttl=24*3600)
@@ -104,8 +110,8 @@ def draw_session_plots(df_to_draw_session):
                     except:
                         date_str = key["session_date"].split("T")[0]
 
-                    st.markdown(f'''<h5 style='text-align: center; color: orange;'>{key["h2o"]}, Session {int(key["session"])}, {date_str} '''
-                                f'''({key["user_name"]}@{key["data_source"]})''',
+                    st.markdown(f'''<h5 style='text-align: center; color: orange;'>{key["subject_id"]} ({key["PI"]}), Session {int(key["session"])}, {date_str} '''
+                                f'''({key["trainer"]}@{key["data_source"]})''',
                                 unsafe_allow_html=True)
                     if len(st.session_state.session_plot_selected_draw_types) > 1:  # more than one types, use the pre-defined layout
                         for row, column_setting in enumerate(draw_type_layout_definition):
@@ -280,28 +286,26 @@ def plot_x_y_session():
     if len(df_selected_from_plotly) == 1:
         with cols[1]:
             draw_session_plots_quick_preview(df_selected_from_plotly)
-
     return df_selected_from_plotly, cols
 
 
 def show_curriculums():
     pass
 
-    
 
 # ------- Layout starts here -------- #
 def init(if_load_bpod_data_override=None, if_load_docDB_override=None):
-    
+
     # Clear specific session state and all filters
     for key in st.session_state:
         if key in ['selected_draw_types'] or '_changed' in key:
             del st.session_state[key]
-            
+
     df = load_data(['sessions'], data_source='bonsai')
-    
+
     if not len(df):
         return False
-    
+
     # --- Perform any data source-dependent preprocessing here ---
     # Because sync_URL_to_session_state() needs df to be loaded (for dynamic column filtering),
     # 'if_load_bpod_sessions' has not been synced from URL to session state yet.
@@ -312,77 +316,66 @@ def init(if_load_bpod_data_override=None, if_load_docDB_override=None):
         else st.session_state.if_load_bpod_sessions 
         if 'if_load_bpod_sessions' in st.session_state
         else False)
-    
+
     st.session_state.bpod_loaded = False
     if _if_load_bpod:
         df_bpod = load_data(['sessions'], data_source='bpod')
         st.session_state.bpod_loaded = True
-        
-        # For historial reason, the suffix of df['sessions_bonsai'] just mean the data of the Home.py page
-        df['sessions_bonsai'] = pd.concat([df['sessions_bonsai'], df_bpod['sessions_bonsai']], axis=0)
-        
+
+        # For historial reason, the suffix of df['sessions_main'] just mean the data of the Home.py page
+        df['sessions_main'] = pd.concat([df['sessions_main'], df_bpod['sessions_main']], axis=0)
+
     st.session_state.df = df
     for source in ["dataframe", "plotly"]:
-        st.session_state[f'df_selected_from_{source}'] = pd.DataFrame(columns=['h2o', 'session'])
-            
+        st.session_state[f'df_selected_from_{source}'] = pd.DataFrame(columns=['subject_id', 'session'])
+
     # Load autotrain
     auto_train_manager, curriculum_manager = load_auto_train()
     st.session_state.auto_train_manager = auto_train_manager
     st.session_state.curriculum_manager = curriculum_manager
-   
+
     # Some ad-hoc modifications on df_sessions
-    _df = st.session_state.df['sessions_bonsai']  # temporary df alias
-    
+    _df = st.session_state.df['sessions_main']  # temporary df alias
+
     _df.columns = _df.columns.get_level_values(1)
     _df.sort_values(['session_start_time'], ascending=False, inplace=True)
     _df['session_start_time'] = _df['session_start_time'].astype(str)  # Turn to string
     _df = _df.reset_index().query('subject_id != "0"')
- 
+
     # Handle mouse and user name
     if 'bpod_backup_h2o' in _df.columns:
-        _df['h2o'] = np.where(_df['bpod_backup_h2o'].notnull(), _df['bpod_backup_h2o'], _df['subject_id'])
-        _df['user_name'] = np.where(_df['bpod_backup_user_name'].notnull(), _df['bpod_backup_user_name'], _df['user_name'])
+        _df['subject_alias'] = np.where(_df['bpod_backup_h2o'].notnull(), _df['bpod_backup_h2o'], _df['subject_id'])
+        _df['trainer'] = np.where(_df['bpod_backup_user_name'].notnull(), _df['bpod_backup_user_name'], _df['trainer'])
     else:
-        _df['h2o'] = _df['subject_id']
-        
-        
-    def _get_data_source(rig):
-        """From rig string, return "{institute}_{rig_type}_{room}_{hardware}"
-        """
-        institute = 'Janelia' if ('bpod' in rig) and not ('AIND' in rig) else 'AIND'
-        hardware = 'bpod' if ('bpod' in rig) else 'bonsai'
-        rig_type = 'ephys' if ('ephys' in rig.lower()) else 'training'
-        
-        # This is a mess...
-        if institute == 'Janelia':
-            room = 'NA'
-        elif 'Ephys-Han' in rig:
-            room = '321'
-        elif hardware == 'bpod':
-            room = '347'
-        elif '447' in rig:
-            room = '447'
-        elif '446' in rig:
-            room = '446'
-        elif '323' in rig:
-            room = '323'
-        elif rig_type == 'ephys':
-            room = '323'
-        else:
-            room = '447'
-        return institute, rig_type, room, hardware, '_'.join([institute, rig_type, room, hardware])
-        
-    # Add data source (Room + Hardware etc)
-    _df[['institute', 'rig_type', 'room', 'hardware', 'data_source']] = _df['rig'].apply(lambda x: pd.Series(_get_data_source(x)))
+        _df['subject_alias'] = _df['subject_id']
+
+    # map trainer
+    _df['trainer'] = _df['trainer'].apply(_trainer_mapper)
+
+    # Merge in PI name
+    df_mouse_pi_mapping = load_mouse_PI_mapping()
+    _df = _df.merge(df_mouse_pi_mapping, how='left', on='subject_id') # Merge in PI name
+    _df.loc[_df["PI"].isnull(), "PI"] = _df.loc[
+        _df["PI"].isnull() & 
+        (_df["trainer"].isin(_df["PI"]) | _df["trainer"].isin(["Han Hou", "Marton Rozsa"])), 
+        "trainer"
+    ]  # Fill in PI with trainer if PI is missing and the trainer was ever a PI
 
+
+    # Add data source (Room + Hardware etc)
+    _df[['institute', 'rig_type', 'room', 'hardware', 'data_source']] = _df['rig'].apply(lambda x: pd.Series(get_data_source(x)))
+
     # Handle session number
     _df.dropna(subset=['session'], inplace=True) # Remove rows with no session number (only leave the nwb file with the largest finished_trials for now)
     _df.drop(_df.query('session < 1').index, inplace=True)
-    
+
     # Remove invalid subject_id
     _df = _df[(999999 > _df["subject_id"].astype(int)) 
               & (_df["subject_id"].astype(int) > 300000)]
 
+    # Remove zero finished trials
+    _df = _df[_df['finished_trials'] > 0]
+    
     # Remove abnormal values
     _df.loc[_df['weight_after'] > 100, 
             ['weight_after', 'weight_after_ratio', 'water_in_session_total', 'water_after_session', 'water_day_total']
@@ -393,35 +386,32 @@ def _get_data_source(rig):
 
     _df.loc[(_df['duration_iti_median'] < 0) | (_df['duration_iti_mean'] < 0),
             ['duration_iti_median', 'duration_iti_mean', 'duration_iti_std', 'duration_iti_min', 'duration_iti_max']] = np.nan
-    
+
     _df.loc[_df['invalid_lick_ratio'] < 0, 
             ['invalid_lick_ratio']]= np.nan
-    
+
     # # add something else
     # add abs(bais) to all terms that have 'bias' in name
     for col in _df.columns:
         if 'bias' in col:
             _df[f'abs({col})'] = np.abs(_df[col])
-        
+
     # # delta weight
     # diff_relative_weight_next_day = _df.set_index(
-    #     ['session']).sort_values('session', ascending=True).groupby('h2o').apply(
+    #     ['session']).sort_values('session', ascending=True).groupby('subject_id').apply(
     #         lambda x: - x.relative_weight.diff(periods=-1)).rename("diff_relative_weight_next_day")
-        
+
     # weekday
     _df.session_date = pd.to_datetime(_df.session_date)
     _df['weekday'] = _df.session_date.dt.dayofweek + 1
-    
-    # map user_name
-    _df['user_name'] = _df['user_name'].apply(_user_name_mapper)
-    
+
     # trial stats
     _df['avg_trial_length_in_seconds'] = _df['session_run_time_in_min'] / _df['total_trials_with_autowater'] * 60
-    
+
     # last day's total water
-    _df['water_day_total_last_session'] = _df.groupby('h2o')['water_day_total'].shift(1)
-    _df['water_after_session_last_session'] = _df.groupby('h2o')['water_after_session'].shift(1)    
-    
+    _df['water_day_total_last_session'] = _df.groupby('subject_id')['water_day_total'].shift(1)
+    _df['water_after_session_last_session'] = _df.groupby('subject_id')['water_after_session'].shift(1)    
+
     # fill nan for autotrain fields
     filled_values = {'curriculum_name': 'None', 
                      'curriculum_version': 'None',
@@ -432,7 +422,7 @@ def _get_data_source(rig):
                      'if_overriden_by_trainer': False,
                      }
     _df.fillna(filled_values, inplace=True)
-        
+
     # foraging performance = foraing_eff * finished_rate
     if 'foraging_performance' not in _df.columns:
         _df['foraging_performance'] = \
@@ -444,20 +434,19 @@ def _get_data_source(rig):
 
     # drop 'bpod_backup_' columns
     _df.drop([col for col in _df.columns if 'bpod_backup_' in col], axis=1, inplace=True)
-    
+
     # fix if_overriden_by_trainer
     _df['if_overriden_by_trainer'] = _df['if_overriden_by_trainer'].astype(bool)
-    
+
     # _df = _df.merge(
-    #     diff_relative_weight_next_day, how='left', on=['h2o', 'session'])
-    
+    #     diff_relative_weight_next_day, how='left', on=['subject_id', 'session'])
+
     # Recorder columns so that autotrain info is easier to see
     first_several_cols = ['subject_id', 'session_date', 'nwb_suffix', 'session', 'rig', 
-                          'user_name', 'curriculum_name', 'curriculum_version', 'current_stage_actual', 
+                          'trainer', 'PI', 'curriculum_name', 'curriculum_version', 'current_stage_actual', 
                           'task', 'notes']
     new_order = first_several_cols + [col for col in _df.columns if col not in first_several_cols]
     _df = _df[new_order]
-    
 
     # --- Load data from docDB ---
     if_load_docDb = if_load_docDB_override if if_load_docDB_override is not None else (
@@ -466,10 +455,10 @@ def _get_data_source(rig):
         else st.session_state.if_load_docDB 
         if 'if_load_docDB' in st.session_state
         else False)
-           
+
     if if_load_docDb:
         _df = merge_in_df_docDB(_df)
-        
+
         # add docDB_status column
         _df["docDB_status"] = _df.apply(
             lambda row: (
@@ -484,15 +473,15 @@ def _get_data_source(rig):
             axis=1,
         )
 
-    st.session_state.df['sessions_bonsai'] = _df  # Somehow _df loses the reference to the original dataframe
+    st.session_state.df['sessions_main'] = _df  # Somehow _df loses the reference to the original dataframe
     st.session_state.session_stats_names = [keys for keys in _df.keys()]
 
     # Set session state from URL
     sync_URL_to_session_state()
-       
+
     # Establish communication between pygwalker and streamlit
     init_streamlit_comm()
-    
+
     return True
 
 def merge_in_df_docDB(_df):
@@ -536,7 +525,7 @@ def app():
         cols = st.columns([4, 4, 4, 1])
         cols[0].markdown(f'### Filter the sessions on the sidebar\n'
                          f'#####  {len(st.session_state.df_session_filtered)} sessions, '
-                         f'{len(st.session_state.df_session_filtered.h2o.unique())} mice filtered')
+                         f'{len(st.session_state.df_session_filtered.subject_id.unique())} mice filtered')
         with cols[1]:
             with st.form(key='load_settings', clear_on_submit=False):
                 if_load_bpod_sessions = checkbox_wrapper_for_url_query(
@@ -582,8 +571,8 @@ def app():
     )
 
     if len(aggrid_outputs['selected_rows']) \
-        and not set(pd.DataFrame(aggrid_outputs['selected_rows']).set_index(['h2o', 'session']).index
-            ) == set(st.session_state.df_selected_from_dataframe.set_index(['h2o', 'session']).index) \
+        and not set(pd.DataFrame(aggrid_outputs['selected_rows']).set_index(['subject_id', 'session']).index
+            ) == set(st.session_state.df_selected_from_dataframe.set_index(['subject_id', 'session']).index) \
         and not st.session_state.get("df_selected_from_dataframe_just_overriden", False):  # so that if the user just overriden the df_selected_from_dataframe by pressing sidebar button, it won't sync selected rows in the table to session state
         st.session_state.df_selected_from_dataframe = pd.DataFrame(aggrid_outputs['selected_rows'])  # Use selected in dataframe to update "selected"
         st.rerun()
@@ -780,7 +769,7 @@ def add_main_tabs():
 
 if __name__ == "__main__":
     ok = True
-    if 'df' not in st.session_state or 'sessions_bonsai' not in st.session_state.df.keys(): 
+    if 'df' not in st.session_state or 'sessions_main' not in st.session_state.df.keys(): 
         ok = init()
 
     if ok:
 
@@ -1 +1 @@
-__ver__ = 'v3.0.0 beta'
+__ver__ = 'v3.0.1 beta'
@@ -356,7 +356,7 @@ def app():
 
     # --- 2. Merge in the master df in the Home page (Han's temporary pipeline) ---
     # Data from Home.init (all sessions from Janelia bpod + AIND bpod + AIND bonsai)
-    df_from_Home = st.session_state.df["sessions_bonsai"]
+    df_from_Home = st.session_state.df["sessions_main"]
     # Only keep AIND sessions
     df_from_Home = df_from_Home.query("institute == 'AIND'")
     df_from_Home.loc[df_from_Home.hardware == "bpod", "Han_temp_pipeline (bpod)"] = True
@@ -539,7 +539,7 @@ def add_venn_diagrms(df_merged):
 if __name__ == "__main__":
 
     # Share the same master df as the Home page
-    if "df" not in st.session_state or "sessions_bonsai" not in st.session_state.df.keys() or not st.session_state.bpod_loaded:
+    if "df" not in st.session_state or "sessions_main" not in st.session_state.df.keys() or not st.session_state.bpod_loaded:
         st.spinner("Loading data from Han temp pipeline...")
         init(if_load_docDB_override=False, if_load_bpod_data_override=True)
Original file line number	Diff line number	Diff line change
`@@ -1 +1 @@`
`1`		`-__ver__ = 'v3.0.0 beta'`
	`1`	`+__ver__ = 'v3.0.1 beta'`