2020import streamlit as st
2121import streamlit_nested_layout
2222from aind_auto_train import __version__ as auto_train_version
23+ from aind_analysis_arch_result_access .han_pipeline import get_session_table
2324from pygwalker .api .streamlit import StreamlitRenderer , init_streamlit_comm
2425from util .aws_s3 import (draw_session_plots_quick_preview ,
2526 load_data ,
@@ -306,12 +307,6 @@ def init(if_load_bpod_data_override=None, if_load_docDB_override=None):
306307 if key in ['selected_draw_types' ] or '_changed' in key :
307308 del st .session_state [key ]
308309
309- df = load_data (['sessions' ], data_source = 'bonsai' )
310-
311- if not len (df ):
312- return False
313-
314- # --- Perform any data source-dependent preprocessing here ---
315310 # Because sync_URL_to_session_state() needs df to be loaded (for dynamic column filtering),
316311 # 'if_load_bpod_sessions' has not been synced from URL to session state yet.
317312 # So here we need to manually get it from URL or session state.
@@ -321,14 +316,14 @@ def init(if_load_bpod_data_override=None, if_load_docDB_override=None):
321316 else st .session_state .if_load_bpod_sessions
322317 if 'if_load_bpod_sessions' in st .session_state
323318 else False )
319+ st .session_state .bpod_loaded = _if_load_bpod
320+
321+ # --- Load data using aind-analysis-arch-result-access ---
322+ df_han = get_session_table (if_load_bpod = _if_load_bpod )
323+ df = {'sessions_main' : df_han } # put it in df['session_main'] for backward compatibility
324324
325- st .session_state .bpod_loaded = False
326- if _if_load_bpod :
327- df_bpod = load_data (['sessions' ], data_source = 'bpod' )
328- st .session_state .bpod_loaded = True
329-
330- # For historial reason, the suffix of df['sessions_main'] just mean the data of the Home.py page
331- df ['sessions_main' ] = pd .concat ([df ['sessions_main' ], df_bpod ['sessions_main' ]], axis = 0 )
325+ if not len (df ):
326+ return False
332327
333328 st .session_state .df = df
334329 for source in ["dataframe" , "plotly" ]:
@@ -340,136 +335,33 @@ def init(if_load_bpod_data_override=None, if_load_docDB_override=None):
340335 st .session_state .curriculum_manager = curriculum_manager
341336
342337 # Some ad-hoc modifications on df_sessions
343- _df = st .session_state .df ['sessions_main' ].copy () # temporary df alias
338+ _df = st .session_state .df ['sessions_main' ].copy ()
344339
345- _df .columns = _df .columns .get_level_values (1 )
346- _df .sort_values (['session_start_time' ], ascending = False , inplace = True )
347- _df ['session_start_time' ] = _df ['session_start_time' ].astype (str ) # Turn to string
348- _df = _df .reset_index ()
349-
350- # Handle mouse and user name
351- if 'bpod_backup_h2o' in _df .columns :
352- _df ['subject_alias' ] = np .where (_df ['bpod_backup_h2o' ].notnull (), _df ['bpod_backup_h2o' ], _df ['subject_id' ])
353- _df ['trainer' ] = np .where (_df ['bpod_backup_user_name' ].notnull (), _df ['bpod_backup_user_name' ], _df ['trainer' ])
354- else :
355- _df ['subject_alias' ] = _df ['subject_id' ]
356-
357- # map trainer
358- _df ['trainer' ] = _df ['trainer' ].apply (_trainer_mapper )
359-
360- # Merge in PI name
361- df_mouse_pi_mapping = load_mouse_PI_mapping ()
362- st .session_state .df_mouse_pi_mapping = df_mouse_pi_mapping # Save to session state for later use
363- _df = _df .merge (df_mouse_pi_mapping , how = 'left' , on = 'subject_id' ) # Merge in PI name
364- _df .loc [_df ["PI" ].isnull (), "PI" ] = _df .loc [
365- _df ["PI" ].isnull () &
366- (_df ["trainer" ].isin (_df ["PI" ]) | _df ["trainer" ].isin (["Han Hou" , "Marton Rozsa" ])),
367- "trainer"
368- ] # Fill in PI with trainer if PI is missing and the trainer was ever a PI
369-
370- # Add data source (Room + Hardware etc)
371- _df [['institute' , 'rig_type' , 'room' , 'hardware' , 'data_source' ]] = _df ['rig' ].apply (lambda x : pd .Series (get_data_source (x )))
372-
373- # Handle session number
374- _df .dropna (subset = ['session' ], inplace = True ) # Remove rows with no session number (only leave the nwb file with the largest finished_trials for now)
375- _df .drop (_df .query ('session < 1' ).index , inplace = True )
376-
377- # Remove invalid subject_id
378- _df = _df [(999999 > _df ["subject_id" ].astype (int ))
379- & (_df ["subject_id" ].astype (int ) > 300000 )]
380-
381- # Remove zero finished trials
382- _df = _df [_df ['finished_trials' ] > 0 ]
383-
384- # Remove abnormal values
385- _df .loc [_df ['weight_after' ] > 100 ,
386- ['weight_after' , 'weight_after_ratio' , 'water_in_session_total' , 'water_after_session' , 'water_day_total' ]
387- ] = np .nan
388-
389- _df .loc [_df ['water_in_session_manual' ] > 100 ,
390- ['water_in_session_manual' , 'water_in_session_total' , 'water_after_session' ]] = np .nan
391-
392- _df .loc [(_df ['duration_iti_median' ] < 0 ) | (_df ['duration_iti_mean' ] < 0 ),
393- ['duration_iti_median' , 'duration_iti_mean' , 'duration_iti_std' , 'duration_iti_min' , 'duration_iti_max' ]] = np .nan
394-
395- _df .loc [_df ['invalid_lick_ratio' ] < 0 ,
396- ['invalid_lick_ratio' ]]= np .nan
397-
398- # # add something else
399- # add abs(bais) to all terms that have 'bias' in name
400- for col in _df .columns :
401- if 'bias' in col :
402- _df [f'abs({ col } )' ] = np .abs (_df [col ])
403-
404- # # delta weight
405- # diff_relative_weight_next_day = _df.set_index(
406- # ['session']).sort_values('session', ascending=True).groupby('subject_id').apply(
407- # lambda x: - x.relative_weight.diff(periods=-1)).rename("diff_relative_weight_next_day")
408-
409- # weekday
410- _df .session_date = pd .to_datetime (_df .session_date )
411- _df ['weekday' ] = _df .session_date .dt .dayofweek + 1
412-
413- # trial stats
414- _df ['avg_trial_length_in_seconds' ] = _df ['session_run_time_in_min' ] / _df ['total_trials_with_autowater' ] * 60
415-
416- # last day's total water
417- _df ['water_day_total_last_session' ] = _df .groupby ('subject_id' )['water_day_total' ].shift (1 )
418- _df ['water_after_session_last_session' ] = _df .groupby ('subject_id' )['water_after_session' ].shift (1 )
419-
420340
421341 # -- overwrite the `if_stage_overriden_by_trainer`
422342 # Previously it was set to True if the trainer changes stage during a session.
423343 # But it is more informative to define it as whether the trainer has overridden the curriculum.
424344 # In other words, it is set to True only when stage_suggested ~= stage_actual, as defined in the autotrain curriculum.
425345 _df .drop (columns = ['if_overriden_by_trainer' ], inplace = True )
426- tmp_auto_train = auto_train_manager .df_manager .query ('if_closed_loop == True' )[
346+ tmp_auto_train = (
347+ auto_train_manager .df_manager .query ("if_closed_loop == True" )[
427348 [
428349 "subject_id" ,
429350 "session_date" ,
430351 "current_stage_suggested" ,
431352 "if_stage_overriden_by_trainer" ,
432353 ]
433- ].copy ()
434- tmp_auto_train ['session_date' ] = pd .to_datetime (tmp_auto_train ['session_date' ])
354+ ]
355+ .copy ()
356+ .drop_duplicates (subset = ["subject_id" , "session_date" ], keep = "first" )
357+ )
358+ tmp_auto_train ["session_date" ] = pd .to_datetime (tmp_auto_train ["session_date" ])
435359 _df = _df .merge (
436360 tmp_auto_train ,
437361 on = ["subject_id" , "session_date" ],
438362 how = 'left' ,
439363 )
440364
441- # fill nan for autotrain fields
442- filled_values = {'curriculum_name' : 'None' ,
443- 'curriculum_version' : 'None' ,
444- 'curriculum_schema_version' : 'None' ,
445- 'current_stage_actual' : 'None' ,
446- 'has_video' : False ,
447- 'has_ephys' : False ,
448- }
449- _df .fillna (filled_values , inplace = True )
450-
451- # foraging performance = foraing_eff * finished_rate
452- if 'foraging_performance' not in _df .columns :
453- _df ['foraging_performance' ] = \
454- _df ['foraging_eff' ] \
455- * _df ['finished_rate' ]
456- _df ['foraging_performance_random_seed' ] = \
457- _df ['foraging_eff_random_seed' ] \
458- * _df ['finished_rate' ]
459-
460- # drop 'bpod_backup_' columns
461- _df .drop ([col for col in _df .columns if 'bpod_backup_' in col ], axis = 1 , inplace = True )
462-
463- # _df = _df.merge(
464- # diff_relative_weight_next_day, how='left', on=['subject_id', 'session'])
465-
466- # Recorder columns so that autotrain info is easier to see
467- first_several_cols = ['subject_id' , 'session_date' , 'nwb_suffix' , 'session' , 'rig' ,
468- 'trainer' , 'PI' , 'curriculum_name' , 'curriculum_version' , 'current_stage_actual' ,
469- 'task' , 'notes' ]
470- new_order = first_several_cols + [col for col in _df .columns if col not in first_several_cols ]
471- _df = _df [new_order ]
472-
473365 # --- Load data from docDB ---
474366 if_load_docDb = if_load_docDB_override if if_load_docDB_override is not None else (
475367 st .query_params ['if_load_docDB' ].lower () == 'true'
@@ -548,6 +440,15 @@ def app():
548440 cols [0 ].markdown (f'### Filter the sessions on the sidebar\n '
549441 f'##### { len (st .session_state .df_session_filtered )} sessions, '
550442 f'{ len (st .session_state .df_session_filtered .subject_id .unique ())} mice filtered' )
443+
444+ with cols [0 ].expander (':bulb: Get the master session table by code' , expanded = False ):
445+ st .code (f'''
446+ #!pip install aind-analysis-arch-result-access
447+ from aind_analysis_arch_result_access.han_pipeline import get_session_table
448+ df = get_session_table(if_load_bpod={ st .session_state .bpod_loaded } )
449+ ''' )
450+ st .markdown ("See [aind-analysis-arch-result-access](https://github.com/AllenNeuralDynamics/aind-analysis-arch-result-access) for details." )
451+
551452 with cols [1 ]:
552453 with st .form (key = 'load_settings' , clear_on_submit = False ):
553454 if_load_bpod_sessions = checkbox_wrapper_for_url_query (
@@ -793,3 +694,4 @@ def add_main_tabs():
793694 st .markdown ('#### 1. Reload the page' )
794695 st .markdown ('#### 2. Click this original URL https://foraging-behavior-browser.allenneuraldynamics-test.org/' )
795696 st .markdown ('#### 3. Report your bug here: https://github.com/AllenNeuralDynamics/foraging-behavior-browser/issues (paste your URL and screenshoots)' )
697+ raise e
0 commit comments