11import re
22from collections import defaultdict
33from dataclasses import dataclass
4- from datetime import datetime
4+ from datetime import datetime , timedelta
55from typing import Iterable , List , Optional , Tuple
66
77import meds_reader
@@ -321,19 +321,32 @@ def generate_demographics_and_patient_blocks(
321321 conversion : MedsToCehrBertConversion ,
322322 patient : meds_reader .Subject ,
323323 prediction_time : datetime = None ,
324+ observation_window : int = None ,
324325) -> Tuple [PatientDemographics , List [PatientBlock ]]:
325326 if isinstance (conversion , MedsToBertMimic4 ):
326327 return mimic_meds_generate_demographics_and_patient_blocks (
327- patient , conversion , prediction_time , conversion .default_visit_id
328+ patient = patient ,
329+ conversion = conversion ,
330+ prediction_time = prediction_time ,
331+ observation_window = observation_window ,
332+ default_visit_id = conversion .default_visit_id ,
328333 )
329334 elif isinstance (conversion , MedsToCehrbertOMOP ):
330- return omop_meds_generate_demographics_and_patient_blocks (patient , conversion , prediction_time )
335+ return omop_meds_generate_demographics_and_patient_blocks (
336+ patient = patient ,
337+ conversion = conversion ,
338+ prediction_time = prediction_time ,
339+ observation_window = observation_window ,
340+ )
331341 else :
332342 raise RuntimeError (f"Unrecognized conversion: { conversion } " )
333343
334344
335345def omop_meds_generate_demographics_and_patient_blocks (
336- patient : meds_reader .Subject , conversion : MedsToCehrBertConversion , prediction_time : datetime = None
346+ patient : meds_reader .Subject ,
347+ conversion : MedsToCehrBertConversion ,
348+ prediction_time : datetime = None ,
349+ observation_window : int = None ,
337350) -> Tuple [PatientDemographics , List [PatientBlock ]]:
338351 disconnect_problem_list_events = getattr (conversion , "disconnect_problem_list_events" , False )
339352 birth_datetime = None
@@ -342,6 +355,9 @@ def omop_meds_generate_demographics_and_patient_blocks(
342355 ethnicity = None
343356 visit_events = defaultdict (list )
344357 unlinked_event_mapping = defaultdict (list )
358+ observation_start_window : Optional [datetime ] = None
359+ if prediction_time and observation_window :
360+ observation_start_window = prediction_time - timedelta (days = observation_window )
345361 for e in patient .events :
346362 # This indicates demographics features
347363 event_code_uppercase = e .code .upper ()
@@ -355,10 +371,11 @@ def omop_meds_generate_demographics_and_patient_blocks(
355371 ethnicity = e .code
356372 elif e .time is not None :
357373 # Skip out of the loop if the events' time stamps are beyond the prediction time
358- if prediction_time is not None :
359- if e .time > prediction_time :
360- break
361-
374+ if prediction_time is not None and e .time > prediction_time :
375+ break
376+ # Skip out of the loop if the events' time stamps are before the observation start window
377+ if observation_start_window is not None and e .time < observation_start_window :
378+ break
362379 if getattr (e , "visit_id" , None ):
363380 visit_id = e .visit_id
364381 visit_events [visit_id ].append (e )
@@ -528,6 +545,7 @@ def mimic_meds_generate_demographics_and_patient_blocks(
528545 patient : meds_reader .Subject ,
529546 conversion : MedsToCehrBertConversion ,
530547 prediction_time : datetime = None ,
548+ observation_window : int = None ,
531549 default_visit_id : int = 1 ,
532550) -> Tuple [PatientDemographics , List [PatientBlock ]]:
533551 birth_datetime = None
@@ -539,13 +557,21 @@ def mimic_meds_generate_demographics_and_patient_blocks(
539557 current_date = None
540558 events_for_current_date = []
541559 patient_blocks = []
542- for e in patient .events :
560+ observation_start_window : Optional [datetime ] = None
561+ if prediction_time and observation_window :
562+ observation_start_window = prediction_time - timedelta (days = observation_window )
543563
564+ for e in patient .events :
544565 # Skip out of the loop if the events' time stamps are beyond the prediction time
545566 if prediction_time is not None and e .time is not None :
546567 if e .time > prediction_time :
547568 break
548569
570+ # Skip out of the loop if the events' time stamps are before observation start window
571+ if observation_start_window is not None and e .time is not None :
572+ if e .time < observation_start_window :
573+ break
574+
549575 # This indicates demographics features
550576 event_code_uppercase = e .code .upper ()
551577 if event_code_uppercase .startswith (birth_code ):
0 commit comments