@@ -2354,6 +2354,198 @@ def rr2ann(rr_array, record_name, extension, fs=250, as_time=False):
23542354 wrann (record_name , extension , ann_sample , symbol = ann_symbol )
23552355
23562356
2357+ def csv2ann (file_name , extension = 'atr' , fs = None , record_only = False ,
2358+ time_onset = True , header = True , delimiter = ',' , verbose = False ):
2359+ """
2360+ Read a CSV/TSV/etc. file and return either an `Annotation` object with the
2361+ annotation descriptors as attributes or write an annotation file.
2362+
2363+ Parameters
2364+ ----------
2365+ file_name : str
2366+ The name of the CSV file to be read, including the '.csv' file
2367+ extension. If the argument contains any path delimiter characters, the
2368+ argument will be interpreted as PATH/BASE_RECORD. Both relative and
2369+ absolute paths are accepted. The BASE_RECORD file name will be used to
2370+ name the annotation file with the desired extension.
2371+ extension : str, optional
2372+ The string annotation file extension.
2373+ fs : float, optional
2374+ This will be used if annotation onsets are given in the format of time
2375+ (`time_onset` = True) instead of sample since onsets must be sample
2376+ numbers in order for `wrann` to work. This number can be expressed in
2377+ any format legal for a Python input of floating point numbers (thus
2378+ '360', '360.', '360.0', and '3.6e2' are all legal and equivalent). The
2379+ sampling frequency must be greater than 0; if it is missing, a value
2380+ of 250 is assumed.
2381+ record_only : bool, optional
2382+ Whether to only return the record information (True) or not (False).
2383+ If false, this function will generate the annotation file.
2384+ time_onset : bool, optional
2385+ Whether to assume the values provided in the 'onset' column are in
2386+ units of time (True) or samples (False). If True, convert the onset
2387+ times to samples by using the, now required, `fs` input.
2388+ header : bool, optional
2389+ Whether to assume the CSV has a first line header (True) or not
2390+ (False) which defines the signal names.
2391+ delimiter : str, optional
2392+ What to use as the delimiter for the file to separate data. The default
2393+ if a comma (','). Other common delimiters are tabs ('\t '), spaces (' '),
2394+ pipes ('|'), and colons (':').
2395+ verbose : bool, optional
2396+ Whether to print all the information read about the file (True) or
2397+ not (False).
2398+
2399+ Returns
2400+ -------
2401+ N/A : Annotation, optional
2402+ The WFDB Annotation object representing the contents of the CSV file
2403+ read.
2404+
2405+ Notes
2406+ -----
2407+ CSVs should be in one of the two possible following format:
2408+
2409+ 1) All events are single time events (no duration).
2410+
2411+ onset,description
2412+ onset_1,description_1
2413+ onset_2,description_2
2414+ ...,...
2415+
2416+ Or this format if `header=False` is defined:
2417+
2418+ onset_1,description_1
2419+ onset_2,description_2
2420+ ...,...
2421+
2422+ 2) A duration is specified for some events.
2423+
2424+ onset,duration,description
2425+ onset_1,duration_1,description_1
2426+ onset_2,duration_2,description_2
2427+ ...,...,...
2428+
2429+ Or this format if `header=False` is defined:
2430+
2431+ onset_1,duration_1,description_1
2432+ onset_2,duration_2,description_2
2433+ ...,...,...
2434+
2435+ By default, the 'onset' will be interpreted as a sample number if it is
2436+ strictly in integer format and as a time otherwise. By default, the
2437+ 'duration' will be interpreted as time values and not elapsed samples. By
2438+ default, the 'description' will be interpreted as the `aux_note` for the
2439+ annotation and the `symbol` will automatically be set to " which defines a
2440+ comment. Future additions will allow the user to customize such
2441+ attributes.
2442+
2443+ Examples
2444+ --------
2445+ 1) Write WFDB annotation file from CSV with time onsets:
2446+ ======= start example.csv =======
2447+ onset,description
2448+ 0.2,p-wave
2449+ 0.8,qrs
2450+ ======== end example.csv ========
2451+ >>> wfdb.csv2ann('example.csv', fs=360)
2452+ * Creates a WFDB annotation file called: 'example.atr'
2453+
2454+ 2) Write WFDB annotation file from CSV with sample onsets:
2455+ ======= start example.csv =======
2456+ onset,description
2457+ 5,p-wave
2458+ 13,qrs
2459+ ======== end example.csv ========
2460+ >>> wfdb.csv2ann('example.csv', fs=10, time_onset=False)
2461+ * Creates a WFDB annotation file called: 'example.atr'
2462+ * 5,13 samples -> 0.5,1.3 seconds for onset
2463+
2464+ 3) Write WFDB annotation file from CSV with time onsets, durations, and no
2465+ header:
2466+ ======= start example.csv =======
2467+ 0.2,0.1,qrs
2468+ 0.8,0.4,qrs
2469+ ======== end example.csv ========
2470+ >>> wfdb.csv2ann('example.csv', extension='qrs', fs=360, header=False)
2471+ * Creates a WFDB annotation file called: 'example.qrs'
2472+
2473+ """
2474+ # NOTE: No need to write input checks here since the Annotation class
2475+ # should handle them (except verifying the CSV input format which is for
2476+ # Pandas)
2477+ if header :
2478+ df_CSV = pd .read_csv (file_name , delimiter = delimiter )
2479+ else :
2480+ df_CSV = pd .read_csv (file_name , delimiter = delimiter , header = None )
2481+ if verbose :
2482+ print ('Successfully read CSV' )
2483+
2484+ if verbose :
2485+ print ('Creating Pandas dataframe from CSV' )
2486+ if df_CSV .shape [1 ] == 2 :
2487+ if verbose :
2488+ print ('onset,description format detected' )
2489+ df_out = df_CSV
2490+ elif df_CSV .shape [1 ] == 3 :
2491+ if verbose :
2492+ print ('onset,duration,description format detected' )
2493+ print ('Converting durations to single time-point events' )
2494+ # Create two separate dataframes for the start and end annotation
2495+ # then remove them from the original
2496+ df_start = df_CSV [df_CSV ['duration' ] > 0 ]
2497+ df_end = df_CSV [df_CSV ['duration' ] > 0 ]
2498+ df_trunc = df_CSV [df_CSV ['duration' ] == 0 ]
2499+ # Append parentheses at the start for annotation start and end for
2500+ # annotation end
2501+ df_start ['description' ] = '(' + df_start ['description' ].astype (str )
2502+ df_end ['description' ] = df_end ['description' ].astype (str ) + ')'
2503+ # Add the duration time to the onset for the end annotation to convert
2504+ # to single time annotations only
2505+ df_end ['onset' ] = df_end ['onset' ] + df_end ['duration' ]
2506+ # Concatenate all of the dataframes
2507+ df_out = pd .concat ([df_trunc , df_start , df_end ], ignore_index = True )
2508+ # Make sure the sorting is correct
2509+ df_out ['col_index' ] = df_out .index
2510+ df_out = df_out .sort_values (['onset' , 'col_index' ])
2511+ else :
2512+ raise Exception ("""The number of columns in the CSV was not
2513+ recognized.""" )
2514+
2515+ # Remove extension from input file name
2516+ file_name = file_name .split ('.' )[0 ]
2517+ if time_onset :
2518+ if not fs :
2519+ raise Exception ("""`fs` must be provided if `time_onset` is True
2520+ since it is required to convert time onsets to
2521+ samples""" )
2522+ sample = (df_out ['onset' ].to_numpy ()* fs ).astype (np .int64 )
2523+ else :
2524+ sample = df_out ['onset' ].to_numpy ()
2525+ # Assume each annotation is a comment
2526+ symbol = ['"' ]* len (df_out .index )
2527+ subtype = np .array ([22 ]* len (df_out .index ))
2528+ # Assume each annotation belongs with the 1st channel
2529+ chan = np .array ([0 ]* len (df_out .index ))
2530+ num = np .array ([0 ]* len (df_out .index ))
2531+ aux_note = df_out ['description' ].tolist ()
2532+
2533+ if verbose :
2534+ print ('Finished CSV parsing... writing to Annotation object' )
2535+
2536+ if record_only :
2537+ return Annotation (record_name = file_name , extension = extension ,
2538+ sample = sample , symbol = symbol , subtype = subtype ,
2539+ chan = chan , num = num , aux_note = aux_note , fs = fs )
2540+ if verbose :
2541+ print ('Finished creating Annotation object' )
2542+ else :
2543+ wrann (file_name , extension , sample = sample , symbol = symbol ,
2544+ subtype = subtype , chan = chan , num = num , aux_note = aux_note , fs = fs )
2545+ if verbose :
2546+ print ('Finished writing Annotation file' )
2547+
2548+
23572549## ------------- Annotation Field Specifications ------------- ##
23582550
23592551
0 commit comments