@@ -351,6 +351,7 @@ def general_data_formatter(
351351 time_column : Union [int , List [int ]],
352352 time_format : str ,
353353 delimiter : str = ',' ,
354+ header_row : int = 0 ,
354355 date_offset : str = None ,
355356 seconds_shift : int = 0 ,
356357 timezone_identifier : str = 'UTC'
@@ -383,6 +384,14 @@ def general_data_formatter(
383384 A tuple containing two np.array objects: the first contains the
384385 epoch times, and the second contains the data.
385386 """
387+
388+ # find str matching in header row and gets index
389+ if isinstance (data_column [0 ], str ):
390+ data_header = data [header_row ].split (delimiter )
391+ # Get data column indices
392+ data_column = [data_header .index (x )
393+ for x in data_column ]
394+
386395 # Check the data format
387396 data = data_format_checks (data , data_checks )
388397
@@ -408,6 +417,7 @@ def sizer_data_formatter(
408417 time_column : int ,
409418 time_format : str ,
410419 delimiter : str = ',' ,
420+ header_row : int = 0 ,
411421 date_offset : str = None ,
412422 seconds_shift : int = 0 ,
413423 timezone_identifier : str = 'UTC'
@@ -443,25 +453,33 @@ def sizer_data_formatter(
443453 """
444454
445455 # Get Dp range and columns
446- data_header = data [data_sizer_reader ["header_rows" ]].split (delimiter )
456+ data_header = data [header_row ].split (delimiter )
457+ # check if start and end keywords are in the header
458+ if data_sizer_reader ["Dp_start_keyword" ] not in data_header :
459+ # rise error with snip of data header
460+ raise ValueError (
461+ f"Cannot find '{ data_sizer_reader ['Dp_start_keyword' ]} ' in header" \
462+ + f" { data_header [:20 ]} ..."
463+ )
464+ if data_sizer_reader ["Dp_end_keyword" ] not in data_header :
465+ # rise error with snip of data header
466+ raise ValueError (
467+ f"Cannot find '{ data_sizer_reader ['Dp_end_keyword' ]} ' in header" \
468+ + f" { data_header [:20 ]} ..."
469+ )
447470 dp_range = [
448471 data_header .index (data_sizer_reader ["Dp_start_keyword" ]),
449472 data_header .index (data_sizer_reader ["Dp_end_keyword" ])
450473 ]
451474 dp_columns = list (range (dp_range [0 ]+ 1 , dp_range [1 ]))
452- dp_header = [data_header [i ] for i in dp_columns ]
475+ header = [data_header [i ] for i in dp_columns ]
453476 # change from np.array
454477
455- # Get data columns
456- data_column = [
457- data_header .index (x ) for x in data_sizer_reader ["list_of_data_headers" ]
458- ]
459-
460478 # Format data
461479 data = data_format_checks (data , data_checks )
462480
463481 # Get data arrays
464- epoch_time , data_smps_2d = sample_data (
482+ epoch_time , data_2d = sample_data (
465483 data ,
466484 time_column ,
467485 time_format ,
@@ -471,16 +489,6 @@ def sizer_data_formatter(
471489 seconds_shift = seconds_shift ,
472490 timezone_identifier = timezone_identifier
473491 )
474- epoch_time , data_smps_1d = sample_data (
475- data ,
476- time_column ,
477- time_format ,
478- data_column ,
479- delimiter ,
480- date_offset ,
481- seconds_shift = seconds_shift ,
482- timezone_identifier = timezone_identifier
483- )
484492
485493 if "convert_scale_from" in data_sizer_reader :
486494 if data_sizer_reader ["convert_scale_from" ] == "dw" :
@@ -493,13 +501,13 @@ def sizer_data_formatter(
493501 " Either dw/dlogdp or dw must be specified."
494502 )
495503 for i in range (len (epoch_time )):
496- data_smps_2d [i , :] = convert .convert_sizer_dn (
497- diameter = np .array (dp_header ).astype (float ),
498- dn_dlogdp = data_smps_2d [i , :],
504+ data_2d [i , :] = convert .convert_sizer_dn (
505+ diameter = np .array (header ).astype (float ),
506+ dn_dlogdp = data_2d [i , :],
499507 inverse = inverse
500508 )
501509
502- return epoch_time , dp_header , data_smps_2d , data_smps_1d
510+ return epoch_time , data_2d , header
503511
504512
505513def non_standard_date_location (
0 commit comments