|
1 | 1 | """Load bounding boxes tracking data into ``movement``.""" |
2 | 2 |
|
3 | 3 | import ast |
4 | | -import re |
5 | | -from collections.abc import Callable |
6 | 4 | from pathlib import Path |
7 | 5 | from typing import Literal |
8 | 6 |
|
@@ -474,7 +472,6 @@ def _df_from_via_tracks_file( |
474 | 472 | """ |
475 | 473 | # Read VIA tracks .csv file as a pandas dataframe |
476 | 474 | df_input = pd.read_csv(file_path, sep=",", header=0) |
477 | | - # df_file = pd.read_parquet(file_path) # engine |
478 | 475 |
|
479 | 476 | # Pre-parse dataframe if not already |
480 | 477 | if "region_shape_attributes_x" not in df_input.columns: |
@@ -529,7 +526,7 @@ def _df_from_via_tracks_file( |
529 | 526 | # sort by ID and frame_number and reset to new index |
530 | 527 | df = ( |
531 | 528 | df.set_index(["ID", "frame_number"]) |
532 | | - .reindex(multi_index) # adds missing rows and fills them with NaN |
| 529 | + .reindex(multi_index) # adds missing rows and fills them with nan |
533 | 530 | .sort_values(by=["ID", "frame_number"], axis=0) |
534 | 531 | .reset_index() |
535 | 532 | ) |
@@ -600,116 +597,3 @@ def _pre_parse_df_columns( |
600 | 597 | df = df.drop(columns=["file_attributes"]) |
601 | 598 |
|
602 | 599 | return df |
603 | | - |
604 | | - |
605 | | -############################# |
606 | | -def _df_from_via_tracks_file_old( |
607 | | - file_path: Path, frame_regexp: str = DEFAULT_FRAME_REGEXP |
608 | | -): |
609 | | - # Read VIA tracks .csv file as a pandas dataframe |
610 | | - df_file = pd.read_csv(file_path, sep=",", header=0) |
611 | | - |
612 | | - # Format to a 2D dataframe |
613 | | - df = pd.DataFrame( |
614 | | - { |
615 | | - "ID": _via_attribute_column_to_numpy( |
616 | | - df_file, "region_attributes", ["track"], int |
617 | | - ), |
618 | | - "frame_number": _extract_frame_number_from_via_tracks_df( |
619 | | - df_file, frame_regexp |
620 | | - ), |
621 | | - "x": _via_attribute_column_to_numpy( |
622 | | - df_file, "region_shape_attributes", ["x"], float |
623 | | - ), |
624 | | - "y": _via_attribute_column_to_numpy( |
625 | | - df_file, "region_shape_attributes", ["y"], float |
626 | | - ), |
627 | | - "w": _via_attribute_column_to_numpy( |
628 | | - df_file, "region_shape_attributes", ["width"], float |
629 | | - ), |
630 | | - "h": _via_attribute_column_to_numpy( |
631 | | - df_file, "region_shape_attributes", ["height"], float |
632 | | - ), |
633 | | - "confidence": _extract_confidence_from_via_tracks_df(df_file), |
634 | | - } |
635 | | - ) |
636 | | - |
637 | | - # Define desired index: all combinations of ID and frame number |
638 | | - multi_index = pd.MultiIndex.from_product( |
639 | | - [df["ID"].unique().tolist(), df["frame_number"].unique().tolist()], |
640 | | - # these unique lists may not be sorted! |
641 | | - names=["ID", "frame_number"], |
642 | | - ) |
643 | | - |
644 | | - # Set index to (ID, frame number), fill in values with nans, |
645 | | - # sort by ID and frame_number and reset to new index |
646 | | - df = ( |
647 | | - df.set_index(["ID", "frame_number"]) |
648 | | - .reindex(multi_index) # fill in empty frame-ID pairs with nans |
649 | | - .sort_values(by=["ID", "frame_number"], axis=0) # sort by ID and frame |
650 | | - .reset_index() |
651 | | - ) |
652 | | - return df |
653 | | - |
654 | | - |
655 | | -def _via_attribute_column_to_numpy( |
656 | | - df: pd.DataFrame, |
657 | | - via_column_name: str, |
658 | | - list_keys: list[str], |
659 | | - cast_fn: Callable = float, |
660 | | -) -> np.ndarray: |
661 | | - """Convert values from VIA attribute-type column to a numpy array.""" |
662 | | - list_bbox_attr = [] |
663 | | - for _, row in df.iterrows(): |
664 | | - row_dict_data = ast.literal_eval(row[via_column_name]) |
665 | | - list_bbox_attr.append( |
666 | | - tuple(cast_fn(row_dict_data[reg]) for reg in list_keys) |
667 | | - ) |
668 | | - |
669 | | - bbox_attr_array = np.array(list_bbox_attr) |
670 | | - |
671 | | - return bbox_attr_array.squeeze() |
672 | | - |
673 | | - |
674 | | -def _extract_confidence_from_via_tracks_df(df: pd.DataFrame) -> np.ndarray: |
675 | | - """Extract confidence scores from the VIA tracks input dataframe.""" |
676 | | - region_attributes_dicts = [ |
677 | | - ast.literal_eval(d) for d in df.region_attributes |
678 | | - ] |
679 | | - |
680 | | - # Check if confidence is defined as a region attribute, else set to NaN |
681 | | - if all(["confidence" in d for d in region_attributes_dicts]): |
682 | | - bbox_confidence = _via_attribute_column_to_numpy( |
683 | | - df, "region_attributes", ["confidence"], float |
684 | | - ) |
685 | | - else: |
686 | | - bbox_confidence = np.full((df.shape[0], 1), np.nan).squeeze() |
687 | | - |
688 | | - return bbox_confidence |
689 | | - |
690 | | - |
691 | | -def _extract_frame_number_from_via_tracks_df( |
692 | | - df: pd.DataFrame, frame_regexp: str = DEFAULT_FRAME_REGEXP |
693 | | -) -> np.ndarray: |
694 | | - """Extract frame numbers from the VIA tracks input dataframe.""" |
695 | | - # Extract frame number from file_attributes if exists |
696 | | - file_attributes_dicts = [ast.literal_eval(d) for d in df.file_attributes] |
697 | | - if all(["frame" in d for d in file_attributes_dicts]): |
698 | | - frame_array = _via_attribute_column_to_numpy( |
699 | | - df, |
700 | | - via_column_name="file_attributes", |
701 | | - list_keys=["frame"], |
702 | | - cast_fn=int, |
703 | | - ) |
704 | | - # Else extract from filename |
705 | | - else: |
706 | | - list_frame_numbers = [ |
707 | | - int(re.search(frame_regexp, f).group(1)) # type: ignore |
708 | | - if re.search(frame_regexp, f) |
709 | | - else np.nan |
710 | | - for f in df["filename"] |
711 | | - ] |
712 | | - |
713 | | - frame_array = np.array(list_frame_numbers) |
714 | | - |
715 | | - return frame_array |
0 commit comments