|
1 | 1 | """Convert DeepLabCut project to benchmark dataset |
2 | 2 | ================================================== |
3 | 3 | Convert videos and labelled frames from a DeepLabCut (DLC) project to the |
4 | | -``poseinterface`` benchmark dataset format. |
| 4 | +``poseinterface`` [benchmark dataset format](target-benchmark-dataset). |
5 | 5 |
|
6 | 6 | """ |
7 | 7 |
|
|
11 | 11 | import shutil |
12 | 12 | from pathlib import Path |
13 | 13 |
|
14 | | -from poseinterface.io import annotations_to_coco |
| 14 | +from poseinterface.utils import tree |
15 | 15 |
|
16 | 16 | # %% |
17 | | -# Background |
18 | | -# ---------- |
19 | | -# We've identified potential datasets from SWC that could be used for the pilot |
20 | | -# version of the pose benchmark dataset. |
21 | | -# Among these is the Elevated Plus Maze (EPM) dataset produced by |
22 | | -# Loukia Katsouri, for John O'Keefe's lab. |
| 17 | +# Project overview |
| 18 | +# ---------------- |
| 19 | +# Here we work with the "Elevated Plus Maze (EPM)" project from the |
| 20 | +# [Sainsbury Wellcome Centre(SWC)](https://www.sainsburywellcome.org/), |
| 21 | +# produced by Loukia Katsouri from John O'Keefe's lab. |
| 22 | +# |
23 | 23 | # It contains single-animal top-down videos of mice exploring an elevated plus |
24 | | -# maze, with keypoint annotations and predictions from DeepLabCut (DLC). |
| 24 | +# maze, with keypoint annotations and predictions from |
| 25 | +# [DeepLabCut (DLC)](https://www.mackenziemathislab.org/deeplabcut). |
25 | 26 | # |
26 | | -# In this example, we convert the DLC annotations to COCO .json format. |
27 | 27 |
|
28 | 28 | # %% |
29 | | -# Define source and target directories |
| 29 | +# Prepare benchmark dataset directories |
| 30 | +# ------------------------------------- |
| 31 | +# Remember that: |
| 32 | +# - A benchmark dataset is organised into a ``Train`` and a ``Test`` split. |
| 33 | +# - Each split contains one or more **projects** |
| 34 | +# (i.e. datasets contributed by different groups). |
| 35 | +# |
| 36 | +# Here we create a ``poseinterface_benchmarks`` directory to hold all of our |
| 37 | +# projects, with subfolders for ``Train`` and ``Test`` splits. |
| 38 | +# In each split, we create a folder named ``SWC-plusmaze`` to hold converted |
| 39 | +# files from the project described above. If any of these directories |
| 40 | +# already exist, they will be left unchanged. |
| 41 | + |
| 42 | +benchmark_base_dir = Path("/mnt/Data/poseinterface_benchmarks") |
| 43 | +project_name = "SWC-plusmaze" |
| 44 | + |
| 45 | +for split in ["Train", "Test"]: |
| 46 | + split_dir = benchmark_base_dir / split / project_name |
| 47 | + split_dir.mkdir(parents=True, exist_ok=True) |
| 48 | + |
| 49 | +# print the directory structure as a tree |
| 50 | +print(tree(benchmark_base_dir, level=2)) |
| 51 | + |
| 52 | +# %% |
| 53 | +# Define source DLC project directory |
30 | 54 | # ------------------------------------ |
31 | | -# We specify the paths to the source DLC project directory |
32 | | -# as well as the target directory where converted files will be saved. |
33 | | -# The target will be organised in the pose benchmarks dataset structure. |
| 55 | +# We specify the path to the source DLC project directory. |
34 | 56 |
|
35 | 57 | source_base_dir = Path( |
36 | | - "/media/ceph-niu/neuroinformatics/sirmpilatzen/behav_data" |
37 | | - "/Loukia/MASTER_DoNotModify" |
| 58 | + "/media/ceph-niu/neuroinformatics/sirmpilatzen/behav_data/Loukia/" |
| 59 | + "MASTER_DoNotModify" |
38 | 60 | ) |
39 | 61 | source_project_dir = source_base_dir / "MouseTopDown-Loukia-2022-09-13" |
40 | 62 | assert source_project_dir.exists(), ( |
41 | 63 | f"DLC project directory not found: {source_project_dir}" |
42 | 64 | ) |
43 | 65 |
|
44 | | -target_base_dir = Path("/mnt/Data/pose_benchmarks") |
45 | | -target_dataset_dir = target_base_dir / "SWC-plusmaze" |
46 | | -target_dataset_dir.mkdir(parents=True, exist_ok=True) |
| 66 | +# Print a tree representation of the DLC project directory structure |
| 67 | +print(tree(source_project_dir, level=1, exclude_hidden=True)) |
| 68 | + |
| 69 | +# %% |
| 70 | +# Of the various sub-directories, we are interested in the following: |
| 71 | +# - ``videos``: contains the video files as well as the corresponding |
| 72 | +# preditictions from DLC. |
| 73 | +# - ``labeled-data``: contains the frames used for labeling and the |
| 74 | +# corresponding annotations in csv format. |
| 75 | + |
| 76 | +print( |
| 77 | + tree( |
| 78 | + source_project_dir / "videos", |
| 79 | + level=1, |
| 80 | + length_limit=14, |
| 81 | + exclude_hidden=True, |
| 82 | + ) |
| 83 | +) |
| 84 | + |
| 85 | +# %% |
| 86 | +# We see that for each video file (ending in ``converted.mp4``), there are |
| 87 | +# corresponding DLC predictions in .h5 and .csv formats (both raw and |
| 88 | +# filtered versions). The files ending in ``.labeled.mp4`` are videos with the |
| 89 | +# DLC predictions overlaid, which we won't need. |
| 90 | +# |
| 91 | +# The video filenames in this project start with strings like |
| 92 | +# ``M708149_EPM_20200317_``. We'll use ``M708149`` as the subject identifier |
| 93 | +# and ``20200317`` (date in YYYYMMDD format) as the session identifier. |
| 94 | +# This DLC project contains videos acquired in multiple different experimental |
| 95 | +# setups, but we'll only focus on those containing the string ``EPM`` |
| 96 | +# (elevated plus maze) in the filename, which are all top-down videos. |
| 97 | + |
| 98 | +# %% |
| 99 | +# What about the ``labeled-data`` directory? |
| 100 | + |
| 101 | +print( |
| 102 | + tree( |
| 103 | + source_project_dir / "labeled-data", |
| 104 | + level=2, |
| 105 | + length_limit=10, |
| 106 | + exclude_hidden=True, |
| 107 | + ) |
| 108 | +) |
| 109 | + |
| 110 | +# %% |
| 111 | +# We see that there are sub-directories named after the videos (without the |
| 112 | +# ``.mp4`` extension), which contain the frames used for labeling and the |
| 113 | +# corresponding annotations (for all frames sampled from this video) |
| 114 | +# in .csv and .h5 formats. |
| 115 | + |
| 116 | +# %% |
| 117 | +# Convert a single video and its corresponding frame annotations |
| 118 | +# --------------------------------------------------------------- |
| 119 | +# For now, let's focus on converting a single EPM video. |
| 120 | + |
| 121 | +source_video_name = "M708149_EPM_20200317_165049331-converted.mp4" |
| 122 | + |
| 123 | +subject_id = source_video_name.split("_")[0] # "M708149" |
| 124 | +session_id = source_video_name.split("_")[2] # "20200317" |
| 125 | +camera_id = "topdown" |
| 126 | + |
| 127 | +source_video_path = source_project_dir / "videos" / source_video_name |
| 128 | +assert source_video_path.exists(), f"Video file not found: {source_video_path}" |
47 | 129 |
|
48 | 130 | # %% |
49 | 131 | # Copy video to target location |
|
54 | 136 | source_video_name = "M708149_EPM_20200317_165049331-converted.mp4" |
55 | 137 | source_video_path = source_project_dir / "videos" / source_video_name |
56 | 138 |
|
57 | | -# Define subject, session, and view identifiers |
| 139 | +# Define subject, session, and camera view identifiers |
58 | 140 | subject_id = "M708149" |
59 | 141 | session_id = "20200317" |
60 | | -view_id = "topdown" |
61 | | -video_id = f"sub-{subject_id}_ses-{session_id}_view-{view_id}" |
| 142 | +camera_id = "topdown" |
62 | 143 |
|
63 | | -# Create target session directory |
64 | | -target_session_dir = target_dataset_dir / f"sub-{subject_id}_ses-{session_id}" |
| 144 | +session_prefix = f"sub-{subject_id}_ses-{session_id}" |
| 145 | +video_prefix = f"{session_prefix}_cam-{camera_id}" |
| 146 | + |
| 147 | +# Create target sessions directory |
| 148 | +target_session_dir = ( |
| 149 | + benchmark_base_dir / "Train" / project_name / session_prefix |
| 150 | +) |
65 | 151 | target_session_dir.mkdir(parents=True, exist_ok=True) |
66 | 152 |
|
67 | 153 | # Copy video to target location |
68 | | -target_video_path = target_session_dir / f"{video_id}.mp4" |
| 154 | +target_video_path = target_session_dir / f"{video_prefix}.mp4" |
69 | 155 | if not target_video_path.exists(): |
70 | 156 | shutil.copy2(source_video_path, target_video_path) |
71 | 157 | print(f"Copied video to: {target_video_path}") |
72 | 158 | else: |
73 | 159 | print(f"Video already exists at: {target_video_path}") |
74 | 160 |
|
75 | | -# %% |
76 | | -# Define source annotations path |
77 | | -# ------------------------------ |
78 | | -# The first attempt failed because the paths in the DLC annotations |
79 | | -# csv file were given as |
80 | | -# ``labeled-data,<video-name>,<filename-with-frame-number>.<extension>`` |
81 | | -# instead of the required |
82 | | -# ``labeled-data/<video-name>/<filename-with-frame-number>.<extension>``. |
83 | | -# We fixed this by replacing the commas with slashes in the csv file. |
84 | | - |
85 | | -source_labels_dir = ( |
86 | | - source_project_dir / "labeled-data" / source_video_name.replace(".mp4", "") |
87 | | -) |
88 | | -source_annotations_path = source_labels_dir / "CollectedData_Loukia.csv" |
89 | | - |
90 | | -# Create Frames directory inside the session directory |
91 | | -target_frames_dir = target_session_dir / "Frames" |
92 | | -target_frames_dir.mkdir(parents=True, exist_ok=True) |
93 | | - |
94 | | -# Save COCO annotations inside the Frames directory |
95 | | -target_annotations_path = target_frames_dir / f"{video_id}_framelabels.json" |
96 | | - |
97 | | -# %% |
98 | | -# Convert DLC annotations to COCO format |
99 | | -# -------------------------------------- |
100 | | -# Here we use the :func:`annotations_to_coco` function from `poseinterface.io` |
101 | | -# which wraps around `sleap_io` functionality to perform the conversion. |
102 | | - |
103 | | -annotations_to_coco( |
104 | | - input_path=source_annotations_path, |
105 | | - output_json_path=target_annotations_path, |
106 | | - sub_id=subject_id, |
107 | | - ses_id=session_id, |
108 | | - cam_id=view_id, |
109 | | -) |
110 | | -print(f"Saved COCO annotations to: {target_annotations_path}") |
111 | | - |
112 | | -# %% |
113 | | -# Copy labeled frames to target directory |
114 | | -# --------------------------------------- |
115 | | -# Copy the frames used for labeling and rename them to follow |
116 | | -# the naming convention: |
117 | | -# ``sub-{subjectID}_ses-{SessionID}_view-{ViewID}_frame-{FrameID}.png`` |
118 | | - |
119 | | -for source_frame_path in source_labels_dir.glob("*.png"): |
120 | | - # Extract frame number from original filename, e.g. "img0042.png" -> "0042" |
121 | | - frame_number = source_frame_path.stem.replace("img", "") |
122 | | - target_frame_path = ( |
123 | | - target_frames_dir / f"{video_id}_frame-{frame_number}.png" |
124 | | - ) |
125 | | - if not target_frame_path.exists(): |
126 | | - shutil.copy2(source_frame_path, target_frame_path) |
127 | | - |
128 | | -print(f"Copied labeled frames to: {target_frames_dir}") |
129 | | - |
130 | | -# %% |
| 161 | +# print the directory structure of the target session directory |
| 162 | +print(tree(target_session_dir, level=1)) |
0 commit comments