Skip to content

Commit 81bba31

Browse files
committed
WIP updating DLC-to-poseinterface example
1 parent b13e0da commit 81bba31

File tree

3 files changed

+213
-81
lines changed

3 files changed

+213
-81
lines changed
Lines changed: 113 additions & 81 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
"""Convert DeepLabCut project to benchmark dataset
22
==================================================
33
Convert videos and labelled frames from a DeepLabCut (DLC) project to the
4-
``poseinterface`` benchmark dataset format.
4+
``poseinterface`` [benchmark dataset format](target-benchmark-dataset).
55
66
"""
77

@@ -11,39 +11,121 @@
1111
import shutil
1212
from pathlib import Path
1313

14-
from poseinterface.io import annotations_to_coco
14+
from poseinterface.utils import tree
1515

1616
# %%
17-
# Background
18-
# ----------
19-
# We've identified potential datasets from SWC that could be used for the pilot
20-
# version of the pose benchmark dataset.
21-
# Among these is the Elevated Plus Maze (EPM) dataset produced by
22-
# Loukia Katsouri, for John O'Keefe's lab.
17+
# Project overview
18+
# ----------------
19+
# Here we work with the "Elevated Plus Maze (EPM)" project from the
20+
# [Sainsbury Wellcome Centre(SWC)](https://www.sainsburywellcome.org/),
21+
# produced by Loukia Katsouri from John O'Keefe's lab.
22+
#
2323
# It contains single-animal top-down videos of mice exploring an elevated plus
24-
# maze, with keypoint annotations and predictions from DeepLabCut (DLC).
24+
# maze, with keypoint annotations and predictions from
25+
# [DeepLabCut (DLC)](https://www.mackenziemathislab.org/deeplabcut).
2526
#
26-
# In this example, we convert the DLC annotations to COCO .json format.
2727

2828
# %%
29-
# Define source and target directories
29+
# Prepare benchmark dataset directories
30+
# -------------------------------------
31+
# Remember that:
32+
# - A benchmark dataset is organised into a ``Train`` and a ``Test`` split.
33+
# - Each split contains one or more **projects**
34+
# (i.e. datasets contributed by different groups).
35+
#
36+
# Here we create a ``poseinterface_benchmarks`` directory to hold all of our
37+
# projects, with subfolders for ``Train`` and ``Test`` splits.
38+
# In each split, we create a folder named ``SWC-plusmaze`` to hold converted
39+
# files from the project described above. If any of these directories
40+
# already exist, they will be left unchanged.
41+
42+
benchmark_base_dir = Path("/mnt/Data/poseinterface_benchmarks")
43+
project_name = "SWC-plusmaze"
44+
45+
for split in ["Train", "Test"]:
46+
split_dir = benchmark_base_dir / split / project_name
47+
split_dir.mkdir(parents=True, exist_ok=True)
48+
49+
# print the directory structure as a tree
50+
print(tree(benchmark_base_dir, level=2))
51+
52+
# %%
53+
# Define source DLC project directory
3054
# ------------------------------------
31-
# We specify the paths to the source DLC project directory
32-
# as well as the target directory where converted files will be saved.
33-
# The target will be organised in the pose benchmarks dataset structure.
55+
# We specify the path to the source DLC project directory.
3456

3557
source_base_dir = Path(
36-
"/media/ceph-niu/neuroinformatics/sirmpilatzen/behav_data"
37-
"/Loukia/MASTER_DoNotModify"
58+
"/media/ceph-niu/neuroinformatics/sirmpilatzen/behav_data/Loukia/"
59+
"MASTER_DoNotModify"
3860
)
3961
source_project_dir = source_base_dir / "MouseTopDown-Loukia-2022-09-13"
4062
assert source_project_dir.exists(), (
4163
f"DLC project directory not found: {source_project_dir}"
4264
)
4365

44-
target_base_dir = Path("/mnt/Data/pose_benchmarks")
45-
target_dataset_dir = target_base_dir / "SWC-plusmaze"
46-
target_dataset_dir.mkdir(parents=True, exist_ok=True)
66+
# Print a tree representation of the DLC project directory structure
67+
print(tree(source_project_dir, level=1, exclude_hidden=True))
68+
69+
# %%
70+
# Of the various sub-directories, we are interested in the following:
71+
# - ``videos``: contains the video files as well as the corresponding
72+
# preditictions from DLC.
73+
# - ``labeled-data``: contains the frames used for labeling and the
74+
# corresponding annotations in csv format.
75+
76+
print(
77+
tree(
78+
source_project_dir / "videos",
79+
level=1,
80+
length_limit=14,
81+
exclude_hidden=True,
82+
)
83+
)
84+
85+
# %%
86+
# We see that for each video file (ending in ``converted.mp4``), there are
87+
# corresponding DLC predictions in .h5 and .csv formats (both raw and
88+
# filtered versions). The files ending in ``.labeled.mp4`` are videos with the
89+
# DLC predictions overlaid, which we won't need.
90+
#
91+
# The video filenames in this project start with strings like
92+
# ``M708149_EPM_20200317_``. We'll use ``M708149`` as the subject identifier
93+
# and ``20200317`` (date in YYYYMMDD format) as the session identifier.
94+
# This DLC project contains videos acquired in multiple different experimental
95+
# setups, but we'll only focus on those containing the string ``EPM``
96+
# (elevated plus maze) in the filename, which are all top-down videos.
97+
98+
# %%
99+
# What about the ``labeled-data`` directory?
100+
101+
print(
102+
tree(
103+
source_project_dir / "labeled-data",
104+
level=2,
105+
length_limit=10,
106+
exclude_hidden=True,
107+
)
108+
)
109+
110+
# %%
111+
# We see that there are sub-directories named after the videos (without the
112+
# ``.mp4`` extension), which contain the frames used for labeling and the
113+
# corresponding annotations (for all frames sampled from this video)
114+
# in .csv and .h5 formats.
115+
116+
# %%
117+
# Convert a single video and its corresponding frame annotations
118+
# ---------------------------------------------------------------
119+
# For now, let's focus on converting a single EPM video.
120+
121+
source_video_name = "M708149_EPM_20200317_165049331-converted.mp4"
122+
123+
subject_id = source_video_name.split("_")[0] # "M708149"
124+
session_id = source_video_name.split("_")[2] # "20200317"
125+
camera_id = "topdown"
126+
127+
source_video_path = source_project_dir / "videos" / source_video_name
128+
assert source_video_path.exists(), f"Video file not found: {source_video_path}"
47129

48130
# %%
49131
# Copy video to target location
@@ -54,77 +136,27 @@
54136
source_video_name = "M708149_EPM_20200317_165049331-converted.mp4"
55137
source_video_path = source_project_dir / "videos" / source_video_name
56138

57-
# Define subject, session, and view identifiers
139+
# Define subject, session, and camera view identifiers
58140
subject_id = "M708149"
59141
session_id = "20200317"
60-
view_id = "topdown"
61-
video_id = f"sub-{subject_id}_ses-{session_id}_view-{view_id}"
142+
camera_id = "topdown"
62143

63-
# Create target session directory
64-
target_session_dir = target_dataset_dir / f"sub-{subject_id}_ses-{session_id}"
144+
session_prefix = f"sub-{subject_id}_ses-{session_id}"
145+
video_prefix = f"{session_prefix}_cam-{camera_id}"
146+
147+
# Create target sessions directory
148+
target_session_dir = (
149+
benchmark_base_dir / "Train" / project_name / session_prefix
150+
)
65151
target_session_dir.mkdir(parents=True, exist_ok=True)
66152

67153
# Copy video to target location
68-
target_video_path = target_session_dir / f"{video_id}.mp4"
154+
target_video_path = target_session_dir / f"{video_prefix}.mp4"
69155
if not target_video_path.exists():
70156
shutil.copy2(source_video_path, target_video_path)
71157
print(f"Copied video to: {target_video_path}")
72158
else:
73159
print(f"Video already exists at: {target_video_path}")
74160

75-
# %%
76-
# Define source annotations path
77-
# ------------------------------
78-
# The first attempt failed because the paths in the DLC annotations
79-
# csv file were given as
80-
# ``labeled-data,<video-name>,<filename-with-frame-number>.<extension>``
81-
# instead of the required
82-
# ``labeled-data/<video-name>/<filename-with-frame-number>.<extension>``.
83-
# We fixed this by replacing the commas with slashes in the csv file.
84-
85-
source_labels_dir = (
86-
source_project_dir / "labeled-data" / source_video_name.replace(".mp4", "")
87-
)
88-
source_annotations_path = source_labels_dir / "CollectedData_Loukia.csv"
89-
90-
# Create Frames directory inside the session directory
91-
target_frames_dir = target_session_dir / "Frames"
92-
target_frames_dir.mkdir(parents=True, exist_ok=True)
93-
94-
# Save COCO annotations inside the Frames directory
95-
target_annotations_path = target_frames_dir / f"{video_id}_framelabels.json"
96-
97-
# %%
98-
# Convert DLC annotations to COCO format
99-
# --------------------------------------
100-
# Here we use the :func:`annotations_to_coco` function from `poseinterface.io`
101-
# which wraps around `sleap_io` functionality to perform the conversion.
102-
103-
annotations_to_coco(
104-
input_path=source_annotations_path,
105-
output_json_path=target_annotations_path,
106-
sub_id=subject_id,
107-
ses_id=session_id,
108-
cam_id=view_id,
109-
)
110-
print(f"Saved COCO annotations to: {target_annotations_path}")
111-
112-
# %%
113-
# Copy labeled frames to target directory
114-
# ---------------------------------------
115-
# Copy the frames used for labeling and rename them to follow
116-
# the naming convention:
117-
# ``sub-{subjectID}_ses-{SessionID}_view-{ViewID}_frame-{FrameID}.png``
118-
119-
for source_frame_path in source_labels_dir.glob("*.png"):
120-
# Extract frame number from original filename, e.g. "img0042.png" -> "0042"
121-
frame_number = source_frame_path.stem.replace("img", "")
122-
target_frame_path = (
123-
target_frames_dir / f"{video_id}_frame-{frame_number}.png"
124-
)
125-
if not target_frame_path.exists():
126-
shutil.copy2(source_frame_path, target_frame_path)
127-
128-
print(f"Copied labeled frames to: {target_frames_dir}")
129-
130-
# %%
161+
# print the directory structure of the target session directory
162+
print(tree(target_session_dir, level=1))

poseinterface/utils.py

Lines changed: 99 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,99 @@
1+
"""General utility functions for ``poseinterface``."""
2+
3+
from collections.abc import Iterator
4+
from itertools import islice
5+
from pathlib import Path
6+
7+
8+
def tree(
9+
dir_path: Path,
10+
*,
11+
level: int = -1,
12+
limit_to_directories: bool = False,
13+
exclude_hidden: bool = False,
14+
length_limit: int = 1000,
15+
) -> str:
16+
"""Return a visual tree structure of a directory as a string.
17+
18+
Parameters
19+
----------
20+
dir_path
21+
Path to the root directory.
22+
level
23+
Maximum depth to display. ``-1`` means no limit. Default is ``-1``.
24+
limit_to_directories
25+
If ``True``, only directories are shown. Default is ``False``.
26+
exclude_hidden
27+
If ``True``, files and directories starting with ``.`` are excluded.
28+
Default is ``False``.
29+
length_limit
30+
Maximum number of lines to include before truncating.
31+
Default is ``1000``.
32+
33+
Returns
34+
-------
35+
str
36+
Tree representation of the directory structure, including a
37+
summary line with the count of directories and files.
38+
39+
Notes
40+
-----
41+
Based on https://stackoverflow.com/a/59109706 by Aaron Hall, modified
42+
by community (see post 'Timeline' for change history).
43+
Retrieved 2026-03-27. License: CC BY-SA 4.0.
44+
45+
Examples
46+
--------
47+
>>> from pathlib import Path
48+
>>> from poseinterface.utils import tree
49+
>>> print(tree(Path(".")))
50+
"""
51+
space = " "
52+
branch = "│ "
53+
tee = "├── "
54+
last = "└── "
55+
56+
dir_path = Path(dir_path)
57+
files = 0
58+
directories = 0
59+
60+
def _inner(
61+
dir_path: Path, prefix: str = "", level: int = -1
62+
) -> Iterator[str]:
63+
nonlocal files, directories
64+
if not level:
65+
return
66+
contents = sorted(
67+
(
68+
d
69+
for d in dir_path.iterdir()
70+
if not (exclude_hidden and d.name.startswith("."))
71+
),
72+
key=lambda d: d.name,
73+
)
74+
if limit_to_directories:
75+
contents = [d for d in contents if d.is_dir()]
76+
pointers = [tee] * (len(contents) - 1) + [last]
77+
for pointer, path in zip(pointers, contents):
78+
if path.is_dir():
79+
yield prefix + pointer + path.name
80+
directories += 1
81+
extension = branch if pointer == tee else space
82+
yield from _inner(
83+
path, prefix=prefix + extension, level=level - 1
84+
)
85+
elif not limit_to_directories:
86+
yield prefix + pointer + path.name
87+
files += 1
88+
89+
lines: list[str] = [dir_path.name]
90+
iterator = _inner(dir_path, level=level)
91+
for line in islice(iterator, length_limit):
92+
lines.append(line)
93+
if next(iterator, None):
94+
lines.append(f"... length_limit, {length_limit}, reached, counted:")
95+
summary = f"\n{directories} directories"
96+
if files:
97+
summary += f", {files} files"
98+
lines.append(summary)
99+
return "\n".join(lines)

pyproject.toml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,7 @@ requires-python = ">=3.11.0"
1111
dynamic = ["version"]
1212

1313
dependencies = [
14+
"jupyter>=1.1.1",
1415
"sleap-io>=0.6.4",
1516
]
1617

0 commit comments

Comments
 (0)