diff --git a/.gitignore b/.gitignore
index 73d8c0b..dd16f03 100644
--- a/.gitignore
+++ b/.gitignore
@@ -164,3 +164,6 @@ cython_debug/
 error.*.log
 notebooks/semantic_search/output.json
 notebooks/semantic_search/config.py
+
+training/movinet_with_hmdb51/input
+training/movinet_with_hmdb51/splits
\ No newline at end of file
diff --git a/training/movinet_with_hmdb51/AHMDB51.py b/training/movinet_with_hmdb51/AHMDB51.py
new file mode 100644
index 0000000..8d9ae9d
--- /dev/null
+++ b/training/movinet_with_hmdb51/AHMDB51.py
@@ -0,0 +1,105 @@
+from typing import Any, Callable, Dict, Optional
+from aperturedb.Videos import Videos
+from aperturedb.CommonLibrary import create_connector, execute_query
+import torchvision
+from AVideoClips import AVideoClips
+
+
+def get_videos(train:bool, split:int) -> Videos:
+    """
+    HMDB51 stores videos in clips corresponding to 51 categories.
+    They videos are classified as a test and train set (70% : 30%)
+
+    The data set is further stored in 3 ways,
+    Get videos from aperturedb based on type (Train/Test)
+    and split.
+
+    Fetch the appropriate set.
+    """
+
+    client = create_connector()
+
+    query = [{
+        "FindEntity": {
+            "_ref": 1,
+            "with_class": "Split",
+            "constraints": {
+                "id": ["==", split]
+            },
+            "results": {
+                "all_properties": True
+            }
+        }
+    }, {
+        "FindVideo":{
+            "is_connected_to": {
+                "ref": 1,
+                "constraints": {
+                    "type": ["==", 1 if train else 2]
+                }
+            },
+            "results":{
+                "all_properties": True,
+                "count": True
+            }
+        }
+    }]
+    _, r, b = execute_query(client, query, [])
+
+
+    videos = Videos(client=client, response=r[1]["FindVideo"]["entities"])
+    videos.blobs = True
+    print(f"Retrieved {len(videos)} videos")
+    return videos
+
+
+class AHMDB51(torchvision.datasets.HMDB51):
+    """
+    Implementation of HMDB51 aware of aperturedb.
+    Notice how pytorch's implementation has so much code for local file processing.
+    """
+    def __init__(self,
+        frames_per_clip: int = 5,
+        step_between_clips: int = 1,
+        frame_rate: Optional[int] = None,
+        fold: int = 1, train: bool = True,
+        transform: Optional[Callable] = None,
+        _precomputed_metadata: Optional[Dict[str, Any]] = None,
+        num_workers: int = 1,
+        _video_width: int = 0,
+        _video_height: int = 0,
+        _video_min_dimension: int = 0,
+        _audio_samples: int = 0,
+        output_format: str = "THWC") -> None:
+        self.video_pts = []
+        self.video_fps = []
+        self.transform = transform
+
+        videos = get_videos(train=train, split=fold)
+        self.ci = {}
+        videos.blobs = False
+        for v in videos:
+            if v["category"] not in self.ci:
+                self.ci[v["category"]] = len(self.ci)
+        self.samples = [(i, self.ci[v["category"]]) for i, v in enumerate(videos)]
+        videos.blobs = True
+
+
+        video_clips = AVideoClips(
+            videos,
+            frames_per_clip,
+            step_between_clips,
+            frame_rate,
+            _precomputed_metadata,
+            num_workers=num_workers,
+            _video_width=_video_width,
+            _video_height=_video_height,
+            _video_min_dimension=_video_min_dimension,
+            _audio_samples=_audio_samples,
+            output_format=output_format,
+        )
+
+        self.video_clips = video_clips
+        self.indices = [i for i in range(len(videos))]
+        assert len(videos) == len(list(filter(lambda e: 'preview' in e, videos)))
+        videos.loaded = True
\ No newline at end of file
diff --git a/training/movinet_with_hmdb51/AVideoClips.py b/training/movinet_with_hmdb51/AVideoClips.py
new file mode 100644
index 0000000..c284317
--- /dev/null
+++ b/training/movinet_with_hmdb51/AVideoClips.py
@@ -0,0 +1,149 @@
+from typing import Any, Dict, List, Optional, Tuple
+from torchvision.datasets.video_utils import VideoClips
+from torchvision.datasets.video_utils import read_video_timestamps
+from torchvision.io.video import read_video
+import tempfile
+import os
+import shutil
+from torch.utils.data.dataloader import DataLoader
+import torch
+
+from aperturedb.Videos import Videos
+from tqdm import tqdm
+
+class _VideoTimestampsDataset:
+    """
+    Dataset used to parallelize the reading of the timestamps
+    of a list of videos, given their paths in the filesystem.
+
+    Used in VideoClips and defined at top level so it can be
+    pickled when forking.
+    """
+    def __init__(self, videos: Videos) -> None:
+        self._videos = videos
+        self._tmp_path = "scratch"
+        if os.path.exists(self._tmp_path) and os.path.isdir(self._tmp_path):
+            pass
+        else:
+            shutil.rmtree(self._tmp_path, ignore_errors=True)
+            os.makedirs(self._tmp_path)
+
+
+    def __len__(self) -> int:
+        return len(self._videos)
+
+    def __getitem__(self, idx: int) -> Tuple[List[int], Optional[float]]:
+        video = self._videos[idx]
+
+        with tempfile.NamedTemporaryFile(dir=self._tmp_path, suffix=".mp4") as ostream:
+            ostream.write(video["preview"])
+            x = read_video_timestamps(ostream.name)
+            return x
+        raise Exception("Should not be here")
+
+class AVideoClips(VideoClips):
+    """
+    Pytorch VideoClips with aperturedb.
+    """
+    def __init__(self, videos: Videos, clip_length_in_frames: int = 16, frames_between_clips: int = 1,
+        frame_rate: Optional[int] = None, _precomputed_metadata: Optional[Dict[str, Any]] = None, num_workers: int = 0,
+        _video_width: int = 0, _video_height: int = 0, _video_min_dimension: int = 0, _video_max_dimension: int = 0,
+        _audio_samples: int = 0, _audio_channels: int = 0, output_format: str = "THWC") -> None:
+        self._videos = videos
+        self._num_workers = num_workers
+
+        # these options are not valid for pyav backend
+        self._video_width = _video_width
+        self._video_height = _video_height
+        self._video_min_dimension = _video_min_dimension
+        self._video_max_dimension = _video_max_dimension
+        self._audio_samples = _audio_samples
+        self._audio_channels = _audio_channels
+        self.output_format = output_format.upper()
+
+        self._compute_frame_pts()
+        self.compute_clips(clip_length_in_frames, frames_between_clips, frame_rate)
+        assert len(self._videos) == len(list(filter(lambda e: 'preview' in e, self._videos)))
+
+
+
+
+    def _compute_frame_pts(self) -> None:
+        dl: DataLoader = DataLoader(
+            _VideoTimestampsDataset(self._videos),
+            batch_size=16,
+            num_workers=self._num_workers,
+            collate_fn=lambda x: x
+            )
+
+        self.video_fps = []
+        self.video_pts = []
+
+        with tqdm(total=len(dl)) as pbar:
+            for batch in dl:
+                pbar.update(1)
+                clips, fps = list(zip(*batch))
+                clips = [torch.as_tensor(c, dtype=torch.long) for c in clips]
+                self.video_pts.extend(clips)
+                self.video_fps.extend(fps)
+
+    def __len__(self) -> int:
+        return len(self._videos)
+
+    def get_clip(self, idx: int) -> Tuple[torch.Tensor, torch.Tensor, Dict[str, Any], int]:
+        """
+        Gets a subclip from a list of videos.
+
+        Args:
+            idx (int): index of the subclip. Must be between 0 and num_clips().
+
+        Returns:
+            video (Tensor)
+            audio (Tensor)
+            info (Dict)
+            video_idx (int): index of the video in `video_paths`
+        """
+        if idx >= self.num_clips():
+            raise IndexError(f"Index {idx} out of range ({self.num_clips()} number of clips)")
+        video_idx, clip_idx = self.get_clip_location(idx)
+        clip_pts = self.clips[video_idx][clip_idx]
+
+        from torchvision import get_video_backend
+
+        backend = get_video_backend()
+
+        if backend == "pyav":
+            # check for invalid options
+            if self._video_width != 0:
+                raise ValueError("pyav backend doesn't support _video_width != 0")
+            if self._video_height != 0:
+                raise ValueError("pyav backend doesn't support _video_height != 0")
+            if self._video_min_dimension != 0:
+                raise ValueError("pyav backend doesn't support _video_min_dimension != 0")
+            if self._video_max_dimension != 0:
+                raise ValueError("pyav backend doesn't support _video_max_dimension != 0")
+            if self._audio_samples != 0:
+                raise ValueError("pyav backend doesn't support _audio_samples != 0")
+
+        if backend == "pyav":
+            start_pts = clip_pts[0].item()
+            end_pts = clip_pts[-1].item()
+            with tempfile.NamedTemporaryFile(dir="scratch", suffix=".mp4") as ostream:
+                ostream.write(self._videos[video_idx]["preview"])
+                video, audio, info = read_video(ostream.name, start_pts, end_pts)
+
+        if self.frame_rate is not None:
+            resampling_idx = self.resampling_idxs[video_idx][clip_idx]
+            if isinstance(resampling_idx, torch.Tensor):
+                resampling_idx = resampling_idx - resampling_idx[0]
+            video = video[resampling_idx]
+            info["video_fps"] = self.frame_rate
+        assert len(video) == self.num_frames, f"{video.shape} x {self.num_frames}"
+
+        if self.output_format == "TCHW":
+            # [T,H,W,C] --> [T,C,H,W]
+            video = video.permute(0, 3, 1, 2)
+
+        return video, audio, info, video_idx
+
+
diff --git a/training/movinet_with_hmdb51/Classify-Vanilla-trained.ipynb b/training/movinet_with_hmdb51/Classify-Vanilla-trained.ipynb
new file mode 100644
index 0000000..b87fe7a
--- /dev/null
+++ b/training/movinet_with_hmdb51/Classify-Vanilla-trained.ipynb
@@ -0,0 +1,340 @@
+{
+ "cells": [
+  {
+   "cell_type": "markdown",
+   "id": "c9b8f6e9-668c-4c4b-81dd-5fcfb7956d51",
+   "metadata": {},
+   "source": [
+    "# Classification with a vanilla model vs trained model.\n",
+    "\n",
+    "## Install pre requisites"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "064eb8e6-054d-4f9a-bf97-903706a577b8",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "!pip install -q git+https://github.com/Atze00/MoViNet-pytorch.git\n",
+    "!pip install -q av\n",
+    "!pip install -q -U aperturedb"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "5382ab3d-dea0-4507-bf41-bcea892a6377",
+   "metadata": {},
+   "source": [
+    "## Util functions\n",
+    "\n",
+    "### Load datasets as clips (of 16 frames), sampled at 5fps"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "e4871f56-82ef-4aaa-99cd-91cc9f36db27",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from torchvision.transforms import v2 as T\n",
+    "import torch\n",
+    "\n",
+    "def get_common():\n",
+    "    \"\"\"\n",
+    "    Just common parameters.\n",
+    "    Applies to the training and data loading sections.\n",
+    "    \"\"\"\n",
+    "    torch.manual_seed(97)\n",
+    "    num_frames = 16\n",
+    "    clip_steps = 2\n",
+    "    Bs_Train = 16\n",
+    "    Bs_Test = 16\n",
+    "\n",
+    "    transform = T.Compose([\n",
+    "                                    T.Lambda(lambda x: x.permute(3, 0, 1, 2) / 255.),\n",
+    "                                    T.Resize((200, 200)),\n",
+    "                                    T.RandomHorizontalFlip(),\n",
+    "                                    # T.Normalize(mean=[0.43216, 0.394666, 0.37645], std=[0.22803, 0.22145, 0.216989]),\n",
+    "                                    T.RandomCrop((172, 172))])\n",
+    "    transform_test = T.Compose([\n",
+    "                                    T.Lambda(lambda x: x.permute(3, 0, 1, 2) / 255.),\n",
+    "                                    # T.ToTensor()/255.0,\n",
+    "                                    # T.ToTensor(),\n",
+    "                                    T.Resize((200, 200)),\n",
+    "                                    # T.Normalize(mean=[0.43216, 0.394666, 0.37645], std=[0.22803, 0.22145, 0.216989]),\n",
+    "                                    T.CenterCrop((172, 172))])\n",
+    "    return num_frames, clip_steps, Bs_Train, Bs_Test, transform, transform_test\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "0488febf-57e3-44a4-bc83-253be7ce9f9f",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from AHMDB51 import AHMDB51\n",
+    "\n",
+    "def get_data_sets():\n",
+    "    \"\"\"\n",
+    "    Get the datasets from aperturedb.\n",
+    "    The data has been ingested previously.\n",
+    "    \"\"\"\n",
+    "    num_frames, clip_steps, Bs_Train, Bs_Test, transform, transform_test = get_common()\n",
+    "\n",
+    "    hmdb51_test = AHMDB51(\n",
+    "        num_workers=1,\n",
+    "        frame_rate=5,\n",
+    "        frames_per_clip=num_frames,\n",
+    "        step_between_clips=clip_steps,\n",
+    "        train=False,\n",
+    "        transform=transform_test\n",
+    "        )\n",
+    "\n",
+    "\n",
+    "    return None, hmdb51_test\n"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "59fd3014-1831-4312-b135-05a59b02f5f3",
+   "metadata": {},
+   "source": [
+    "### Utility function to show a tensor.\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "6861e546-7dc7-408c-b13d-d42c9a0b6d55",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from IPython.display import Video, display\n",
+    "import torchvision\n",
+    "\n",
+    "def show_tensor(tensor):\n",
+    "    with open(\"tmp_video.mp4\", \"wb\") as f:\n",
+    "        torchvision.io.write_video(f.name, tensor, fps=5, video_codec=\"h264\")\n",
+    "        f.seek(0)\n",
+    "        display(Video(\"tmp_video.mp4\"))\n"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "cee562d4-3d97-44d6-8c47-191005ae60b4",
+   "metadata": {},
+   "source": [
+    "## Instantiate a off the shelf model"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "1b01a490-4cf8-4328-8112-ed172650300e",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from movinets import MoViNet\n",
+    "from movinets.config import _C\n",
+    "\n",
+    "# Use the original movinet based on Kinetics400 dataset when we get pretrained.\n",
+    "model_vanilla = MoViNet(_C.MODEL.MoViNetA0, causal = False, pretrained = True )\n"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "4f7045eb-228a-4019-93a1-7fe69ee1068a",
+   "metadata": {},
+   "source": [
+    "## Make a model from trained movinet with hmdb51"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "af1e4da4-e7b8-42e0-a5db-a2dfe6a5169d",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import torch\n",
+    "\n",
+    "# Load the model trained on HMDB51. It has been trained for 1 epoch.\n",
+    "model_trained = torch.load(\"movinet_hmdb51_1.pth\")\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "bb0acc9c-6d9f-4330-82ed-b3affebd4c52",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "train, test = get_data_sets()\n",
+    "test.classes = {v: k for k, v in test.ci.items()}"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "a7c4bda9",
+   "metadata": {},
+   "source": [
+    "## See the shape of the tensor passsed through model.\n",
+    "\n",
+    "This point is good to have, and troubleshoot any problems with the input going into the model."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "45065c28-1aa8-458c-9c87-b9435fd7f8de",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "data = test[333]\n",
+    "video, audio, class_index = data\n",
+    "print(video.shape)\n",
+    "x = video.permute(1, 2, 3, 0)\n",
+    "x=(x*255).type(torch.uint8)\n",
+    "show_tensor(x)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "f919086f-3058-4887-b559-12ee21285e80",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "ground_truth = class_index\n",
+    "all_classes = test.classes\n",
+    "print(f\"{len(all_classes)=} \\r\\n {all_classes=}\\r\\n {all_classes[ground_truth]=}\")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "8dcb120c-32db-4210-9914-af0cd2f3dc0e",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Add an extra dim to video tensor to make it compatible with model.\n",
+    "p = video[None, :]\n",
+    "y = model_trained(p)\n",
+    "\n",
+    "# Get predictions from the trained movinet\n",
+    "preds = torch.topk(y, 5, largest=True)\n",
+    "\n",
+    "#show the top k predictions.\n",
+    "for i in preds.indices[0]:\n",
+    "    print(test.classes[int(i)])\n"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "242f4ac2",
+   "metadata": {},
+   "source": [
+    "### "
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "ff22c594-fa99-4981-9070-3aa658a18319",
+   "metadata": {},
+   "source": [
+    "### Predict with vanilla\n",
+    "\n",
+    "Take a random clip from the test Dataset (specified as an index between 0 and len test)\n",
+    "\n",
+    "Some indices will be out of 51 range, as the model had 600 classifications."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "b8ec98a8-0a26-4341-8406-094af799317c",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "\n",
+    "video, _, ground_truth=test[10]\n",
+    "show_tensor((video.permute(1, 2, 3, 0)*255).type(torch.uint8))\n",
+    "\n",
+    "display(f\"{test.classes[ground_truth]=}\")\n",
+    "\n",
+    "y = model_vanilla(video[None, :])\n",
+    "op = torch.nn.Softmax(dim=1)\n",
+    "preds = torch.topk(op(y), 5, largest=True)\n",
+    "print(\"Predictions:\")\n",
+    "for i, prob in zip(preds.indices[0], preds.values[0]):\n",
+    "    try:\n",
+    "        prediction = test.classes[int(i)]\n",
+    "        probability = float(prob)\n",
+    "        print(f\"{prediction=}, {probability=}\")\n",
+    "    except IndexError:\n",
+    "        print(f\"Cannot find class for index={i}\")\n",
+    "    except KeyError:\n",
+    "        print(f\"Cannot find class for index={i}\")"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "2b9c3c9a-8f28-4188-b601-9f9f573c5ec9",
+   "metadata": {},
+   "source": [
+    "### Predict with trained.\n",
+    "\n",
+    "Way better predictions."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "fc30fea4-96ec-4354-a0d5-544ae592a5e9",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "video, _, ground_truth=test[120]\n",
+    "show_tensor((video.permute(1, 2, 3, 0)*255).type(torch.uint8))\n",
+    "\n",
+    "\n",
+    "y = model_trained(video[None, :])\n",
+    "op = torch.nn.Softmax(dim=1)\n",
+    "preds = torch.topk(op(y), 5, largest=True)\n",
+    "print(\"Predictions:\")\n",
+    "for i, prob in zip(preds.indices[0], preds.values[0]):\n",
+    "    try:\n",
+    "        prediction = test.classes[int(i)]\n",
+    "        probability = float(prob)\n",
+    "        print(f\"{prediction=}, {probability=}\")\n",
+    "    except AttributeError as e:\n",
+    "        print(f\"Cannot find class for index={i}\")"
+   ]
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "package",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.10.12"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 5
+}
diff --git a/training/movinet_with_hmdb51/README.md b/training/movinet_with_hmdb51/README.md
new file mode 100644
index 0000000..53a9379
--- /dev/null
+++ b/training/movinet_with_hmdb51/README.md
@@ -0,0 +1,41 @@
+# Using HMDB51 to train movinet.
+
+[Download it here](https://serre-lab.clps.brown.edu/resource/hmdb-a-large-human-motion-database/#Downloads)
+
+[Pytorch implementation](https://pytorch.org/vision/main/generated/torchvision.datasets.HMDB51.html)
+
+### Needed packages and binaries.
+
+#### Mac
+    brew brew install rar ffmpeg
+#### Ubuntu
+    apt install rar on ubuntu.
+
+### Download data
+    mkdir input splits
+    DOWNLOAD=true bash prepare_data.sh
+
+### Download the helper scripts and classes.
+    wget https://github.com/aperture-data/aperturedb-applications/blob/train_movinet/training/movinet_with_hmdb51/AHMDB51.py
+    wget https://github.com/aperture-data/aperturedb-applications/blob/train_movinet/training/movinet_with_hmdb51/AVideoClips.py
+    wget https://github.com/aperture-data/aperturedb-applications/blob/train_movinet/training/movinet_with_hmdb51/ingest_transcode.py
+    wget https://github.com/aperture-data/aperturedb-applications/blob/train_movinet/training/movinet_with_hmdb51/train_movinet.py
+    wget https://github.com/aperture-data/aperturedb-applications/blob/train_movinet/training/movinet_with_hmdb51/prepare_data.sh
+
+
+### Ingest into aperturedb
+    python ingest_transcode.py
+
+### Train model
+    python train_movinet.py training true
+
+### Explore classification abilities with the "off the shelf moveinet" vs fine tuned version of the same.
+[Classify Vanilla trained](https://github.com/aperture-data/aperturedb-applications/blob/train_movinet/training/movinet_with_hmdb51/Classify-Vanilla-trained.ipynb)
+
+## Glossary of the files and resources.
+### AHMDB51.py
+It is an subclass of HMDB51 (from pytorch) and it incorporates the fact that the videos are stored in ApertureDB rather than as local files.
+
+### AVideoClips.py
+Video Clips resample the Videos into clips of 16 frame lengths, sampled at a specified fps.
+Since Movinet expects inputs of 172x172, or 200x200 pixels, there's also a transformation that is applied to a batch of videos.
diff --git a/training/movinet_with_hmdb51/ingest_transcode.py b/training/movinet_with_hmdb51/ingest_transcode.py
new file mode 100644
index 0000000..1b5e861
--- /dev/null
+++ b/training/movinet_with_hmdb51/ingest_transcode.py
@@ -0,0 +1,118 @@
+import os
+import subprocess
+
+from aperturedb.ParallelLoader import ParallelLoader
+from aperturedb.QueryGenerator import QueryGenerator
+from aperturedb.CommonLibrary import create_connector
+from aperturedb.Utils import Utils
+
+
+class TreeIngest(QueryGenerator):
+    def __init__(self, root_path: str, annotation_path: str) -> None:
+        self._files = [os.path.join(dirpath, f) for dirpath, dirs, filenames in os.walk(root_path) for f in filenames if f.endswith("avi")]
+        self._fc = {os.path.basename(file_path): file_path for file_path in self._files}
+        print(f"{len(self._files)=}, {len(self._fc)=}")
+
+        self._items = []
+        for _,_, filenames in os.walk(annotation_path):
+            for filename in filenames:
+                with open(os.path.join(annotation_path, filename), "r") as ins:
+                    split = int(filename.split(".")[0][-1])
+                    lines = ins.readlines()
+                    for line in lines:
+                        path, code = line.split()
+                        if path in self._fc:
+                            if os.path.exists(self._fc[path]):
+                                self._items.append((split, int(code), self._fc[path]))
+                        else:
+                            print(f"{path} from splits not in files")
+        print("processed")
+
+    def __repr__(self) -> str:
+        return f"A collection of {len(self)} smaples"
+
+    def __len__(self):
+        return len(self._items)
+
+    def getitem(self, subscript):
+        split, code, file_path = self._items[subscript]
+        dest_path = file_path.replace(".avi", ".mp4")
+        if not os.path.exists(dest_path):
+            p = subprocess.Popen(
+                f"ffmpeg -i '{file_path}' -vcodec libx264 -acodec aac '{dest_path}' 1> /dev/null 2>/dev/null",
+                shell=True,
+            )
+            out, err = p.communicate()
+            if out or err:
+                print(f"res: {out, err}")
+                if "error" in err:
+                    print(f"Error transcoding {file_path}")
+                    return None
+        category = file_path.split("/")[-2]
+        video_uid = os.path.basename(dest_path)
+        connection_uid  = f"{os.path.basename(dest_path)}_{split}_{code}"
+        query = [
+            {
+                "AddEntity": {
+                    "_ref": 1,
+                    "class": "Split",
+                    "properties": {
+                        "id": split
+                    },
+                    "if_not_found": {
+                        "id": ["==", split]
+                    }
+                }
+            },
+            {
+                "AddVideo": {
+                    "_ref": 2,
+                    "properties": {
+                        "name": video_uid,
+                        "category": category
+                    },
+                    "if_not_found": {
+                        "name": ["==", video_uid]
+                    }
+                }
+            },
+            {
+                "AddConnection":{
+                    "class": "IsInSplit",
+                    "src": 2,
+                    "dst": 1,
+                    "properties": {
+                        "type": code,
+                        "id": connection_uid
+                    },
+                    "if_not_found": {
+                        "id": ["==", connection_uid]
+                    }
+                }
+            }
+        ]
+        buffer = None
+        with open(dest_path, "rb") as instream:
+            buffer = instream.read()
+        return query, [buffer]
+
+if __name__ == "__main__":
+
+
+
+    generator = TreeIngest("input/categories", "splits/testTrainMulti_7030_splits")
+    print(generator)
+
+    # Create a client.
+    client = create_connector()
+
+    utils = Utils(client)
+    assert utils.create_entity_index("Split", "id"), "Failed to create index for Split"
+    assert utils.create_entity_index("_Video", "name"), "Failed to create index for _Video"
+    assert utils.create_connection_index("IsInSplit", "id"), "Failed to create index for IsInSplit"
+
+    # Create a loader
+    loader = ParallelLoader(client=client, dry_run=False)
+
+    # Ingest the data
+    loader.ingest(generator=generator, batchsize=1, stats=True)
diff --git a/training/movinet_with_hmdb51/prepare_data.sh b/training/movinet_with_hmdb51/prepare_data.sh
new file mode 100644
index 0000000..9ab3084
--- /dev/null
+++ b/training/movinet_with_hmdb51/prepare_data.sh
@@ -0,0 +1,25 @@
+#!/bin/bash
+set -e
+
+if [ "${DOWNLOAD}" == "true" ]; then
+  echo "Downloading the data"
+  mkdir -p input
+  cd input
+  wget https://serre-lab.clps.brown.edu/wp-content/uploads/2013/10/hmdb51_org.rar
+
+  cd ../splits
+  wget http://serre-lab.clps.brown.edu/wp-content/uploads/2013/10/test_train_splits.rar
+  cd ..
+
+fi
+  cd input
+  mkdir categories
+  cd categories
+  unrar x ../hmdb51_org.rar
+  for i in *.rar; do
+    unrar x $i
+  done
+  cd ../..
+  cd splits
+  unrar x test_train_splits.rar
+  cd ..
diff --git a/training/movinet_with_hmdb51/train_movinet.py b/training/movinet_with_hmdb51/train_movinet.py
new file mode 100644
index 0000000..0638d2d
--- /dev/null
+++ b/training/movinet_with_hmdb51/train_movinet.py
@@ -0,0 +1,276 @@
+import time
+import torchvision
+import torch.nn.functional as F
+import torch.optim as optim
+from torch.utils.data import DataLoader
+import torch
+from torchvision.transforms import v2 as T
+from movinets import MoViNet
+from movinets.config import _C
+
+
+
+from torch.utils.data.dataloader import DataLoader
+from typer import Typer
+from AHMDB51 import AHMDB51
+
+
+NUM_WORKERS = 0 # Number of workers for data loading, 0 = main process.
+N_EPOCHS = 1
+
+def get_common():
+    """
+    Just common parameters.
+    Applies to the training and data loading sections.
+    """
+    torch.manual_seed(97)
+    num_frames = 16
+    clip_steps = 2
+    Bs_Train = 16
+    Bs_Test = 16
+
+    transform = T.Compose([
+                                    T.Lambda(lambda x: x.permute(3, 0, 1, 2) / 255.),
+                                    T.Resize((200, 200)),
+                                    T.RandomHorizontalFlip(),
+                                    # T.Normalize(mean=[0.43216, 0.394666, 0.37645], std=[0.22803, 0.22145, 0.216989]),
+                                    T.RandomCrop((172, 172))])
+    transform_test = T.Compose([
+                                    T.Lambda(lambda x: x.permute(3, 0, 1, 2) / 255.),
+                                    # T.ToTensor()/255.0,
+                                    # T.ToTensor(),
+                                    T.Resize((200, 200)),
+                                    # T.Normalize(mean=[0.43216, 0.394666, 0.37645], std=[0.22803, 0.22145, 0.216989]),
+                                    T.CenterCrop((172, 172))])
+    return num_frames, clip_steps, Bs_Train, Bs_Test, transform, transform_test
+
+def get_local_data_sets():
+    """
+    Build datasets from local files.
+    This is the original code.
+    """
+    num_frames, clip_steps, Bs_Train, Bs_Test, transform, transform_test = get_common()
+    hmdb51_train = torchvision.datasets.HMDB51('video_data/', 'test_train_splits/', num_frames,frame_rate=5,
+                                                    step_between_clips = clip_steps, fold=1, train=True,
+                                                    transform=transform, num_workers=1)
+
+    hmdb51_test = torchvision.datasets.HMDB51('video_data/', 'test_train_splits/', num_frames,frame_rate=5,
+                                                    step_between_clips = clip_steps, fold=1, train=False,
+                                                    transform=transform_test, num_workers=1)
+    return hmdb51_train, hmdb51_test
+
+def get_data_sets():
+    """
+    Get the datasets from aperturedb.
+    The data has been ingested previously.
+    """
+    num_frames, clip_steps, Bs_Train, Bs_Test, transform, transform_test = get_common()
+
+    hmdb51_train = AHMDB51(
+        num_workers=NUM_WORKERS,
+        frame_rate=5,
+        frames_per_clip=num_frames,
+        step_between_clips=clip_steps,
+        train=True,
+        transform=transform
+        )
+    hmdb51_test = AHMDB51(
+        num_workers=NUM_WORKERS,
+        frame_rate=5,
+        frames_per_clip=num_frames,
+        step_between_clips=clip_steps,
+        train=False,
+        transform=transform_test
+        )
+
+
+    return hmdb51_train, hmdb51_test
+
+def get_data_loaders(use_aperturedb: bool=False):
+    """
+    Build Data loaders using the datasets
+    arg use_aperturedb defines how to get datasets
+    """
+    num_frames, clip_steps, Bs_Train, Bs_Test, transform, transform_test = get_common()
+    if not use_aperturedb:
+        hmdb51_train, hmdb51_test = get_local_data_sets()
+    else:
+        hmdb51_train, hmdb51_test = get_data_sets()
+
+    train_loader = DataLoader(hmdb51_train, batch_size=Bs_Train, shuffle=True)
+    test_loader = DataLoader(hmdb51_test, batch_size=Bs_Test, shuffle=False)
+    return train_loader, test_loader
+
+def train_iter(model, optimz, data_load, loss_val):
+    samples = len(data_load.dataset)
+    model.train()
+    # model.cuda()
+    model.cpu()
+    model.clean_activation_buffers()
+    optimz.zero_grad()
+    for i, (data,_ , target) in enumerate(data_load):
+        # out = F.log_softmax(model(data.cuda()), dim=1)
+        out = F.log_softmax(model(data.cpu()), dim=1)
+        # loss = F.nll_loss(out, target.cuda())
+        loss = F.nll_loss(out, target.cpu())
+        loss.backward()
+        optimz.step()
+        optimz.zero_grad()
+        model.clean_activation_buffers()
+        if i % 50 == 0:
+            print('[' +  '{:5}'.format(i * len(data)) + '/' + '{:5}'.format(samples) +
+                  ' (' + '{:3.0f}'.format(100 * i / len(data_load)) + '%)]  Loss: ' +
+                  '{:6.4f}'.format(loss.item()))
+            loss_val.append(loss.item())
+
+def evaluate(model, data_load, loss_val):
+    model.eval()
+
+    samples = len(data_load.dataset)
+    csamp = 0
+    tloss = 0
+    model.clean_activation_buffers()
+    with torch.no_grad():
+        for data, _, target in data_load:
+            # output = F.log_softmax(model(data.cuda()), dim=1)
+            output = F.log_softmax(model(data.cpu()), dim=1)
+            # loss = F.nll_loss(output, target.cuda(), reduction='sum')
+            loss = F.nll_loss(output, target.cpu(), reduction='sum')
+            _, pred = torch.max(output, dim=1)
+
+            tloss += loss.item()
+            # csamp += pred.eq(target.cuda()).sum()
+            csamp += pred.eq(target.cpu()).sum()
+
+            model.clean_activation_buffers()
+    aloss = tloss / samples
+    loss_val.append(aloss)
+    print('\nAverage test loss: ' + '{:.4f}'.format(aloss) +
+          '  Accuracy:' + '{:5}'.format(csamp) + '/' +
+          '{:5}'.format(samples) + ' (' +
+          '{:4.2f}'.format(100.0 * csamp / samples) + '%)\n')
+
+def train_iter_stream(model, optimz, data_load, loss_val, n_clips = 2, n_clip_frames=8):
+    """
+    In causal mode with stream buffer a single video is fed to the network
+    using subclips of lenght n_clip_frames.
+    n_clips*n_clip_frames should be equal to the total number of frames presents
+    in the video.
+
+    n_clips : number of clips that are used
+    n_clip_frames : number of frame contained in each clip
+    """
+    #clean the buffer of activations
+    samples = len(data_load.dataset)
+    # model.cuda()
+    model.cpu()
+    model.train()
+    model.clean_activation_buffers()
+    optimz.zero_grad()
+
+    for i, (data,_, target) in enumerate(data_load):
+        # data = data.cuda()
+        # target = target.cuda()
+        data = data.cpu()
+        target = target.cpu()
+        l_batch = 0
+        #backward pass for each clip
+        for j in range(n_clips):
+          output = F.log_softmax(model(data[:,:,(n_clip_frames)*(j):(n_clip_frames)*(j+1)]), dim=1)
+          loss = F.nll_loss(output, target)
+          _, pred = torch.max(output, dim=1)
+          loss = F.nll_loss(output, target)/n_clips
+          loss.backward()
+        l_batch += loss.item()*n_clips
+        optimz.step()
+        optimz.zero_grad()
+
+        #clean the buffer of activations
+        model.clean_activation_buffers()
+        if i % 50 == 0:
+            print('[' +  '{:5}'.format(i * len(data)) + '/' + '{:5}'.format(samples) +
+                  ' (' + '{:3.0f}'.format(100 * i / len(data_load)) + '%)]  Loss: ' +
+                  '{:6.4f}'.format(l_batch))
+            loss_val.append(l_batch)
+
+def evaluate_stream(model, data_load, loss_val, n_clips = 2, n_clip_frames=8):
+    model.eval()
+    # model.cuda()
+    model.cpu()
+    samples = len(data_load.dataset)
+    csamp = 0
+    tloss = 0
+    with torch.no_grad():
+        for data, _, target in data_load:
+            # data = data.cuda()
+            # target = target.cuda()
+            data = data.cpu()
+            target = target.cpu()
+            model.clean_activation_buffers()
+            for j in range(n_clips):
+              output = F.log_softmax(model(data[:,:,(n_clip_frames)*(j):(n_clip_frames)*(j+1)]), dim=1)
+              loss = F.nll_loss(output, target)
+            _, pred = torch.max(output, dim=1)
+            tloss += loss.item()
+            csamp += pred.eq(target).sum()
+
+    aloss = tloss /  len(data_load)
+    loss_val.append(aloss)
+    print('\nAverage test loss: ' + '{:.4f}'.format(aloss) +
+          '  Accuracy:' + '{:5}'.format(csamp) + '/' +
+          '{:5}'.format(samples) + ' (' +
+          '{:4.2f}'.format(100.0 * csamp / samples) + '%)\n')
+
+def train(train_loader, test_loader):
+
+
+    # Use the original movinet based on Kinetics400 dataset when we get pretrained.
+    model = MoViNet(_C.MODEL.MoViNetA0, causal = False, pretrained = True )
+    start_time = time.time()
+
+    trloss_val, tsloss_val = [], []
+    model.classifier[3] = torch.nn.Conv3d(2048, 51, (1,1,1))
+    optimz = optim.Adam(model.parameters(), lr=0.00005)
+    for epoch in range(1, N_EPOCHS + 1):
+        print('Epoch:', epoch)
+        train_iter(model, optimz, train_loader, trloss_val)
+        evaluate(model, test_loader, tsloss_val)
+        torch.save({
+            'model_state_dict': model.state_dict(),
+            'optimizer_state_dict': optimz.state_dict(),
+            'epoch': epoch,
+            'train_loss': trloss_val,
+            'test_loss': tsloss_val
+        }, f'movinet_{epoch}.pth')
+
+        # Save every epoch and can compare across epochs too. Right now we stop at 1
+        # HMDB51 has ~6000 clips with 51 classes. It has 3 splits. A split is a combination of train and test.
+        # We use the first split for this example.
+        # This trains on a split based on fold value selected when we load dataset
+        torch.save(model, f'movinet_hmdb51_{epoch}.pth')
+
+    print('Execution time:', '{:5.2f}'.format(time.time() - start_time), 'seconds')
+
+
+
+app = Typer()
+
+@app.command()
+def inference(use_aperturedb:bool):
+    train_loader, test_loader = get_data_loaders(use_aperturedb=use_aperturedb)
+    print(test_loader.dataset.ci)
+
+@app.command()
+def training(use_aperturedb:bool):
+    train_loader, test_loader = get_data_loaders(use_aperturedb=use_aperturedb)
+    train(train_loader=train_loader, test_loader=test_loader)
+    classes = test_loader.dataset.ci
+    rc = {v:k for k,v in classes.items()}
+
+    # Preserve the classes to index mapping for this model.
+    import json
+    with open("classes.json", "w") as out:
+        json.dump(rc, out, indent=2)
+
+if __name__ == "__main__":
+   app()
\ No newline at end of file