Update llavaction_video_demo.ipynb

MMathisLab · web-flow · commit c895055c0c9d · 2025-03-24T22:32:14.000+01:00
- changed header to note demo data (not model)
- model from HuggingFace directly
diff --git a/example/llavaction_video_demo.ipynb b/example/llavaction_video_demo.ipynb
@@ -141,7 +141,7 @@
         "id": "gTr6BFHuva0v"
       },
       "source": [
-        "## Grab our model weights and demo data:\n"
+        "## Grab our demo video (from EPIC-KITCHENs-100):\n"
       ]
     },
     {
@@ -152,7 +152,7 @@
       },
       "outputs": [],
       "source": [
-        "# currently, demo data and a 0.5B model is hosted here:\n",
+        "# demo data is hosted here:\n",
         "!gdown --folder https://drive.google.com/drive/folders/1ql8MSWTK-2_uGH1EzPOrifauwUNg4E6i -O ./data"
       ]
     },
@@ -190,7 +190,7 @@
       "source": [
         "## Setting Key parameters for LLaVAction:\n",
         "\n",
-        "- Specify where to load the EPIC-KITCHENS-100 videos and the LLaVAction checkpoint for the inference. If you followed our steps above, these paths do not need changed for Google Colaboratory.\n",
+        "- Specify where to load the EPIC-KITCHENS-100 video and grab the LLaVAction checkpoint from HuggingFace for the inference. If you followed our steps above, these paths do not need changed for Google Colaboratory.\n",
         "- ⚠️ You can adjust `n_frames` to a higher number for better performance (which we empirically observed), but note this uses more compute."
       ]
     },
@@ -235,7 +235,7 @@
           "output_type": "stream",
           "text": [
             "overwrite_config {}\n",
-            "Loaded LLaVA model: /content/data/checkpoint/dev_ov_0.5b_16f_top5_full\n"
+            "Loaded LLaVA model: MLAdaptiveIntelligence/LLaVAction-0.5B\n"
           ]
         },
         {
@@ -290,11 +290,11 @@
       ],
       "source": [
         "data_root = '/content/data/EK100_512/EK100'\n",
-        "checkpoint_path = 'MLAdaptiveIntelligence/LLaVAction-0.5B' # or MLAdaptiveIntelligence/LLaVAction-7B\n",
+        "huggingface_checkpoint_path = 'MLAdaptiveIntelligence/LLaVAction-0.5B' # or MLAdaptiveIntelligence/LLaVAction-7B\n",
         "n_frames = 16\n",
         "\n",
         "inferencer = SelectiveInferencer(data_root,\n",
-        "                                     checkpoint_path,\n",
+        "                                     huggingface_checkpoint_path,\n",
         "                                     include_time_instruction = False,\n",
         "                                     n_frames = n_frames,\n",
         "                                    use_flash_attention = True)"