|
141 | 141 | "id": "gTr6BFHuva0v" |
142 | 142 | }, |
143 | 143 | "source": [ |
144 | | - "## Grab our model weights and demo data:\n" |
| 144 | + "## Grab our demo video (from EPIC-KITCHENs-100):\n" |
145 | 145 | ] |
146 | 146 | }, |
147 | 147 | { |
|
152 | 152 | }, |
153 | 153 | "outputs": [], |
154 | 154 | "source": [ |
155 | | - "# currently, demo data and a 0.5B model is hosted here:\n", |
| 155 | + "# demo data is hosted here:\n", |
156 | 156 | "!gdown --folder https://drive.google.com/drive/folders/1ql8MSWTK-2_uGH1EzPOrifauwUNg4E6i -O ./data" |
157 | 157 | ] |
158 | 158 | }, |
|
190 | 190 | "source": [ |
191 | 191 | "## Setting Key parameters for LLaVAction:\n", |
192 | 192 | "\n", |
193 | | - "- Specify where to load the EPIC-KITCHENS-100 videos and the LLaVAction checkpoint for the inference. If you followed our steps above, these paths do not need changed for Google Colaboratory.\n", |
| 193 | + "- Specify where to load the EPIC-KITCHENS-100 video and grab the LLaVAction checkpoint from HuggingFace for the inference. If you followed our steps above, these paths do not need changed for Google Colaboratory.\n", |
194 | 194 | "- ⚠️ You can adjust `n_frames` to a higher number for better performance (which we empirically observed), but note this uses more compute." |
195 | 195 | ] |
196 | 196 | }, |
|
235 | 235 | "output_type": "stream", |
236 | 236 | "text": [ |
237 | 237 | "overwrite_config {}\n", |
238 | | - "Loaded LLaVA model: /content/data/checkpoint/dev_ov_0.5b_16f_top5_full\n" |
| 238 | + "Loaded LLaVA model: MLAdaptiveIntelligence/LLaVAction-0.5B\n" |
239 | 239 | ] |
240 | 240 | }, |
241 | 241 | { |
|
290 | 290 | ], |
291 | 291 | "source": [ |
292 | 292 | "data_root = '/content/data/EK100_512/EK100'\n", |
293 | | - "checkpoint_path = 'MLAdaptiveIntelligence/LLaVAction-0.5B' # or MLAdaptiveIntelligence/LLaVAction-7B\n", |
| 293 | + "huggingface_checkpoint_path = 'MLAdaptiveIntelligence/LLaVAction-0.5B' # or MLAdaptiveIntelligence/LLaVAction-7B\n", |
294 | 294 | "n_frames = 16\n", |
295 | 295 | "\n", |
296 | 296 | "inferencer = SelectiveInferencer(data_root,\n", |
297 | | - " checkpoint_path,\n", |
| 297 | + " huggingface_checkpoint_path,\n", |
298 | 298 | " include_time_instruction = False,\n", |
299 | 299 | " n_frames = n_frames,\n", |
300 | 300 | " use_flash_attention = True)" |
|
0 commit comments