|
1 | 1 | { |
2 | 2 | "cells": [ |
3 | 3 | { |
4 | | - "attachments": {}, |
5 | 4 | "cell_type": "markdown", |
6 | 5 | "id": "SwSYWR4vzk_e", |
7 | 6 | "metadata": { |
|
149 | 148 | ] |
150 | 149 | }, |
151 | 150 | { |
152 | | - "attachments": {}, |
153 | 151 | "cell_type": "markdown", |
154 | 152 | "id": "8aba34e1", |
155 | 153 | "metadata": {}, |
|
247 | 245 | "</div>" |
248 | 246 | ], |
249 | 247 | "text/plain": [ |
250 | | - " path noisy_labels_0 noisy_labels_1 \\\n", |
251 | | - "0 train/n02979186/n02979186_9036.JPEG n02979186 n02979186 \n", |
| 248 | + " path noisy_labels_0 noisy_labels_1 \n", |
| 249 | + "0 train/n02979186/n02979186_9036.JPEG n02979186 n02979186 \\\n", |
252 | 250 | "1 train/n02979186/n02979186_11957.JPEG n02979186 n02979186 \n", |
253 | 251 | "2 train/n02979186/n02979186_9715.JPEG n02979186 n02979186 \n", |
254 | 252 | "\n", |
|
269 | 267 | ] |
270 | 268 | }, |
271 | 269 | { |
272 | | - "attachments": {}, |
273 | 270 | "cell_type": "markdown", |
274 | 271 | "id": "dfc957bf", |
275 | 272 | "metadata": {}, |
|
322 | 319 | " <thead>\n", |
323 | 320 | " <tr style=\"text-align: right;\">\n", |
324 | 321 | " <th></th>\n", |
325 | | - " <th>img_filename</th>\n", |
| 322 | + " <th>filename</th>\n", |
326 | 323 | " <th>label</th>\n", |
327 | 324 | " <th>split</th>\n", |
328 | 325 | " </tr>\n", |
329 | 326 | " </thead>\n", |
330 | 327 | " <tbody>\n", |
331 | 328 | " <tr>\n", |
332 | 329 | " <th>0</th>\n", |
333 | | - " <td>train/n02979186/n02979186_9036.JPEG</td>\n", |
| 330 | + " <td>imagenette2-160/train/n02979186/n02979186_9036...</td>\n", |
334 | 331 | " <td>cassette_player</td>\n", |
335 | | - " <td>train</td>\n", |
| 332 | + " <td>imagenette2-160</td>\n", |
336 | 333 | " </tr>\n", |
337 | 334 | " <tr>\n", |
338 | 335 | " <th>1</th>\n", |
339 | | - " <td>train/n02979186/n02979186_11957.JPEG</td>\n", |
| 336 | + " <td>imagenette2-160/train/n02979186/n02979186_1195...</td>\n", |
340 | 337 | " <td>cassette_player</td>\n", |
341 | | - " <td>train</td>\n", |
| 338 | + " <td>imagenette2-160</td>\n", |
342 | 339 | " </tr>\n", |
343 | 340 | " <tr>\n", |
344 | 341 | " <th>2</th>\n", |
345 | | - " <td>train/n02979186/n02979186_9715.JPEG</td>\n", |
| 342 | + " <td>imagenette2-160/train/n02979186/n02979186_9715...</td>\n", |
346 | 343 | " <td>cassette_player</td>\n", |
347 | | - " <td>train</td>\n", |
| 344 | + " <td>imagenette2-160</td>\n", |
348 | 345 | " </tr>\n", |
349 | 346 | " <tr>\n", |
350 | 347 | " <th>3</th>\n", |
351 | | - " <td>train/n02979186/n02979186_21736.JPEG</td>\n", |
| 348 | + " <td>imagenette2-160/train/n02979186/n02979186_2173...</td>\n", |
352 | 349 | " <td>cassette_player</td>\n", |
353 | | - " <td>train</td>\n", |
| 350 | + " <td>imagenette2-160</td>\n", |
354 | 351 | " </tr>\n", |
355 | 352 | " <tr>\n", |
356 | 353 | " <th>4</th>\n", |
357 | | - " <td>train/n02979186/ILSVRC2012_val_00046953.JPEG</td>\n", |
| 354 | + " <td>imagenette2-160/train/n02979186/ILSVRC2012_val...</td>\n", |
358 | 355 | " <td>cassette_player</td>\n", |
359 | | - " <td>train</td>\n", |
| 356 | + " <td>imagenette2-160</td>\n", |
360 | 357 | " </tr>\n", |
361 | 358 | " <tr>\n", |
362 | 359 | " <th>...</th>\n", |
|
366 | 363 | " </tr>\n", |
367 | 364 | " <tr>\n", |
368 | 365 | " <th>13389</th>\n", |
369 | | - " <td>val/n03425413/n03425413_17521.JPEG</td>\n", |
| 366 | + " <td>imagenette2-160/val/n03425413/n03425413_17521....</td>\n", |
370 | 367 | " <td>gas_pump</td>\n", |
371 | | - " <td>val</td>\n", |
| 368 | + " <td>imagenette2-160</td>\n", |
372 | 369 | " </tr>\n", |
373 | 370 | " <tr>\n", |
374 | 371 | " <th>13390</th>\n", |
375 | | - " <td>val/n03425413/n03425413_20711.JPEG</td>\n", |
| 372 | + " <td>imagenette2-160/val/n03425413/n03425413_20711....</td>\n", |
376 | 373 | " <td>gas_pump</td>\n", |
377 | | - " <td>val</td>\n", |
| 374 | + " <td>imagenette2-160</td>\n", |
378 | 375 | " </tr>\n", |
379 | 376 | " <tr>\n", |
380 | 377 | " <th>13391</th>\n", |
381 | | - " <td>val/n03425413/n03425413_19050.JPEG</td>\n", |
| 378 | + " <td>imagenette2-160/val/n03425413/n03425413_19050....</td>\n", |
382 | 379 | " <td>gas_pump</td>\n", |
383 | | - " <td>val</td>\n", |
| 380 | + " <td>imagenette2-160</td>\n", |
384 | 381 | " </tr>\n", |
385 | 382 | " <tr>\n", |
386 | 383 | " <th>13392</th>\n", |
387 | | - " <td>val/n03425413/n03425413_13831.JPEG</td>\n", |
| 384 | + " <td>imagenette2-160/val/n03425413/n03425413_13831....</td>\n", |
388 | 385 | " <td>gas_pump</td>\n", |
389 | | - " <td>val</td>\n", |
| 386 | + " <td>imagenette2-160</td>\n", |
390 | 387 | " </tr>\n", |
391 | 388 | " <tr>\n", |
392 | 389 | " <th>13393</th>\n", |
393 | | - " <td>val/n03425413/n03425413_1242.JPEG</td>\n", |
| 390 | + " <td>imagenette2-160/val/n03425413/n03425413_1242.JPEG</td>\n", |
394 | 391 | " <td>gas_pump</td>\n", |
395 | | - " <td>val</td>\n", |
| 392 | + " <td>imagenette2-160</td>\n", |
396 | 393 | " </tr>\n", |
397 | 394 | " </tbody>\n", |
398 | 395 | "</table>\n", |
399 | 396 | "<p>13394 rows × 3 columns</p>\n", |
400 | 397 | "</div>" |
401 | 398 | ], |
402 | 399 | "text/plain": [ |
403 | | - " img_filename label split\n", |
404 | | - "0 train/n02979186/n02979186_9036.JPEG cassette_player train\n", |
405 | | - "1 train/n02979186/n02979186_11957.JPEG cassette_player train\n", |
406 | | - "2 train/n02979186/n02979186_9715.JPEG cassette_player train\n", |
407 | | - "3 train/n02979186/n02979186_21736.JPEG cassette_player train\n", |
408 | | - "4 train/n02979186/ILSVRC2012_val_00046953.JPEG cassette_player train\n", |
409 | | - "... ... ... ...\n", |
410 | | - "13389 val/n03425413/n03425413_17521.JPEG gas_pump val\n", |
411 | | - "13390 val/n03425413/n03425413_20711.JPEG gas_pump val\n", |
412 | | - "13391 val/n03425413/n03425413_19050.JPEG gas_pump val\n", |
413 | | - "13392 val/n03425413/n03425413_13831.JPEG gas_pump val\n", |
414 | | - "13393 val/n03425413/n03425413_1242.JPEG gas_pump val\n", |
| 400 | + " filename label \n", |
| 401 | + "0 imagenette2-160/train/n02979186/n02979186_9036... cassette_player \\\n", |
| 402 | + "1 imagenette2-160/train/n02979186/n02979186_1195... cassette_player \n", |
| 403 | + "2 imagenette2-160/train/n02979186/n02979186_9715... cassette_player \n", |
| 404 | + "3 imagenette2-160/train/n02979186/n02979186_2173... cassette_player \n", |
| 405 | + "4 imagenette2-160/train/n02979186/ILSVRC2012_val... cassette_player \n", |
| 406 | + "... ... ... \n", |
| 407 | + "13389 imagenette2-160/val/n03425413/n03425413_17521.... gas_pump \n", |
| 408 | + "13390 imagenette2-160/val/n03425413/n03425413_20711.... gas_pump \n", |
| 409 | + "13391 imagenette2-160/val/n03425413/n03425413_19050.... gas_pump \n", |
| 410 | + "13392 imagenette2-160/val/n03425413/n03425413_13831.... gas_pump \n", |
| 411 | + "13393 imagenette2-160/val/n03425413/n03425413_1242.JPEG gas_pump \n", |
| 412 | + "\n", |
| 413 | + " split \n", |
| 414 | + "0 imagenette2-160 \n", |
| 415 | + "1 imagenette2-160 \n", |
| 416 | + "2 imagenette2-160 \n", |
| 417 | + "3 imagenette2-160 \n", |
| 418 | + "4 imagenette2-160 \n", |
| 419 | + "... ... \n", |
| 420 | + "13389 imagenette2-160 \n", |
| 421 | + "13390 imagenette2-160 \n", |
| 422 | + "13391 imagenette2-160 \n", |
| 423 | + "13392 imagenette2-160 \n", |
| 424 | + "13393 imagenette2-160 \n", |
415 | 425 | "\n", |
416 | 426 | "[13394 rows x 3 columns]" |
417 | 427 | ] |
|
428 | 438 | "# rename columns to fastdup's column names\n", |
429 | 439 | "df_annot = df_annot.rename({'noisy_labels_0': 'label', 'path': 'filename'}, axis='columns')\n", |
430 | 440 | "\n", |
| 441 | + "# append datadir\n", |
| 442 | + "df_annot['filename'] = df_annot['filename'].apply(lambda x: data_dir + x)\n", |
| 443 | + "\n", |
431 | 444 | "# create split column\n", |
432 | 445 | "df_annot['split'] = df_annot['filename'].apply(lambda x: x.split(\"/\")[0])\n", |
433 | 446 | "\n", |
|
439 | 452 | ] |
440 | 453 | }, |
441 | 454 | { |
442 | | - "attachments": {}, |
443 | 455 | "cell_type": "markdown", |
444 | 456 | "id": "0c648ed1-5016-4230-9873-546eb510b764", |
445 | 457 | "metadata": { |
|
453 | 465 | }, |
454 | 466 | { |
455 | 467 | "cell_type": "code", |
456 | | - "execution_count": 1, |
| 468 | + "execution_count": 6, |
457 | 469 | "id": "7f69d8b2", |
458 | 470 | "metadata": {}, |
459 | 471 | "outputs": [ |
460 | 472 | { |
461 | 473 | "data": { |
462 | 474 | "text/plain": [ |
463 | | - "'0.918'" |
| 475 | + "'0.922'" |
464 | 476 | ] |
465 | 477 | }, |
466 | | - "execution_count": 1, |
| 478 | + "execution_count": 6, |
467 | 479 | "metadata": {}, |
468 | 480 | "output_type": "execute_result" |
469 | 481 | } |
|
522 | 534 | ] |
523 | 535 | }, |
524 | 536 | { |
525 | | - "attachments": {}, |
526 | 537 | "cell_type": "markdown", |
527 | 538 | "id": "62e35a12-fadd-4b3f-bcab-69e6e67862a4", |
528 | 539 | "metadata": {}, |
|
1662 | 1673 | ] |
1663 | 1674 | }, |
1664 | 1675 | { |
1665 | | - "attachments": {}, |
1666 | 1676 | "cell_type": "markdown", |
1667 | 1677 | "id": "67378b58", |
1668 | 1678 | "metadata": {}, |
|
3998 | 4008 | ] |
3999 | 4009 | }, |
4000 | 4010 | { |
4001 | | - "attachments": {}, |
4002 | 4011 | "cell_type": "markdown", |
4003 | 4012 | "id": "c2c393be-2b42-4814-8688-03d2be9e8998", |
4004 | 4013 | "metadata": {}, |
|
4833 | 4842 | ] |
4834 | 4843 | }, |
4835 | 4844 | { |
4836 | | - "attachments": {}, |
4837 | 4845 | "cell_type": "markdown", |
4838 | 4846 | "id": "e10989e1", |
4839 | 4847 | "metadata": {}, |
|
7707 | 7715 | ] |
7708 | 7716 | }, |
7709 | 7717 | { |
7710 | | - "attachments": {}, |
7711 | 7718 | "cell_type": "markdown", |
7712 | 7719 | "id": "569cb878", |
7713 | 7720 | "metadata": {}, |
|
7778 | 7785 | "name": "python", |
7779 | 7786 | "nbconvert_exporter": "python", |
7780 | 7787 | "pygments_lexer": "ipython3", |
7781 | | - "version": "3.10.9" |
| 7788 | + "version": "3.10.11" |
7782 | 7789 | } |
7783 | 7790 | }, |
7784 | 7791 | "nbformat": 4, |
|
0 commit comments