fixed unable to read state_dict issue

codinglabsong · codinglabsong · commit a7c75c8f01b6 · 2025-06-04T09:47:13.000+09:00
diff --git a/README.md b/README.md
@@ -29,7 +29,7 @@ Supports local development, SageMaker training, flexible dataset prep, Weights &
 │   └── train.py               # Main training script
 ├── .env.example               # Example for API keys/secrets
 ├── requirements.txt           # Pip dependencies
-├── gradio_app.py              # Gradio tnterface
+├── gradio_app.py              # Gradio interface
 ├── README.md
 └── .gitignore
 ```
diff --git a/class_names.txt b/class_names.txt
@@ -0,0 +1,101 @@
+apple_pie
+baby_back_ribs
+baklava
+beef_carpaccio
+beef_tartare
+beet_salad
+beignets
+bibimbap
+bread_pudding
+breakfast_burrito
+bruschetta
+caesar_salad
+cannoli
+caprese_salad
+carrot_cake
+ceviche
+cheese_plate
+cheesecake
+chicken_curry
+chicken_quesadilla
+chicken_wings
+chocolate_cake
+chocolate_mousse
+churros
+clam_chowder
+club_sandwich
+crab_cakes
+creme_brulee
+croque_madame
+cup_cakes
+deviled_eggs
+donuts
+dumplings
+edamame
+eggs_benedict
+escargots
+falafel
+filet_mignon
+fish_and_chips
+foie_gras
+french_fries
+french_onion_soup
+french_toast
+fried_calamari
+fried_rice
+frozen_yogurt
+garlic_bread
+gnocchi
+greek_salad
+grilled_cheese_sandwich
+grilled_salmon
+guacamole
+gyoza
+hamburger
+hot_and_sour_soup
+hot_dog
+huevos_rancheros
+hummus
+ice_cream
+lasagna
+lobster_bisque
+lobster_roll_sandwich
+macaroni_and_cheese
+macarons
+miso_soup
+mussels
+nachos
+omelette
+onion_rings
+oysters
+pad_thai
+paella
+pancakes
+panna_cotta
+peking_duck
+pho
+pizza
+pork_chop
+poutine
+prime_rib
+pulled_pork_sandwich
+ramen
+ravioli
+red_velvet_cake
+risotto
+samosa
+sashimi
+scallops
+seaweed_salad
+shrimp_and_grits
+spaghetti_bolognese
+spaghetti_carbonara
+spring_rolls
+steak
+strawberry_shortcake
+sushi
+tacos
+takoyaki
+tiramisu
+tuna_tartare
+waffles
diff --git a/create_dataset_names.py b/create_dataset_names.py
@@ -0,0 +1,8 @@
+import os
+
+train_dir = "data/sample/train"
+class_names = sorted(os.listdir(train_dir))
+
+with open("class_names.txt", "w") as f:
+    for name in class_names:
+        f.write(name + "\n")
diff --git a/gradio_app.py b/gradio_app.py
@@ -1,4 +1,5 @@
-import os, torch, yaml
+from collections import OrderedDict
+import torch, yaml
 import torch.nn as nn
 import torch.nn.functional as F
 from torchvision import models, transforms
@@ -23,20 +24,30 @@ def build_model(num_classes):
     return model
 
 # 2. Load class names
-# Assuming same folder structure as the default flags for train.py's train-dir
-train_dir = "data/sample/train"
-class_names = sorted(os.listdir(train_dir))
+# Load class names from file
+with open("class_names.txt") as f:
+    class_names = [line.strip() for line in f]
 
 # 3. Build and load the model
 num_classes = len(class_names)
 model = build_model(num_classes)
-model.load_state_dict(torch.load("output/model.pth", map_location="cpu"))
+
+# If you see _orig_mod keys, strip the prefix! (Due to possibilty of saving compiled version of model during training)
+ckpt = torch.load("output/model.pth", map_location='cpu')
+new_state_dict = OrderedDict()
+for k, v in ckpt.items():
+    if k.startswith('_orig_mod.'):
+        new_state_dict[k[len('_orig_mod.'):]] = v
+    else:
+        new_state_dict[k] = v
+
+model.load_state_dict(new_state_dict)
 model.eval()
 
 # 4. Preprocessing: same as test transforms in train.py
 preprocess = transforms.Compose([
     transforms.Resize(256),
-    transforms.CenterCrop(cfg["estimator"]["hyperparameters"]["img_size"]),
+    transforms.CenterCrop(cfg["estimator"]["hyperparameters"]["img-size"]),
     transforms.ToTensor(),
     transforms.Normalize([0.485,0.456,0.406], 
                          [0.229,0.224,0.225])
diff --git a/scripts/download_full.py b/scripts/download_full.py
@@ -16,7 +16,7 @@ def main():
         shutil.rmtree(out)
     out.mkdir(parents=True)
     
-    # download MNIST into .cache
+    # download dataset into .cache
     cache = Path(".cache")
     ds_train = datasets.Food101(cache, split="train", download=True)
     ds_test = datasets.Food101(cache, split="test", download=True)