Loading the state dict in a different way

lotif · lotif · commit 1bd5d4681805 · 2025-08-14T15:21:16.000-03:00
diff --git a/src/midst_toolkit/models/clavaddpm/model.py b/src/midst_toolkit/models/clavaddpm/model.py
@@ -283,7 +283,7 @@ def clava_clustering(tables, relation_order, save_dir, configs):
     return tables, all_group_lengths_prob_dicts
 
 
-def clava_training(tables, relation_order, save_dir, configs, device="cuda", initial_state_dict=None):
+def clava_training(tables, relation_order, save_dir, configs, device="cuda", initial_state_file_path=None):
     models = {}
     for parent, child in relation_order:
         print(f"Training {parent} -> {child} model from scratch")
@@ -298,7 +298,7 @@ def clava_training(tables, relation_order, save_dir, configs, device="cuda", ini
             child,
             configs,
             device,
-            initial_state_dict,
+            initial_state_file_path,
         )
 
         models[(parent, child)] = result
@@ -324,7 +324,7 @@ def child_training(
     child_name: str,
     configs: dict[str, Any],
     device: str = "cuda",
-    initial_state_dict: dict[str, Tensor] | None = None,
+    initial_state_file_path: Path | None = None,
 ) -> dict[str, Any]:
     if parent_name is None:
         y_col = "placeholder"
@@ -354,7 +354,7 @@ def child_training(
         configs["diffusion"]["lr"],
         configs["diffusion"]["weight_decay"],
         device=device,
-        initial_state_dict=initial_state_dict,
+        initial_state_file_path=initial_state_file_path,
     )
 
     if parent_name is None:
@@ -398,7 +398,7 @@ def train_model(
     lr: float,
     weight_decay: float,
     device: str = "cuda",
-    initial_state_dict: dict[str, Tensor] | None = None,
+    initial_state_file_path: Path | None = None,
 ) -> dict[str, Any]:
     T = Transformations(**T_dict)
     dataset, label_encoders, column_orders = make_dataset_from_df(
@@ -443,8 +443,14 @@ def train_model(
     )
     diffusion.to(device)
 
-    if initial_state_dict is not None:
-        diffusion.load_state_dict(initial_state_dict)
+    print("++++++++++++++++++++++++ BEFORE ++++++++++++++++++++++++++")
+    print(diffusion.state_dict())
+
+    if initial_state_file_path is not None:
+        diffusion.load_state_dict(torch.load(initial_state_file_path, weights_only=True))
+
+    print("++++++++++++++++++++++++ AFTER ++++++++++++++++++++++++++")
+    print(diffusion.state_dict())
 
     diffusion.train()
 
diff --git a/tests/integration/data/diffusion_initial_state.pth b/tests/integration/data/diffusion_initial_state.pth
diff --git a/tests/integration/models/clavaddpm/test_model.py b/tests/integration/models/clavaddpm/test_model.py
@@ -248,12 +248,16 @@ def test_train_single_table(tmp_path: Path):
 
     os.makedirs(tmp_path / "models")
     configs = {"clustering": CLUSTERING_CONFIG, "diffusion": DIFFUSION_CONFIG}
-    initial_state_dict = pickle.loads(Path("tests/integration/data/diffusion_initial_state.pkl").read_bytes())
 
     # Act
     tables, relation_order, _ = load_multi_table("tests/integration/data/single_table/")
     tables, models = clava_training(
-        tables, relation_order, tmp_path, configs, device="cpu", initial_state_dict=initial_state_dict
+        tables,
+        relation_order,
+        tmp_path,
+        configs,
+        device="cpu",
+        initial_state_file_path="tests/integration/data/diffusion_initial_state.pth",
     )
 
     # Assert
@@ -284,7 +288,7 @@ def test_train_single_table(tmp_path: Path):
     # if np.allclose(model_data[model_layers[0]].detach(), expected_model_data[expected_model_layers[0]].detach()):
     # if the first layer is equal with minimal tolerance, all others should be equal as well
     assert all(
-        np.allclose(model_data[layer].detach(), expected_model_data[layer].detach(), atol=0.1)
+        np.allclose(model_data[layer].detach(), expected_model_data[layer].detach(), atol=0.05)
         for layer in model_layers
     )
 
@@ -311,12 +315,16 @@ def test_train_multi_table(tmp_path: Path):
     # Act
     os.makedirs(tmp_path / "models")
     configs = {"clustering": CLUSTERING_CONFIG, "diffusion": DIFFUSION_CONFIG, "classifier": CLASSIFIER_CONFIG}
-    initial_state_dict = pickle.loads(Path("tests/integration/data/diffusion_initial_state.pkl").read_bytes())
 
     tables, relation_order, _ = load_multi_table("tests/integration/data/multi_table/")
     tables, _ = clava_clustering(tables, relation_order, tmp_path, configs)
     models = clava_training(
-        tables, relation_order, tmp_path, configs, device="cpu", initial_state_dict=initial_state_dict
+        tables,
+        relation_order,
+        tmp_path,
+        configs,
+        device="cpu",
+        initial_state_file_path="tests/integration/data/diffusion_initial_state.pth",
     )
 
     # Assert
@@ -348,7 +356,7 @@ def test_train_multi_table(tmp_path: Path):
     # if np.allclose(model_data[model_layers[0]].detach(), expected_model_data[expected_model_layers[0]].detach()):
     # if the first layer is equal with minimal tolerance, all others should be equal as well
     assert all(
-        np.allclose(model_data[layer].detach(), expected_model_data[layer].detach(), atol=0.1)
+        np.allclose(model_data[layer].detach(), expected_model_data[layer].detach(), atol=0.05)
         for layer in model_layers
     )