|  | 
| 39 | 39 |     load_hf_numpy, | 
| 40 | 40 |     require_torch_accelerator, | 
| 41 | 41 |     require_torch_accelerator_with_fp16, | 
| 42 |  | -    require_torch_accelerator_with_training, | 
| 43 | 42 |     require_torch_gpu, | 
| 44 | 43 |     skip_mps, | 
| 45 | 44 |     slow, | 
| @@ -170,53 +169,14 @@ def prepare_init_args_and_inputs_for_common(self): | 
| 170 | 169 |         inputs_dict = self.dummy_input | 
| 171 | 170 |         return init_dict, inputs_dict | 
| 172 | 171 | 
 | 
|  | 172 | +    @unittest.skip("Not tested.") | 
| 173 | 173 |     def test_forward_signature(self): | 
| 174 | 174 |         pass | 
| 175 | 175 | 
 | 
|  | 176 | +    @unittest.skip("Not tested.") | 
| 176 | 177 |     def test_training(self): | 
| 177 | 178 |         pass | 
| 178 | 179 | 
 | 
| 179 |  | -    @require_torch_accelerator_with_training | 
| 180 |  | -    def test_gradient_checkpointing(self): | 
| 181 |  | -        # enable deterministic behavior for gradient checkpointing | 
| 182 |  | -        init_dict, inputs_dict = self.prepare_init_args_and_inputs_for_common() | 
| 183 |  | -        model = self.model_class(**init_dict) | 
| 184 |  | -        model.to(torch_device) | 
| 185 |  | - | 
| 186 |  | -        assert not model.is_gradient_checkpointing and model.training | 
| 187 |  | - | 
| 188 |  | -        out = model(**inputs_dict).sample | 
| 189 |  | -        # run the backwards pass on the model. For backwards pass, for simplicity purpose, | 
| 190 |  | -        # we won't calculate the loss and rather backprop on out.sum() | 
| 191 |  | -        model.zero_grad() | 
| 192 |  | - | 
| 193 |  | -        labels = torch.randn_like(out) | 
| 194 |  | -        loss = (out - labels).mean() | 
| 195 |  | -        loss.backward() | 
| 196 |  | - | 
| 197 |  | -        # re-instantiate the model now enabling gradient checkpointing | 
| 198 |  | -        model_2 = self.model_class(**init_dict) | 
| 199 |  | -        # clone model | 
| 200 |  | -        model_2.load_state_dict(model.state_dict()) | 
| 201 |  | -        model_2.to(torch_device) | 
| 202 |  | -        model_2.enable_gradient_checkpointing() | 
| 203 |  | - | 
| 204 |  | -        assert model_2.is_gradient_checkpointing and model_2.training | 
| 205 |  | - | 
| 206 |  | -        out_2 = model_2(**inputs_dict).sample | 
| 207 |  | -        # run the backwards pass on the model. For backwards pass, for simplicity purpose, | 
| 208 |  | -        # we won't calculate the loss and rather backprop on out.sum() | 
| 209 |  | -        model_2.zero_grad() | 
| 210 |  | -        loss_2 = (out_2 - labels).mean() | 
| 211 |  | -        loss_2.backward() | 
| 212 |  | - | 
| 213 |  | -        # compare the output and parameters gradients | 
| 214 |  | -        self.assertTrue((loss - loss_2).abs() < 1e-5) | 
| 215 |  | -        named_params = dict(model.named_parameters()) | 
| 216 |  | -        named_params_2 = dict(model_2.named_parameters()) | 
| 217 |  | -        for name, param in named_params.items(): | 
| 218 |  | -            self.assertTrue(torch_all_close(param.grad.data, named_params_2[name].grad.data, atol=5e-5)) | 
| 219 |  | - | 
| 220 | 180 |     def test_from_pretrained_hub(self): | 
| 221 | 181 |         model, loading_info = AutoencoderKL.from_pretrained("fusing/autoencoder-kl-dummy", output_loading_info=True) | 
| 222 | 182 |         self.assertIsNotNone(model) | 
| @@ -329,9 +289,11 @@ def prepare_init_args_and_inputs_for_common(self): | 
| 329 | 289 |         inputs_dict = self.dummy_input | 
| 330 | 290 |         return init_dict, inputs_dict | 
| 331 | 291 | 
 | 
|  | 292 | +    @unittest.skip("Not tested.") | 
| 332 | 293 |     def test_forward_signature(self): | 
| 333 | 294 |         pass | 
| 334 | 295 | 
 | 
|  | 296 | +    @unittest.skip("Not tested.") | 
| 335 | 297 |     def test_forward_with_norm_groups(self): | 
| 336 | 298 |         pass | 
| 337 | 299 | 
 | 
| @@ -364,6 +326,7 @@ def prepare_init_args_and_inputs_for_common(self): | 
| 364 | 326 |         inputs_dict = self.dummy_input | 
| 365 | 327 |         return init_dict, inputs_dict | 
| 366 | 328 | 
 | 
|  | 329 | +    @unittest.skip("Not tested.") | 
| 367 | 330 |     def test_outputs_equivalence(self): | 
| 368 | 331 |         pass | 
| 369 | 332 | 
 | 
| @@ -443,56 +406,14 @@ def prepare_init_args_and_inputs_for_common(self): | 
| 443 | 406 |         inputs_dict = self.dummy_input | 
| 444 | 407 |         return init_dict, inputs_dict | 
| 445 | 408 | 
 | 
|  | 409 | +    @unittest.skip("Not tested.") | 
| 446 | 410 |     def test_forward_signature(self): | 
| 447 | 411 |         pass | 
| 448 | 412 | 
 | 
|  | 413 | +    @unittest.skip("Not tested.") | 
| 449 | 414 |     def test_training(self): | 
| 450 | 415 |         pass | 
| 451 | 416 | 
 | 
| 452 |  | -    @unittest.skipIf(torch_device == "mps", "Gradient checkpointing skipped on MPS") | 
| 453 |  | -    def test_gradient_checkpointing(self): | 
| 454 |  | -        # enable deterministic behavior for gradient checkpointing | 
| 455 |  | -        init_dict, inputs_dict = self.prepare_init_args_and_inputs_for_common() | 
| 456 |  | -        model = self.model_class(**init_dict) | 
| 457 |  | -        model.to(torch_device) | 
| 458 |  | - | 
| 459 |  | -        assert not model.is_gradient_checkpointing and model.training | 
| 460 |  | - | 
| 461 |  | -        out = model(**inputs_dict).sample | 
| 462 |  | -        # run the backwards pass on the model. For backwards pass, for simplicity purpose, | 
| 463 |  | -        # we won't calculate the loss and rather backprop on out.sum() | 
| 464 |  | -        model.zero_grad() | 
| 465 |  | - | 
| 466 |  | -        labels = torch.randn_like(out) | 
| 467 |  | -        loss = (out - labels).mean() | 
| 468 |  | -        loss.backward() | 
| 469 |  | - | 
| 470 |  | -        # re-instantiate the model now enabling gradient checkpointing | 
| 471 |  | -        model_2 = self.model_class(**init_dict) | 
| 472 |  | -        # clone model | 
| 473 |  | -        model_2.load_state_dict(model.state_dict()) | 
| 474 |  | -        model_2.to(torch_device) | 
| 475 |  | -        model_2.enable_gradient_checkpointing() | 
| 476 |  | - | 
| 477 |  | -        assert model_2.is_gradient_checkpointing and model_2.training | 
| 478 |  | - | 
| 479 |  | -        out_2 = model_2(**inputs_dict).sample | 
| 480 |  | -        # run the backwards pass on the model. For backwards pass, for simplicity purpose, | 
| 481 |  | -        # we won't calculate the loss and rather backprop on out.sum() | 
| 482 |  | -        model_2.zero_grad() | 
| 483 |  | -        loss_2 = (out_2 - labels).mean() | 
| 484 |  | -        loss_2.backward() | 
| 485 |  | - | 
| 486 |  | -        # compare the output and parameters gradients | 
| 487 |  | -        self.assertTrue((loss - loss_2).abs() < 1e-5) | 
| 488 |  | -        named_params = dict(model.named_parameters()) | 
| 489 |  | -        named_params_2 = dict(model_2.named_parameters()) | 
| 490 |  | -        for name, param in named_params.items(): | 
| 491 |  | -            if "post_quant_conv" in name: | 
| 492 |  | -                continue | 
| 493 |  | - | 
| 494 |  | -            self.assertTrue(torch_all_close(param.grad.data, named_params_2[name].grad.data, atol=5e-5)) | 
| 495 |  | - | 
| 496 | 417 | 
 | 
| 497 | 418 | class AutoencoderOobleckTests(ModelTesterMixin, UNetTesterMixin, unittest.TestCase): | 
| 498 | 419 |     model_class = AutoencoderOobleck | 
| @@ -522,9 +443,11 @@ def prepare_init_args_and_inputs_for_common(self): | 
| 522 | 443 |         inputs_dict = self.dummy_input | 
| 523 | 444 |         return init_dict, inputs_dict | 
| 524 | 445 | 
 | 
|  | 446 | +    @unittest.skip("Not tested.") | 
| 525 | 447 |     def test_forward_signature(self): | 
| 526 | 448 |         pass | 
| 527 | 449 | 
 | 
|  | 450 | +    @unittest.skip("Not tested.") | 
| 528 | 451 |     def test_forward_with_norm_groups(self): | 
| 529 | 452 |         pass | 
| 530 | 453 | 
 | 
|  | 
0 commit comments