|
24 | 24 | import numpy as np
|
25 | 25 | import requests_mock
|
26 | 26 | import torch
|
| 27 | +from accelerate.utils import compute_module_sizes |
27 | 28 | from huggingface_hub import ModelCard, delete_repo
|
28 | 29 | from huggingface_hub.utils import is_jinja_available
|
29 | 30 | from requests.exceptions import HTTPError
|
|
39 | 40 | require_torch_2,
|
40 | 41 | require_torch_accelerator_with_training,
|
41 | 42 | require_torch_gpu,
|
| 43 | + require_torch_multi_gpu, |
42 | 44 | run_test_in_subprocess,
|
43 | 45 | torch_device,
|
44 | 46 | )
|
@@ -200,6 +202,21 @@ class ModelTesterMixin:
|
200 | 202 | main_input_name = None # overwrite in model specific tester class
|
201 | 203 | base_precision = 1e-3
|
202 | 204 | forward_requires_fresh_args = False
|
| 205 | + model_split_percents = [0.5, 0.7, 0.9] |
| 206 | + |
| 207 | + def check_device_map_is_respected(self, model, device_map): |
| 208 | + for param_name, param in model.named_parameters(): |
| 209 | + # Find device in device_map |
| 210 | + while len(param_name) > 0 and param_name not in device_map: |
| 211 | + param_name = ".".join(param_name.split(".")[:-1]) |
| 212 | + if param_name not in device_map: |
| 213 | + raise ValueError("device map is incomplete, it does not contain any device for `param_name`.") |
| 214 | + |
| 215 | + param_device = device_map[param_name] |
| 216 | + if param_device in ["cpu", "disk"]: |
| 217 | + self.assertEqual(param.device, torch.device("meta")) |
| 218 | + else: |
| 219 | + self.assertEqual(param.device, torch.device(param_device)) |
203 | 220 |
|
204 | 221 | def test_from_save_pretrained(self, expected_max_diff=5e-5):
|
205 | 222 | if self.forward_requires_fresh_args:
|
@@ -670,6 +687,117 @@ def test_deprecated_kwargs(self):
|
670 | 687 | " from `_deprecated_kwargs = [<deprecated_argument>]`"
|
671 | 688 | )
|
672 | 689 |
|
| 690 | + @require_torch_gpu |
| 691 | + def test_cpu_offload(self): |
| 692 | + config, inputs_dict = self.prepare_init_args_and_inputs_for_common() |
| 693 | + model = self.model_class(**config).eval() |
| 694 | + model = model.to(torch_device) |
| 695 | + |
| 696 | + torch.manual_seed(0) |
| 697 | + base_output = model(**inputs_dict) |
| 698 | + |
| 699 | + model_size = compute_module_sizes(model)[""] |
| 700 | + # We test several splits of sizes to make sure it works. |
| 701 | + max_gpu_sizes = [int(p * model_size) for p in self.model_split_percents[1:]] |
| 702 | + with tempfile.TemporaryDirectory() as tmp_dir: |
| 703 | + model.cpu().save_pretrained(tmp_dir) |
| 704 | + |
| 705 | + for max_size in max_gpu_sizes: |
| 706 | + max_memory = {0: max_size, "cpu": model_size * 2} |
| 707 | + new_model = self.model_class.from_pretrained(tmp_dir, device_map="auto", max_memory=max_memory) |
| 708 | + # Making sure part of the model will actually end up offloaded |
| 709 | + self.assertSetEqual(set(new_model.hf_device_map.values()), {0, "cpu"}) |
| 710 | + |
| 711 | + self.check_device_map_is_respected(new_model, new_model.hf_device_map) |
| 712 | + torch.manual_seed(0) |
| 713 | + new_output = new_model(**inputs_dict) |
| 714 | + |
| 715 | + self.assertTrue(torch.allclose(base_output[0], new_output[0], atol=1e-5)) |
| 716 | + |
| 717 | + @require_torch_gpu |
| 718 | + def test_disk_offload_without_safetensors(self): |
| 719 | + config, inputs_dict = self.prepare_init_args_and_inputs_for_common() |
| 720 | + model = self.model_class(**config).eval() |
| 721 | + model = model.to(torch_device) |
| 722 | + |
| 723 | + torch.manual_seed(0) |
| 724 | + base_output = model(**inputs_dict) |
| 725 | + |
| 726 | + model_size = compute_module_sizes(model)[""] |
| 727 | + with tempfile.TemporaryDirectory() as tmp_dir: |
| 728 | + model.cpu().save_pretrained(tmp_dir, safe_serialization=False) |
| 729 | + |
| 730 | + with self.assertRaises(ValueError): |
| 731 | + max_size = int(self.model_split_percents[1] * model_size) |
| 732 | + max_memory = {0: max_size, "cpu": max_size} |
| 733 | + # This errors out because it's missing an offload folder |
| 734 | + new_model = self.model_class.from_pretrained(tmp_dir, device_map="auto", max_memory=max_memory) |
| 735 | + |
| 736 | + max_size = int(self.model_split_percents[1] * model_size) |
| 737 | + max_memory = {0: max_size, "cpu": max_size} |
| 738 | + new_model = self.model_class.from_pretrained( |
| 739 | + tmp_dir, device_map="auto", max_memory=max_memory, offload_folder=tmp_dir |
| 740 | + ) |
| 741 | + |
| 742 | + self.check_device_map_is_respected(new_model, new_model.hf_device_map) |
| 743 | + torch.manual_seed(0) |
| 744 | + new_output = new_model(**inputs_dict) |
| 745 | + |
| 746 | + self.assertTrue(torch.allclose(base_output[0], new_output[0], atol=1e-5)) |
| 747 | + |
| 748 | + @require_torch_gpu |
| 749 | + def test_disk_offload_with_safetensors(self): |
| 750 | + config, inputs_dict = self.prepare_init_args_and_inputs_for_common() |
| 751 | + model = self.model_class(**config).eval() |
| 752 | + model = model.to(torch_device) |
| 753 | + |
| 754 | + torch.manual_seed(0) |
| 755 | + base_output = model(**inputs_dict) |
| 756 | + |
| 757 | + model_size = compute_module_sizes(model)[""] |
| 758 | + with tempfile.TemporaryDirectory() as tmp_dir: |
| 759 | + model.cpu().save_pretrained(tmp_dir) |
| 760 | + |
| 761 | + max_size = int(self.model_split_percents[1] * model_size) |
| 762 | + max_memory = {0: max_size, "cpu": max_size} |
| 763 | + new_model = self.model_class.from_pretrained( |
| 764 | + tmp_dir, device_map="auto", offload_folder=tmp_dir, max_memory=max_memory |
| 765 | + ) |
| 766 | + |
| 767 | + self.check_device_map_is_respected(new_model, new_model.hf_device_map) |
| 768 | + torch.manual_seed(0) |
| 769 | + new_output = new_model(**inputs_dict) |
| 770 | + |
| 771 | + self.assertTrue(torch.allclose(base_output[0], new_output[0], atol=1e-5)) |
| 772 | + |
| 773 | + @require_torch_multi_gpu |
| 774 | + def test_model_parallelism(self): |
| 775 | + config, inputs_dict = self.prepare_init_args_and_inputs_for_common() |
| 776 | + model = self.model_class(**config).eval() |
| 777 | + model = model.to(torch_device) |
| 778 | + |
| 779 | + torch.manual_seed(0) |
| 780 | + base_output = model(**inputs_dict) |
| 781 | + |
| 782 | + model_size = compute_module_sizes(model)[""] |
| 783 | + # We test several splits of sizes to make sure it works. |
| 784 | + max_gpu_sizes = [int(p * model_size) for p in self.model_split_percents[1:]] |
| 785 | + with tempfile.TemporaryDirectory() as tmp_dir: |
| 786 | + model.cpu().save_pretrained(tmp_dir) |
| 787 | + |
| 788 | + for max_size in max_gpu_sizes: |
| 789 | + max_memory = {0: max_size, 1: model_size * 2, "cpu": model_size * 2} |
| 790 | + new_model = self.model_class.from_pretrained(tmp_dir, device_map="auto", max_memory=max_memory) |
| 791 | + # Making sure part of the model will actually end up offloaded |
| 792 | + self.assertSetEqual(set(new_model.hf_device_map.values()), {0, 1}) |
| 793 | + |
| 794 | + self.check_device_map_is_respected(new_model, new_model.hf_device_map) |
| 795 | + |
| 796 | + torch.manual_seed(0) |
| 797 | + new_output = new_model(**inputs_dict) |
| 798 | + |
| 799 | + self.assertTrue(torch.allclose(base_output[0], new_output[0], atol=1e-5)) |
| 800 | + |
673 | 801 |
|
674 | 802 | @is_staging_test
|
675 | 803 | class ModelPushToHubTester(unittest.TestCase):
|
|
0 commit comments