File tree Expand file tree Collapse file tree 2 files changed +11
-12
lines changed
src/llmcompressor/entrypoints Expand file tree Collapse file tree 2 files changed +11
-12
lines changed Original file line number Diff line number Diff line change 2
2
from datetime import datetime
3
3
from typing import Optional
4
4
5
+ import torch
6
+ from compressed_tensors .utils import force_cpu_offload
5
7
from loguru import logger
6
8
from torch .utils .data import DataLoader
7
9
from transformers import PreTrainedModel
@@ -123,6 +125,15 @@ def __init__(
123
125
# initialize the model and processor
124
126
pre_process (model_args )
125
127
128
+ # offload to cpu if possible
129
+ if "cuda" in str (model_args .oneshot_device ) and torch .cuda .is_available ():
130
+ # TODO: consider renaming function similar to "offload_dispatch_model"
131
+ # TODO: modify function to remove any hooks if they already exist (making
132
+ # sure to move to cpu when removing hook
133
+ force_cpu_offload (model_args .model , model_args .oneshot_device )
134
+ else :
135
+ logger .warning ("CUDA is not available! Compressing model on CPU instead" )
136
+
126
137
# Set instance attributes
127
138
self .model = self .model_args .model
128
139
self .processor = self .model_args .processor
Original file line number Diff line number Diff line change 3
3
from pathlib import PosixPath
4
4
from typing import Optional , Tuple
5
5
6
- import torch
7
- from compressed_tensors .utils import force_cpu_offload
8
6
from loguru import logger
9
7
from torch .nn import Module
10
8
from transformers import (
@@ -64,16 +62,6 @@ def pre_process(model_args: "ModelArguments"):
64
62
# untie tie_word_embeddings weights
65
63
patch_tied_tensors_bug (model_args .model )
66
64
67
- # offload to cpu if possible
68
- if "cuda" in str (model_args .oneshot_device ) and torch .cuda .is_available ():
69
- # TODO: consider renaming function to something like "offload_dispatch_model"
70
- # TODO: modify function to remove any hooks if they already exist (making sure
71
- # to move to cpu when removing hook
72
- force_cpu_offload (model_args .model , model_args .oneshot_device )
73
-
74
- else :
75
- logger .warning ("CUDA is not available! Compressing model on CPU instead" )
76
-
77
65
# wrap model.save_pretrained
78
66
modify_save_pretrained (model_args .model )
79
67
You can’t perform that action at this time.
0 commit comments