@@ -99,32 +99,6 @@ def choose_image_loader():
9999 return processed_docs or []
100100
101101
102- def device_str_from_model (model , fallback_device = None ):
103- devs = set ()
104- offload = False
105- if hasattr (model , "hf_device_map" ) and model .hf_device_map :
106- for loc in model .hf_device_map .values ():
107- if not loc :
108- continue
109- if loc == "disk" :
110- offload = True
111- continue
112- devs .add (loc .split (":" )[0 ])
113- else :
114- try :
115- devs |= {p .device .type for p in model .parameters () if p .device .type != "meta" }
116- devs |= {b .device .type for b in model .buffers () if b .device .type != "meta" }
117- except Exception :
118- dev = getattr (fallback_device , "type" , fallback_device or "unknown" )
119- devs .add (dev )
120-
121- names = {"cuda" : "CUDA" , "cpu" : "CPU" , "mps" : "MPS" , "xpu" : "XPU" , "npu" : "NPU" }
122- label = "+" .join (sorted (names .get (d , d .upper ()) for d in devs )) or "UNKNOWN"
123- if offload :
124- label += " (+offload)"
125- return label
126-
127-
128102class BaseLoader :
129103 def __init__ (self , config ):
130104 self .config = config
@@ -213,7 +187,7 @@ def initialize_model_and_tokenizer(self):
213187 )
214188
215189 precision_str = "bfloat16" if use_bf16 else "float16"
216- device_str = device_str_from_model ( model , fallback_device = self . device )
190+ device_str = "CUDA"
217191 my_cprint (f"{ chosen_model } loaded into memory on { device_str } ({ precision_str } )" , "green" )
218192
219193 return model , tokenizer , None
@@ -247,7 +221,7 @@ def initialize_model_and_tokenizer(self):
247221 source = info .get ('model_path' ) or info ['repo_id' ]
248222 cache_dir = CACHE_DIR / info .get ('cache_dir' , '' )
249223 cache_dir .mkdir (parents = True , exist_ok = True )
250-
224+
251225 self .processor = AutoProcessor .from_pretrained (
252226 source ,
253227 token = False ,
@@ -256,14 +230,14 @@ def initialize_model_and_tokenizer(self):
256230 device_map = 'auto' ,
257231 cache_dir = cache_dir
258232 )
259-
233+
260234 quant_config = BitsAndBytesConfig (
261235 load_in_4bit = True ,
262236 bnb_4bit_compute_dtype = torch .bfloat16 ,
263237 bnb_4bit_quant_type = "nf4" ,
264238 bnb_4bit_use_double_quant = True
265239 )
266-
240+
267241 self .model = AutoModelForCausalLM .from_pretrained (
268242 source ,
269243 token = False ,
@@ -273,11 +247,14 @@ def initialize_model_and_tokenizer(self):
273247 device_map = 'auto' ,
274248 cache_dir = cache_dir
275249 )
276-
250+
277251 self .model .model .vision_backbone = self .model .model .vision_backbone .to (torch .float32 )
278252 self .model .eval ()
279-
280- device_str = device_str_from_model (self .model , fallback_device = self .device )
253+
254+ if torch .cuda .is_available ():
255+ device_str = "CUDA"
256+ else :
257+ device_str = "CPU"
281258 precision_str = "bfloat16"
282259 my_cprint (f"{ chosen_model } loaded into memory on { device_str } ({ precision_str } )" , "green" )
283260
@@ -317,8 +294,12 @@ def initialize_model_and_tokenizer(self):
317294 if self .device == "cuda" :
318295 use_bf16 = torch .cuda .get_device_capability ()[0 ] >= 8
319296 dtype = torch .bfloat16 if use_bf16 else torch .float16
297+ precision_str = "bfloat16" if use_bf16 else "float16"
298+ device_str = "CUDA"
320299 else :
321300 dtype = torch .float32
301+ precision_str = "float32"
302+ device_str = "CPU"
322303
323304 self .model_dtype = dtype
324305
@@ -338,8 +319,6 @@ def initialize_model_and_tokenizer(self):
338319
339320 self .model = model
340321
341- precision_str = "bfloat16" if dtype == torch .bfloat16 else "float16" if dtype == torch .float16 else "float32"
342- device_str = device_str_from_model (model , fallback_device = self .device )
343322 my_cprint (f"{ chosen_model } loaded into memory on { device_str } ({ precision_str } )" , "green" )
344323
345324 return model , text_tokenizer , visual_tokenizer
@@ -418,6 +397,7 @@ def initialize_model_and_tokenizer(self):
418397 cache_dir = cache_dir ,
419398 token = False
420399 ).eval ()
400+ device_str = "CUDA"
421401 else :
422402 # CPU fallback
423403 dtype = torch .float32
@@ -431,9 +411,9 @@ def initialize_model_and_tokenizer(self):
431411 token = False ,
432412 device_map = {"" : "cpu" }
433413 ).eval ()
414+ device_str = "CPU"
434415
435416 self .model_dtype = dtype
436- device_str = device_str_from_model (model , fallback_device = self .device )
437417 my_cprint (f"{ chosen_model } loaded into memory on { device_str } ({ precision_str } )" , "green" )
438418
439419 tokenizer = AutoTokenizer .from_pretrained (
@@ -603,9 +583,6 @@ def initialize_model_and_tokenizer(self):
603583
604584 use_bf16 = torch .cuda .get_device_capability ()[0 ] >= 8
605585 dtype = torch .bfloat16 if use_bf16 else torch .float16
606- precision_str = "bfloat16" if use_bf16 else "float16"
607- device_str = device_str_from_model (model , fallback_device = self .device )
608- my_cprint (f"{ chosen_model } loaded into memory on { device_str } ({ precision_str } )" , "green" )
609586
610587 quantization_config = BitsAndBytesConfig (
611588 load_in_4bit = True ,
@@ -640,6 +617,7 @@ def initialize_model_and_tokenizer(self):
640617 "visual.blocks.31.mlp.down_proj"
641618 ]
642619 )
620+
643621 processor = AutoProcessor .from_pretrained (
644622 model_id ,
645623 use_fast = True ,
@@ -649,6 +627,7 @@ def initialize_model_and_tokenizer(self):
649627 cache_dir = cache_dir ,
650628 token = False
651629 )
630+
652631 model = Qwen2_5_VLForConditionalGeneration .from_pretrained (
653632 model_id ,
654633 quantization_config = quantization_config ,
@@ -660,7 +639,9 @@ def initialize_model_and_tokenizer(self):
660639 )
661640 model = model .to (self .device )
662641 model .eval ()
642+
663643 precision_str = "bfloat16" if use_bf16 else "float16"
644+ device_str = device_str_from_model (model , fallback_device = self .device )
664645 my_cprint (f"{ chosen_model } loaded into memory on { device_str } ({ precision_str } )" , "green" )
665646
666647 return model , None , processor
@@ -693,6 +674,7 @@ def process_single_image(self, raw_image):
693674
694675 return ' ' .join (line .strip () for line in response .split ('\n ' ) if line .strip ())
695676
677+
696678class loader_glmv4_thinking (BaseLoader ):
697679 def initialize_model_and_tokenizer (self ):
698680 chosen_model = self .config ['vision' ]['chosen_model' ]
@@ -732,7 +714,7 @@ def initialize_model_and_tokenizer(self):
732714 )
733715
734716 precision_str = "bfloat16" if use_bf16 else "float16"
735- device_str = device_str_from_model ( model , fallback_device = self . device )
717+ device_str = "CUDA"
736718 my_cprint (f"{ chosen_model } loaded into memory on { device_str } ({ precision_str } )" , "green" )
737719
738720 return model , None , processor
@@ -795,7 +777,10 @@ def initialize_model_and_tokenizer(self):
795777 if hasattr (processor , "tokenizer" ) and hasattr (processor .tokenizer , "add_bos_token" ):
796778 processor .tokenizer .add_bos_token = False
797779
798- device_str = device_str_from_model (model , fallback_device = self .device )
780+ if torch .cuda .is_available ():
781+ device_str = "CUDA"
782+ else :
783+ device_str = "CPU"
799784 my_cprint (f"{ chosen_model } loaded into memory on { device_str } ({ precision_str } )" , "green" )
800785
801786 return model , None , processor
0 commit comments