@@ -233,7 +233,7 @@ def __init__(
233233 role : str ,
234234 djl_version : Optional [str ] = None ,
235235 task : Optional [str ] = None ,
236- data_type : str = "fp32" ,
236+ dtype : str = "fp32" ,
237237 number_of_partitions : Optional [int ] = None ,
238238 min_workers : Optional [int ] = None ,
239239 max_workers : Optional [int ] = None ,
@@ -264,7 +264,7 @@ def __init__(
264264 task (str): The HuggingFace/NLP task you want to launch this model for. Defaults to
265265 None.
266266 If not provided, the task will be inferred from the model architecture by DJL.
267- data_type (str): The data type to use for loading your model. Accepted values are
267+ dtype (str): The data type to use for loading your model. Accepted values are
268268 "fp32", "fp16", "bf16", "int8". Defaults to "fp32".
269269 number_of_partitions (int): The number of GPUs to partition the model across. The
270270 partitioning strategy is determined by the selected backend. If DeepSpeed is
@@ -322,13 +322,20 @@ def __init__(
322322 "You only need to set model_id and ensure it points to uncompressed model "
323323 "artifacts in s3, or a valid HuggingFace Hub model_id."
324324 )
325+ data_type = kwargs .pop ("data_type" , None )
326+ if data_type :
327+ logger .warning (
328+ "data_type is being deprecated in favor of dtype. Please migrate use of data_type"
329+ " to dtype. Support for data_type will be removed in a future release"
330+ )
331+ dtype = dtype or data_type
325332 super (DJLModel , self ).__init__ (
326333 None , image_uri , role , entry_point , predictor_cls = predictor_cls , ** kwargs
327334 )
328335 self .model_id = model_id
329336 self .djl_version = djl_version
330337 self .task = task
331- self .data_type = data_type
338+ self .dtype = dtype
332339 self .number_of_partitions = number_of_partitions
333340 self .min_workers = min_workers
334341 self .max_workers = max_workers
@@ -372,7 +379,7 @@ def transformer(self, **_):
372379 "DJLModels do not currently support Batch Transform inference jobs"
373380 )
374381
375- def right_size (self , checkpoint_data_type : str ):
382+ def right_size (self , ** _ ):
376383 """Not implemented.
377384
378385 DJLModels do not support SageMaker Inference Recommendation Jobs.
@@ -573,8 +580,8 @@ def generate_serving_properties(self, serving_properties=None) -> Dict[str, str]
573580 serving_properties ["option.entryPoint" ] = self .entry_point
574581 if self .task :
575582 serving_properties ["option.task" ] = self .task
576- if self .data_type :
577- serving_properties ["option.dtype" ] = self .data_type
583+ if self .dtype :
584+ serving_properties ["option.dtype" ] = self .dtype
578585 if self .min_workers :
579586 serving_properties ["minWorkers" ] = self .min_workers
580587 if self .max_workers :
@@ -779,7 +786,7 @@ def __init__(
779786 None.
780787 load_in_8bit (bool): Whether to load the model in int8 precision using bits and bytes
781788 quantization. This is only supported for select model architectures.
782- Defaults to False. If ``data_type `` is int8, then this is set to True.
789+ Defaults to False. If ``dtype `` is int8, then this is set to True.
783790 low_cpu_mem_usage (bool): Whether to limit CPU memory usage to 1x model size during
784791 model loading. This is an experimental feature in HuggingFace. This is useful when
785792 loading multiple instances of your model in parallel. Defaults to False.
@@ -832,19 +839,19 @@ def generate_serving_properties(self, serving_properties=None) -> Dict[str, str]
832839 if self .device_map :
833840 serving_properties ["option.device_map" ] = self .device_map
834841 if self .load_in_8bit :
835- if self .data_type != "int8" :
836- raise ValueError ("Set data_type ='int8' to use load_in_8bit" )
842+ if self .dtype != "int8" :
843+ raise ValueError ("Set dtype ='int8' to use load_in_8bit" )
837844 serving_properties ["option.load_in_8bit" ] = self .load_in_8bit
838- if self .data_type == "int8" :
845+ if self .dtype == "int8" :
839846 serving_properties ["option.load_in_8bit" ] = True
840847 if self .low_cpu_mem_usage :
841848 serving_properties ["option.low_cpu_mem_usage" ] = self .low_cpu_mem_usage
842849 # This is a workaround due to a bug in our built in handler for huggingface
843850 # TODO: This needs to be fixed when new dlc is published
844851 if (
845852 serving_properties ["option.entryPoint" ] == "djl_python.huggingface"
846- and self .data_type
847- and self .data_type != "auto"
853+ and self .dtype
854+ and self .dtype != "auto"
848855 ):
849856 serving_properties ["option.dtype" ] = "auto"
850857 serving_properties .pop ("option.load_in_8bit" , None )
0 commit comments