diff --git a/optillm/__init__.py b/optillm/__init__.py index 4ae9b7ee..6fe64d92 100644 --- a/optillm/__init__.py +++ b/optillm/__init__.py @@ -1,5 +1,5 @@ # Version information -__version__ = "0.2.8" +__version__ = "0.2.9" # Import from server module from .server import ( diff --git a/optillm/inference.py b/optillm/inference.py index 1ebf8a61..3b61a875 100644 --- a/optillm/inference.py +++ b/optillm/inference.py @@ -1029,7 +1029,7 @@ def _load_model(): logger.info(f"Using device: {device}") # Load tokenizer - tokenizer = AutoTokenizer.from_pretrained(model_id, trust_remote_code=True) + tokenizer = AutoTokenizer.from_pretrained(model_id, trust_remote_code=True, token=os.getenv("HF_TOKEN")) # Base kwargs for model loading model_kwargs = { @@ -1076,6 +1076,7 @@ def _load_model(): try: model = AutoModelForCausalLM.from_pretrained( model_id, + token=os.getenv("HF_TOKEN"), **model_kwargs ) except Exception as e: @@ -1085,6 +1086,7 @@ def _load_model(): model_kwargs.pop("attn_implementation") model = AutoModelForCausalLM.from_pretrained( model_id, + token=os.getenv("HF_TOKEN"), **model_kwargs ) elif model_kwargs["torch_dtype"] == torch.float16: @@ -1094,6 +1096,7 @@ def _load_model(): model_kwargs["torch_dtype"] = torch.float32 model = AutoModelForCausalLM.from_pretrained( model_id, + token=os.getenv("HF_TOKEN"), **model_kwargs ) @@ -1134,7 +1137,7 @@ def validate_adapter(self, adapter_id: str) -> bool: config = PeftConfig.from_pretrained( adapter_id, trust_remote_code=True, - use_auth_token=os.getenv("HF_TOKEN") + token=os.getenv("HF_TOKEN") ) return True except Exception as e: @@ -1159,7 +1162,7 @@ def _load_adapter(): config = PeftConfig.from_pretrained( adapter_id, trust_remote_code=True, - use_auth_token=os.getenv("HF_TOKEN") + token=os.getenv("HF_TOKEN") ) model = base_model diff --git a/pyproject.toml b/pyproject.toml index 23ae88d1..d1049c79 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta" [project] name = "optillm" -version = "0.2.8" +version = "0.2.9" description = "An optimizing inference proxy for LLMs." readme = "README.md" license = "Apache-2.0"