Skip to content

Commit 6d09721

Browse files
authored
Merge pull request #251 from codelion/feat-new-cepo-release
Feat new cepo release
2 parents 74e0ae1 + d0b2784 commit 6d09721

File tree

3 files changed

+8
-5
lines changed

3 files changed

+8
-5
lines changed

optillm/__init__.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
# Version information
2-
__version__ = "0.2.8"
2+
__version__ = "0.2.9"
33

44
# Import from server module
55
from .server import (

optillm/inference.py

Lines changed: 6 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1029,7 +1029,7 @@ def _load_model():
10291029
logger.info(f"Using device: {device}")
10301030

10311031
# Load tokenizer
1032-
tokenizer = AutoTokenizer.from_pretrained(model_id, trust_remote_code=True)
1032+
tokenizer = AutoTokenizer.from_pretrained(model_id, trust_remote_code=True, token=os.getenv("HF_TOKEN"))
10331033

10341034
# Base kwargs for model loading
10351035
model_kwargs = {
@@ -1076,6 +1076,7 @@ def _load_model():
10761076
try:
10771077
model = AutoModelForCausalLM.from_pretrained(
10781078
model_id,
1079+
token=os.getenv("HF_TOKEN"),
10791080
**model_kwargs
10801081
)
10811082
except Exception as e:
@@ -1085,6 +1086,7 @@ def _load_model():
10851086
model_kwargs.pop("attn_implementation")
10861087
model = AutoModelForCausalLM.from_pretrained(
10871088
model_id,
1089+
token=os.getenv("HF_TOKEN"),
10881090
**model_kwargs
10891091
)
10901092
elif model_kwargs["torch_dtype"] == torch.float16:
@@ -1094,6 +1096,7 @@ def _load_model():
10941096
model_kwargs["torch_dtype"] = torch.float32
10951097
model = AutoModelForCausalLM.from_pretrained(
10961098
model_id,
1099+
token=os.getenv("HF_TOKEN"),
10971100
**model_kwargs
10981101
)
10991102

@@ -1134,7 +1137,7 @@ def validate_adapter(self, adapter_id: str) -> bool:
11341137
config = PeftConfig.from_pretrained(
11351138
adapter_id,
11361139
trust_remote_code=True,
1137-
use_auth_token=os.getenv("HF_TOKEN")
1140+
token=os.getenv("HF_TOKEN")
11381141
)
11391142
return True
11401143
except Exception as e:
@@ -1159,7 +1162,7 @@ def _load_adapter():
11591162
config = PeftConfig.from_pretrained(
11601163
adapter_id,
11611164
trust_remote_code=True,
1162-
use_auth_token=os.getenv("HF_TOKEN")
1165+
token=os.getenv("HF_TOKEN")
11631166
)
11641167

11651168
model = base_model

pyproject.toml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
44

55
[project]
66
name = "optillm"
7-
version = "0.2.8"
7+
version = "0.2.9"
88
description = "An optimizing inference proxy for LLMs."
99
readme = "README.md"
1010
license = "Apache-2.0"

0 commit comments

Comments
 (0)