Use modernbert based router

codelion · codelion · commit b8bb62136fb6 · 2025-01-09T21:55:36.000+08:00
diff --git a/optillm.py b/optillm.py
@@ -624,12 +624,11 @@ def health():
 
 def parse_args():
     parser = argparse.ArgumentParser(description="Run LLM inference with various approaches.")
-    
-     # Add version argument using importlib.metadata
+
     try:
-        package_version = version('optillm')
-    except Exception:
-        package_version = "unknown"  # Fallback if package is not installed
+        from optillm import __version__ as package_version
+    except ImportError:
+        package_version = "unknown"
         
     parser.add_argument('--version', action='version', 
                        version=f'%(prog)s {package_version}',
diff --git a/optillm/__init__.py b/optillm/__init__.py
@@ -1,5 +1,18 @@
 from importlib import util
 import os
+import re
+
+def get_version_from_setup():
+    try:
+        setup_path = os.path.join(os.path.dirname(os.path.dirname(__file__)), 'setup.py')
+        with open(setup_path, 'r') as f:
+            content = f.read()
+            version_match = re.search(r'version=["\']([^"\']+)["\']', content)
+            if version_match:
+                return version_match.group(1)
+    except Exception:
+        pass
+    return "unknown"
 
 # Get the path to the root optillm.py
 spec = util.spec_from_file_location(
@@ -34,7 +47,7 @@
 generate_streaming_response = module.generate_streaming_response
 
 # Version information
-__version__ = "0.0.8"  # Match with setup.py
+__version__ = get_version_from_setup()
 
 # List of exported symbols
 __all__ = [
diff --git a/optillm/plugins/readurls_plugin.py b/optillm/plugins/readurls_plugin.py
@@ -1,11 +1,28 @@
 import re
 from typing import Tuple, List
 import requests
+import os
 from bs4 import BeautifulSoup
 from urllib.parse import urlparse
 
 SLUG = "readurls"
 
+def get_version():
+    try:
+        # Get path to setup.py relative to this file
+        current_dir = os.path.dirname(__file__)
+        package_root = os.path.dirname(os.path.dirname(current_dir))
+        setup_path = os.path.join(package_root, 'setup.py')
+        
+        with open(setup_path, 'r') as f:
+            content = f.read()
+            version_match = re.search(r'version=["\']([^"\']+)["\']', content)
+            if version_match:
+                return version_match.group(1)
+    except Exception:
+        pass
+    return "unknown"
+
 def extract_urls(text: str) -> List[str]:
     # Updated regex pattern to be more precise
     url_pattern = re.compile(r'https?://[^\s\'"]+')
@@ -24,8 +41,9 @@ def extract_urls(text: str) -> List[str]:
 
 def fetch_webpage_content(url: str, max_length: int = 100000) -> str:
     try:
+        version = get_version()
         headers = {
-            'User-Agent': 'optillm/0.0.21 (https://github.com/codelion/optillm)'
+            'User-Agent': f'optillm/{version} (https://github.com/codelion/optillm)'
         }
         
         response = requests.get(url, headers=headers, timeout=10)
diff --git a/optillm/plugins/router_plugin.py b/optillm/plugins/router_plugin.py
@@ -22,9 +22,10 @@
 SLUG = "router"
 
 # Constants
-MAX_LENGTH = 512
+MAX_LENGTH = 1024
 APPROACHES = ["none", "mcts", "bon", "moa", "rto", "z3", "self_consistency", "pvg", "rstar", "cot_reflection", "plansearch", "leap", "re2"]
-MODEL_NAME = "codelion/optillm-bert-uncased"
+BASE_MODEL = "answerdotai/ModernBERT-large"
+OPTILLM_MODEL_NAME = "codelion/optillm-modernbert-large"
 
 class OptILMClassifier(nn.Module):
     def __init__(self, base_model, num_labels):
@@ -49,16 +50,16 @@ def forward(self, input_ids, attention_mask, effort):
 def load_optillm_model():
     device = torch.device("mps" if torch.backends.mps.is_available() else "cuda" if torch.cuda.is_available() else "cpu")
     # Load the base model
-    base_model = AutoModel.from_pretrained("google-bert/bert-large-uncased")
+    base_model = AutoModel.from_pretrained(BASE_MODEL)
     # Create the OptILMClassifier
     model = OptILMClassifier(base_model, num_labels=len(APPROACHES))  
     model.to(device)
     # Download the safetensors file
-    safetensors_path = hf_hub_download(repo_id=MODEL_NAME, filename="model.safetensors")
+    safetensors_path = hf_hub_download(repo_id=OPTILLM_MODEL_NAME, filename="model.safetensors")
     # Load the state dict from the safetensors file
     load_model(model, safetensors_path)
 
-    tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)
+    tokenizer = AutoTokenizer.from_pretrained(OPTILLM_MODEL_NAME)
     return model, tokenizer, device
 
 def preprocess_input(tokenizer, system_prompt, initial_query):
diff --git a/scripts/eval_aime_benchmark.py b/scripts/eval_aime_benchmark.py
@@ -15,7 +15,7 @@
 logger = logging.getLogger(__name__)
 
 # Initialize OpenAI client
-client = OpenAI(api_key=os.environ.get("OPENAI_API_KEY"), base_url="http://localhost:8888/v1")
+client = OpenAI(api_key=os.environ.get("OPENAI_API_KEY"), base_url="http://localhost:8000/v1")
 
 SYSTEM_PROMPT = '''You are solving AIME (American Invitational Mathematics Examination) problems.
 
diff --git a/scripts/requirements.txt b/scripts/requirements.txt
@@ -1,3 +1,4 @@
 datasets
 accelerate
-huggingface_hub
+huggingface_hub
+git+https://github.com/huggingface/transformers.git
diff --git a/scripts/train_optillm_classifier.py b/scripts/train_optillm_classifier.py
@@ -15,7 +15,7 @@
 
 # Constants
 APPROACHES = ["none", "mcts", "bon", "moa", "rto", "z3", "self_consistency", "pvg", "rstar", "cot_reflection", "plansearch", "leap", "re2"]
-MAX_LENGTH = 512
+MAX_LENGTH = 1024
 
 # Device selection
 device = torch.device("mps" if torch.backends.mps.is_available() else "cuda" if torch.cuda.is_available() else "cpu")
@@ -233,6 +233,18 @@ def inference(model, tokenizer, prompt, effort_levels):
     return results
 
 def main(args):
+
+    if args.push_to_hub:
+        base_model = AutoModel.from_pretrained(args.model_name)
+        tokenizer = AutoTokenizer.from_pretrained(args.model_name)
+        # best_model = OptILMClassifier(base_model, num_labels=len(APPROACHES))
+        # best_model.to(device)
+        # load_model(best_model, "best_model.safetensors")
+        # we just push the base model and then upload the safetensors file manually as OptILMClassifier class doesn't have a push_to_hub method.
+        base_model.push_to_hub(args.hub_model_id)
+        tokenizer.push_to_hub(args.hub_model_id)
+        return
+    
     tokenizer = AutoTokenizer.from_pretrained(args.model_name)
     dataset = load_and_preprocess_data(tokenizer)
 
@@ -273,15 +285,6 @@ def main(args):
 
     print(f"\nBest performing model was from fold {best_fold} with validation accuracy {best_val_accuracy:.4f}")
 
-    if args.push_to_hub:
-        base_model = AutoModel.from_pretrained(args.model_name)
-        # best_model = OptILMClassifier(base_model, num_labels=len(APPROACHES))
-        # best_model.to(device)
-        # load_model(best_model, "best_model.safetensors")
-        # we just push the base model and then upload the safetensors file manually as OptILMClassifier class doesn't have a push_to_hub method.
-        base_model.push_to_hub(args.hub_model_id)
-        tokenizer.push_to_hub(args.hub_model_id)
-
     # Load the best model for inference
     base_model = AutoModel.from_pretrained(args.model_name)
     best_model = OptILMClassifier(base_model, num_labels=len(APPROACHES))