Optimisation/mlx llm (#123)

anantashahane · web-flow · commit e5800d672678 · 2026-02-02T11:14:34.000+01:00
* exported namespace management, for use in other libraries.

* prep_namespace exposed for use in other libraries.

* merge conflict in uv.lock

* mlx llms working.

* Updating github test cases for AS-M1.

* Updating github test cases for AS-M1..

* Updating github test cases for AS-M1...
diff --git a/.github/workflows/docs2pages.yml b/.github/workflows/docs2pages.yml
@@ -51,7 +51,7 @@ jobs:
     #----------------------------------------------
     - name: Install dependencies
       if: steps.cached-uv-dependencies.outputs.cache-hit != 'true'
-      run: uv sync --dev --group docs
+      run: uv sync --dev --group docs --no-group apple-silicon
     - name: Build documentation
       run: |
         uv run sphinx-build -b html docs/ ./_site      
diff --git a/.github/workflows/test-windows.yml b/.github/workflows/test-windows.yml
@@ -45,7 +45,7 @@ jobs:
       #----------------------------------------------
       - name: Install dependencies
         if: steps.cached-uv-dependencies.outputs.cache-hit != 'true'
-        run: uv sync --dev
+        run: uv sync --dev --no-group apple-silicon
       #----------------------------------------------
       #              run black
       #----------------------------------------------
diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml
@@ -42,7 +42,7 @@ jobs:
       #----------------------------------------------
       - name: Install dependencies
         if: steps.cached-uv-dependencies.outputs.cache-hit != 'true'
-        run: uv sync --dev
+        run: uv sync --dev --no-group apple-silicon
       #----------------------------------------------
       #              run black
       #----------------------------------------------
diff --git a/llamea/__init__.py b/llamea/__init__.py
@@ -1,5 +1,14 @@
 from .llamea import LLaMEA
-from .llm import LLM, Dummy_LLM, Gemini_LLM, Multi_LLM, Ollama_LLM, OpenAI_LLM
+from .llm import (
+    LLM,
+    Dummy_LLM,
+    Gemini_LLM,
+    Multi_LLM,
+    Ollama_LLM,
+    OpenAI_LLM,
+    LMStudio_LLM,
+    MLX_LM_LLM,
+)
 from .loggers import ExperimentLogger
 from .solution import Solution
 from .utils import (
diff --git a/llamea/llamea.py b/llamea/llamea.py
@@ -310,6 +310,7 @@ def __call__(self, func):
 
         if self.log:
             modelname = self.model.replace(":", "_")
+            modelname = self.model.replace("/", "_")
             self.logger = ExperimentLogger(f"LLaMEA-{modelname}-{experiment_name}")
             self.llm.set_logger(self.logger)
         else:
diff --git a/llamea/llm.py b/llamea/llm.py
@@ -24,6 +24,15 @@
     import openai
 except ModuleNotFoundError:  # pragma: no cover - optional dependency
     openai = None
+try:
+    import lmstudio as lms
+except ModuleNotFoundError:
+    lms = object
+try:
+    from mlx_lm import load, generate
+except ModuleNotFoundError:
+    load = None
+    generate = None
 
 try:
     from ConfigSpace import ConfigurationSpace
@@ -545,6 +554,173 @@ def __init__(self, api_key, model="deepseek-chat", temperature=0.8, **kwargs):
         self.client = openai.OpenAI(**self._client_kwargs)
 
 
+class LMStudio_LLM(LLM):
+    """A manager for running MLX-Optimised LLM locally."""
+
+    def __init__(self, model, config=None, **kwargs):
+        """
+        Initialises the LMStudio LLM inteface.
+
+        :param model: Name of the model, to be initialised for interaction.
+        :param config: Configuration to be set for LLM chat.
+        :param kwargs: Keyed arguements for setting up the LLM chat.
+        """
+        super().__init__(api_key="", model=model, **kwargs)
+        self.llm = lms.llm(model)
+        self.config = config
+
+    def query(
+        self, session: list[dict[str, str]], default_delay: int = 5, max_tries: int = 5
+    ) -> str:
+        """
+        Query stub for LMStudio class.
+
+        ## Parameters
+        `session: list[dict[str, str]]`: A session message is a list of {'role' : 'user'|'system', 'content': 'content'} data, use to make LLM request.
+        `default_delay: int`: Amount of time to wait, before retrying a prompt on LLMs when exception occurs.
+        `max_tries: int`: A max count for the number of tries, to get a response.
+        """
+        request = session[-1]["content"]
+        for _ in range(max_tries):
+            try:
+                if self.config is not None:
+                    response = self.llm.respond(request, config=self.config)
+                else:
+                    response = self.llm.respond(request)
+                response = re.sub(  # Remove thinking section, if avaiable.
+                    r"<think>.*?</think>", "", str(response), flags=re.DOTALL
+                )
+                return response
+            except:
+                time.sleep(default_delay)
+                pass
+        return ""
+
+    def __getstate__(self):
+        state = self.__dict__.copy()
+        state.pop("llm", None)
+        return state
+
+    def __setstate__(self, state):
+        self.__dict__.update(state)
+        self.llm = lms.llm(self.model)
+
+    def __deepcopy__(self, memo):
+        cls = self.__class__
+        new = cls.__new__(cls)
+        memo[id(self)] = new
+        for k, v in self.__dict__.items():
+            if k == "llm":
+                continue
+            setattr(new, k, copy.deepcopy(v, memo))
+        new.llm = self.llm
+        return new
+
+
+class MLX_LM_LLM(LLM):
+    """An mlx_lm implementation for running large LLMs locally."""
+
+    def __init__(
+        self,
+        model,
+        config=None,
+        max_tokens: int = 12000,
+        chat_template_style=None,
+        **kwargs,
+    ):
+        """
+        Initialises the LMStudio LLM inteface.
+
+        :param model: Name of the model, to be initialised for interaction.
+        :param config: Configuration to be set for LLM chat.
+        :param max_tokens: Maximun number of tokens to be generated for a request.
+        :param chat_template_style: Some models require chat_template_style to be specify, refer to those model's docs in huggingface to set this parameter.
+        :param kwargs: Keyed arguements for setting up the LLM chat.
+        """
+        super().__init__(api_key="", model=model, **kwargs)
+        if config is not None:
+            llm, tokenizer = load(model, model_config=config)
+        else:
+            llm, tokenizer = load(model)
+        self.llm = llm
+        self.tokenizer = tokenizer
+        self.chat_template_style = chat_template_style
+        print(f"Init tokeniser object: {self.tokenizer}.")
+
+        self.config = config
+        self.max_tokens = max_tokens
+
+    def __getstate__(self) -> object:
+        state = self.__dict__.copy()
+        state.pop("tokenizer", None)
+        state.pop("llm", None)
+        return state
+
+    def __setstate__(self, state):
+        self.__dict__.update(state)
+        if self.config is None:
+            llm, tokenizer = load(self.model)
+        else:
+            llm, tokenizer = load(self.model, model_config=self.config)
+        self.llm = llm
+        self.tokenizer = tokenizer
+
+    def __deepcopy__(self, memo):
+        cls = self.__class__
+        new = cls.__new__(cls)
+        memo[id(self)] = new
+        for k, v in self.__dict__.items():
+            if k in ["llm", "tokenizer"]:
+                continue
+            setattr(new, k, copy.deepcopy(v, memo))
+        new.llm = self.llm  # <- reference symantics copy for massive object `llm`.
+        new.tokenizer = self.tokenizer
+        return new
+
+    def query(
+        self,
+        session: list,
+        max_tries: int = 5,
+        default_delay: int = 5,
+        add_generation_prompt: bool = False,
+    ):
+        """
+        Query stub for LMStudio class.
+
+        ## Parameters
+        `session: list[dict[str, str]]`: A session message is a list of {'role' : 'user'|'system', 'content': 'content'} data, use to make LLM request.
+        `max_tries: int`: A max count for the number of tries, to get a response.
+        `default_delay: int`: Amount of time to wait, before retrying a prompt on LLMs when exception occurs.
+        `add_generation_prompt: bool`: MLX_LM come with an option to add_generation_prompt to optimise prompts.
+        """
+        if self.chat_template_style is not None:
+            prompt = self.tokenizer.apply_chat_template(
+                session,
+                add_generation_prompt=add_generation_prompt,
+                chat_template=self.chat_template_style,
+            )
+        else:
+            prompt = self.tokenizer.apply_chat_template(
+                session, add_generation_prompt=add_generation_prompt
+            )
+        for _ in range(max_tries):
+            try:
+                response = generate(
+                    self.llm,
+                    self.tokenizer,
+                    prompt,
+                    max_tokens=self.max_tokens,  # Disable limit on token count.
+                )
+                response = re.sub(  # Remove thinking section, if avaiable.
+                    r"<think>.*?</think>", "", str(response), flags=re.DOTALL
+                )
+                return response
+            except:
+                time.sleep(default_delay)
+                pass
+        return ""
+
+
 class Dummy_LLM(LLM):
     def __init__(self, model="DUMMY", **kwargs):
         """
diff --git a/pyproject.toml b/pyproject.toml
@@ -65,7 +65,11 @@ examples = [
 ]
 llm-extras = [
     "torch>=2.6.0,<3",
-    "transformers>=4.49.0,<5",
+    "transformers>=4.49.0,<=5",
+]
+apple-silicon = [
+    "lmstudio>=1.5.0,<2; platform_system == 'Darwin' and platform_machine == 'arm64'",
+    "mlx_lm>=0.29.1,<1; platform_system == 'Darwin' and platform_machine == 'arm64'",
 ]
 
 [tool.uv]
diff --git a/tests/test_llm.py b/tests/test_llm.py
diff --git a/uv.lock b/uv.lock

Original file line number	Diff line number	Diff line change
`@@ -65,7 +65,11 @@ examples = [`
`65`	`65`	`]`
`66`	`66`	`llm-extras = [`
`67`	`67`	`"torch>=2.6.0,<3",`
`68`		`- "transformers>=4.49.0,<5",`
	`68`	`+ "transformers>=4.49.0,<=5",`
	`69`	`+]`
	`70`	`+apple-silicon = [`
	`71`	`+ "lmstudio>=1.5.0,<2; platform_system == 'Darwin' and platform_machine == 'arm64'",`
	`72`	`+ "mlx_lm>=0.29.1,<1; platform_system == 'Darwin' and platform_machine == 'arm64'",`
`69`	`73`	`]`
`70`	`74`
`71`	`75`	`[tool.uv]`