genlm
diff --git a/‎.github/workflows/coverage.yml‎
Lines changed: 1 addition & 36 deletions b/‎.github/workflows/coverage.yml‎
Lines changed: 1 addition & 36 deletions
diff --git a/‎.github/workflows/pytest.yml‎
Lines changed: 1 addition & 22 deletions b/‎.github/workflows/pytest.yml‎
Lines changed: 1 addition & 22 deletions
diff --git a/‎DEVELOPING.md‎
Lines changed: 0 additions & 5 deletions b/‎DEVELOPING.md‎
Lines changed: 0 additions & 5 deletions
diff --git a/‎README.md‎
Lines changed: 0 additions & 8 deletions b/‎README.md‎
Lines changed: 0 additions & 8 deletions
diff --git a/‎benchmark/benchmark_mlx.py‎
Lines changed: 0 additions & 43 deletions b/‎benchmark/benchmark_mlx.py‎
Lines changed: 0 additions & 43 deletions
diff --git a/‎genlm/backend/cache.py‎
Lines changed: 0 additions & 27 deletions b/‎genlm/backend/cache.py‎
Lines changed: 0 additions & 27 deletions
diff --git a/‎genlm/backend/llm/__init__.py‎
Lines changed: 0 additions & 4 deletions b/‎genlm/backend/llm/__init__.py‎
Lines changed: 0 additions & 4 deletions
@@ -32,42 +32,7 @@ jobs:
       - name: Run tests
         run: |
           source venv/bin/activate
-          coverage run --source=genlm/backend -m pytest --benchmark-disable --ignore=tests/test_mlx.py
-          coverage json --omit "*/test*"
-          coverage report --omit "*/test*"
-
-      - name: Upload coverage to Codecov
-        uses: codecov/codecov-action@v5
-        with:
-          fail_ci_if_error: false
-          token: ${{ secrets.CODECOV_TOKEN }}
-          files: ./coverage.json
-          slug: genlm/genlm-backend
-
-  test_mlx_coverage:
-    runs-on: macos-14
-
-    steps:
-      - uses: actions/checkout@v4
-        with:
-          fetch-depth: 1
-
-      - uses: actions/setup-python@v4
-        with:
-          python-version: 3.11.5
-          cache: 'pip'
-
-      - name: Install dependencies
-        run: |
-          python -m venv venv
-          source venv/bin/activate
-          pip install -e .[mlx]
-          pip install -r requirements-dev.txt
-
-      - name: Run MLX tests
-        run: |
-          source venv/bin/activate
-          coverage run --source=genlm/backend -m pytest tests/test_mlx.py --benchmark-disable
+          coverage run --source=genlm/backend -m pytest --benchmark-disable
           coverage json --omit "*/test*"
           coverage report --omit "*/test*"
 
 
@@ -29,25 +29,4 @@ jobs:
           source venv/bin/activate
           pip install -e .[test]
           pip install -r requirements-dev.txt
-          python -m pytest tests --ignore=tests/test_mlx.py
-
-  test-mlx:
-    runs-on: macos-14
-
-    steps:
-      - uses: actions/checkout@v4
-        with:
-          fetch-depth: 1
-
-      - uses: actions/setup-python@v4
-        with:
-          python-version: 3.11.5
-          cache: 'pip'
-
-      - name: Run MLX Tests
-        run: |
-          python -m venv venv
-          source venv/bin/activate
-          pip install -e .[mlx]
-          pip install -r requirements-dev.txt
-          python -m pytest tests/test_mlx.py
+          python -m pytest tests
@@ -27,11 +27,6 @@ uv pip install -e ".[docs]"
 uv pip install -r requirements-dev.txt
 ```
 
-To build with MLX support, run:
-```bash
-uv pip install -e ".[mlx]"
-```
-
 ## Testing
 
 When test dependencies are installed, the test suite can be run via:
 
@@ -18,7 +18,6 @@ See our [documentation](https://genlm.github.io/genlm-backend/).
 - Automatic batching of concurrent log-probability requests, enabling efficient large-scale inference without having to write batching logic yourself
 - Byte-level decoding of transformers tokenizers, enabling advanced token-level control
 - Support for arbitrary Hugging Face models (e.g., LLaMA, DeepSeek, etc.) with fast inference and automatic KV caching using vllm
-- NEW: support for MLX-LM library, allowing faster inference on Apple silicon devices.
 
 
 ## ⚡ Quick Start
@@ -29,13 +28,6 @@ This library supports installation via pip:
 pip install genlm-backend
 ```
 
-Or to install with MLX support, run:
-
-```bash
-pip install genlm-backend[mlx]
-```
-
-
 ## 🧪 Example: Autobatched Sequential Importance Sampling with LLMs
 
 This example demonstrates how `genlm-backend` enables concise, scalable probabilistic inference with language models. It implements a Sequential Importance Sampling (SIS) algorithm that makes asynchronous log-probabality requests which get automatically batched by the language model.
 
@@ -43,33 +43,6 @@ def clear(self):
         self.cache.clear()
 
 
-class OutputMLXCache(OutputCache):
-    """A cache for storing tensor outputs with MLX.
-
-    Since MLX uses unified memory, we don't need to move tensors between CPU and GPU.
-
-    Args:
-        maxsize (int): Maximum number of items to store in the cache
-    """
-
-    def __init__(self, maxsize):
-        super().__init__(maxsize, move_to_cpu=False)
-
-    def __getitem__(self, key):
-        if key in self.cache:
-            value = self.cache.pop(key)
-            self.cache[key] = value
-            return value
-        raise KeyError(key)
-
-    def __setitem__(self, key, value):
-        if len(self.cache) >= self.maxsize:
-            _, old_tensor = self.cache.popitem(last=False)
-            del old_tensor
-
-        self.cache[key] = value
-
-
 class TokenTrie:
     """Class used internally to cache language model results.
 
 
@@ -1,7 +1,6 @@
 from genlm.backend.llm.vllm import AsyncVirtualLM
 from genlm.backend.llm.hf import AsyncTransformer
 from genlm.backend.llm.base import AsyncLM, MockAsyncLM
-from genlm.backend.llm.mlx import AsyncMlxLM
 
 import torch
 
@@ -34,8 +33,6 @@ def load_model_by_name(name, backend=None, llm_opts=None):
         return AsyncTransformer.from_name(name, **llm_opts)
     elif backend == "mock":
         return MockAsyncLM.from_name(name, **llm_opts)
-    elif backend == "mlx":
-        return AsyncMlxLM.from_name(name, **llm_opts)
     else:
         raise ValueError(f"Invalid backend: {backend}")
 
@@ -45,6 +42,5 @@ def load_model_by_name(name, backend=None, llm_opts=None):
     "AsyncLM",
     "AsyncVirtualLM",
     "AsyncTransformer",
-    "AsyncMlxLM",
     "MockAsyncLM",
 ]