lmstudio-ai
diff --git a/‎src/lmstudio/json_api.py‎
Lines changed: 21 additions & 0 deletions b/‎src/lmstudio/json_api.py‎
Lines changed: 21 additions & 0 deletions
diff --git a/‎tests/README.md‎
Lines changed: 14 additions & 2 deletions b/‎tests/README.md‎
Lines changed: 14 additions & 2 deletions
diff --git a/‎tests/async/test_model_catalog_async.py‎
Lines changed: 87 additions & 15 deletions b/‎tests/async/test_model_catalog_async.py‎
Lines changed: 87 additions & 15 deletions
diff --git a/‎tests/async/test_repository_async.py‎
Lines changed: 4 additions & 4 deletions b/‎tests/async/test_repository_async.py‎
Lines changed: 4 additions & 4 deletions
diff --git a/‎tests/support/__init__.py‎
Lines changed: 6 additions & 3 deletions b/‎tests/support/__init__.py‎
Lines changed: 6 additions & 3 deletions
@@ -862,6 +862,27 @@ def iter_message_events(
                 yield from self._update_progress(0.0)
             case {"type": "loadProgress" | "progress", "progress": progress}:
                 yield from self._update_progress(progress)
+            case {"type": "unloadingOtherJITModel", "info": other_model_info} if (
+                "modelKey" in other_model_info
+            ):
+                jit_unload_event = "Unloading other JIT model"
+                unloaded_model_key = other_model_info["modelKey"]
+                suggestion = (
+                    "You can disable this behavior by going to "
+                    "LM Studio -> Settings -> Developer -> Turn OFF JIT models auto-evict"
+                )
+                # Report the JIT unload
+                self._logger.info(
+                    jit_unload_event,
+                    unloaded_model_key=unloaded_model_key,
+                    suggestion=suggestion,
+                )
+                # Report further details on the unloaded model if debug messages are enabled
+                self._logger.debug(
+                    jit_unload_event,
+                    unloaded_model_key=unloaded_model_key,
+                    unloaded_model=other_model_info,
+                )
             case {
                 "type": "success" | "alreadyLoaded" | "loadSuccess",
                 "info": {
 
@@ -13,18 +13,24 @@ conditions must also be met for the test suite to pass:
 - the API server must be enabled and running on port 1234
 - the following models model must be loaded with their default identifiers
   - `text-embedding-nomic-embed-text-v1.5` (text embedding model)
-  - `llama-3.2-1b-instruct` (chat oriented text LLM)
+  - `llama-3.2-1b-instruct` (text LLM)
   - `ZiangWu/MobileVLM_V2-1.7B-GGUF` (visual LLM)
   - `qwen2.5-7b-instruct-1m` (tool using LLM)
 
 Additional models should NOT be loaded when running the test suite,
 as some model querying tests may fail in that case.
 
-However, there's no problem with having additional models downloaded.
+There are also some JIT model loading/unloading test cases which
+expect `smollm2-135m` (small text LLM) to already be downloaded.
+A full test run will download this model (since it is also the
+model used for the end-to-end search-and-download test case).
+
+There's no problem with having additional models downloaded.
 The only impact is that the test that checks all of the expected
 models can be found in the list of downloaded models will take a
 little longer to run.
 
+
 # Loading and unloading the required models
 
 The `load-test-models` `tox` environment can be used to ensure the required
@@ -44,6 +50,12 @@ explicitly unload the test models:
 $ tox -m unload-test-models
 ```
 
+The model downloading test cases can be specifically run with:
+
+```console
+$ tox -m test -- -k test_download_model
+```
+
 
 ## Adding new tests
 
 
@@ -3,6 +3,8 @@
 import asyncio
 import logging
 
+from contextlib import suppress
+
 import pytest
 from pytest import LogCaptureFixture as LogCap
 from pytest_subtests import SubTests
@@ -13,12 +15,11 @@
 from ..support import (
     LLM_LOAD_CONFIG,
     EXPECTED_LLM,
-    EXPECTED_LLM_DEFAULT_ID,
     EXPECTED_LLM_ID,
     EXPECTED_EMBEDDING,
-    EXPECTED_EMBEDDING_DEFAULT_ID,
     EXPECTED_EMBEDDING_ID,
     EXPECTED_VLM_ID,
+    SMALL_LLM_ID,
     TOOL_LLM_ID,
     check_sdk_error,
 )
@@ -291,16 +292,17 @@ async def test_get_or_load_when_unloaded_llm_async(caplog: LogCap) -> None:
     caplog.set_level(logging.DEBUG)
     async with AsyncClient() as client:
         llm = client.llm
-        await llm.unload(EXPECTED_LLM_ID)
-        model = await llm.model(EXPECTED_LLM_DEFAULT_ID, config=LLM_LOAD_CONFIG)
-        assert model.identifier == EXPECTED_LLM_DEFAULT_ID
+        with suppress(LMStudioModelNotFoundError):
+            await llm.unload(EXPECTED_LLM_ID)
+        model = await llm.model(EXPECTED_LLM_ID, config=LLM_LOAD_CONFIG)
+        assert model.identifier == EXPECTED_LLM_ID
         # LM Studio may default to JIT handling for models loaded with `getOrLoad`,
         # so ensure we restore a regular non-JIT instance with no TTL set
-        await llm.unload(EXPECTED_LLM_ID)
+        await model.unload()
         model = await llm.load_new_instance(
-            EXPECTED_LLM_DEFAULT_ID, config=LLM_LOAD_CONFIG, ttl=None
+            EXPECTED_LLM_ID, config=LLM_LOAD_CONFIG, ttl=None
         )
-        assert model.identifier == EXPECTED_LLM_DEFAULT_ID
+        assert model.identifier == EXPECTED_LLM_ID
 
 
 @pytest.mark.asyncio
@@ -310,13 +312,83 @@ async def test_get_or_load_when_unloaded_embedding_async(caplog: LogCap) -> None
     caplog.set_level(logging.DEBUG)
     async with AsyncClient() as client:
         embedding = client.embedding
-        await embedding.unload(EXPECTED_EMBEDDING_ID)
-        model = await embedding.model(EXPECTED_EMBEDDING_DEFAULT_ID)
-        assert model.identifier == EXPECTED_EMBEDDING_DEFAULT_ID
+        with suppress(LMStudioModelNotFoundError):
+            await embedding.unload(EXPECTED_EMBEDDING_ID)
+        model = await embedding.model(EXPECTED_EMBEDDING_ID)
+        assert model.identifier == EXPECTED_EMBEDDING_ID
         # LM Studio may default to JIT handling for models loaded with `getOrLoad`,
         # so ensure we restore a regular non-JIT instance with no TTL set
-        await embedding.unload(EXPECTED_EMBEDDING_ID)
-        model = await embedding.load_new_instance(
-            EXPECTED_EMBEDDING_DEFAULT_ID, ttl=None
+        await model.unload()
+        model = await embedding.load_new_instance(EXPECTED_EMBEDDING_ID, ttl=None)
+        assert model.identifier == EXPECTED_EMBEDDING_ID
+
+
+@pytest.mark.asyncio
+@pytest.mark.slow
+@pytest.mark.lmstudio
+async def test_jit_unloading_async(caplog: LogCap) -> None:
+    # For the time being, only test the embedding vs LLM cross-namespace
+    # JIT unloading (since that ensures the info type mixing is handled).
+    # Assuming LM Studio eventually switches to per-namespace JIT unloading,
+    # this can be split into separate LLM and embedding test cases at that time.
+    caplog.set_level(logging.DEBUG)
+    async with AsyncClient() as client:
+        # Unload the non-JIT instance of the embedding model
+        with suppress(LMStudioModelNotFoundError):
+            await client.embedding.unload(EXPECTED_EMBEDDING_ID)
+        # Load a JIT instance of the embedding model
+        model1 = await client.embedding.model(EXPECTED_EMBEDDING_ID, ttl=300)
+        assert model1.identifier == EXPECTED_EMBEDDING_ID
+        model1_info = await model1.get_info()
+        assert model1_info.identifier == model1.identifier
+        # Load a JIT instance of the small testing LLM
+        # This will unload the JIT instance of the testing embedding model
+        model2 = await client.llm.model(SMALL_LLM_ID, ttl=300)
+        assert model2.identifier == SMALL_LLM_ID
+        model2_info = await model2.get_info()
+        assert model2_info.identifier == model2.identifier
+        # Attempting to query the now unloaded JIT embedding model will fail
+        with pytest.raises(LMStudioModelNotFoundError):
+            await model1.get_info()
+        # Restore things to the way other test cases expect them to be
+        await model2.unload()
+        model = await client.embedding.load_new_instance(
+            EXPECTED_EMBEDDING_ID, ttl=None
         )
-        assert model.identifier == EXPECTED_EMBEDDING_DEFAULT_ID
+        assert model.identifier == EXPECTED_EMBEDDING_ID
+
+    # Check for expected log messages
+    jit_unload_event = "Unloading other JIT model"
+    jit_unload_messages_debug: list[str] = []
+    jit_unload_messages_info: list[str] = []
+    jit_unload_messages = {
+        logging.DEBUG: jit_unload_messages_debug,
+        logging.INFO: jit_unload_messages_info,
+    }
+    for _logger_name, log_level, message in caplog.record_tuples:
+        if jit_unload_event not in message:
+            continue
+        jit_unload_messages[log_level].append(message)
+
+    assert len(jit_unload_messages_info) == 1
+    assert len(jit_unload_messages_debug) == 1
+
+    info_message = jit_unload_messages_info[0]
+    debug_message = jit_unload_messages_debug[0]
+    # Ensure info message omits model info, but includes config guidance
+    unload_notice = f'"event": "{jit_unload_event}"'
+    assert unload_notice in info_message
+    loading_model_notice = f'"model_key": "{SMALL_LLM_ID}"'
+    assert loading_model_notice in info_message
+    unloaded_model_notice = f'"unloaded_model_key": "{EXPECTED_EMBEDDING_ID}"'
+    assert unloaded_model_notice in info_message
+    assert '"suggestion": ' in info_message
+    assert "disable this behavior" in info_message
+    assert '"unloaded_model": ' not in info_message
+    # Ensure debug message includes model info, but omits config guidance
+    assert unload_notice in debug_message
+    assert loading_model_notice in info_message
+    assert unloaded_model_notice in debug_message
+    assert '"suggestion": ' not in debug_message
+    assert "disable this behavior" not in debug_message
+    assert '"unloaded_model": ' in debug_message
@@ -7,7 +7,7 @@
 
 from lmstudio import AsyncClient, LMStudioClientError
 
-from ..support import EXPECTED_DOWNLOAD_SEARCH_TERM
+from ..support import SMALL_LLM_SEARCH_TERM
 
 
 # N.B. We can maybe provide a reference list for what should be available
@@ -21,7 +21,7 @@
 async def test_download_model_async(caplog: LogCap) -> None:
     caplog.set_level(logging.DEBUG)
     async with AsyncClient() as client:
-        models = await client.repository.search_models(EXPECTED_DOWNLOAD_SEARCH_TERM)
+        models = await client.repository.search_models(SMALL_LLM_SEARCH_TERM)
         logging.info(f"Models: {models}")
         assert models
         assert isinstance(models, list)
@@ -45,7 +45,7 @@ async def test_download_model_async(caplog: LogCap) -> None:
 async def test_get_options_out_of_session_async(caplog: LogCap) -> None:
     caplog.set_level(logging.DEBUG)
     async with AsyncClient() as client:
-        models = await client.repository.search_models(EXPECTED_DOWNLOAD_SEARCH_TERM)
+        models = await client.repository.search_models(SMALL_LLM_SEARCH_TERM)
         assert models
         assert isinstance(models, list)
         assert len(models) > 0
@@ -60,7 +60,7 @@ async def test_get_options_out_of_session_async(caplog: LogCap) -> None:
 async def test_download_out_of_session_async(caplog: LogCap) -> None:
     caplog.set_level(logging.DEBUG)
     async with AsyncClient() as client:
-        models = await client.repository.search_models(EXPECTED_DOWNLOAD_SEARCH_TERM)
+        models = await client.repository.search_models(SMALL_LLM_SEARCH_TERM)
         logging.info(f"Models: {models}")
         assert models
         assert isinstance(models, list)
 
@@ -24,14 +24,12 @@
 THIS_DIR = Path(__file__).parent
 
 LOCAL_API_HOST = "localhost:1234"
-EXPECTED_DOWNLOAD_SEARCH_TERM = "smollm2-135m"
 
 ####################################################
 # Embedding model testing
 ####################################################
 EXPECTED_EMBEDDING = "nomic-ai/nomic-embed-text-v1.5"
 EXPECTED_EMBEDDING_ID = "text-embedding-nomic-embed-text-v1.5"
-EXPECTED_EMBEDDING_DEFAULT_ID = EXPECTED_EMBEDDING_ID  # the same for now
 EXPECTED_EMBEDDING_LENGTH = 768  # nomic has embedding dimension 768
 EXPECTED_EMBEDDING_CONTEXT_LENGTH = 2048  # nomic accepts a 2048 token context
 
@@ -40,7 +38,6 @@
 ####################################################
 EXPECTED_LLM = "hugging-quants/llama-3.2-1b-instruct"
 EXPECTED_LLM_ID = "llama-3.2-1b-instruct"
-EXPECTED_LLM_DEFAULT_ID = EXPECTED_LLM_ID  # the same for now
 PROMPT = "Hello"
 MAX_PREDICTED_TOKENS = 50
 # Use a dict here to ensure dicts are accepted in all config APIs,
@@ -68,6 +65,12 @@
 ####################################################
 TOOL_LLM_ID = "qwen2.5-7b-instruct-1m"
 
+####################################################
+# Other specific models needed for testing
+####################################################
+SMALL_LLM_SEARCH_TERM = "smollm2-135m"
+SMALL_LLM_ID = "smollm2-135m-instruct"
+
 ####################################################
 # Structured LLM responses
 ####################################################