diff --git a/src/lmstudio/json_api.py b/src/lmstudio/json_api.py index 1218eb4..e438323 100644 --- a/src/lmstudio/json_api.py +++ b/src/lmstudio/json_api.py @@ -862,6 +862,27 @@ def iter_message_events( yield from self._update_progress(0.0) case {"type": "loadProgress" | "progress", "progress": progress}: yield from self._update_progress(progress) + case {"type": "unloadingOtherJITModel", "info": other_model_info} if ( + "modelKey" in other_model_info + ): + jit_unload_event = "Unloading other JIT model" + unloaded_model_key = other_model_info["modelKey"] + suggestion = ( + "You can disable this behavior by going to " + "LM Studio -> Settings -> Developer -> Turn OFF JIT models auto-evict" + ) + # Report the JIT unload + self._logger.info( + jit_unload_event, + unloaded_model_key=unloaded_model_key, + suggestion=suggestion, + ) + # Report further details on the unloaded model if debug messages are enabled + self._logger.debug( + jit_unload_event, + unloaded_model_key=unloaded_model_key, + unloaded_model=other_model_info, + ) case { "type": "success" | "alreadyLoaded" | "loadSuccess", "info": { diff --git a/tests/README.md b/tests/README.md index 92ebf5e..e15419a 100644 --- a/tests/README.md +++ b/tests/README.md @@ -13,18 +13,24 @@ conditions must also be met for the test suite to pass: - the API server must be enabled and running on port 1234 - the following models model must be loaded with their default identifiers - `text-embedding-nomic-embed-text-v1.5` (text embedding model) - - `llama-3.2-1b-instruct` (chat oriented text LLM) + - `llama-3.2-1b-instruct` (text LLM) - `ZiangWu/MobileVLM_V2-1.7B-GGUF` (visual LLM) - `qwen2.5-7b-instruct-1m` (tool using LLM) Additional models should NOT be loaded when running the test suite, as some model querying tests may fail in that case. -However, there's no problem with having additional models downloaded. +There are also some JIT model loading/unloading test cases which +expect `smollm2-135m` (small text LLM) to already be downloaded. +A full test run will download this model (since it is also the +model used for the end-to-end search-and-download test case). + +There's no problem with having additional models downloaded. The only impact is that the test that checks all of the expected models can be found in the list of downloaded models will take a little longer to run. + # Loading and unloading the required models The `load-test-models` `tox` environment can be used to ensure the required @@ -44,6 +50,12 @@ explicitly unload the test models: $ tox -m unload-test-models ``` +The model downloading test cases can be specifically run with: + +```console +$ tox -m test -- -k test_download_model +``` + ## Adding new tests diff --git a/tests/async/test_model_catalog_async.py b/tests/async/test_model_catalog_async.py index a8db3cf..af493a6 100644 --- a/tests/async/test_model_catalog_async.py +++ b/tests/async/test_model_catalog_async.py @@ -3,6 +3,8 @@ import asyncio import logging +from contextlib import suppress + import pytest from pytest import LogCaptureFixture as LogCap from pytest_subtests import SubTests @@ -13,12 +15,11 @@ from ..support import ( LLM_LOAD_CONFIG, EXPECTED_LLM, - EXPECTED_LLM_DEFAULT_ID, EXPECTED_LLM_ID, EXPECTED_EMBEDDING, - EXPECTED_EMBEDDING_DEFAULT_ID, EXPECTED_EMBEDDING_ID, EXPECTED_VLM_ID, + SMALL_LLM_ID, TOOL_LLM_ID, check_sdk_error, ) @@ -291,16 +292,17 @@ async def test_get_or_load_when_unloaded_llm_async(caplog: LogCap) -> None: caplog.set_level(logging.DEBUG) async with AsyncClient() as client: llm = client.llm - await llm.unload(EXPECTED_LLM_ID) - model = await llm.model(EXPECTED_LLM_DEFAULT_ID, config=LLM_LOAD_CONFIG) - assert model.identifier == EXPECTED_LLM_DEFAULT_ID + with suppress(LMStudioModelNotFoundError): + await llm.unload(EXPECTED_LLM_ID) + model = await llm.model(EXPECTED_LLM_ID, config=LLM_LOAD_CONFIG) + assert model.identifier == EXPECTED_LLM_ID # LM Studio may default to JIT handling for models loaded with `getOrLoad`, # so ensure we restore a regular non-JIT instance with no TTL set - await llm.unload(EXPECTED_LLM_ID) + await model.unload() model = await llm.load_new_instance( - EXPECTED_LLM_DEFAULT_ID, config=LLM_LOAD_CONFIG, ttl=None + EXPECTED_LLM_ID, config=LLM_LOAD_CONFIG, ttl=None ) - assert model.identifier == EXPECTED_LLM_DEFAULT_ID + assert model.identifier == EXPECTED_LLM_ID @pytest.mark.asyncio @@ -310,13 +312,83 @@ async def test_get_or_load_when_unloaded_embedding_async(caplog: LogCap) -> None caplog.set_level(logging.DEBUG) async with AsyncClient() as client: embedding = client.embedding - await embedding.unload(EXPECTED_EMBEDDING_ID) - model = await embedding.model(EXPECTED_EMBEDDING_DEFAULT_ID) - assert model.identifier == EXPECTED_EMBEDDING_DEFAULT_ID + with suppress(LMStudioModelNotFoundError): + await embedding.unload(EXPECTED_EMBEDDING_ID) + model = await embedding.model(EXPECTED_EMBEDDING_ID) + assert model.identifier == EXPECTED_EMBEDDING_ID # LM Studio may default to JIT handling for models loaded with `getOrLoad`, # so ensure we restore a regular non-JIT instance with no TTL set - await embedding.unload(EXPECTED_EMBEDDING_ID) - model = await embedding.load_new_instance( - EXPECTED_EMBEDDING_DEFAULT_ID, ttl=None + await model.unload() + model = await embedding.load_new_instance(EXPECTED_EMBEDDING_ID, ttl=None) + assert model.identifier == EXPECTED_EMBEDDING_ID + + +@pytest.mark.asyncio +@pytest.mark.slow +@pytest.mark.lmstudio +async def test_jit_unloading_async(caplog: LogCap) -> None: + # For the time being, only test the embedding vs LLM cross-namespace + # JIT unloading (since that ensures the info type mixing is handled). + # Assuming LM Studio eventually switches to per-namespace JIT unloading, + # this can be split into separate LLM and embedding test cases at that time. + caplog.set_level(logging.DEBUG) + async with AsyncClient() as client: + # Unload the non-JIT instance of the embedding model + with suppress(LMStudioModelNotFoundError): + await client.embedding.unload(EXPECTED_EMBEDDING_ID) + # Load a JIT instance of the embedding model + model1 = await client.embedding.model(EXPECTED_EMBEDDING_ID, ttl=300) + assert model1.identifier == EXPECTED_EMBEDDING_ID + model1_info = await model1.get_info() + assert model1_info.identifier == model1.identifier + # Load a JIT instance of the small testing LLM + # This will unload the JIT instance of the testing embedding model + model2 = await client.llm.model(SMALL_LLM_ID, ttl=300) + assert model2.identifier == SMALL_LLM_ID + model2_info = await model2.get_info() + assert model2_info.identifier == model2.identifier + # Attempting to query the now unloaded JIT embedding model will fail + with pytest.raises(LMStudioModelNotFoundError): + await model1.get_info() + # Restore things to the way other test cases expect them to be + await model2.unload() + model = await client.embedding.load_new_instance( + EXPECTED_EMBEDDING_ID, ttl=None ) - assert model.identifier == EXPECTED_EMBEDDING_DEFAULT_ID + assert model.identifier == EXPECTED_EMBEDDING_ID + + # Check for expected log messages + jit_unload_event = "Unloading other JIT model" + jit_unload_messages_debug: list[str] = [] + jit_unload_messages_info: list[str] = [] + jit_unload_messages = { + logging.DEBUG: jit_unload_messages_debug, + logging.INFO: jit_unload_messages_info, + } + for _logger_name, log_level, message in caplog.record_tuples: + if jit_unload_event not in message: + continue + jit_unload_messages[log_level].append(message) + + assert len(jit_unload_messages_info) == 1 + assert len(jit_unload_messages_debug) == 1 + + info_message = jit_unload_messages_info[0] + debug_message = jit_unload_messages_debug[0] + # Ensure info message omits model info, but includes config guidance + unload_notice = f'"event": "{jit_unload_event}"' + assert unload_notice in info_message + loading_model_notice = f'"model_key": "{SMALL_LLM_ID}"' + assert loading_model_notice in info_message + unloaded_model_notice = f'"unloaded_model_key": "{EXPECTED_EMBEDDING_ID}"' + assert unloaded_model_notice in info_message + assert '"suggestion": ' in info_message + assert "disable this behavior" in info_message + assert '"unloaded_model": ' not in info_message + # Ensure debug message includes model info, but omits config guidance + assert unload_notice in debug_message + assert loading_model_notice in info_message + assert unloaded_model_notice in debug_message + assert '"suggestion": ' not in debug_message + assert "disable this behavior" not in debug_message + assert '"unloaded_model": ' in debug_message diff --git a/tests/async/test_repository_async.py b/tests/async/test_repository_async.py index 606994c..ae6337e 100644 --- a/tests/async/test_repository_async.py +++ b/tests/async/test_repository_async.py @@ -7,7 +7,7 @@ from lmstudio import AsyncClient, LMStudioClientError -from ..support import EXPECTED_DOWNLOAD_SEARCH_TERM +from ..support import SMALL_LLM_SEARCH_TERM # N.B. We can maybe provide a reference list for what should be available @@ -21,7 +21,7 @@ async def test_download_model_async(caplog: LogCap) -> None: caplog.set_level(logging.DEBUG) async with AsyncClient() as client: - models = await client.repository.search_models(EXPECTED_DOWNLOAD_SEARCH_TERM) + models = await client.repository.search_models(SMALL_LLM_SEARCH_TERM) logging.info(f"Models: {models}") assert models assert isinstance(models, list) @@ -45,7 +45,7 @@ async def test_download_model_async(caplog: LogCap) -> None: async def test_get_options_out_of_session_async(caplog: LogCap) -> None: caplog.set_level(logging.DEBUG) async with AsyncClient() as client: - models = await client.repository.search_models(EXPECTED_DOWNLOAD_SEARCH_TERM) + models = await client.repository.search_models(SMALL_LLM_SEARCH_TERM) assert models assert isinstance(models, list) assert len(models) > 0 @@ -60,7 +60,7 @@ async def test_get_options_out_of_session_async(caplog: LogCap) -> None: async def test_download_out_of_session_async(caplog: LogCap) -> None: caplog.set_level(logging.DEBUG) async with AsyncClient() as client: - models = await client.repository.search_models(EXPECTED_DOWNLOAD_SEARCH_TERM) + models = await client.repository.search_models(SMALL_LLM_SEARCH_TERM) logging.info(f"Models: {models}") assert models assert isinstance(models, list) diff --git a/tests/support/__init__.py b/tests/support/__init__.py index a3f93a2..1bd4a3c 100644 --- a/tests/support/__init__.py +++ b/tests/support/__init__.py @@ -24,14 +24,12 @@ THIS_DIR = Path(__file__).parent LOCAL_API_HOST = "localhost:1234" -EXPECTED_DOWNLOAD_SEARCH_TERM = "smollm2-135m" #################################################### # Embedding model testing #################################################### EXPECTED_EMBEDDING = "nomic-ai/nomic-embed-text-v1.5" EXPECTED_EMBEDDING_ID = "text-embedding-nomic-embed-text-v1.5" -EXPECTED_EMBEDDING_DEFAULT_ID = EXPECTED_EMBEDDING_ID # the same for now EXPECTED_EMBEDDING_LENGTH = 768 # nomic has embedding dimension 768 EXPECTED_EMBEDDING_CONTEXT_LENGTH = 2048 # nomic accepts a 2048 token context @@ -40,7 +38,6 @@ #################################################### EXPECTED_LLM = "hugging-quants/llama-3.2-1b-instruct" EXPECTED_LLM_ID = "llama-3.2-1b-instruct" -EXPECTED_LLM_DEFAULT_ID = EXPECTED_LLM_ID # the same for now PROMPT = "Hello" MAX_PREDICTED_TOKENS = 50 # Use a dict here to ensure dicts are accepted in all config APIs, @@ -68,6 +65,12 @@ #################################################### TOOL_LLM_ID = "qwen2.5-7b-instruct-1m" +#################################################### +# Other specific models needed for testing +#################################################### +SMALL_LLM_SEARCH_TERM = "smollm2-135m" +SMALL_LLM_ID = "smollm2-135m-instruct" + #################################################### # Structured LLM responses #################################################### diff --git a/tests/sync/test_model_catalog_sync.py b/tests/sync/test_model_catalog_sync.py index b0887e0..77ffecd 100644 --- a/tests/sync/test_model_catalog_sync.py +++ b/tests/sync/test_model_catalog_sync.py @@ -10,6 +10,8 @@ import logging from contextlib import nullcontext +from contextlib import suppress + import pytest from pytest import LogCaptureFixture as LogCap from pytest_subtests import SubTests @@ -20,12 +22,11 @@ from ..support import ( LLM_LOAD_CONFIG, EXPECTED_LLM, - EXPECTED_LLM_DEFAULT_ID, EXPECTED_LLM_ID, EXPECTED_EMBEDDING, - EXPECTED_EMBEDDING_DEFAULT_ID, EXPECTED_EMBEDDING_ID, EXPECTED_VLM_ID, + SMALL_LLM_ID, TOOL_LLM_ID, check_sdk_error, ) @@ -278,16 +279,15 @@ def test_get_or_load_when_unloaded_llm_sync(caplog: LogCap) -> None: caplog.set_level(logging.DEBUG) with Client() as client: llm = client.llm - llm.unload(EXPECTED_LLM_ID) - model = llm.model(EXPECTED_LLM_DEFAULT_ID, config=LLM_LOAD_CONFIG) - assert model.identifier == EXPECTED_LLM_DEFAULT_ID + with suppress(LMStudioModelNotFoundError): + llm.unload(EXPECTED_LLM_ID) + model = llm.model(EXPECTED_LLM_ID, config=LLM_LOAD_CONFIG) + assert model.identifier == EXPECTED_LLM_ID # LM Studio may default to JIT handling for models loaded with `getOrLoad`, # so ensure we restore a regular non-JIT instance with no TTL set - llm.unload(EXPECTED_LLM_ID) - model = llm.load_new_instance( - EXPECTED_LLM_DEFAULT_ID, config=LLM_LOAD_CONFIG, ttl=None - ) - assert model.identifier == EXPECTED_LLM_DEFAULT_ID + model.unload() + model = llm.load_new_instance(EXPECTED_LLM_ID, config=LLM_LOAD_CONFIG, ttl=None) + assert model.identifier == EXPECTED_LLM_ID @pytest.mark.slow @@ -296,11 +296,80 @@ def test_get_or_load_when_unloaded_embedding_sync(caplog: LogCap) -> None: caplog.set_level(logging.DEBUG) with Client() as client: embedding = client.embedding - embedding.unload(EXPECTED_EMBEDDING_ID) - model = embedding.model(EXPECTED_EMBEDDING_DEFAULT_ID) - assert model.identifier == EXPECTED_EMBEDDING_DEFAULT_ID + with suppress(LMStudioModelNotFoundError): + embedding.unload(EXPECTED_EMBEDDING_ID) + model = embedding.model(EXPECTED_EMBEDDING_ID) + assert model.identifier == EXPECTED_EMBEDDING_ID # LM Studio may default to JIT handling for models loaded with `getOrLoad`, # so ensure we restore a regular non-JIT instance with no TTL set - embedding.unload(EXPECTED_EMBEDDING_ID) - model = embedding.load_new_instance(EXPECTED_EMBEDDING_DEFAULT_ID, ttl=None) - assert model.identifier == EXPECTED_EMBEDDING_DEFAULT_ID + model.unload() + model = embedding.load_new_instance(EXPECTED_EMBEDDING_ID, ttl=None) + assert model.identifier == EXPECTED_EMBEDDING_ID + + +@pytest.mark.slow +@pytest.mark.lmstudio +def test_jit_unloading_sync(caplog: LogCap) -> None: + # For the time being, only test the embedding vs LLM cross-namespace + # JIT unloading (since that ensures the info type mixing is handled). + # Assuming LM Studio eventually switches to per-namespace JIT unloading, + # this can be split into separate LLM and embedding test cases at that time. + caplog.set_level(logging.DEBUG) + with Client() as client: + # Unload the non-JIT instance of the embedding model + with suppress(LMStudioModelNotFoundError): + client.embedding.unload(EXPECTED_EMBEDDING_ID) + # Load a JIT instance of the embedding model + model1 = client.embedding.model(EXPECTED_EMBEDDING_ID, ttl=300) + assert model1.identifier == EXPECTED_EMBEDDING_ID + model1_info = model1.get_info() + assert model1_info.identifier == model1.identifier + # Load a JIT instance of the small testing LLM + # This will unload the JIT instance of the testing embedding model + model2 = client.llm.model(SMALL_LLM_ID, ttl=300) + assert model2.identifier == SMALL_LLM_ID + model2_info = model2.get_info() + assert model2_info.identifier == model2.identifier + # Attempting to query the now unloaded JIT embedding model will fail + with pytest.raises(LMStudioModelNotFoundError): + model1.get_info() + # Restore things to the way other test cases expect them to be + model2.unload() + model = client.embedding.load_new_instance(EXPECTED_EMBEDDING_ID, ttl=None) + assert model.identifier == EXPECTED_EMBEDDING_ID + + # Check for expected log messages + jit_unload_event = "Unloading other JIT model" + jit_unload_messages_debug: list[str] = [] + jit_unload_messages_info: list[str] = [] + jit_unload_messages = { + logging.DEBUG: jit_unload_messages_debug, + logging.INFO: jit_unload_messages_info, + } + for _logger_name, log_level, message in caplog.record_tuples: + if jit_unload_event not in message: + continue + jit_unload_messages[log_level].append(message) + + assert len(jit_unload_messages_info) == 1 + assert len(jit_unload_messages_debug) == 1 + + info_message = jit_unload_messages_info[0] + debug_message = jit_unload_messages_debug[0] + # Ensure info message omits model info, but includes config guidance + unload_notice = f'"event": "{jit_unload_event}"' + assert unload_notice in info_message + loading_model_notice = f'"model_key": "{SMALL_LLM_ID}"' + assert loading_model_notice in info_message + unloaded_model_notice = f'"unloaded_model_key": "{EXPECTED_EMBEDDING_ID}"' + assert unloaded_model_notice in info_message + assert '"suggestion": ' in info_message + assert "disable this behavior" in info_message + assert '"unloaded_model": ' not in info_message + # Ensure debug message includes model info, but omits config guidance + assert unload_notice in debug_message + assert loading_model_notice in info_message + assert unloaded_model_notice in debug_message + assert '"suggestion": ' not in debug_message + assert "disable this behavior" not in debug_message + assert '"unloaded_model": ' in debug_message diff --git a/tests/sync/test_repository_sync.py b/tests/sync/test_repository_sync.py index e166180..55dd6f2 100644 --- a/tests/sync/test_repository_sync.py +++ b/tests/sync/test_repository_sync.py @@ -14,7 +14,7 @@ from lmstudio import Client, LMStudioClientError -from ..support import EXPECTED_DOWNLOAD_SEARCH_TERM +from ..support import SMALL_LLM_SEARCH_TERM # N.B. We can maybe provide a reference list for what should be available @@ -27,7 +27,7 @@ def test_download_model_sync(caplog: LogCap) -> None: caplog.set_level(logging.DEBUG) with Client() as client: - models = client.repository.search_models(EXPECTED_DOWNLOAD_SEARCH_TERM) + models = client.repository.search_models(SMALL_LLM_SEARCH_TERM) logging.info(f"Models: {models}") assert models assert isinstance(models, list) @@ -50,7 +50,7 @@ def test_download_model_sync(caplog: LogCap) -> None: def test_get_options_out_of_session_sync(caplog: LogCap) -> None: caplog.set_level(logging.DEBUG) with Client() as client: - models = client.repository.search_models(EXPECTED_DOWNLOAD_SEARCH_TERM) + models = client.repository.search_models(SMALL_LLM_SEARCH_TERM) assert models assert isinstance(models, list) assert len(models) > 0 @@ -64,7 +64,7 @@ def test_get_options_out_of_session_sync(caplog: LogCap) -> None: def test_download_out_of_session_sync(caplog: LogCap) -> None: caplog.set_level(logging.DEBUG) with Client() as client: - models = client.repository.search_models(EXPECTED_DOWNLOAD_SEARCH_TERM) + models = client.repository.search_models(SMALL_LLM_SEARCH_TERM) logging.info(f"Models: {models}") assert models assert isinstance(models, list) diff --git a/tests/test_kv_config.py b/tests/test_kv_config.py index e0535c0..d493491 100644 --- a/tests/test_kv_config.py +++ b/tests/test_kv_config.py @@ -35,9 +35,14 @@ "mainGpu": 0, "ratio": 0.5, "splitStrategy": "evenly", - "disabledGpus": [1, 2] + "disabledGpus": [1, 2], +} +SC_GPU_CONFIG = { + "main_gpu": 0, + "ratio": 0.5, + "split_strategy": "evenly", + "disabled_gpus": [1, 2], } -SC_GPU_CONFIG = {"main_gpu": 0, "ratio": 0.5, "split_strategy": "evenly", "disabled_gpus": [1, 2]} LOAD_CONFIG_EMBEDDING: EmbeddingLoadModelConfigDict = { "contextLength": 1978, diff --git a/tox.ini b/tox.ini index f7f9091..ee399b0 100644 --- a/tox.ini +++ b/tox.ini @@ -18,6 +18,7 @@ groups = dev allowlist_externals = pytest passenv = CI + LMS_* commands = # Even the "slow" tests aren't absurdly slow, so default to running them pytest {posargs} tests/