Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
21 changes: 21 additions & 0 deletions src/lmstudio/json_api.py
Original file line number Diff line number Diff line change
Expand Up @@ -862,6 +862,27 @@ def iter_message_events(
yield from self._update_progress(0.0)
case {"type": "loadProgress" | "progress", "progress": progress}:
yield from self._update_progress(progress)
case {"type": "unloadingOtherJITModel", "info": other_model_info} if (
"modelKey" in other_model_info
):
jit_unload_event = "Unloading other JIT model"
unloaded_model_key = other_model_info["modelKey"]
suggestion = (
"You can disable this behavior by going to "
"LM Studio -> Settings -> Developer -> Turn OFF JIT models auto-evict"
)
# Report the JIT unload
self._logger.info(
jit_unload_event,
unloaded_model_key=unloaded_model_key,
suggestion=suggestion,
)
# Report further details on the unloaded model if debug messages are enabled
self._logger.debug(
jit_unload_event,
unloaded_model_key=unloaded_model_key,
unloaded_model=other_model_info,
)
case {
"type": "success" | "alreadyLoaded" | "loadSuccess",
"info": {
Expand Down
16 changes: 14 additions & 2 deletions tests/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -13,18 +13,24 @@ conditions must also be met for the test suite to pass:
- the API server must be enabled and running on port 1234
- the following models model must be loaded with their default identifiers
- `text-embedding-nomic-embed-text-v1.5` (text embedding model)
- `llama-3.2-1b-instruct` (chat oriented text LLM)
- `llama-3.2-1b-instruct` (text LLM)
- `ZiangWu/MobileVLM_V2-1.7B-GGUF` (visual LLM)
- `qwen2.5-7b-instruct-1m` (tool using LLM)

Additional models should NOT be loaded when running the test suite,
as some model querying tests may fail in that case.

However, there's no problem with having additional models downloaded.
There are also some JIT model loading/unloading test cases which
expect `smollm2-135m` (small text LLM) to already be downloaded.
A full test run will download this model (since it is also the
model used for the end-to-end search-and-download test case).

There's no problem with having additional models downloaded.
The only impact is that the test that checks all of the expected
models can be found in the list of downloaded models will take a
little longer to run.


# Loading and unloading the required models

The `load-test-models` `tox` environment can be used to ensure the required
Expand All @@ -44,6 +50,12 @@ explicitly unload the test models:
$ tox -m unload-test-models
```

The model downloading test cases can be specifically run with:

```console
$ tox -m test -- -k test_download_model
```


## Adding new tests

Expand Down
102 changes: 87 additions & 15 deletions tests/async/test_model_catalog_async.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,8 @@
import asyncio
import logging

from contextlib import suppress

import pytest
from pytest import LogCaptureFixture as LogCap
from pytest_subtests import SubTests
Expand All @@ -13,12 +15,11 @@
from ..support import (
LLM_LOAD_CONFIG,
EXPECTED_LLM,
EXPECTED_LLM_DEFAULT_ID,
EXPECTED_LLM_ID,
EXPECTED_EMBEDDING,
EXPECTED_EMBEDDING_DEFAULT_ID,
EXPECTED_EMBEDDING_ID,
EXPECTED_VLM_ID,
SMALL_LLM_ID,
TOOL_LLM_ID,
check_sdk_error,
)
Expand Down Expand Up @@ -291,16 +292,17 @@ async def test_get_or_load_when_unloaded_llm_async(caplog: LogCap) -> None:
caplog.set_level(logging.DEBUG)
async with AsyncClient() as client:
llm = client.llm
await llm.unload(EXPECTED_LLM_ID)
model = await llm.model(EXPECTED_LLM_DEFAULT_ID, config=LLM_LOAD_CONFIG)
assert model.identifier == EXPECTED_LLM_DEFAULT_ID
with suppress(LMStudioModelNotFoundError):
await llm.unload(EXPECTED_LLM_ID)
model = await llm.model(EXPECTED_LLM_ID, config=LLM_LOAD_CONFIG)
assert model.identifier == EXPECTED_LLM_ID
# LM Studio may default to JIT handling for models loaded with `getOrLoad`,
# so ensure we restore a regular non-JIT instance with no TTL set
await llm.unload(EXPECTED_LLM_ID)
await model.unload()
model = await llm.load_new_instance(
EXPECTED_LLM_DEFAULT_ID, config=LLM_LOAD_CONFIG, ttl=None
EXPECTED_LLM_ID, config=LLM_LOAD_CONFIG, ttl=None
)
assert model.identifier == EXPECTED_LLM_DEFAULT_ID
assert model.identifier == EXPECTED_LLM_ID


@pytest.mark.asyncio
Expand All @@ -310,13 +312,83 @@ async def test_get_or_load_when_unloaded_embedding_async(caplog: LogCap) -> None
caplog.set_level(logging.DEBUG)
async with AsyncClient() as client:
embedding = client.embedding
await embedding.unload(EXPECTED_EMBEDDING_ID)
model = await embedding.model(EXPECTED_EMBEDDING_DEFAULT_ID)
assert model.identifier == EXPECTED_EMBEDDING_DEFAULT_ID
with suppress(LMStudioModelNotFoundError):
await embedding.unload(EXPECTED_EMBEDDING_ID)
model = await embedding.model(EXPECTED_EMBEDDING_ID)
assert model.identifier == EXPECTED_EMBEDDING_ID
# LM Studio may default to JIT handling for models loaded with `getOrLoad`,
# so ensure we restore a regular non-JIT instance with no TTL set
await embedding.unload(EXPECTED_EMBEDDING_ID)
model = await embedding.load_new_instance(
EXPECTED_EMBEDDING_DEFAULT_ID, ttl=None
await model.unload()
model = await embedding.load_new_instance(EXPECTED_EMBEDDING_ID, ttl=None)
assert model.identifier == EXPECTED_EMBEDDING_ID


@pytest.mark.asyncio
@pytest.mark.slow
@pytest.mark.lmstudio
async def test_jit_unloading_async(caplog: LogCap) -> None:
# For the time being, only test the embedding vs LLM cross-namespace
# JIT unloading (since that ensures the info type mixing is handled).
# Assuming LM Studio eventually switches to per-namespace JIT unloading,
# this can be split into separate LLM and embedding test cases at that time.
caplog.set_level(logging.DEBUG)
async with AsyncClient() as client:
# Unload the non-JIT instance of the embedding model
with suppress(LMStudioModelNotFoundError):
await client.embedding.unload(EXPECTED_EMBEDDING_ID)
# Load a JIT instance of the embedding model
model1 = await client.embedding.model(EXPECTED_EMBEDDING_ID, ttl=300)
assert model1.identifier == EXPECTED_EMBEDDING_ID
model1_info = await model1.get_info()
assert model1_info.identifier == model1.identifier
# Load a JIT instance of the small testing LLM
# This will unload the JIT instance of the testing embedding model
model2 = await client.llm.model(SMALL_LLM_ID, ttl=300)
assert model2.identifier == SMALL_LLM_ID
model2_info = await model2.get_info()
assert model2_info.identifier == model2.identifier
# Attempting to query the now unloaded JIT embedding model will fail
with pytest.raises(LMStudioModelNotFoundError):
await model1.get_info()
# Restore things to the way other test cases expect them to be
await model2.unload()
model = await client.embedding.load_new_instance(
EXPECTED_EMBEDDING_ID, ttl=None
)
assert model.identifier == EXPECTED_EMBEDDING_DEFAULT_ID
assert model.identifier == EXPECTED_EMBEDDING_ID

# Check for expected log messages
jit_unload_event = "Unloading other JIT model"
jit_unload_messages_debug: list[str] = []
jit_unload_messages_info: list[str] = []
jit_unload_messages = {
logging.DEBUG: jit_unload_messages_debug,
logging.INFO: jit_unload_messages_info,
}
for _logger_name, log_level, message in caplog.record_tuples:
if jit_unload_event not in message:
continue
jit_unload_messages[log_level].append(message)

assert len(jit_unload_messages_info) == 1
assert len(jit_unload_messages_debug) == 1

info_message = jit_unload_messages_info[0]
debug_message = jit_unload_messages_debug[0]
# Ensure info message omits model info, but includes config guidance
unload_notice = f'"event": "{jit_unload_event}"'
assert unload_notice in info_message
loading_model_notice = f'"model_key": "{SMALL_LLM_ID}"'
assert loading_model_notice in info_message
unloaded_model_notice = f'"unloaded_model_key": "{EXPECTED_EMBEDDING_ID}"'
assert unloaded_model_notice in info_message
assert '"suggestion": ' in info_message
assert "disable this behavior" in info_message
assert '"unloaded_model": ' not in info_message
# Ensure debug message includes model info, but omits config guidance
assert unload_notice in debug_message
assert loading_model_notice in info_message
assert unloaded_model_notice in debug_message
assert '"suggestion": ' not in debug_message
assert "disable this behavior" not in debug_message
assert '"unloaded_model": ' in debug_message
8 changes: 4 additions & 4 deletions tests/async/test_repository_async.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@

from lmstudio import AsyncClient, LMStudioClientError

from ..support import EXPECTED_DOWNLOAD_SEARCH_TERM
from ..support import SMALL_LLM_SEARCH_TERM


# N.B. We can maybe provide a reference list for what should be available
Expand All @@ -21,7 +21,7 @@
async def test_download_model_async(caplog: LogCap) -> None:
caplog.set_level(logging.DEBUG)
async with AsyncClient() as client:
models = await client.repository.search_models(EXPECTED_DOWNLOAD_SEARCH_TERM)
models = await client.repository.search_models(SMALL_LLM_SEARCH_TERM)
logging.info(f"Models: {models}")
assert models
assert isinstance(models, list)
Expand All @@ -45,7 +45,7 @@ async def test_download_model_async(caplog: LogCap) -> None:
async def test_get_options_out_of_session_async(caplog: LogCap) -> None:
caplog.set_level(logging.DEBUG)
async with AsyncClient() as client:
models = await client.repository.search_models(EXPECTED_DOWNLOAD_SEARCH_TERM)
models = await client.repository.search_models(SMALL_LLM_SEARCH_TERM)
assert models
assert isinstance(models, list)
assert len(models) > 0
Expand All @@ -60,7 +60,7 @@ async def test_get_options_out_of_session_async(caplog: LogCap) -> None:
async def test_download_out_of_session_async(caplog: LogCap) -> None:
caplog.set_level(logging.DEBUG)
async with AsyncClient() as client:
models = await client.repository.search_models(EXPECTED_DOWNLOAD_SEARCH_TERM)
models = await client.repository.search_models(SMALL_LLM_SEARCH_TERM)
logging.info(f"Models: {models}")
assert models
assert isinstance(models, list)
Expand Down
9 changes: 6 additions & 3 deletions tests/support/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,14 +24,12 @@
THIS_DIR = Path(__file__).parent

LOCAL_API_HOST = "localhost:1234"
EXPECTED_DOWNLOAD_SEARCH_TERM = "smollm2-135m"

####################################################
# Embedding model testing
####################################################
EXPECTED_EMBEDDING = "nomic-ai/nomic-embed-text-v1.5"
EXPECTED_EMBEDDING_ID = "text-embedding-nomic-embed-text-v1.5"
EXPECTED_EMBEDDING_DEFAULT_ID = EXPECTED_EMBEDDING_ID # the same for now
EXPECTED_EMBEDDING_LENGTH = 768 # nomic has embedding dimension 768
EXPECTED_EMBEDDING_CONTEXT_LENGTH = 2048 # nomic accepts a 2048 token context

Expand All @@ -40,7 +38,6 @@
####################################################
EXPECTED_LLM = "hugging-quants/llama-3.2-1b-instruct"
EXPECTED_LLM_ID = "llama-3.2-1b-instruct"
EXPECTED_LLM_DEFAULT_ID = EXPECTED_LLM_ID # the same for now
PROMPT = "Hello"
MAX_PREDICTED_TOKENS = 50
# Use a dict here to ensure dicts are accepted in all config APIs,
Expand Down Expand Up @@ -68,6 +65,12 @@
####################################################
TOOL_LLM_ID = "qwen2.5-7b-instruct-1m"

####################################################
# Other specific models needed for testing
####################################################
SMALL_LLM_SEARCH_TERM = "smollm2-135m"
SMALL_LLM_ID = "smollm2-135m-instruct"

####################################################
# Structured LLM responses
####################################################
Expand Down
Loading
Loading