@@ -57,24 +57,25 @@ def test_find_array(monkeypatch: pytest.MonkeyPatch):
57
57
def server_embedding ():
58
58
# GritLM embedding implementation is only supported by XFormers backend.
59
59
args = ["--task" , "embed" , "--max_model_len" , str (MAX_MODEL_LEN )]
60
- with RemoteOpenAIServer (MODEL_NAME , args ) as remote_server :
61
- yield remote_server
60
+ with pytest .MonkeyPatch .context () as m :
61
+ m .setenv (STR_BACKEND_ENV_VAR , "XFORMERS" )
62
+ with RemoteOpenAIServer (MODEL_NAME , args ) as remote_server :
63
+ yield remote_server
62
64
63
65
64
66
@pytest .fixture (scope = "module" )
65
67
def server_generate ():
66
68
args = ["--task" , "generate" , "--max_model_len" , str (MAX_MODEL_LEN )]
67
- with RemoteOpenAIServer (MODEL_NAME , args ) as remote_server :
68
- yield remote_server
69
+ with pytest .MonkeyPatch .context () as m :
70
+ m .setenv (STR_BACKEND_ENV_VAR , "XFORMERS" )
71
+ with RemoteOpenAIServer (MODEL_NAME , args ) as remote_server :
72
+ yield remote_server
69
73
70
74
71
75
@pytest_asyncio .fixture
72
- async def client_embedding (monkeypatch : pytest .MonkeyPatch ,
73
- server_embedding : RemoteOpenAIServer ):
74
- with monkeypatch .context () as m :
75
- m .setenv ("VLLM_ATTENTION_BACKEND" , "XFORMERS" )
76
- async with server_embedding .get_async_client () as async_client :
77
- yield async_client
76
+ async def client_embedding (server_embedding : RemoteOpenAIServer ):
77
+ async with server_embedding .get_async_client () as async_client :
78
+ yield async_client
78
79
79
80
80
81
@pytest_asyncio .fixture
0 commit comments