Skip to content

Commit 44af848

Browse files
authored
[Build] Various fixes (#936)
The builds have broken in a number of interesting ways: - The required CMake configuration for `llama-cpp-python` has changed slightly, so update the various workflow files (not to mention the ReadMe). - It also appears that a new version of Phi3 started giving trouble with another test, so fix that as well - One of the LLamaCpp tests is consistently failing on Windows (and only Windows), so add a conditional xfail
1 parent 6e4ee06 commit 44af848

File tree

6 files changed

+19
-19
lines changed

6 files changed

+19
-19
lines changed

.github/workflows/action_gpu_basic_tests.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -57,7 +57,7 @@ jobs:
5757
run: |
5858
pip install accelerate
5959
pip uninstall -y llama-cpp-python
60-
CMAKE_ARGS="-DLLAMA_CUBLAS=on" pip install "llama-cpp-python!=0.2.58,!=0.2.75"
60+
CMAKE_ARGS="-DGGML_CUDA=on" pip install "llama-cpp-python!=0.2.58,!=0.2.75"
6161
- name: Check GPU available
6262
run: |
6363
python -c "import torch; assert torch.cuda.is_available()"

.github/workflows/ci_tests.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -53,7 +53,7 @@ jobs:
5353
- name: GPU pip installs
5454
run: |
5555
pip install accelerate
56-
CMAKE_ARGS="-DLLAMA_CUBLAS=on" pip install "llama-cpp-python!=0.2.58,!=0.2.75"
56+
CMAKE_ARGS="-DGGML_CUDA=on" pip install "llama-cpp-python!=0.2.58,!=0.2.75"
5757
- name: Check GPU available
5858
run: |
5959
python -c "import torch; assert torch.cuda.is_available()"

.github/workflows/notebook_tests.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -56,7 +56,7 @@ jobs:
5656
- name: GPU pip installs
5757
run: |
5858
pip install accelerate
59-
CMAKE_ARGS="-DLLAMA_CUBLAS=on" pip install "llama-cpp-python!=0.2.58,!=0.2.75"
59+
CMAKE_ARGS="-DGGML_CUDA=on" pip install "llama-cpp-python!=0.2.58,!=0.2.75"
6060
- name: Check GPU available
6161
run: |
6262
python -c "import torch; assert torch.cuda.is_available()"

README.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -648,7 +648,7 @@ time.time() - a
648648
### llama.cpp
649649
Install the python bindings:
650650
```bash
651-
CMAKE_ARGS="-DLLAMA_CUBLAS=on" pip install llama-cpp-python
651+
CMAKE_ARGS="-DGGML_CUDA=on" pip install llama-cpp-python
652652
```
653653
Loading the model:
654654
```python

tests/model_specific/test_llama_cpp.py

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,5 @@
1+
import platform
2+
13
import numpy as np
24
import pytest
35

@@ -49,8 +51,11 @@ def test_llama_cpp_select2(llamacpp_model: guidance.models.Model):
4951
]
5052

5153

54+
@pytest.mark.xfail(
55+
condition=platform.system() == "Windows",
56+
reason="llama-cpp-python >=0.2.79 appears to have made models non-deterministic on Windows",
57+
)
5258
def test_repeat_calls(llamacpp_model: guidance.models.Model):
53-
# llama-cpp-python 0.2.79 appears to have made models non-deterministic on Windows
5459
llama2 = llamacpp_model
5560
a = []
5661
lm = llama2 + "How much is 2 + 2? " + gen(name="test", max_tokens=10)

tests/model_specific/test_transformers.py

Lines changed: 9 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -15,10 +15,7 @@ def phi3_model(selected_model, selected_model_name):
1515

1616
@pytest.fixture(scope="module")
1717
def llama3_model(selected_model, selected_model_name):
18-
if (
19-
selected_model_name in ["transformers_llama3cpu_8b"]
20-
and selected_model is not None
21-
):
18+
if selected_model_name in ["transformers_llama3cpu_8b"] and selected_model is not None:
2219
return selected_model
2320
else:
2421
pytest.skip("Requires Llama3 model (needs HF_TOKEN to be set)")
@@ -27,7 +24,7 @@ def llama3_model(selected_model, selected_model_name):
2724
def test_gpt2():
2825
gpt2 = get_model("transformers:gpt2")
2926
lm = gpt2 + "this is a test" + gen("test", max_tokens=10)
30-
27+
3128
assert len(str(lm)) > len("this is a test")
3229

3330

@@ -42,9 +39,7 @@ def test_recursion_error():
4239
{gen('verse', max_tokens=2)}
4340
"""
4441
)
45-
assert len(str(lm)) > len(
46-
"Tweak this proverb to apply to model instructions instead.\n\n"
47-
)
42+
assert len(str(lm)) > len("Tweak this proverb to apply to model instructions instead.\n\n")
4843

4944

5045
TRANSFORMER_MODELS = {
@@ -81,6 +76,7 @@ def test_transformer_smoke_select(model_name, model_kwargs):
8176

8277
# Phi-3 specific tests
8378

79+
8480
@pytest.mark.skip("Don't overload the build machines")
8581
def test_phi3_transformers_orig():
8682
import torch
@@ -116,11 +112,10 @@ def test_phi3_transformers_orig():
116112
def test_phi3_chat_basic(phi3_model: models.Model):
117113
lm = phi3_model
118114

119-
lm += "You are a counting bot. Just keep counting numbers."
120115
with user():
121-
lm += "1,2,3,4"
116+
lm += "You are a counting bot. Just keep counting numbers."
122117
with assistant():
123-
lm += gen(name="five", max_tokens=10)
118+
lm += "1,2,3,4," + gen(name="five", max_tokens=20)
124119

125120
assert "5" in lm["five"]
126121

@@ -143,7 +138,7 @@ def test_phi3_newline_chat(phi3_model: models.Model):
143138
with assistant():
144139
lm += "\n" + gen(name="five", max_tokens=1)
145140
lm += "\n" + gen(name="six", max_tokens=1)
146-
141+
147142
# This test would raise an exception earlier if we didn't fix the tokenizer.
148143
assert True
149144

@@ -155,7 +150,7 @@ def test_phi3_unstable_tokenization(phi3_model: models.Model):
155150
with user():
156151
lm += "1,2,3,4,"
157152
with assistant():
158-
lm += "\n" # comment and uncomment this line to get the error
153+
lm += "\n" # comment and uncomment this line to get the error
159154
lm += gen(name="five", max_tokens=1)
160155
lm += "," + gen(name="six", max_tokens=1)
161156

@@ -168,4 +163,4 @@ def test_phi3_basic_completion_badtokens(phi3_model: models.Model):
168163
lm += f"""<|use\n\nYou are a counting bot. Just keep counting numbers.<|end|><|assistant|>1,2,3,4,"""
169164
lm += gen("five", max_tokens=10)
170165

171-
assert len(lm["five"]) > 0
166+
assert len(lm["five"]) > 0

0 commit comments

Comments
 (0)