Skip to content

Commit 8851626

Browse files
authored
Merge pull request #1 from thomasantony/make-torch-optional
Make torch an optional dependency and fix the CI workflows
2 parents b03974b + dcc54f2 commit 8851626

File tree

6 files changed

+120
-43
lines changed

6 files changed

+120
-43
lines changed

.github/workflows/wheels.yml

Lines changed: 38 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -16,34 +16,59 @@ on:
1616

1717
jobs:
1818
build_wheels:
19-
name: ${{ matrix.type }} ${{ matrix.arch }} on ${{ matrix.os }}
19+
name: Build wheels on ${{ matrix.os }} for ${{ matrix.arch }} - ${{ matrix.p_ver }}
2020
runs-on: ${{ matrix.os }}
21+
env:
22+
CIBW_BUILD: ${{ matrix.cibw_build }}
23+
CIBW_ARCHS_LINUX: ${{ matrix.arch }}
24+
CIBW_ARCHS_MACOS: ${{ matrix.arch }}
2125
strategy:
22-
fail-fast: false
2326
matrix:
24-
os: [macos-latest, ubuntu-latest]
27+
os: [ubuntu-latest, windows-latest, macos-latest]
2528
arch: [auto64]
29+
cibw_build: ["cp3{9,10,11}-*"]
30+
p_ver: ["3.9-3.11"]
31+
exclude:
32+
- arch: arm64
33+
os: macos-latest
34+
include:
35+
- arch: aarch64
36+
os: ubuntu-latest
37+
cibw_build: "cp37*"
38+
p_ver: "3.7"
39+
- arch: aarch64
40+
os: ubuntu-latest
41+
cibw_build: "cp38*"
42+
p_ver: "3.8"
43+
- arch: aarch64
44+
os: ubuntu-latest
45+
cibw_build: "cp39*"
46+
p_ver: "3.9"
47+
- arch: aarch64
48+
os: ubuntu-latest
49+
cibw_build: "cp310*"
50+
p_ver: "3.10"
51+
- arch: aarch64
52+
os: ubuntu-latest
53+
cibw_build: "cp311*"
54+
p_ver: "3.11"
2655

2756
steps:
2857
- uses: actions/checkout@v3
2958
with:
3059
fetch-depth: 0
3160
submodules: true
3261

33-
# Used to host cibuildwheel
34-
- uses: actions/setup-python@v3
35-
# Installs poetry
36-
- uses: Gr1N/setup-poetry@v8
62+
- name: Set up QEMU
63+
if: matrix.os == 'ubuntu-latest' && matrix.arch == 'aarch64'
64+
uses: docker/setup-qemu-action@v1
3765
with:
38-
poetry-version: "1.4.0"
39-
- name: Install cibuildwheel
40-
run: python -m pip install cibuildwheel==2.12.1
66+
platforms: arm64
4167

4268
- name: Build wheels
43-
run: python -m cibuildwheel --output-dir wheelhouse
69+
uses: pypa/cibuildwheel@v2.12.1
4470
env:
45-
CIBW_ARCHS_MACOS: "x86_64 universal2 arm64"
46-
CIBW_TEST_SKIP: '*_arm64 *_universal2:arm64'
71+
CIBW_ARCHS_MACOS: "x86_64"
4772

4873
- uses: actions/upload-artifact@v3
4974
with:

README.md

Lines changed: 37 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -32,6 +32,8 @@ llamacpp-quantize ./models/7B/
3232
llamacpp-cli
3333
```
3434

35+
**Note that running `llamacpp-convert` requires `torch`, `sentencepiece` and `numpy` to be installed. These packages are not installed by default when your install `llamacpp`.**
36+
3537
## Command line interface
3638

3739
The package installs the command line entry point `llamacpp-cli` that points to `llamacpp/cli.py` and should provide about the same functionality as the `main` program in the original C++ repository. There is also an experimental `llamacpp-chat` that is supposed to bring up a chat interface but this is not working correctly yet.
@@ -40,6 +42,41 @@ The package installs the command line entry point `llamacpp-cli` that points to
4042

4143
See `llamacpp/cli.py` for a detailed example. The simplest demo would be something like the following:
4244

45+
```python
46+
47+
params = llamacpp.gpt_params(
48+
'./models/7B/ggml_model_q4_0.bin', # model,
49+
"A llama is a ", # prompt
50+
"", # reverse_prompt
51+
512, # ctx_size
52+
100, # n_predict
53+
40, # top_k
54+
0.95, # top_p
55+
0.85, # temp
56+
1.30, # repeat_penalty
57+
-1, # seed
58+
8, # threads
59+
64, # repeat_last_n
60+
8, # batch_size
61+
False, # color
62+
False, # interactive or args.interactive_start
63+
False, # interactive_start
64+
)
65+
model = llamacpp.PyLLAMA(params)
66+
model.add_bos() # Adds "beginning of string" token
67+
model.update_input(params.prompt)
68+
model.print_startup_stats()
69+
model.prepare_context()
70+
71+
model.ingest_all_pending_input(True)
72+
while not model.is_finished():
73+
model.ingest_all_pending_input(not input_noecho)
74+
text, is_finished = model.infer_text()
75+
print(text, end="")
76+
if is_finished:
77+
break
78+
```
79+
4380
## ToDo
4481

4582
- [x] Use poetry to build package

build.py

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,4 @@
1+
import os
12
from setuptools_cpp import CMakeExtension, ExtensionBuilder
23
from typing import Any, Dict
34

@@ -12,5 +13,13 @@ def build(setup_kwargs: Dict[str, Any]) -> None:
1213
"ext_modules": ext_modules,
1314
"cmdclass": dict(build_ext=ExtensionBuilder),
1415
"zip_safe": False,
16+
"options": {
17+
'bdist_wheel': {
18+
'plat_name': os.getenv('PP_PYTHON_TARGET', 'any')
19+
},
20+
'egg_info': {
21+
'egg_base': './build/'
22+
}
23+
}
1524
}
1625
)

llamacpp/convert.py

Lines changed: 26 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -17,13 +17,21 @@
1717
# and vocabulary.
1818
#
1919

20+
# Check if torch is installed and show and error and exit if not
2021
import sys
2122
import json
2223
import struct
23-
import numpy as np
24-
import torch
2524

26-
from sentencepiece import SentencePieceProcessor
25+
try:
26+
import torch
27+
import numpy as np
28+
from sentencepiece import SentencePieceProcessor
29+
except ImportError:
30+
print("Error: torch, sentencepiece and numpy are required to run this script.")
31+
print("Please install using the following command:")
32+
print(" pip install torch sentencepiece numpy")
33+
sys.exit(1)
34+
2735

2836
def main():
2937
if len(sys.argv) < 3:
@@ -35,7 +43,7 @@ def main():
3543
# output in the same directory as the model
3644
dir_model = sys.argv[1]
3745

38-
fname_hparams = sys.argv[1] + "/params.json"
46+
fname_hparams = sys.argv[1] + "/params.json"
3947
fname_tokenizer = sys.argv[1] + "/../tokenizer.model"
4048

4149
def get_n_parts(dim):
@@ -76,35 +84,35 @@ def get_n_parts(dim):
7684
n_parts = get_n_parts(hparams["dim"])
7785

7886
print(hparams)
79-
print('n_parts = ', n_parts)
87+
print("n_parts = ", n_parts)
8088

8189
for p in range(n_parts):
82-
print('Processing part ', p)
90+
print("Processing part ", p)
8391

84-
#fname_model = sys.argv[1] + "/consolidated.00.pth"
92+
# fname_model = sys.argv[1] + "/consolidated.00.pth"
8593
fname_model = sys.argv[1] + "/consolidated.0" + str(p) + ".pth"
8694
fname_out = sys.argv[1] + "/ggml-model-" + ftype_str[ftype] + ".bin"
87-
if (p > 0):
95+
if p > 0:
8896
fname_out = sys.argv[1] + "/ggml-model-" + ftype_str[ftype] + ".bin" + "." + str(p)
8997

9098
# weights_only requires torch 1.13.1, remove this param or update if you get an "invalid keyword argument" error
9199
model = torch.load(fname_model, map_location="cpu", weights_only=True)
92100

93101
fout = open(fname_out, "wb")
94102

95-
fout.write(struct.pack("i", 0x67676d6c)) # magic: ggml in hex
103+
fout.write(struct.pack("i", 0x67676D6C)) # magic: ggml in hex
96104
fout.write(struct.pack("i", hparams["vocab_size"]))
97105
fout.write(struct.pack("i", hparams["dim"]))
98106
fout.write(struct.pack("i", hparams["multiple_of"]))
99107
fout.write(struct.pack("i", hparams["n_heads"]))
100108
fout.write(struct.pack("i", hparams["n_layers"]))
101-
fout.write(struct.pack("i", hparams["dim"] // hparams["n_heads"])) # rot (obsolete)
109+
fout.write(struct.pack("i", hparams["dim"] // hparams["n_heads"])) # rot (obsolete)
102110
fout.write(struct.pack("i", ftype))
103111

104112
# Is this correct??
105113
for i in range(32000):
106114
# TODO: this is probably wrong - not sure how this tokenizer works
107-
text = tokenizer.decode([29889, i]).encode('utf-8')
115+
text = tokenizer.decode([29889, i]).encode("utf-8")
108116
# remove the first byte (it's always '.')
109117
text = text[1:]
110118
fout.write(struct.pack("i", len(text)))
@@ -120,16 +128,16 @@ def get_n_parts(dim):
120128

121129
print("Processing variable: " + name + " with shape: ", shape, " and type: ", v.dtype)
122130

123-
#data = tf.train.load_variable(dir_model, name).squeeze()
131+
# data = tf.train.load_variable(dir_model, name).squeeze()
124132
data = v.numpy().squeeze()
125-
n_dims = len(data.shape);
133+
n_dims = len(data.shape)
126134

127135
# for efficiency - transpose some matrices
128136
# "model/h.*/attn/c_attn/w"
129137
# "model/h.*/attn/c_proj/w"
130138
# "model/h.*/mlp/c_fc/w"
131139
# "model/h.*/mlp/c_proj/w"
132-
#if name[-14:] == "/attn/c_attn/w" or \
140+
# if name[-14:] == "/attn/c_attn/w" or \
133141
# name[-14:] == "/attn/c_proj/w" or \
134142
# name[-11:] == "/mlp/c_fc/w" or \
135143
# name[-13:] == "/mlp/c_proj/w":
@@ -146,11 +154,11 @@ def get_n_parts(dim):
146154
ftype_cur = 0
147155

148156
# header
149-
sname = name.encode('utf-8')
157+
sname = name.encode("utf-8")
150158
fout.write(struct.pack("iii", n_dims, len(sname), ftype_cur))
151159
for i in range(n_dims):
152160
fout.write(struct.pack("i", dshape[n_dims - 1 - i]))
153-
fout.write(sname);
161+
fout.write(sname)
154162

155163
# data
156164
data.tofile(fout)
@@ -163,5 +171,6 @@ def get_n_parts(dim):
163171
print("Done. Output file: " + fname_out + ", (part ", p, ")")
164172
print("")
165173

166-
if __name__ == '__main__':
174+
175+
if __name__ == "__main__":
167176
main()

pyproject.toml

Lines changed: 10 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
[tool.poetry]
22
name = "llamacpp"
3-
version = "0.1.5"
3+
version = "0.1.6"
44
description = "Python bindings for @ggerganov's llama.cpp"
55
authors = ["Thomas Antony <[email protected]>"]
66
license = "MIT"
@@ -11,8 +11,12 @@ packages = [{ include = "llamacpp", from = "." }]
1111

1212
[tool.poetry.dependencies]
1313
python = "^3.6"
14+
15+
[tool.poetry.group.dev.dependencies]
16+
# Require torch and sentencepiece for running the convert script
1417
torch = "^1.13.1"
1518
sentencepiece = "^0.1.97"
19+
setuptools-cpp = "^0.1.0"
1620

1721
[build-system]
1822
requires = ["poetry>=0.12", "setuptools", "wheel", "setuptools-cpp"]
@@ -29,9 +33,11 @@ llamacpp-cli = 'llamacpp.cli:run'
2933
llamacpp-chat = 'llamacpp.chat:run'
3034

3135
[tool.cibuildwheel]
36+
# Install pybind and poetry
37+
before-build = "pip install -U \"pybind11[global]\" poetry"
3238

33-
# Install something required for the build
34-
# (you might want to use build-system.requires instead)
35-
before-build = "pip install -U \"pybind11[global]\""
3639
# Skip PyPy and 32-bit builds
3740
skip = ["pp*", "*-win32", "*-manylinux_i686", "*-musllinux_i686"]
41+
42+
build-verbosity = 3
43+
test-skip = "*macosx*arm64*"

src/PyLlama.cpp

Lines changed: 0 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -2,17 +2,8 @@
22
#include "llama.h"
33
#include "utils.h"
44
#include <pybind11/pybind11.h>
5-
#include <csignal>
65

76

8-
9-
void catch_signals() {
10-
auto handler = [](int code) { throw std::runtime_error("SIGNAL " + std::to_string(code)); };
11-
signal(SIGINT, handler);
12-
signal(SIGTERM, handler);
13-
signal(SIGKILL, handler);
14-
}
15-
167
namespace py = pybind11;
178

189
class PyLLAMA {

0 commit comments

Comments
 (0)