Update to use vendored llama.cpp submodule

thomasantony · thomasantony · commit 4b2a140c0c76 · 2023-03-18T11:23:39.000-07:00
diff --git a/CMakeLists.txt b/CMakeLists.txt
@@ -10,11 +10,11 @@ set(CMAKE_CXX_EXTENSIONS OFF)
 set(GGML_USE_ACCELERATE 1)
 find_package(pybind11 REQUIRED)
 
-add_library(llamacpp MODULE src/PyLlama.cpp ../llama.cpp ../ggml.c ../utils.cpp)
+add_subdirectory(vendor/llama.cpp)
+add_library(llamacpp MODULE src/PyLlama.cpp)
 
-target_include_directories(llamacpp PRIVATE ../)
-
-target_link_libraries(llamacpp PRIVATE pybind11::module pybind11::lto pybind11::windows_extras)
+target_include_directories(llamacpp PRIVATE vendor/llama.cpp)
+target_link_libraries(llamacpp PRIVATE pybind11::module pybind11::lto pybind11::windows_extras llamalib)
 add_link_options(-no_fixup_chains)
 pybind11_extension(llamacpp)
 
diff --git a/README.md b/README.md
@@ -1,21 +1,18 @@
+## Python bindings for llama.cpp
+
 ## Building the Python bindings
 
 ### macOS
 
-`brew install pybind11`
-
-## Install python package
-
-### From PyPI
-
 ```
-pip install llamacpp
+brew install pybind11  # Installs dependency
+git submodule init && git submodule update
+poetry install
 ```
-
-### From source
+### From PyPI
 
 ```
-poetry install
+pip install llamacpp
 ```
 
 ## Get the model weights
@@ -32,25 +29,16 @@ Convert the weights to GGML format using `llamacpp-convert`. Then use `llamacpp-
 ```
 llamacpp-convert ./models/7B/ 1
 llamacpp-quantize ./models/7B/
+llamacpp-cli
 ```
 
-## Run this demo script
+## Command line interface
 
-```
-import llamacpp
-import os
+The package installs the command line entry point `llamacpp-cli` that points to `llamacpp/cli.py` and should provide about the same functionality as the `main` program in the original C++ repository. There is also an experimental `llamacpp-chat` that is supposed to bring up a chat interface but this is not working correctly yet.
 
-model_path = "./models/7B/ggml-model-q4_0.bin"
-params = llamacpp.gpt_params(model_path,
-"Hi, I'm a llama.",
-4096,
-40,
-0.1,
-0.7,
-2.0)
-model = llamacpp.PyLLAMA(model_path, params)
-model.predict("Hello, I'm a llama.", 10)
-```
+## Demo script
+
+See `llamacpp/cli.py` for a detailed example. The simplest demo would be something like the following:
 
 ## ToDo
 
diff --git a/build.py b/build.py
@@ -1,10 +1,10 @@
-from setuptools_cpp import CMakeExtension, ExtensionBuilder, Pybind11Extension
+from setuptools_cpp import CMakeExtension, ExtensionBuilder
 from typing import Any, Dict
 
 
 def build(setup_kwargs: Dict[str, Any]) -> None:
     ext_modules = [
-        CMakeExtension(f"llamacpp.llamacpp", sourcedir="./python"),
+        CMakeExtension("llamacpp.llamacpp", sourcedir="."),
     ]
 
     setup_kwargs.update(
diff --git a/pyproject.toml b/pyproject.toml
@@ -4,8 +4,10 @@ version = "0.1.5"
 description = "Python bindings for @ggerganov's llama.cpp"
 authors = ["Thomas Antony <mail@thomasantony.com>"]
 license = "MIT"
-readme = "python/README.md"
-packages = [{ include = "llamacpp", from = "python" }]
+readme = "README.md"
+homepage = "https://github.com/thomasantony/llamacpp-python"
+repository = "https://github.com/thomasantony/llamacpp-python"
+packages = [{ include = "llamacpp", from = "." }]
 
 [tool.poetry.dependencies]
 python = "^3.10"