Skip to content

Commit 8565b7c

Browse files
authored
feat: v3.0 stable release (#331)
1 parent c35fcad commit 8565b7c

File tree

4 files changed

+165
-53
lines changed

4 files changed

+165
-53
lines changed

.github/workflows/build.yml

Lines changed: 30 additions & 43 deletions
Original file line numberDiff line numberDiff line change
@@ -23,7 +23,8 @@ jobs:
2323
- name: Download latest llama.cpp release
2424
env:
2525
CI: true
26-
run: node ./dist/cli/cli.js source download --release latest --skipBuild --noBundle --noUsageExample --updateBinariesReleaseMetadataAndSaveGitBundle
26+
# Switched to `b3808` instead of `latest` due to a build failure on the latest version. `b3808` is the previous release.
27+
run: node ./dist/cli/cli.js source download --release b3808 --skipBuild --noBundle --noUsageExample --updateBinariesReleaseMetadataAndSaveGitBundle
2728
- name: Upload build artifact
2829
uses: actions/upload-artifact@v4
2930
with:
@@ -640,7 +641,7 @@ jobs:
640641
if: |
641642
always() &&
642643
github.event_name == 'push' &&
643-
(github.ref == 'refs/heads/master' || github.ref == 'refs/heads/beta') &&
644+
github.ref == 'refs/heads/master' &&
644645
needs.build.result == 'success' &&
645646
needs.resolve-next-release.result == 'success' &&
646647
needs.resolve-next-release.outputs.next-version != '' &&
@@ -654,7 +655,7 @@ jobs:
654655
concurrency: update-documentation-website-${{ github.ref }}
655656
environment:
656657
name: Documentation website
657-
# url: "https://node-llama-cpp.withcat.ai"
658+
url: "https://node-llama-cpp.withcat.ai"
658659
needs:
659660
- build
660661
- resolve-next-release
@@ -704,40 +705,33 @@ jobs:
704705
705706
git apply --ignore-whitespace ./scripts/patches/vitepress+1.3.4.patch
706707
npm run docs:build
707-
- name: Upload docs
708-
uses: actions/upload-artifact@v4
708+
- name: Upload docs to GitHub Pages
709+
uses: actions/upload-pages-artifact@v3
709710
with:
710-
include-hidden-files: true
711-
retention-days: 2
712-
name: "docs-site"
711+
name: pages-docs
713712
path: docs-site
714-
# - name: Upload docs to GitHub Pages
715-
# uses: actions/upload-pages-artifact@v3
716-
# with:
717-
# name: pages-docs
718-
# path: docs-site
719-
# - name: Deploy docs to GitHub Pages
720-
# uses: actions/deploy-pages@v4
721-
# with:
722-
# artifact_name: pages-docs
723-
# - name: Update feed
724-
# run: |
725-
# curl -X POST "https://pubsubhubbub.appspot.com/" -H "Content-Type: application/x-www-form-urlencoded" --data-urlencode "hub.mode=publish" --data-urlencode "hub.url=https://node-llama-cpp.withcat.ai/blog/feed.atom"
713+
- name: Deploy docs to GitHub Pages
714+
uses: actions/deploy-pages@v4
715+
with:
716+
artifact_name: pages-docs
717+
- name: Update feed
718+
run: |
719+
curl -X POST "https://pubsubhubbub.appspot.com/" -H "Content-Type: application/x-www-form-urlencoded" --data-urlencode "hub.mode=publish" --data-urlencode "hub.url=https://node-llama-cpp.withcat.ai/blog/feed.atom"
726720
727721
update-documentation-website-no-release:
728722
name: Update documentation website - no version release
729723
if: |
730724
always() &&
731725
github.event_name == 'push' &&
732-
(github.ref == 'refs/heads/master' || github.ref == 'refs/heads/beta') &&
726+
github.ref == 'refs/heads/master' &&
733727
needs.build.result == 'success' &&
734728
needs.resolve-next-release.result == 'success' &&
735729
needs.resolve-next-release.outputs.next-version == 'false'
736730
runs-on: ubuntu-latest
737731
concurrency: update-documentation-website-${{ github.ref }}
738732
environment:
739733
name: Documentation website
740-
# url: "https://node-llama-cpp.withcat.ai"
734+
url: "https://node-llama-cpp.withcat.ai"
741735
needs:
742736
- build
743737
- resolve-next-release
@@ -760,12 +754,12 @@ jobs:
760754
- name: Move artifacts
761755
run: |
762756
mv artifacts/build dist/
763-
757+
764758
cp -r artifacts/llama.cpp/llama.cpp llama/llama.cpp
765-
759+
766760
rm -f ./llama/binariesGithubRelease.json
767761
mv artifacts/llama.cpp/binariesGithubRelease.json ./llama/binariesGithubRelease.json
768-
762+
769763
rm -f ./llama/llama.cpp.info.json
770764
mv artifacts/llama.cpp/llama.cpp.info.json ./llama/llama.cpp.info.json
771765
- name: Resolve docs version
@@ -783,25 +777,18 @@ jobs:
783777
run: |
784778
export DOCS_PACKAGE_VERSION="$(cat ./docsVersion.txt)"
785779
echo "Package version: $DOCS_PACKAGE_VERSION"
786-
780+
787781
git apply --ignore-whitespace ./scripts/patches/vitepress+1.3.4.patch
788782
npm run docs:build
789-
- name: Upload docs
790-
uses: actions/upload-artifact@v4
783+
- name: Upload docs to GitHub Pages
784+
uses: actions/upload-pages-artifact@v3
791785
with:
792-
include-hidden-files: true
793-
retention-days: 2
794-
name: "docs-site"
786+
name: pages-docs
795787
path: docs-site
796-
# - name: Upload docs to GitHub Pages
797-
# uses: actions/upload-pages-artifact@v3
798-
# with:
799-
# name: pages-docs
800-
# path: docs-site
801-
# - name: Deploy docs to GitHub Pages
802-
# uses: actions/deploy-pages@v4
803-
# with:
804-
# artifact_name: pages-docs
805-
# - name: Update feed
806-
# run: |
807-
# curl -X POST "https://pubsubhubbub.appspot.com/" -H "Content-Type: application/x-www-form-urlencoded" --data-urlencode "hub.mode=publish" --data-urlencode "hub.url=https://node-llama-cpp.withcat.ai/blog/feed.atom"
788+
- name: Deploy docs to GitHub Pages
789+
uses: actions/deploy-pages@v4
790+
with:
791+
artifact_name: pages-docs
792+
- name: Update feed
793+
run: |
794+
curl -X POST "https://pubsubhubbub.appspot.com/" -H "Content-Type: application/x-www-form-urlencoded" --data-urlencode "hub.mode=publish" --data-urlencode "hub.url=https://node-llama-cpp.withcat.ai/blog/feed.atom"

.vitepress/theme/index.ts

Lines changed: 9 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -17,19 +17,19 @@ import type {EnhanceAppContext} from "vitepress";
1717
export default {
1818
extends: Theme,
1919
Layout: () => {
20-
const text = "v3.0.0 is here!";
20+
const text = "v3.0 is here!";
2121
const link = "/blog/v3";
2222
const hideDate = new Date("2025-01-01T00:00:00Z");
2323

2424
return h(LayoutContainer, null, h(Theme.Layout, null, {
25-
// "home-hero-info-before": () => h(LatestVersionHomeBadge, {
26-
// type: "desktop",
27-
// text, link, hideDate
28-
// }),
29-
// "home-hero-actions-after": () => h(LatestVersionHomeBadge, {
30-
// type: "mobile",
31-
// text, link, hideDate
32-
// }),
25+
"home-hero-info-before": () => h(LatestVersionHomeBadge, {
26+
type: "desktop",
27+
text, link, hideDate
28+
}),
29+
"home-hero-actions-after": () => h(LatestVersionHomeBadge, {
30+
type: "mobile",
31+
text, link, hideDate
32+
}),
3333
"doc-after": () => h(CommentsSection)
3434
}));
3535
},

README.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,7 @@
1515

1616
</div>
1717

18-
New! [`v3.0.0` is here!](https://github.com/withcatai/node-llama-cpp/pull/105) (included: function calling, automatic chat wrapper detection, embedding support, and more)
18+
[`v3.0` is here!](https://node-llama-cpp.withcat.ai/blog/v3)
1919

2020
## Features
2121
* Run LLMs locally on your machine

docs/blog/v3.md

Lines changed: 125 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,125 @@
1+
---
2+
title: node-llama-cpp v3.0
3+
date: 2024-09-23T22:00:00Z
4+
author:
5+
name: Gilad S.
6+
github: giladgd
7+
category: Release
8+
description: Learn more about the new features in node-llama-cpp v3.0!
9+
image:
10+
url: https://github.com/user-attachments/assets/c7ed2eab-fb50-426d-9019-aed40147f30e
11+
alt: Celebrate
12+
width: 3072
13+
height: 1536
14+
---
15+
[`node-llama-cpp`](https://node-llama-cpp.withcat.ai) 3.0 is finally here.
16+
17+
With [`node-llama-cpp`](https://node-llama-cpp.withcat.ai), you can run large language models locally on your machine using the power of [`llama.cpp`](https://github.com/ggerganov/llama.cpp) with a simple and easy-to-use API.
18+
19+
It includes everything you need, from downloading models, to running them in the most optimized way for your hardware, and integrating them in your projects.
20+
21+
---
22+
23+
## Why `node-llama-cpp`?
24+
You might be wondering, why choose `node-llama-cpp` over using an OpenAI API of a service running on your machine?
25+
26+
The answer is simple: simplicity, performance, and flexibility.
27+
28+
Let's break it down:
29+
30+
### Simplicity
31+
To use `node-llama-cpp`, you install it like any other npm package, and you're good to go.
32+
33+
To run your project, all you have to do is `npm install` and `npm start`. That's it.
34+
35+
No installing additional software on your machine, no setting up API keys or environment variables, no setup process at all.
36+
Everything is self-contained in your project, giving you complete control over it.
37+
38+
With `node-llama-cpp`, you can run large language models on your machine using Node.js and TypeScript, _without_ any Python at all.
39+
Say goodbye to setup headaches, "it works on my machine" issues, and all other Python-related problems.
40+
41+
While `llama.cpp` is an amazing project, it's also highly technical and can be challenging for beginners.
42+
`node-llama-cpp` bridge that gap, making `llama.cpp` accessible to everyone, regardless of their experience level.
43+
44+
### Performance
45+
[`node-llama-cpp`](https://node-llama-cpp.withcat.ai) is built on top of [`llama.cpp`](https://github.com/ggerganov/llama.cpp), a highly optimized C++ library for running large language models.
46+
47+
`llama.cpp` supports many compute backends, including Metal, CUDA, and Vulkan. It also uses [Accelerate](https://developer.apple.com/accelerate/) on Mac.
48+
49+
`node-llama-cpp` automatically adapts to your hardware and adjusts the default settings to give you the best performance,
50+
so you don't _have_ to configure anything to use it.
51+
52+
By using `node-llama-cpp` you are essentially running models _inside_ your project.
53+
With no overhead of network calls or data serializations,
54+
you can more effectively take advantage of the stateful nature of inference operations.
55+
56+
For example, you can prompt a model on top of an existing conversation inference state,
57+
without re-evaluating the entire history just to process the new prompt.
58+
<br/>
59+
This reduces the time it takes to start generating a response, and makes more efficient use of your resources.
60+
61+
If you were using an API, you would have to re-evaluate the entire history every time you prompt the model,
62+
or have the API store the state for you, which can use huge amounts of disk space.
63+
64+
### Flexibility
65+
Since `node-llama-cpp` runs inside your project, you can also deploy it together with your project.
66+
<br/>
67+
You can run models in your [Electron](../guide/electron.md) app without requiring any additional setup on the user's machine.
68+
69+
You can build libraries that use large language models and distribute them as npm packages,
70+
<br/>
71+
or deploy self-contained Docker images and run them on any hardware you want.
72+
73+
You can use [any model you want](../guide/choosing-a-model.md), or even create your own and use it with `node-llama-cpp`.
74+
<br/>
75+
Download models [as part of `npm install`](../guide/downloading-models.md) or [on-demand from your code](../guide/downloading-models.md#programmatic).
76+
77+
[Tweak inference settings](../guide/chat-session.md#repeat-penalty) to get better results for your particular use case.
78+
79+
`node-llama-cpp` is regularly updated with the latest `llama.cpp` release,
80+
but you can also [download and build the latest release](../guide/building-from-source.md#download-new-release) at any time with a single command.
81+
82+
The possibilities are endless.
83+
You have full control over the models you use, how you use them, and where you use them.
84+
You can tailor `node-llama-cpp` to your needs in ways that aren't possible with an OpenAI API (at least not efficiently or easily).
85+
86+
## Powerful Features
87+
`node-llama-cpp` includes a complete suite of everything you need to use large language models in your projects,
88+
with convenient wrappers for popular tasks, such as:
89+
* [Enforcing a JSON schema](../guide/chat-session.md#response-json-schema) on the output the model generates
90+
* Providing the model with [functions it can call on demand](../guide/chat-session.md#function-calling) to retrieve information or perform actions, even with some models that don't officially support it
91+
* [Generating completion](../guide/text-completion.md) for a given text
92+
* [Embedding text](../guide/embedding.md) for similarity searches or other tasks
93+
* Much more
94+
95+
## Why Node.js?
96+
JavaScript is the most popular programming language in the world, and Node.js is the most popular runtime for JavaScript server-side applications.
97+
Developers choose Node.js for its versatility, reliability, ease of use, forward compatibility, and the vast ecosystem of npm packages.
98+
99+
While Python is currently the go-to language for data science and machine learning,
100+
the needs of data scientists differ from those of developers building services and applications.
101+
102+
`node-llama-cpp` bridges this gap, making it easier to integrate large language models into Node.js and Electron projects,
103+
while focusing on the needs of developers building services and applications.
104+
105+
## Try It Out
106+
`node-llama-cpp` comes with comprehensive documentation, covering everything from installation to advanced usage.
107+
It's beginner-friendly, with explanations for every step of the way for those who are new to the world of large language models,
108+
while still being flexible enough to allow advanced usage for those who are more experienced and knowledgeable.
109+
110+
Experience the ease of running models on your machine with this single command:
111+
```shell
112+
npx -y node-llama-cpp chat
113+
```
114+
115+
Check out the [getting started guide](../guide/index.md) to learn how to use `node-llama-cpp`.
116+
117+
## Thank You
118+
`node-llama-cpp` is only possible thanks to the amazing work done on [`llama.cpp`](https://github.com/ggerganov/llama.cpp) by [Georgi Gerganov](https://github.com/ggerganov), [Slaren](https://github.com/slaren) and all the contributors from the community.
119+
120+
## What's next?
121+
Version 3.0 is a major milestone, but there's plenty more planned for the future.
122+
123+
Check out the [roadmap](https://github.com/orgs/withcatai/projects/1) to see what's coming next,
124+
<br />
125+
and [give `node-llama-cpp` a star on GitHub](https://github.com/withcatai/node-llama-cpp) to support the project.

0 commit comments

Comments
 (0)