feat: v3.0 stable release (#331)

giladgd · web-flow · commit 8565b7c369f1 · 2024-09-24T00:04:46.000+03:00
diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml
@@ -23,7 +23,8 @@ jobs:
       - name: Download latest llama.cpp release
         env:
           CI: true
-        run: node ./dist/cli/cli.js source download --release latest --skipBuild --noBundle --noUsageExample --updateBinariesReleaseMetadataAndSaveGitBundle
+        # Switched to `b3808` instead of `latest` due to a build failure on the latest version. `b3808` is the previous release.
+        run: node ./dist/cli/cli.js source download --release b3808 --skipBuild --noBundle --noUsageExample --updateBinariesReleaseMetadataAndSaveGitBundle
       - name: Upload build artifact
         uses: actions/upload-artifact@v4
         with:
@@ -640,7 +641,7 @@ jobs:
     if: |
       always() &&
       github.event_name == 'push' &&
-      (github.ref == 'refs/heads/master' || github.ref == 'refs/heads/beta') &&
+      github.ref == 'refs/heads/master' &&
       needs.build.result == 'success' &&
       needs.resolve-next-release.result == 'success' &&
       needs.resolve-next-release.outputs.next-version != '' &&
@@ -654,7 +655,7 @@ jobs:
     concurrency: update-documentation-website-${{ github.ref }}
     environment:
       name: Documentation website
-#      url: "https://node-llama-cpp.withcat.ai"
+      url: "https://node-llama-cpp.withcat.ai"
     needs:
       - build
       - resolve-next-release
@@ -704,40 +705,33 @@ jobs:
           
           git apply --ignore-whitespace ./scripts/patches/vitepress+1.3.4.patch
           npm run docs:build
-      - name: Upload docs
-        uses: actions/upload-artifact@v4
+      - name: Upload docs to GitHub Pages
+        uses: actions/upload-pages-artifact@v3
         with:
-          include-hidden-files: true
-          retention-days: 2
-          name: "docs-site"
+          name: pages-docs
           path: docs-site
-#      - name: Upload docs to GitHub Pages
-#        uses: actions/upload-pages-artifact@v3
-#        with:
-#          name: pages-docs
-#          path: docs-site
-#      - name: Deploy docs to GitHub Pages
-#        uses: actions/deploy-pages@v4
-#        with:
-#          artifact_name: pages-docs
-#      - name: Update feed
-#        run: |
-#          curl -X POST "https://pubsubhubbub.appspot.com/" -H "Content-Type: application/x-www-form-urlencoded" --data-urlencode "hub.mode=publish" --data-urlencode "hub.url=https://node-llama-cpp.withcat.ai/blog/feed.atom"
+      - name: Deploy docs to GitHub Pages
+        uses: actions/deploy-pages@v4
+        with:
+          artifact_name: pages-docs
+      - name: Update feed
+        run: |
+          curl -X POST "https://pubsubhubbub.appspot.com/" -H "Content-Type: application/x-www-form-urlencoded" --data-urlencode "hub.mode=publish" --data-urlencode "hub.url=https://node-llama-cpp.withcat.ai/blog/feed.atom"
 
   update-documentation-website-no-release:
     name: Update documentation website - no version release
     if: |
       always() &&
       github.event_name == 'push' &&
-      (github.ref == 'refs/heads/master' || github.ref == 'refs/heads/beta') &&
+      github.ref == 'refs/heads/master' &&
       needs.build.result == 'success' &&
       needs.resolve-next-release.result == 'success' &&
       needs.resolve-next-release.outputs.next-version == 'false'
     runs-on: ubuntu-latest
     concurrency: update-documentation-website-${{ github.ref }}
     environment:
       name: Documentation website
-#      url: "https://node-llama-cpp.withcat.ai"
+      url: "https://node-llama-cpp.withcat.ai"
     needs:
       - build
       - resolve-next-release
@@ -760,12 +754,12 @@ jobs:
       - name: Move artifacts
         run: |
           mv artifacts/build dist/
-          
+
           cp -r artifacts/llama.cpp/llama.cpp llama/llama.cpp
-          
+
           rm -f ./llama/binariesGithubRelease.json
           mv artifacts/llama.cpp/binariesGithubRelease.json ./llama/binariesGithubRelease.json
-          
+
           rm -f ./llama/llama.cpp.info.json
           mv artifacts/llama.cpp/llama.cpp.info.json ./llama/llama.cpp.info.json
       - name: Resolve docs version
@@ -783,25 +777,18 @@ jobs:
         run: |
           export DOCS_PACKAGE_VERSION="$(cat ./docsVersion.txt)"
           echo "Package version: $DOCS_PACKAGE_VERSION"
-          
+
           git apply --ignore-whitespace ./scripts/patches/vitepress+1.3.4.patch
           npm run docs:build
-      - name: Upload docs
-        uses: actions/upload-artifact@v4
+      - name: Upload docs to GitHub Pages
+        uses: actions/upload-pages-artifact@v3
         with:
-          include-hidden-files: true
-          retention-days: 2
-          name: "docs-site"
+          name: pages-docs
           path: docs-site
-    #      - name: Upload docs to GitHub Pages
-    #        uses: actions/upload-pages-artifact@v3
-    #        with:
-    #          name: pages-docs
-    #          path: docs-site
-    #      - name: Deploy docs to GitHub Pages
-    #        uses: actions/deploy-pages@v4
-    #        with:
-    #          artifact_name: pages-docs
-    #      - name: Update feed
-    #        run: |
-    #          curl -X POST "https://pubsubhubbub.appspot.com/" -H "Content-Type: application/x-www-form-urlencoded" --data-urlencode "hub.mode=publish" --data-urlencode "hub.url=https://node-llama-cpp.withcat.ai/blog/feed.atom"
+      - name: Deploy docs to GitHub Pages
+        uses: actions/deploy-pages@v4
+        with:
+          artifact_name: pages-docs
+      - name: Update feed
+        run: |
+          curl -X POST "https://pubsubhubbub.appspot.com/" -H "Content-Type: application/x-www-form-urlencoded" --data-urlencode "hub.mode=publish" --data-urlencode "hub.url=https://node-llama-cpp.withcat.ai/blog/feed.atom"
diff --git a/.vitepress/theme/index.ts b/.vitepress/theme/index.ts
@@ -17,19 +17,19 @@ import type {EnhanceAppContext} from "vitepress";
 export default {
     extends: Theme,
     Layout: () => {
-        const text = "v3.0.0 is here!";
+        const text = "v3.0 is here!";
         const link = "/blog/v3";
         const hideDate = new Date("2025-01-01T00:00:00Z");
 
         return h(LayoutContainer, null, h(Theme.Layout, null, {
-            // "home-hero-info-before": () => h(LatestVersionHomeBadge, {
-            //     type: "desktop",
-            //     text, link, hideDate
-            // }),
-            // "home-hero-actions-after": () => h(LatestVersionHomeBadge, {
-            //     type: "mobile",
-            //     text, link, hideDate
-            // }),
+            "home-hero-info-before": () => h(LatestVersionHomeBadge, {
+                type: "desktop",
+                text, link, hideDate
+            }),
+            "home-hero-actions-after": () => h(LatestVersionHomeBadge, {
+                type: "mobile",
+                text, link, hideDate
+            }),
             "doc-after": () => h(CommentsSection)
         }));
     },
diff --git a/README.md b/README.md
@@ -15,7 +15,7 @@
 
 </div>
 
-✨ New! [`v3.0.0` is here!](https://github.com/withcatai/node-llama-cpp/pull/105) ✨ (included: function calling, automatic chat wrapper detection, embedding support, and more)
+✨ [`v3.0` is here!](https://node-llama-cpp.withcat.ai/blog/v3) ✨
 
 ## Features
 * Run LLMs locally on your machine
diff --git a/docs/blog/v3.md b/docs/blog/v3.md
@@ -0,0 +1,125 @@
+---
+title: node-llama-cpp v3.0
+date: 2024-09-23T22:00:00Z
+author:
+    name: Gilad S.
+    github: giladgd
+category: Release
+description: Learn more about the new features in node-llama-cpp v3.0!
+image:
+    url: https://github.com/user-attachments/assets/c7ed2eab-fb50-426d-9019-aed40147f30e
+    alt: Celebrate
+    width: 3072
+    height: 1536
+---
+[`node-llama-cpp`](https://node-llama-cpp.withcat.ai) 3.0 is finally here.
+
+With [`node-llama-cpp`](https://node-llama-cpp.withcat.ai), you can run large language models locally on your machine using the power of [`llama.cpp`](https://github.com/ggerganov/llama.cpp) with a simple and easy-to-use API.
+
+It includes everything you need, from downloading models, to running them in the most optimized way for your hardware, and integrating them in your projects.
+
+---
+
+## Why `node-llama-cpp`?
+You might be wondering, why choose `node-llama-cpp` over using an OpenAI API of a service running on your machine?
+
+The answer is simple: simplicity, performance, and flexibility.
+
+Let's break it down:
+
+### Simplicity
+To use `node-llama-cpp`, you install it like any other npm package, and you're good to go.
+
+To run your project, all you have to do is `npm install` and `npm start`. That's it.
+
+No installing additional software on your machine, no setting up API keys or environment variables, no setup process at all.
+Everything is self-contained in your project, giving you complete control over it.
+
+With `node-llama-cpp`, you can run large language models on your machine using Node.js and TypeScript, _without_ any Python at all.
+Say goodbye to setup headaches, "it works on my machine" issues, and all other Python-related problems.
+
+While `llama.cpp` is an amazing project, it's also highly technical and can be challenging for beginners.
+`node-llama-cpp` bridge that gap, making `llama.cpp` accessible to everyone, regardless of their experience level.
+
+### Performance
+[`node-llama-cpp`](https://node-llama-cpp.withcat.ai) is built on top of [`llama.cpp`](https://github.com/ggerganov/llama.cpp), a highly optimized C++ library for running large language models.
+
+`llama.cpp` supports many compute backends, including Metal, CUDA, and Vulkan. It also uses [Accelerate](https://developer.apple.com/accelerate/) on Mac.
+
+`node-llama-cpp` automatically adapts to your hardware and adjusts the default settings to give you the best performance,
+so you don't _have_ to configure anything to use it.
+
+By using `node-llama-cpp` you are essentially running models _inside_ your project.
+With no overhead of network calls or data serializations,
+you can more effectively take advantage of the stateful nature of inference operations.
+
+For example, you can prompt a model on top of an existing conversation inference state,
+without re-evaluating the entire history just to process the new prompt.
+<br/>
+This reduces the time it takes to start generating a response, and makes more efficient use of your resources.
+
+If you were using an API, you would have to re-evaluate the entire history every time you prompt the model,
+or have the API store the state for you, which can use huge amounts of disk space.
+
+### Flexibility
+Since `node-llama-cpp` runs inside your project, you can also deploy it together with your project.
+<br/>
+You can run models in your [Electron](../guide/electron.md) app without requiring any additional setup on the user's machine.
+
+You can build libraries that use large language models and distribute them as npm packages,
+<br/>
+or deploy self-contained Docker images and run them on any hardware you want.
+
+You can use [any model you want](../guide/choosing-a-model.md), or even create your own and use it with `node-llama-cpp`.
+<br/>
+Download models [as part of `npm install`](../guide/downloading-models.md) or [on-demand from your code](../guide/downloading-models.md#programmatic).
+
+[Tweak inference settings](../guide/chat-session.md#repeat-penalty) to get better results for your particular use case.
+
+`node-llama-cpp` is regularly updated with the latest `llama.cpp` release,
+but you can also [download and build the latest release](../guide/building-from-source.md#download-new-release) at any time with a single command.
+
+The possibilities are endless.
+You have full control over the models you use, how you use them, and where you use them.
+You can tailor `node-llama-cpp` to your needs in ways that aren't possible with an OpenAI API (at least not efficiently or easily).
+
+## Powerful Features
+`node-llama-cpp` includes a complete suite of everything you need to use large language models in your projects,
+with convenient wrappers for popular tasks, such as:
+* [Enforcing a JSON schema](../guide/chat-session.md#response-json-schema) on the output the model generates
+* Providing the model with [functions it can call on demand](../guide/chat-session.md#function-calling) to retrieve information or perform actions, even with some models that don't officially support it
+* [Generating completion](../guide/text-completion.md) for a given text
+* [Embedding text](../guide/embedding.md) for similarity searches or other tasks
+* Much more
+
+## Why Node.js?
+JavaScript is the most popular programming language in the world, and Node.js is the most popular runtime for JavaScript server-side applications.
+Developers choose Node.js for its versatility, reliability, ease of use, forward compatibility, and the vast ecosystem of npm packages.
+
+While Python is currently the go-to language for data science and machine learning,
+the needs of data scientists differ from those of developers building services and applications.
+
+`node-llama-cpp` bridges this gap, making it easier to integrate large language models into Node.js and Electron projects,
+while focusing on the needs of developers building services and applications.
+
+## Try It Out
+`node-llama-cpp` comes with comprehensive documentation, covering everything from installation to advanced usage.
+It's beginner-friendly, with explanations for every step of the way for those who are new to the world of large language models,
+while still being flexible enough to allow advanced usage for those who are more experienced and knowledgeable.
+
+Experience the ease of running models on your machine with this single command:
+```shell
+npx -y node-llama-cpp chat
+```
+
+Check out the [getting started guide](../guide/index.md) to learn how to use `node-llama-cpp`.
+
+## Thank You
+`node-llama-cpp` is only possible thanks to the amazing work done on [`llama.cpp`](https://github.com/ggerganov/llama.cpp) by [Georgi Gerganov](https://github.com/ggerganov), [Slaren](https://github.com/slaren) and all the contributors from the community.
+
+## What's next?
+Version 3.0 is a major milestone, but there's plenty more planned for the future.
+
+Check out the [roadmap](https://github.com/orgs/withcatai/projects/1) to see what's coming next,
+<br />
+and [give `node-llama-cpp` a star on GitHub](https://github.com/withcatai/node-llama-cpp) to support the project.