crc-org
diff --git a/‎.github/workflows/build.yml‎
Lines changed: 3 additions & 3 deletions b/‎.github/workflows/build.yml‎
Lines changed: 3 additions & 3 deletions
diff --git a/‎CMakePresets.json‎
Lines changed: 2 additions & 0 deletions b/‎CMakePresets.json‎
Lines changed: 2 additions & 0 deletions
diff --git a/‎CONTRIBUTING.md‎
Lines changed: 1 addition & 0 deletions b/‎CONTRIBUTING.md‎
Lines changed: 1 addition & 0 deletions
diff --git a/‎OWNERS‎
Lines changed: 13 additions & 0 deletions b/‎OWNERS‎
Lines changed: 13 additions & 0 deletions
diff --git a/‎README.md‎
Lines changed: 0 additions & 13 deletions b/‎README.md‎
Lines changed: 0 additions & 13 deletions
diff --git a/‎build.backend.sh‎
Lines changed: 37 additions & 0 deletions b/‎build.backend.sh‎
Lines changed: 37 additions & 0 deletions
diff --git a/‎build.linux.sh‎
Lines changed: 10 additions & 0 deletions b/‎build.linux.sh‎
Lines changed: 10 additions & 0 deletions
diff --git a/‎build.remoting.sh‎
Lines changed: 26 additions & 0 deletions b/‎build.remoting.sh‎
Lines changed: 26 additions & 0 deletions
diff --git a/‎build.sh‎
Lines changed: 1 addition & 0 deletions b/‎build.sh‎
Lines changed: 1 addition & 0 deletions
diff --git a/‎build.vulkan.sh‎
Lines changed: 10 additions & 0 deletions b/‎build.vulkan.sh‎
Lines changed: 10 additions & 0 deletions
@@ -243,7 +243,7 @@ jobs:
           echo "Fetch llama2c model"
           wget https://huggingface.co/karpathy/tinyllamas/resolve/main/stories260K/stories260K.bin
           ./bin/llama-convert-llama2c-to-ggml --copy-vocab-from-model ./tok512.bin --llama2c-model stories260K.bin --llama2c-output-model stories260K.gguf
-          ./bin/llama-cli -m stories260K.gguf -p "One day, Lily met a Shoggoth" -n 500 -c 256
+          ./bin/llama-completion -m stories260K.gguf -p "One day, Lily met a Shoggoth" -n 500 -c 256
 
       - name: Test llama2c (s390x)
         id: llama2c_test_s390x
@@ -252,7 +252,7 @@ jobs:
           cd build
           echo "Fetch llama2c big-endian model"
           wget https://huggingface.co/ggml-org/models/resolve/main/tinyllamas/stories260K-be.gguf
-          ./bin/llama-cli -m stories260K-be.gguf -p "One day, Lily met a Shoggoth" -n 500 -c 256
+          ./bin/llama-completion -m stories260K-be.gguf -p "One day, Lily met a Shoggoth" -n 500 -c 256
 
   ubuntu-latest-cmake-sanitizer:
     runs-on: ubuntu-latest
@@ -1770,7 +1770,7 @@ jobs:
           echo "Fetch llama2c model"
           wget https://huggingface.co/karpathy/tinyllamas/resolve/main/stories260K/stories260K.bin
           ./bin/llama-convert-llama2c-to-ggml --copy-vocab-from-model ./tok512.bin --llama2c-model stories260K.bin --llama2c-output-model stories260K.gguf
-          ./bin/llama-cli -m stories260K.gguf -p "One day, Lily met a Shoggoth" -n 500 -c 256
+          ./bin/llama-completion -m stories260K.gguf -p "One day, Lily met a Shoggoth" -n 500 -c 256
 
   ubuntu-cmake-sanitizer-riscv64-native:
     runs-on: RISCV64
 
@@ -30,6 +30,8 @@
     { "name": "static",   "hidden": true, "cacheVariables": { "GGML_STATIC":      "ON" } },
     { "name": "sycl_f16", "hidden": true, "cacheVariables": { "GGML_SYCL_F16":    "ON" } },
     { "name": "vulkan",   "hidden": true, "cacheVariables": { "GGML_VULKAN":      "ON" } },
+    { "name": "remoting_frontend",   "hidden": true, "cacheVariables": { "GGML_REMOTING_FRONTEND":      "ON" } },
+    { "name": "remoting_backend",   "hidden": true, "cacheVariables": { "GGML_REMOTING_BACKEND":      "ON" } },
 
     {
         "name": "x64-windows-llvm", "hidden": true,
 
@@ -15,6 +15,7 @@ The project differentiates between 3 levels of contributors:
     - If you modified the `ggml` source, run the `test-backend-ops` tool to check whether different backend implementations of the `ggml` operators produce consistent results (this requires access to at least two different `ggml` backends)
     - If you modified a `ggml` operator or added a new one, add the corresponding test cases to `test-backend-ops`
 - Create separate PRs for each feature or fix. Avoid combining unrelated changes in a single PR
+- When adding support for a new model or feature, focus on **CPU support only** in the initial PR unless you have a good reason not to. Add support for other backends like CUDA in follow-up PRs
 - Consider allowing write access to your branch for faster reviews, as reviewers can push commits directly
 - If your PR becomes stale, rebase it on top of latest `master` to get maintainers attention
 - Maintainers will rely on your insights and approval when making a final decision to approve and merge a PR
 
@@ -0,0 +1,13 @@
+approvers:
+- kpouget
+- cfergeau
+- praveenkumar
+- vyasgun
+- gbraad
+options: {}
+reviewers:
+- kpouget
+- cfergeau
+- praveenkumar
+- vyasgun
+- gbraad
@@ -347,19 +347,6 @@ To learn more about model quantization, [read this documentation](tools/quantize
 
     </details>
 
-- <details>
-    <summary>Run simple text completion</summary>
-
-    To disable conversation mode explicitly, use `-no-cnv`
-
-    ```bash
-    llama-cli -m model.gguf -p "I believe the meaning of life is" -n 128 -no-cnv
-
-    # I believe the meaning of life is to find your own truth and to live in accordance with it. For me, this means being true to myself and following my passions, even if they don't align with societal expectations. I think that's what I love about yoga – it's not just a physical practice, but a spiritual one too. It's about connecting with yourself, listening to your inner voice, and honoring your own unique journey.
-    ```
-
-    </details>
-
 - <details>
     <summary>Constrain the output with a custom grammar</summary>
 
 
@@ -0,0 +1,37 @@
+# force isatty-->true, so that $0 |& head -50 has colors ...
+rm -f READY_backend FAILED_backend
+
+echo "int isatty(int fd) { return 1; }" | gcc -O2 -fpic -shared -ldl -o /tmp/isatty.so -xc -
+export LD_PRELOAD=/tmp/isatty.so
+
+if [[ "${PERF_MODE:-}" ]]; then
+    FLAVOR="-prod"
+else
+    FLAVOR=""
+fi
+
+export SDKROOT=$(xcrun --sdk macosx --show-sdk-path)
+
+if [[ "$FLAVOR" == "-prod" ]]; then
+    cat <<EOF
+###
+### Building the prod flavor
+###
+EOF
+fi
+
+TARGETS="llama-run"
+if [[ "${BENCH_MODE:-}" == "bench" ]]; then
+    TARGETS="$TARGETS llama-bench"
+elif [[ "${BENCH_MODE:-}" == "perf" ]]; then
+    TARGETS="$TARGETS test-backend-ops"
+fi
+
+cmake --build ../build.remoting-backend$FLAVOR --target $TARGETS "$@" --parallel 8
+
+if [[ $? == 0 ]]; then
+    touch READY_backend
+else
+    touch FAILED_backend
+    exit 1
+fi
@@ -0,0 +1,10 @@
+rm -f READY FAILED
+
+cmake --build ../build.vulkan-linux --parallel 8 --target llama-run llama-server
+
+if [[ $? == 0 ]]; then
+    touch READY
+else
+    touch FAILED
+    exit 1
+fi
@@ -0,0 +1,26 @@
+# force isatty-->true, so that $0 |& head -50 has colors ...
+rm -f READY FAILED
+
+echo "int isatty(int fd) { return 1; }" | gcc -O2 -fpic -shared -ldl -o /tmp/isatty.so -xc -
+export LD_PRELOAD=/tmp/isatty.so
+
+TARGETS="ggml-remotingfrontend"
+
+TARGETS="$BUILD_TARGET llama-run"
+set -x
+if [[ "${BENCH_MODE:-}" == "bench" ]]; then
+    TARGETS="$TARGETS llama-bench"
+elif [[ "${BENCH_MODE:-}" == "server" ]]; then
+    TARGETS="$TARGETS llama-server"
+elif [[ "${BENCH_MODE:-}" == "perf" ]]; then
+    TARGETS="$TARGETS test-backend-ops"
+fi
+
+cmake --build ../build.remoting-frontend$FLAVOR --parallel 8 --target $TARGETS "$@"
+
+if [[ $? == 0 ]]; then
+    touch READY
+else
+    touch FAILED
+    exit 1
+fi
@@ -0,0 +1 @@
+cmake --build ./build/ --parallel 8
@@ -0,0 +1,10 @@
+rm -f READY FAILED
+
+cmake --build ../build.vulkan --parallel 8 --target llama-run
+
+if [[ $? == 0 ]]; then
+    touch READY
+else
+    touch FAILED
+    exit 1
+fi
Original file line number	Diff line number	Diff line change
`@@ -30,6 +30,8 @@`
`30`	`30`	`{ "name": "static", "hidden": true, "cacheVariables": { "GGML_STATIC": "ON" } },`
`31`	`31`	`{ "name": "sycl_f16", "hidden": true, "cacheVariables": { "GGML_SYCL_F16": "ON" } },`
`32`	`32`	`{ "name": "vulkan", "hidden": true, "cacheVariables": { "GGML_VULKAN": "ON" } },`
	`33`	`+ { "name": "remoting_frontend", "hidden": true, "cacheVariables": { "GGML_REMOTING_FRONTEND": "ON" } },`
	`34`	`+ { "name": "remoting_backend", "hidden": true, "cacheVariables": { "GGML_REMOTING_BACKEND": "ON" } },`
`33`	`35`
`34`	`36`	`{`
`35`	`37`	`"name": "x64-windows-llvm", "hidden": true,`