ggml-org
diff --git a/‎.github/workflows/build.yml‎
Lines changed: 28 additions & 3 deletions b/‎.github/workflows/build.yml‎
Lines changed: 28 additions & 3 deletions
diff --git a/‎.github/workflows/release.yml‎
Lines changed: 28 additions & 3 deletions b/‎.github/workflows/release.yml‎
Lines changed: 28 additions & 3 deletions
diff --git a/‎CONTRIBUTING.md‎
Lines changed: 3 additions & 0 deletions b/‎CONTRIBUTING.md‎
Lines changed: 3 additions & 0 deletions
diff --git a/‎common/json-schema-to-grammar.cpp‎
Lines changed: 21 additions & 1 deletion b/‎common/json-schema-to-grammar.cpp‎
Lines changed: 21 additions & 1 deletion
diff --git a/‎convert_hf_to_gguf.py‎
Lines changed: 2 additions & 0 deletions b/‎convert_hf_to_gguf.py‎
Lines changed: 2 additions & 0 deletions
diff --git a/‎docs/backend/CANN.md‎
Lines changed: 4 additions & 0 deletions b/‎docs/backend/CANN.md‎
Lines changed: 4 additions & 0 deletions
diff --git a/‎examples/eval-callback/eval-callback.cpp‎
Lines changed: 11 additions & 0 deletions b/‎examples/eval-callback/eval-callback.cpp‎
Lines changed: 11 additions & 0 deletions
diff --git a/‎examples/json_schema_to_grammar.py‎
Lines changed: 14 additions & 1 deletion b/‎examples/json_schema_to_grammar.py‎
Lines changed: 14 additions & 1 deletion
diff --git a/‎examples/model-conversion/requirements.txt‎
Lines changed: 5 additions & 4 deletions b/‎examples/model-conversion/requirements.txt‎
Lines changed: 5 additions & 4 deletions
@@ -1063,21 +1063,46 @@ jobs:
         run: |
           git clone https://github.com/rocm/rocwmma --branch rocm-6.2.4 --depth 1
 
-      - name: Install
+      - name: Cache ROCm Installation
+        id: cache-rocm
+        uses: actions/cache@v4
+        with:
+          path: C:\Program Files\AMD\ROCm
+          key: rocm-6.1-${{ runner.os }}-v1
+          restore-keys: |
+            rocm-6.1-${{ runner.os }}-
+
+      - name: Install ROCm
+        if: steps.cache-rocm.outputs.cache-hit != 'true'
         id: depends
         run: |
           $ErrorActionPreference = "Stop"
           write-host "Downloading AMD HIP SDK Installer"
           Invoke-WebRequest -Uri "https://download.amd.com/developer/eula/rocm-hub/AMD-Software-PRO-Edition-24.Q3-WinSvr2022-For-HIP.exe" -OutFile "${env:RUNNER_TEMP}\rocm-install.exe"
           write-host "Installing AMD HIP SDK"
           $proc = Start-Process "${env:RUNNER_TEMP}\rocm-install.exe" -ArgumentList '-install' -NoNewWindow -PassThru
-          $proc.WaitForExit(600000)
+          $completed = $proc.WaitForExit(600000)
+          if (-not $completed) {
+              Write-Error "ROCm installation timed out after 10 minutes. Killing the process"
+              $proc.Kill()
+              exit 1
+          }
+          if ($proc.ExitCode -ne 0) {
+              Write-Error "ROCm installation failed with exit code $($proc.ExitCode)"
+              exit 1
+          }
           write-host "Completed AMD HIP SDK installation"
 
       - name: Verify ROCm
         id: verify
         run: |
-          & 'C:\Program Files\AMD\ROCm\*\bin\clang.exe' --version
+          # Find and test ROCm installation
+          $clangPath = Get-ChildItem 'C:\Program Files\AMD\ROCm\*\bin\clang.exe' | Select-Object -First 1
+          if (-not $clangPath) {
+            Write-Error "ROCm installation not found"
+            exit 1
+          }
+          & $clangPath.FullName --version
 
       - name: Install ccache
         uses: ggml-org/[email protected]
 
@@ -544,27 +544,52 @@ jobs:
         run: |
           git clone https://github.com/rocm/rocwmma --branch rocm-6.2.4 --depth 1
 
+      - name: Cache ROCm Installation
+        id: cache-rocm
+        uses: actions/cache@v4
+        with:
+          path: C:\Program Files\AMD\ROCm
+          key: rocm-6.1-${{ runner.os }}-v1
+          restore-keys: |
+            rocm-6.1-${{ runner.os }}-
+
       - name: ccache
         uses: ggml-org/[email protected]
         with:
           key: windows-latest-cmake-hip-${{ matrix.name }}-x64
           evict-old-files: 1d
 
-      - name: Install
+      - name: Install ROCm
+        if: steps.cache-rocm.outputs.cache-hit != 'true'
         id: depends
         run: |
           $ErrorActionPreference = "Stop"
           write-host "Downloading AMD HIP SDK Installer"
           Invoke-WebRequest -Uri "https://download.amd.com/developer/eula/rocm-hub/AMD-Software-PRO-Edition-24.Q3-WinSvr2022-For-HIP.exe" -OutFile "${env:RUNNER_TEMP}\rocm-install.exe"
           write-host "Installing AMD HIP SDK"
           $proc = Start-Process "${env:RUNNER_TEMP}\rocm-install.exe" -ArgumentList '-install' -NoNewWindow -PassThru
-          $proc.WaitForExit(600000)
+          $completed = $proc.WaitForExit(600000)
+          if (-not $completed) {
+              Write-Error "ROCm installation timed out after 10 minutes. Killing the process"
+              $proc.Kill()
+              exit 1
+          }
+          if ($proc.ExitCode -ne 0) {
+              Write-Error "ROCm installation failed with exit code $($proc.ExitCode)"
+              exit 1
+          }
           write-host "Completed AMD HIP SDK installation"
 
       - name: Verify ROCm
         id: verify
         run: |
-          & 'C:\Program Files\AMD\ROCm\*\bin\clang.exe' --version
+          # Find and test ROCm installation
+          $clangPath = Get-ChildItem 'C:\Program Files\AMD\ROCm\*\bin\clang.exe' | Select-Object -First 1
+          if (-not $clangPath) {
+            Write-Error "ROCm installation not found"
+            exit 1
+          }
+          & $clangPath.FullName --version
 
       - name: Build
         id: cmake_build
 
@@ -16,6 +16,9 @@
 - Use the following format for the squashed commit title: `<module> : <commit title> (#<issue_number>)`. For example: `utils : fix typo in utils.py (#1234)`
 - Optionally pick a `<module>` from here: https://github.com/ggml-org/llama.cpp/wiki/Modules
 - Consider adding yourself to [CODEOWNERS](CODEOWNERS)
+- Let authors, who are also collaborators, merge their own PRs
+- When merging a PR by a contributor, make sure you have a good understanding of the changes
+- Be mindful of maintenance: most of the work going into a feature happens after the PR is merged. If the PR author is not committed to contribute long-term, someone else needs to take responsibility (you)
 
 # Coding guidelines
 
 
@@ -843,9 +843,10 @@ class SchemaConverter {
                 _build_object_rule(
                     properties, required, name,
                     schema.contains("additionalProperties") ? schema["additionalProperties"] : json()));
-        } else if ((schema_type.is_null() || schema_type == "object") && schema.contains("allOf")) {
+        } else if ((schema_type.is_null() || schema_type == "object" || schema_type == "string") && schema.contains("allOf")) {
             std::unordered_set<std::string> required;
             std::vector<std::pair<std::string, json>> properties;
+            std::map<std::string, size_t> enum_values;
             std::string hybrid_name = name;
             std::function<void(const json &, bool)> add_component = [&](const json & comp_schema, bool is_required) {
                 if (comp_schema.contains("$ref")) {
@@ -857,6 +858,14 @@ class SchemaConverter {
                             required.insert(prop.key());
                         }
                     }
+                } else if (comp_schema.contains("enum")) {
+                    for (const auto & v : comp_schema["enum"]) {
+                        const auto rule = _generate_constant_rule(v);
+                        if (enum_values.find(rule) == enum_values.end()) {
+                            enum_values[rule] = 0;
+                        }
+                        enum_values[rule] += 1;
+                    }
                 } else {
                   // todo warning
                 }
@@ -870,6 +879,17 @@ class SchemaConverter {
                     add_component(t, true);
                 }
             }
+            if (!enum_values.empty()) {
+                std::vector<std::string> enum_intersection;
+                for (const auto & p : enum_values) {
+                    if (p.second == schema["allOf"].size()) {
+                        enum_intersection.push_back(p.first);
+                    }
+                }
+                if (!enum_intersection.empty()) {
+                    return _add_rule(rule_name, "(" + string_join(enum_intersection, " | ") + ") space");
+                }
+            }
             return _add_rule(rule_name, _build_object_rule(properties, required, hybrid_name, json()));
         } else if ((schema_type.is_null() || schema_type == "array") && (schema.contains("items") || schema.contains("prefixItems"))) {
             json items = schema.contains("items") ? schema["items"] : schema["prefixItems"];
 
@@ -6701,6 +6701,8 @@ def set_gguf_parameters(self):
         self.gguf_writer.add_embedding_length(self.hparams["d_model"])
         self.gguf_writer.add_feed_forward_length(self.hparams["d_ff"])
         self.gguf_writer.add_block_count(self.hparams["num_layers"])
+        if (dec_n_layer := self.hparams.get("num_decoder_layers")) is not None:
+            self.gguf_writer.add_decoder_block_count(dec_n_layer)
         self.gguf_writer.add_head_count(self.hparams["num_heads"])
         self.gguf_writer.add_key_length(self.hparams["d_kv"])
         self.gguf_writer.add_value_length(self.hparams["d_kv"])
 
@@ -314,3 +314,7 @@ Converting the matmul weight format from ND to NZ to improve performance. Enable
 ### GGML_CANN_ACL_GRAPH
 
 Operators are executed using ACL graph execution, rather than in op-by-op (eager) mode. Enabled by default.
+
+### GGML_CANN_GRAPH_CACHE_CAPACITY
+
+Maximum number of compiled CANN graphs kept in the LRU cache, default is 12. When the number of cached graphs exceeds this capacity, the least recently used graph will be evicted.
@@ -28,6 +28,15 @@ static std::string ggml_ne_string(const ggml_tensor * t) {
     return str;
 }
 
+static inline float ggml_compute_bf16_to_fp32(ggml_bf16_t h) {
+    union {
+        float f;
+        uint32_t i;
+    } u;
+    u.i = (uint32_t)h.bits << 16;
+    return u.f;
+}
+
 static float ggml_get_float_value(uint8_t * data, ggml_type type, const size_t * nb, size_t i0, size_t i1, size_t i2, size_t i3) {
     size_t i = i3 * nb[3] + i2 * nb[2] + i1 * nb[1] + i0 * nb[0];
     float v;
@@ -43,6 +52,8 @@ static float ggml_get_float_value(uint8_t * data, ggml_type type, const size_t *
         v = (float) *(int16_t *) &data[i];
     } else if (type == GGML_TYPE_I8) {
         v = (float) *(int8_t *) &data[i];
+    } else if (type == GGML_TYPE_BF16) {
+        v = ggml_compute_bf16_to_fp32(*(ggml_bf16_t *) &data[i]);
     } else {
         GGML_ABORT("fatal error");
     }
 
@@ -586,9 +586,10 @@ def visit(self, schema, name):
             properties = list(schema.get('properties', {}).items())
             return self._add_rule(rule_name, self._build_object_rule(properties, required, name, schema.get('additionalProperties')))
 
-        elif schema_type in (None, 'object') and 'allOf' in schema:
+        elif schema_type in (None, 'object', 'string') and 'allOf' in schema:
             required = set()
             properties = []
+            enum_sets = []
             hybrid_name = name
             def add_component(comp_schema, is_required):
                 if (ref := comp_schema.get('$ref')) is not None:
@@ -600,13 +601,25 @@ def add_component(comp_schema, is_required):
                         if is_required:
                             required.add(prop_name)
 
+                if 'enum' in comp_schema:
+                    enum_sets.append(set(comp_schema['enum']))
+
             for t in schema['allOf']:
                 if 'anyOf' in t:
                     for tt in t['anyOf']:
                         add_component(tt, is_required=False)
                 else:
                     add_component(t, is_required=True)
 
+            if enum_sets:
+                enum_intersection = enum_sets[0]
+                for s in enum_sets[1:]:
+                    enum_intersection &= s
+
+                if enum_intersection:
+                    rule = '(' + ' | '.join((self._generate_constant_rule(v) for v in sorted(enum_intersection))) + ') space'
+                    return self._add_rule(rule_name, rule)
+
             return self._add_rule(rule_name, self._build_object_rule(properties, required, hybrid_name, additional_properties=None))
 
         elif schema_type in (None, 'array') and ('items' in schema or 'prefixItems' in schema):
 
@@ -1,5 +1,6 @@
 --extra-index-url https://download.pytorch.org/whl/cpu
-torch~=2.6.0
-torchvision~=0.21.0
-transformers~=4.55.0
-huggingface-hub~=0.34.0
+torch
+torchvision
+transformers
+huggingface-hub
+accelerate