Skip to content

Commit 8d6feac

Browse files
authored
Merge branch 'ggerganov:master' into embed_yolo_files
2 parents bcf4ec8 + 9d562d7 commit 8d6feac

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

45 files changed

+8253
-6455
lines changed

CMakeLists.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -51,6 +51,7 @@ option(GGML_METAL_NDEBUG "ggml: disable Metal debugging" OF
5151
option(GGML_METAL_SHADER_DEBUG "ggml: compile Metal with -fno-fast-math" OFF)
5252
option(GGML_METAL_EMBED_LIBRARY "ggml: embed Metal library" OFF)
5353
option(GGML_RPC "ggml: use RPC" OFF)
54+
option(GGML_VULKAN "ggml: use Vulkan" OFF)
5455

5556
option(GGML_CUDA_FORCE_DMMV "ggml: use dmmv instead of mmvq CUDA kernels" OFF)
5657
option(GGML_CUDA_FORCE_MMQ "ggml: use mmq kernels instead of cuBLAS" OFF)

README.md

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -122,6 +122,12 @@ cmake -DGGML_METAL=ON -DBUILD_SHARED_LIBS=Off ..
122122
cmake -DGGML_CUBLAS=ON -DCMAKE_CUDA_COMPILER=/usr/local/cuda-12.1/bin/nvcc ..
123123
```
124124

125+
## Using hipBLAS
126+
127+
```bash
128+
cmake -DCMAKE_C_COMPILER="$(hipconfig -l)/clang" -DCMAKE_CXX_COMPILER="$(hipconfig -l)/clang++" -DGGML_HIPBLAS=ON
129+
```
130+
125131
## Using clBLAST
126132

127133
```bash

build.zig

Lines changed: 36 additions & 34 deletions
Original file line numberDiff line numberDiff line change
@@ -4,14 +4,9 @@ const builtin = @import("builtin");
44
// Zig Version: 0.11.0
55
// Zig Build Command: zig build
66
// Zig Run Command: zig build -h
7-
// zig build run_dolly-v2
8-
// zig build run_gpt-2
97
// zig build run_gpt-j
10-
// zig build run_gpt-neox
118
// zig build run_mnist
12-
// zig build run_mpt
13-
// zig build run_replit
14-
// zig build run_starcoder
9+
// zig build run_magika
1510
// zig build run_test-grad0
1611
// zig build run_test-mul-mat0
1712
// zig build run_test-mul-mat2
@@ -25,33 +20,35 @@ const builtin = @import("builtin");
2520
// zig build run_zig_test1
2621
// zig build run_zig_test2
2722
// zig build run_zig_test3
28-
pub fn build(b: *std.build.Builder) void {
23+
pub fn build(b: *std.Build) void {
2924
const target = b.standardTargetOptions(.{});
3025
const optimize = b.standardOptimizeOption(.{});
3126
const lib = b.addStaticLibrary(.{
3227
.name = "ggml",
3328
.target = target,
3429
.optimize = optimize,
3530
});
36-
lib.addIncludePath(.{ .path = "./include" });
37-
lib.addIncludePath(.{ .path = "./include/ggml" });
38-
lib.addCSourceFiles(&.{
31+
lib.addIncludePath(b.path("./include"));
32+
lib.addIncludePath(b.path("./include/ggml"));
33+
lib.addCSourceFiles(.{ .files = &.{
3934
"src/ggml.c",
40-
}, &.{"-std=c11"});
35+
"src/ggml-alloc.c",
36+
"src/ggml-backend.c",
37+
"src/ggml-quants.c",
38+
}, .flags = &.{
39+
"-std=c11",
40+
"-D_GNU_SOURCE",
41+
"-D_XOPEN_SOURCE=600",
42+
} });
4143
lib.linkLibC();
4244
lib.linkLibCpp();
4345
b.installArtifact(lib);
4446

4547
// examples
4648
const examples = .{
47-
"dolly-v2",
48-
"gpt-2",
4949
"gpt-j",
50-
"gpt-neox",
50+
"magika",
5151
"mnist",
52-
"mpt",
53-
"replit",
54-
"starcoder",
5552
// "whisper",
5653
};
5754
inline for (examples) |name| {
@@ -60,16 +57,19 @@ pub fn build(b: *std.build.Builder) void {
6057
.target = target,
6158
.optimize = optimize,
6259
});
63-
exe.addIncludePath(.{ .path = "./include" });
64-
exe.addIncludePath(.{ .path = "./include/ggml" });
65-
exe.addIncludePath(.{ .path = "./examples" });
60+
exe.addIncludePath(b.path("./include"));
61+
exe.addIncludePath(b.path("./include/ggml"));
62+
exe.addIncludePath(b.path("./examples"));
6663
// exe.addIncludePath("./examples/whisper");
67-
exe.addCSourceFiles(&.{
68-
std.fmt.comptimePrint("examples/{s}/main.cpp", .{name}),
69-
"examples/common.cpp",
70-
"examples/common-ggml.cpp",
71-
// "examples/whisper/whisper.cpp",
72-
}, &.{"-std=c++11"});
64+
exe.addCSourceFiles(.{
65+
.files = &.{
66+
std.fmt.comptimePrint("examples/{s}/main.cpp", .{name}),
67+
"examples/common.cpp",
68+
"examples/common-ggml.cpp",
69+
// "examples/whisper/whisper.cpp",
70+
},
71+
.flags = &.{"-std=c++11"},
72+
});
7373
exe.linkLibrary(lib);
7474
b.installArtifact(exe);
7575
const run_cmd = b.addRunArtifact(exe);
@@ -88,7 +88,7 @@ pub fn build(b: *std.build.Builder) void {
8888
"test-mul-mat2",
8989
// "test-opt",
9090
// "test-svd0",
91-
// "test-vec0",
91+
"test-vec0",
9292
"test-vec1",
9393
// "test-vec2",
9494
"test0",
@@ -117,11 +117,13 @@ pub fn build(b: *std.build.Builder) void {
117117
.target = target,
118118
.optimize = optimize,
119119
});
120-
exe.addIncludePath(.{ .path = "./include" });
121-
exe.addIncludePath(.{ .path = "./include/ggml" });
122-
exe.addCSourceFiles(&.{
120+
exe.addIncludePath(b.path("./include"));
121+
exe.addIncludePath(b.path("./include/ggml"));
122+
exe.addCSourceFiles(.{ .files = &.{
123123
std.fmt.comptimePrint("tests/{s}.c", .{name}),
124-
}, &.{"-std=c11"});
124+
}, .flags = &.{
125+
"-std=c11",
126+
} });
125127
exe.linkLibrary(lib);
126128
b.installArtifact(exe);
127129
const run_cmd = b.addRunArtifact(exe);
@@ -141,12 +143,12 @@ pub fn build(b: *std.build.Builder) void {
141143
inline for (zig_tests) |name| {
142144
const exe = b.addExecutable(.{
143145
.name = name,
144-
.root_source_file = .{ .path = std.fmt.comptimePrint("tests/{s}.zig", .{name}) },
146+
.root_source_file = b.path(std.fmt.comptimePrint("tests/{s}.zig", .{name})),
145147
.target = target,
146148
.optimize = optimize,
147149
});
148-
exe.addIncludePath(.{ .path = "./include" });
149-
exe.addIncludePath(.{ .path = "./include/ggml" });
150+
exe.addIncludePath(b.path("./include"));
151+
exe.addIncludePath(b.path("./include/ggml"));
150152
exe.linkLibrary(lib);
151153
b.installArtifact(exe);
152154
const run_cmd = b.addRunArtifact(exe);

docs/gguf.md

Lines changed: 57 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -20,40 +20,91 @@ The key difference between GGJT and GGUF is the use of a key-value structure for
2020

2121
### GGUF Naming Convention
2222

23-
GGUF follow a naming convention of `<Model>-<Version>-<ExpertsCount>x<Parameters>-<EncodingScheme>.gguf`
23+
GGUF follow a naming convention of `<Model>(-<Version>)-(<ExpertsCount>x)<Parameters>-<EncodingScheme>(-<Shard>).gguf`
2424

2525
The components are:
2626
1. **Model**: A descriptive name for the model type or architecture.
27+
- This can be derived from gguf metadata `general.name` substituting spaces for dashes.
2728
2. **Version**: (Optional) Denotes the model version number, formatted as `v<Major>.<Minor>`
2829
- If model is missing a version number then assume `v0.0` (Prerelease)
29-
3. **ExpertsCount**: Indicates the number of experts found in a Mixture of Experts based model.
30+
- This can be derived from gguf metadata `general.version`
31+
3. **ExpertsCount**: (Optional) Indicates the number of experts found in a Mixture of Experts based model.
32+
- This can be derived from gguf metadata `llama.expert_count`
3033
4. **Parameters**: Indicates the number of parameters and their scale, represented as `<count><scale-prefix>`:
3134
- `Q`: Quadrillion parameters.
3235
- `T`: Trillion parameters.
3336
- `B`: Billion parameters.
3437
- `M`: Million parameters.
3538
- `K`: Thousand parameters.
3639
5. **EncodingScheme**: Indicates the weights encoding scheme that was applied to the model. Content, type mixture and arrangement however are determined by user code and can vary depending on project needs.
40+
6. **Shard**: (Optional) Indicates and denotes that the model has been split into multiple shards, formatted as `<ShardNum>-of-<ShardTotal>`.
41+
- *ShardNum* : Shard position in this model. Must be 5 digits padded by zeros.
42+
- Shard number always starts from `00001` onwards (e.g. First shard always starts at `00001-of-XXXXX` rather than `00000-of-XXXXX`).
43+
- *ShardTotal* : Total number of shards in this model. Must be 5 digits padded by zeros.
3744

3845
#### Parsing Above Naming Convention
3946

4047
To correctly parse a well formed naming convention based gguf filename, it is recommended to read from right to left using `-` as the delimiter. This strategy allow for the most flexibility in model name to include dashes if they so choose, while at the same time allowing for version string to be optional. This approach also gives some future proofing to extend the format if needed in the future.
4148

4249
For example:
4350

44-
* `mixtral-v0.1-8x7B-KQ2.gguf`:
51+
* `Mixtral-v0.1-8x7B-Q2_K.gguf`:
4552
- Model Name: Mixtral
4653
- Version Number: v0.1
4754
- Expert Count: 8
4855
- Parameter Count: 7B
49-
- Weight Encoding Scheme: KQ2
56+
- Weight Encoding Scheme: Q2_K
57+
- Shard: N/A
5058

5159
* `Hermes-2-Pro-Llama-3-8B-F16.gguf`:
5260
- Model Name: Hermes 2 Pro Llama 3
53-
- Version Number: v0.0 (`<Version>-` missing)
54-
- Expert Count: 0 (`<ExpertsCount>x` missing)
61+
- Version Number: v0.0
62+
- Expert Count: 0
5563
- Parameter Count: 8B
5664
- Weight Encoding Scheme: F16
65+
- Shard: N/A
66+
67+
* `Grok-v1.0-100B-Q4_0-00003-of-00009.gguf"`
68+
- Model Name: Grok
69+
- Version Number: v1.0
70+
- Expert Count: 0
71+
- Parameter Count: 100B
72+
- Weight Encoding Scheme: Q4_0
73+
- Shard: 3 out of 9 total shards
74+
75+
You can also try using `/^(?<model_name>[A-Za-z0-9\s-]+)(?:-v(?<major>\d+)\.(?<minor>\d+))?-(?:(?<experts_count>\d+)x)?(?<parameters>\d+[A-Za-z]?)-(?<encoding_scheme>[\w_]+)(?:-(?<shard>\d{5})-of-(?<shardTotal>\d{5}))?\.gguf$/` regular expression to extract all the values above as well. Just don't forget to convert `-` to ` ` for the model name.
76+
77+
<details><summary>Example Node.js Regex Function</summary>
78+
79+
```js
80+
#!/usr/bin/env node
81+
const ggufRegex = /^(?<model_name>[A-Za-z0-9\s-]+)(?:-v(?<major>\d+)\.(?<minor>\d+))?-(?:(?<experts_count>\d+)x)?(?<parameters>\d+[A-Za-z]?)-(?<encoding_scheme>[\w_]+)(?:-(?<shard>\d{5})-of-(?<shardTotal>\d{5}))?\.gguf$/;
82+
83+
function parseGGUFFilename(filename) {
84+
const match = ggufRegex.exec(filename);
85+
if (!match)
86+
return null;
87+
const {model_name, major = '0', minor = '0', experts_count = null, parameters, encoding_scheme, shard = null, shardTotal = null} = match.groups;
88+
return {modelName: model_name.trim().replace(/-/g, ' '), version: `v${major}.${minor}`, expertsCount: experts_count ? +experts_count : null, parameters, encodingScheme: encoding_scheme, shard: shard ? +shard : null, shardTotal: shardTotal ? +shardTotal : null};
89+
}
90+
91+
const testCases = [
92+
{filename: 'Mixtral-v0.1-8x7B-Q2_K.gguf', expected: { modelName: 'Mixtral', version: 'v0.1', expertsCount: 8, parameters: '7B', encodingScheme: 'Q2_K', shard: null, shardTotal: null }},
93+
{filename: 'Grok-v1.0-100B-Q4_0-00003-of-00009.gguf', expected: { modelName: 'Grok', version: 'v1.0', expertsCount: null, parameters: '100B', encodingScheme: 'Q4_0', shard: 3, shardTotal: 9 }},
94+
{filename: 'Hermes-2-Pro-Llama-3-8B-F16.gguf', expected: { modelName: 'Hermes 2 Pro Llama 3', version: 'v0.0', expertsCount: null, parameters: '8B', encodingScheme: 'F16', shard: null, shardTotal: null }},
95+
{filename: 'Hermes-2-Pro-Llama-3-v32.33-8Q-F16.gguf', expected: { modelName: 'Hermes 2 Pro Llama 3', version: 'v32.33', expertsCount: null, parameters: '8Q', encodingScheme: 'F16', shard: null, shardTotal: null }},
96+
{filename: 'not-a-known-arrangement.gguf', expected: null},
97+
];
98+
99+
testCases.forEach(({ filename, expected }) => {
100+
const result = parseGGUFFilename(filename);
101+
const passed = JSON.stringify(result) === JSON.stringify(expected);
102+
console.log(`${filename}: ${passed ? "PASS" : "FAIL"}`);
103+
});
104+
```
105+
106+
</details>
107+
57108

58109
### File Structure
59110

examples/whisper/whisper.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2588,7 +2588,7 @@ static struct ggml_cgraph * whisper_build_graph_decoder(
25882588
if (aheads_cross_QKs == NULL) {
25892589
aheads_cross_QKs = aheads_KQs;
25902590
} else {
2591-
aheads_cross_QKs = ggml_concat(ctx0, aheads_cross_QKs, aheads_KQs);
2591+
aheads_cross_QKs = ggml_concat(ctx0, aheads_cross_QKs, aheads_KQs, 2);
25922592
}
25932593
}
25942594
}

scripts/sync-llama-am.sh

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -156,6 +156,7 @@ if [ -f $SRC_GGML/llama-src.patch ]; then
156156
-e 's/\/ggml-sycl\.h/\/src\/ggml-sycl.h/g' \
157157
-e 's/\/ggml-vulkan\.cpp/\/src\/ggml-vulkan.cpp/g' \
158158
-e 's/\/ggml-vulkan\.h/\/src\/ggml-vulkan.h/g' \
159+
-e 's/\/ggml_vk_generate_shaders\.py/\/src\/ggml_vk_generate_shaders.py/g' \
159160
-e 's/\/ggml\.h/\/include\/ggml\/ggml.h/g' \
160161
-e 's/\/ggml-alloc\.h/\/include\/ggml\/ggml-alloc.h/g' \
161162
-e 's/\/ggml-backend\.h/\/include\/ggml\/ggml-backend.h/g' \

scripts/sync-llama.last

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1 +1 @@
1-
e8a7fd4fb06d82f663850c21fcf86c0fb98ad9b4
1+
0e8d8bfd6caf1d0a8cbdf9d3d5c06fbbb9dfced8

scripts/sync-llama.sh

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -26,6 +26,7 @@ cp -rpv ../llama.cpp/ggml-sycl.cpp src/ggml-sycl.cpp
2626
cp -rpv ../llama.cpp/ggml-sycl.h src/ggml-sycl.h
2727
cp -rpv ../llama.cpp/ggml-vulkan.cpp src/ggml-vulkan.cpp
2828
cp -rpv ../llama.cpp/ggml-vulkan.h src/ggml-vulkan.h
29+
cp -rpv ../llama.cpp/ggml_vk_generate_shaders.py src/ggml_vk_generate_shaders.py
2930
cp -rpv ../llama.cpp/ggml.h include/ggml/ggml.h
3031
cp -rpv ../llama.cpp/ggml-alloc.h include/ggml/ggml-alloc.h
3132
cp -rpv ../llama.cpp/ggml-backend.h include/ggml/ggml-backend.h

0 commit comments

Comments
 (0)