Skip to content

Commit 4c44e3d

Browse files
authored
Merge branch 'ggerganov:master' into master
2 parents 39940e5 + d5ac8cf commit 4c44e3d

File tree

7 files changed

+585
-335
lines changed

7 files changed

+585
-335
lines changed

CONTRIBUTING.md

Lines changed: 5 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1,24 +1,23 @@
11
# Pull requests (for contributors)
22

33
- Test your changes:
4-
- Using the commands in the [`tests`](tests) folder. For instance, running the `./tests/test-backend-ops` command tests different backend implementations of the GGML library
4+
- Using the commands in the [`tests`](tests) folder. For instance, running the `./tests/test-backend-ops` command tests different backend implementations of the `ggml` library
55
- Execute [the full CI locally on your machine](ci/README.md) before publishing
6-
- Please rate the complexity of your PR (i.e. `Review Complexity : Low`, `Review Complexity : Medium`, `Review Complexity : High`). This makes it easier for maintainers to triage the PRs.
7-
- The PR template has a series of review complexity checkboxes `[ ]` that [you can mark as](https://docs.github.com/en/get-started/writing-on-github/working-with-advanced-formatting/about-task-lists) `[X]` for your convenience
8-
- Consider allowing write access to your branch for faster review
6+
- Optionally rate the complexity of your PR (i.e. `Review Complexity : Low`, `Review Complexity : Medium`, `Review Complexity : High`). This makes it easier for maintainers to triage the PRs
7+
- Consider allowing write access to your branch for faster reviews, as reviewers can push commits directly
98
- If your PR becomes stale, don't hesitate to ping the maintainers in the comments
109

1110
# Pull requests (for collaborators)
1211

1312
- Squash-merge PRs
1413
- Use the following format for the squashed commit title: `<module> : <commit title> (#<issue_number>)`. For example: `utils : fix typo in utils.py (#1234)`
15-
- Optionally, pick a `<module>` from here: https://github.com/ggerganov/llama.cpp/wiki/Modules
14+
- Optionally pick a `<module>` from here: https://github.com/ggerganov/llama.cpp/wiki/Modules
1615

1716
# Coding guidelines
1817

1918
- Avoid adding third-party dependencies, extra files, extra headers, etc.
2019
- Always consider cross-compatibility with other operating systems and architectures
21-
- Avoid fancy looking modern STL constructs, use basic `for` loops, avoid templates, keep it simple
20+
- Avoid fancy-looking modern STL constructs, use basic `for` loops, avoid templates, keep it simple
2221
- There are no strict rules for the code style, but try to follow the patterns in the code (indentation, spaces, etc.). Vertical alignment makes things more readable and easier to batch edit
2322
- Clean-up any trailing whitespaces, use 4 spaces for indentation, brackets on the same line, `void * ptr`, `int & a`
2423
- Naming usually optimizes for common prefix (see https://github.com/ggerganov/ggml/pull/302#discussion_r1243240963)

ggml/include/ggml-backend.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -127,6 +127,8 @@ extern "C" {
127127
bool async;
128128
// pinned host buffer
129129
bool host_buffer;
130+
// creating buffers from host ptr
131+
bool buffer_from_host_ptr;
130132
// event synchronization
131133
bool events;
132134
};

ggml/include/ggml-metal.h

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -43,7 +43,9 @@ GGML_API ggml_backend_t ggml_backend_metal_init(void);
4343

4444
GGML_API bool ggml_backend_is_metal(ggml_backend_t backend);
4545

46-
GGML_API ggml_backend_buffer_t ggml_backend_metal_buffer_from_ptr(void * data, size_t size, size_t max_size);
46+
GGML_DEPRECATED(
47+
GGML_API ggml_backend_buffer_t ggml_backend_metal_buffer_from_ptr(void * data, size_t size, size_t max_size),
48+
"obsoleted by the new device interface - https://github.com/ggerganov/llama.cpp/pull/9713");
4749

4850
GGML_API void ggml_backend_metal_set_abort_callback(ggml_backend_t backend, ggml_abort_callback abort_callback, void * user_data);
4951

@@ -57,6 +59,8 @@ GGML_API bool ggml_backend_metal_supports_family(ggml_backend_t backend, int fam
5759
// capture all command buffers committed the next time `ggml_backend_graph_compute` is called
5860
GGML_API void ggml_backend_metal_capture_next_compute(ggml_backend_t backend);
5961

62+
GGML_API ggml_backend_reg_t ggml_backend_metal_reg(void);
63+
6064
#ifdef __cplusplus
6165
}
6266
#endif

ggml/src/ggml-backend.cpp

Lines changed: 17 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -463,6 +463,7 @@ enum ggml_backend_dev_type ggml_backend_dev_type(ggml_backend_dev_t device) {
463463
}
464464

465465
void ggml_backend_dev_get_props(ggml_backend_dev_t device, struct ggml_backend_dev_props * props) {
466+
memset(props, 0, sizeof(*props));
466467
device->iface.get_props(device, props);
467468
}
468469

@@ -479,6 +480,10 @@ ggml_backend_buffer_type_t ggml_backend_dev_buffer_type(ggml_backend_dev_t devic
479480
}
480481

481482
ggml_backend_buffer_type_t ggml_backend_dev_host_buffer_type(ggml_backend_dev_t device) {
483+
if (device->iface.get_host_buffer_type == NULL) {
484+
return NULL;
485+
}
486+
482487
return device->iface.get_host_buffer_type(device);
483488
}
484489

@@ -525,6 +530,10 @@ void * ggml_backend_reg_get_proc_address(ggml_backend_reg_t reg, const char * na
525530
#include "ggml-cuda.h"
526531
#endif
527532

533+
#ifdef GGML_USE_METAL
534+
#include "ggml-metal.h"
535+
#endif
536+
528537
struct ggml_backend_registry {
529538
std::vector<ggml_backend_reg_t> backends;
530539
std::vector<ggml_backend_dev_t> devices;
@@ -533,10 +542,13 @@ struct ggml_backend_registry {
533542
#ifdef GGML_USE_CUDA
534543
register_backend(ggml_backend_cuda_reg());
535544
#endif
545+
#ifdef GGML_USE_METAL
546+
register_backend(ggml_backend_metal_reg());
547+
#endif
536548

537549
register_backend(ggml_backend_cpu_reg());
538550

539-
// TODO: sycl, metal, vulkan, kompute, cann
551+
// TODO: sycl, vulkan, kompute, cann
540552
}
541553

542554
void register_backend(ggml_backend_reg_t reg) {
@@ -1118,9 +1130,10 @@ static void ggml_backend_cpu_device_get_props(ggml_backend_dev_t dev, struct ggm
11181130
props->type = ggml_backend_cpu_device_get_type(dev);
11191131
ggml_backend_cpu_device_get_memory(dev, &props->memory_free, &props->memory_total);
11201132
props->caps = {
1121-
/* async */ false,
1122-
/* host_buffer */ false,
1123-
/* events */ false,
1133+
/* .async = */ false,
1134+
/* .host_buffer = */ false,
1135+
/* .buffer_from_host_ptr = */ true,
1136+
/* .events = */ false,
11241137
};
11251138
}
11261139

ggml/src/ggml-cuda.cu

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -2920,9 +2920,10 @@ static void ggml_backend_cuda_device_get_props(ggml_backend_dev_t dev, ggml_back
29202920
#endif
29212921

29222922
props->caps = {
2923-
/* async */ true,
2924-
/* host_buffer */ host_buffer,
2925-
/* events */ events,
2923+
/* .async = */ true,
2924+
/* .host_buffer = */ host_buffer,
2925+
/* .buffer_from_host_ptr = */ false,
2926+
/* .events = */ events,
29262927
};
29272928
}
29282929

0 commit comments

Comments
 (0)