Skip to content

Commit fa6fb88

Browse files
committed
Latest commits
1 parent bce3613 commit fa6fb88

17 files changed

+147115
-147034
lines changed

base/ggml/ggml-aarch64.c

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -392,7 +392,7 @@ void ggml_gemv_q4_0_4x4_q8_0(int n, float * restrict s, size_t bs, const void *
392392
#if defined(__ARM_NEON) && defined(__ARM_FEATURE_MATMUL_INT8)
393393
GGML_ASSERT(!(ggml_cpu_has_neon() && ggml_cpu_has_matmul_int8()) &&
394394
"__ARM_NEON and __ARM_FEATURE_MATMUL_INT8 defined, use the Q4_0_4_8 quantization format for optimal performance");
395-
#elif defined(__ARM_NEON) && defined(__aarch64__)
395+
#elif defined(__ARM_NEON) && defined(__aarch64__) && ! ((defined(_MSC_VER)) && ! defined(__clang__))
396396
const void * b_ptr = vx;
397397
const void * a_ptr = vy;
398398
float * res_ptr = s;
@@ -501,7 +501,7 @@ void ggml_gemv_q4_0_4x8_q8_0(int n, float * restrict s, size_t bs, const void *
501501
"__ARM_FEATURE_SVE defined, use the Q4_0_8_8 quantization format for optimal performance");
502502
}
503503
#endif
504-
#if defined(__ARM_NEON) && defined(__ARM_FEATURE_MATMUL_INT8)
504+
#if defined(__ARM_NEON) && defined(__ARM_FEATURE_MATMUL_INT8) && ! ((defined(_MSC_VER)) && ! defined(__clang__))
505505
const void * b_ptr = vx;
506506
const void * a_ptr = vy;
507507
float * res_ptr = s;
@@ -613,7 +613,7 @@ void ggml_gemv_q4_0_8x8_q8_0(int n, float * restrict s, size_t bs, const void *
613613
UNUSED(ncols_interleaved);
614614
UNUSED(blocklen);
615615

616-
#if defined(__ARM_FEATURE_SVE)
616+
#if defined(__ARM_FEATURE_SVE) && ! ((defined(_MSC_VER)) && ! defined(__clang__))
617617
if (svcntw() == 8) {
618618
const void * b_ptr = vx;
619619
const void * a_ptr = vy;
@@ -753,7 +753,7 @@ void ggml_gemm_q4_0_4x4_q8_0(int n, float * restrict s, size_t bs, const void *
753753
#if defined(__ARM_NEON) && defined(__ARM_FEATURE_MATMUL_INT8)
754754
GGML_ASSERT(!(ggml_cpu_has_neon() && ggml_cpu_has_matmul_int8()) &&
755755
"__ARM_NEON and __ARM_FEATURE_MATMUL_INT8 defined, use the Q4_0_4_8 quantization format for optimal performance");
756-
#elif defined(__ARM_NEON) && defined(__aarch64__)
756+
#elif defined(__ARM_NEON) && defined(__aarch64__) && ! ((defined(_MSC_VER)) && ! defined(__clang__))
757757
const void * b_ptr = vx;
758758
const void * a_ptr = vy;
759759
float * res_ptr = s;
@@ -1271,7 +1271,7 @@ void ggml_gemm_q4_0_4x8_q8_0(int n, float * restrict s, size_t bs, const void *
12711271
"__ARM_FEATURE_SVE defined, use the Q4_0_8_8 quantization format for optimal performance");
12721272
}
12731273
#endif
1274-
#if defined(__ARM_NEON) && defined(__ARM_FEATURE_MATMUL_INT8)
1274+
#if defined(__ARM_NEON) && defined(__ARM_FEATURE_MATMUL_INT8) && ! ((defined(_MSC_VER)) && ! defined(__clang__))
12751275
const void * b_ptr = vx;
12761276
const void * a_ptr = vy;
12771277
float * res_ptr = s;
@@ -1727,7 +1727,7 @@ void ggml_gemm_q4_0_8x8_q8_0(int n, float * restrict s, size_t bs, const void *
17271727
UNUSED(ncols_interleaved);
17281728
UNUSED(blocklen);
17291729

1730-
#if defined(__ARM_FEATURE_SVE) && defined(__ARM_FEATURE_MATMUL_INT8)
1730+
#if defined(__ARM_FEATURE_SVE) && defined(__ARM_FEATURE_MATMUL_INT8) && ! ((defined(_MSC_VER)) && ! defined(__clang__))
17311731
if (svcntw() == 8) {
17321732
const void * b_ptr = vx;
17331733
const void * a_ptr = vy;

base/ggml/ggml-alloc.c

Lines changed: 18 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -91,8 +91,7 @@ void ggml_tallocr_alloc(struct ggml_tallocr * talloc, struct ggml_tensor * tenso
9191
if (talloc->offset + size > ggml_backend_buffer_get_size(talloc->buffer)) {
9292
fprintf(stderr, "%s: not enough space in the buffer to allocate %s (needed %zu, available %zu)\n",
9393
__func__, tensor->name, size, ggml_backend_buffer_get_size(talloc->buffer) - talloc->offset);
94-
GGML_ASSERT(!"not enough space in the buffer");
95-
return;
94+
GGML_ABORT("not enough space in the buffer");
9695
}
9796

9897
void * addr = (char *)ggml_backend_buffer_get_base(talloc->buffer) + talloc->offset;
@@ -133,7 +132,7 @@ static void add_allocated_tensor(struct ggml_dyn_tallocr * alloc, size_t offset,
133132
return;
134133
}
135134
}
136-
GGML_ASSERT(!"out of allocated_tensors");
135+
GGML_ABORT("out of allocated_tensors");
137136
}
138137
static void remove_allocated_tensor(struct ggml_dyn_tallocr * alloc, size_t offset, const struct ggml_tensor * tensor) {
139138
for (int i = 0; i < 1024; i++) {
@@ -142,8 +141,7 @@ static void remove_allocated_tensor(struct ggml_dyn_tallocr * alloc, size_t offs
142141
return;
143142
}
144143
}
145-
fprintf(stderr, "tried to free tensor %s not found\n", tensor->name);
146-
GGML_ASSERT(!"tensor not found");
144+
GGML_ABORT("tried to free tensor %s not found\n", tensor->name);
147145
}
148146
#endif
149147

@@ -176,8 +174,7 @@ static size_t ggml_dyn_tallocr_alloc(struct ggml_dyn_tallocr * alloc, size_t siz
176174
// this should never happen
177175
fprintf(stderr, "%s: not enough space in the buffer to allocate %zu bytes, largest block available %zu bytes\n",
178176
__func__, size, max_avail);
179-
GGML_ASSERT(!"not enough space in the buffer");
180-
GGML_UNREACHABLE();
177+
GGML_ABORT("not enough space in the buffer");
181178
}
182179
}
183180

@@ -443,7 +440,7 @@ void ggml_gallocr_free(ggml_gallocr_t galloc) {
443440
}
444441
}
445442

446-
free(galloc->hash_set.keys);
443+
ggml_hash_set_free(&galloc->hash_set);
447444
free(galloc->hash_values);
448445
free(galloc->bufts);
449446
free(galloc->buffers);
@@ -456,7 +453,7 @@ void ggml_gallocr_free(ggml_gallocr_t galloc) {
456453
typedef struct ggml_gallocr * ggml_gallocr_t;
457454

458455
static struct hash_node * ggml_gallocr_hash_get(ggml_gallocr_t galloc, struct ggml_tensor * t) {
459-
size_t i = ggml_hash_find_or_insert(galloc->hash_set, t);
456+
size_t i = ggml_hash_find_or_insert(&galloc->hash_set, t);
460457
return &galloc->hash_values[i];
461458
}
462459

@@ -565,8 +562,8 @@ static int get_node_buffer_id(const int * node_buffer_ids, int i) {
565562

566563
static void ggml_gallocr_alloc_graph_impl(ggml_gallocr_t galloc, struct ggml_cgraph * graph, const int * node_buffer_ids, const int * leaf_buffer_ids) {
567564
// clear hash tables
568-
memset(galloc->hash_set.keys, 0, galloc->hash_set.size * sizeof(struct ggml_tensor *));
569-
memset(galloc->hash_values, 0, galloc->hash_set.size * sizeof(struct hash_node));
565+
ggml_hash_set_reset(&galloc->hash_set);
566+
memset(galloc->hash_values, 0, sizeof(struct hash_node) * galloc->hash_set.size);
570567

571568
// allocate leafs
572569
// these may be tensors that the application is not using in the graph, but may still want to allocate for other purposes
@@ -671,21 +668,19 @@ static void ggml_gallocr_alloc_graph_impl(ggml_gallocr_t galloc, struct ggml_cgr
671668
}
672669

673670
bool ggml_gallocr_reserve_n(ggml_gallocr_t galloc, struct ggml_cgraph * graph, const int * node_buffer_ids, const int * leaf_buffer_ids) {
674-
size_t hash_size = graph->visited_hash_table.size;
671+
size_t min_hash_size = graph->n_nodes + graph->n_leafs;
672+
// add 25% margin to avoid hash collisions
673+
min_hash_size += min_hash_size / 4;
675674

676675
// initialize hash table
677-
if (galloc->hash_set.size < hash_size) {
678-
free(galloc->hash_set.keys);
679-
free(galloc->hash_values);
680-
galloc->hash_set.size = hash_size;
681-
galloc->hash_set.keys = calloc(hash_size, sizeof(struct ggml_tensor *));
682-
galloc->hash_values = calloc(hash_size, sizeof(struct hash_node));
676+
if (galloc->hash_set.size < min_hash_size) {
677+
ggml_hash_set_free(&galloc->hash_set);
678+
galloc->hash_set = ggml_hash_set_new(min_hash_size);
683679
GGML_ASSERT(galloc->hash_set.keys != NULL);
680+
681+
free(galloc->hash_values);
682+
galloc->hash_values = malloc(sizeof(struct hash_node) * galloc->hash_set.size);
684683
GGML_ASSERT(galloc->hash_values != NULL);
685-
} else {
686-
// reset hash table
687-
memset(galloc->hash_set.keys, 0, sizeof(struct ggml_tensor *) * galloc->hash_set.size);
688-
memset(galloc->hash_values, 0, sizeof(struct hash_node) * galloc->hash_set.size);
689684
}
690685

691686
// reset allocators
@@ -817,8 +812,7 @@ static void ggml_gallocr_init_tensor(ggml_gallocr_t galloc, struct ggml_tensor *
817812
}
818813

819814
static bool ggml_gallocr_node_needs_realloc(ggml_gallocr_t galloc, struct ggml_tensor * node, struct tensor_alloc * talloc) {
820-
ggml_backend_buffer_type_t buft = talloc->buffer_id != -1 ? galloc->bufts[talloc->buffer_id] : NULL;
821-
size_t node_size = (node->data || node->view_src) ? 0 : ggml_backend_buft_get_alloc_size(buft, node);
815+
size_t node_size = (node->data || node->view_src) ? 0 : ggml_backend_buft_get_alloc_size(galloc->bufts[talloc->buffer_id], node);
822816
return talloc->size_max >= node_size;
823817
}
824818

0 commit comments

Comments
 (0)