Skip to content

Commit b622066

Browse files
committed
Merge branch 'upstream' into concedo_experimental
# Conflicts: # .github/workflows/docker.yml # Makefile # examples/CMakeLists.txt # ggml/CMakeLists.txt # ggml/src/CMakeLists.txt # ggml/src/ggml-sycl/common.hpp # ggml/src/ggml-sycl/convert.cpp # ggml/src/ggml-sycl/convert.hpp # ggml/src/ggml-sycl/ggml-sycl.cpp # scripts/sync-ggml.last
2 parents 7c5d47f + 8733e0c commit b622066

File tree

14 files changed

+410
-117
lines changed

14 files changed

+410
-117
lines changed

common/arg.cpp

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1284,7 +1284,7 @@ common_params_context common_params_parser_init(common_params & params, llama_ex
12841284
[](common_params & params) {
12851285
params.use_color = true;
12861286
}
1287-
).set_examples({LLAMA_EXAMPLE_MAIN, LLAMA_EXAMPLE_INFILL, LLAMA_EXAMPLE_SPECULATIVE, LLAMA_EXAMPLE_LOOKUP}));
1287+
).set_examples({LLAMA_EXAMPLE_MAIN, LLAMA_EXAMPLE_SPECULATIVE, LLAMA_EXAMPLE_LOOKUP}));
12881288
add_opt(common_arg(
12891289
{"-t", "--threads"}, "N",
12901290
string_format("number of threads to use during generation (default: %d)", params.cpuparams.n_threads),
@@ -1417,7 +1417,7 @@ common_params_context common_params_parser_init(common_params & params, llama_ex
14171417
add_opt(common_arg(
14181418
{"-n", "--predict", "--n-predict"}, "N",
14191419
string_format(
1420-
ex == LLAMA_EXAMPLE_MAIN || ex == LLAMA_EXAMPLE_INFILL
1420+
ex == LLAMA_EXAMPLE_MAIN
14211421
? "number of tokens to predict (default: %d, -1 = infinity, -2 = until context filled)"
14221422
: "number of tokens to predict (default: %d, -1 = infinity)",
14231423
params.n_predict),
@@ -1656,15 +1656,15 @@ common_params_context common_params_parser_init(common_params & params, llama_ex
16561656
params.input_prefix = value;
16571657
params.enable_chat_template = false;
16581658
}
1659-
).set_examples({LLAMA_EXAMPLE_MAIN, LLAMA_EXAMPLE_INFILL}));
1659+
).set_examples({LLAMA_EXAMPLE_MAIN}));
16601660
add_opt(common_arg(
16611661
{"--in-suffix"}, "STRING",
16621662
"string to suffix after user inputs with (default: empty)",
16631663
[](common_params & params, const std::string & value) {
16641664
params.input_suffix = value;
16651665
params.enable_chat_template = false;
16661666
}
1667-
).set_examples({LLAMA_EXAMPLE_MAIN, LLAMA_EXAMPLE_INFILL}));
1667+
).set_examples({LLAMA_EXAMPLE_MAIN}));
16681668
add_opt(common_arg(
16691669
{"--no-warmup"},
16701670
"skip warming up the model with an empty run",
@@ -1681,7 +1681,7 @@ common_params_context common_params_parser_init(common_params & params, llama_ex
16811681
[](common_params & params) {
16821682
params.spm_infill = true;
16831683
}
1684-
).set_examples({LLAMA_EXAMPLE_SERVER, LLAMA_EXAMPLE_INFILL}));
1684+
).set_examples({LLAMA_EXAMPLE_SERVER}));
16851685
add_opt(common_arg(
16861686
{"--samplers"}, "SAMPLERS",
16871687
string_format("samplers that will be used for generation in the order, separated by \';\'\n(default: %s)", sampler_type_names.c_str()),
@@ -2893,7 +2893,7 @@ common_params_context common_params_parser_init(common_params & params, llama_ex
28932893
[](common_params & params) {
28942894
params.simple_io = true;
28952895
}
2896-
).set_examples({LLAMA_EXAMPLE_MAIN, LLAMA_EXAMPLE_INFILL}));
2896+
).set_examples({LLAMA_EXAMPLE_MAIN}));
28972897
add_opt(common_arg(
28982898
{"--positive-file"}, "FNAME",
28992899
string_format("positive prompts file, one prompt per line (default: '%s')", params.cvector_positive_file.c_str()),

common/common.h

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -62,7 +62,6 @@ enum llama_example {
6262
LLAMA_EXAMPLE_COMMON,
6363
LLAMA_EXAMPLE_SPECULATIVE,
6464
LLAMA_EXAMPLE_MAIN,
65-
LLAMA_EXAMPLE_INFILL,
6665
LLAMA_EXAMPLE_EMBEDDING,
6766
LLAMA_EXAMPLE_PERPLEXITY,
6867
LLAMA_EXAMPLE_RETRIEVAL,

common/sampling.cpp

Lines changed: 11 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
#include "sampling.h"
22

33
#include "common.h"
4+
#include "log.h"
45

56
#include <cmath>
67
#include <unordered_map>
@@ -534,14 +535,16 @@ std::vector<common_sampler_type> common_sampler_types_from_names(const std::vect
534535
auto sampler = sampler_canonical_name_map.find(name);
535536
if (sampler != sampler_canonical_name_map.end()) {
536537
samplers.push_back(sampler->second);
537-
} else {
538-
if (allow_alt_names) {
539-
sampler = sampler_alt_name_map.find(name);
540-
if (sampler != sampler_alt_name_map.end()) {
541-
samplers.push_back(sampler->second);
542-
}
538+
continue;
539+
}
540+
if (allow_alt_names) {
541+
sampler = sampler_alt_name_map.find(name);
542+
if (sampler != sampler_alt_name_map.end()) {
543+
samplers.push_back(sampler->second);
544+
continue;
543545
}
544546
}
547+
LOG_WRN("%s: unable to match sampler by name '%s'\n", __func__, name.c_str());
545548
}
546549

547550
return samplers;
@@ -568,6 +571,8 @@ std::vector<common_sampler_type> common_sampler_types_from_chars(const std::stri
568571
const auto sampler = sampler_name_map.find(c);
569572
if (sampler != sampler_name_map.end()) {
570573
samplers.push_back(sampler->second);
574+
} else {
575+
LOG_WRN("%s: unable to match sampler by char '%c'\n", __func__, c);
571576
}
572577
}
573578

ggml/src/ggml-cpu/ggml-cpu-aarch64.cpp

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -87,8 +87,6 @@ static_assert(sizeof(block_iq4_nlx4) == 4 * sizeof(ggml_half) + QK4_NL * 2, "wro
8787

8888
#if defined(__GNUC__)
8989
#pragma GCC diagnostic ignored "-Woverlength-strings"
90-
#elif defined(_MSC_VER)
91-
#pragma warning(disable: 4244 4267) // possible loss of data
9290
#endif
9391

9492
#define UNUSED GGML_UNUSED

ggml/src/ggml-cpu/ggml-cpu-quants.c

Lines changed: 0 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -20,12 +20,6 @@
2020
#define GROUP_MAX_EPS_IQ1_M 1e-7f
2121
#define GROUP_MAX_EPS_IQ1_S 1e-12f
2222

23-
#if defined(_MSC_VER)
24-
// disable "possible loss of data" to avoid warnings for hundreds of casts
25-
// we should just be careful :)
26-
#pragma warning(disable: 4244 4267)
27-
#endif
28-
2923
#define UNUSED GGML_UNUSED
3024

3125
#ifndef MM256_SET_M128I

ggml/src/ggml-cpu/ggml-cpu.c

Lines changed: 0 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -51,19 +51,6 @@
5151
#include "llamafile/sgemm.h"
5252
#endif
5353

54-
#if defined(_MSC_VER)
55-
// disable "possible loss of data" to avoid hundreds of casts
56-
// we should just be careful :)
57-
#pragma warning(disable: 4244 4267)
58-
59-
// disable POSIX deprecation warnings
60-
// these functions are never going away, anyway
61-
#pragma warning(disable: 4996)
62-
63-
// unreachable code because of multiple instances of code after GGML_ABORT
64-
#pragma warning(disable: 4702)
65-
#endif
66-
6754
// Note: once we move threading into a separate C++ file
6855
// will use std::hardware_destructive_interference_size instead of hardcoding it here
6956
// and we'll use C++ attribute syntax.

ggml/src/ggml-cpu/ops.cpp

Lines changed: 0 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -8,19 +8,6 @@
88

99
#include <float.h>
1010

11-
#if defined(_MSC_VER)
12-
// disable "possible loss of data" to avoid hundreds of casts
13-
// we should just be careful :)
14-
#pragma warning(disable: 4244 4267)
15-
16-
// disable POSIX deprecation warnings
17-
// these functions are never going away, anyway
18-
#pragma warning(disable: 4996)
19-
20-
// unreachable code because of multiple instances of code after GGML_ABORT
21-
#pragma warning(disable: 4702)
22-
#endif
23-
2411
// ggml_compute_forward_dup
2512

2613
static void ggml_compute_forward_dup_same_cont(

ggml/src/ggml-cpu/vec.cpp

Lines changed: 0 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -2,12 +2,6 @@
22

33
#include <cassert>
44

5-
#if defined(_MSC_VER)
6-
// disable "possible loss of data" to avoid hundreds of casts
7-
// we should just be careful :)
8-
#pragma warning(disable: 4244 4267)
9-
#endif
10-
115
// precomputed gelu table for f16 (128 KB)
126
ggml_fp16_t ggml_table_gelu_f16[1 << 16];
137

ggml/src/ggml-cuda/common.cuh

Lines changed: 0 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -130,10 +130,6 @@ static int ggml_cuda_highest_compiled_arch(const int arch) {
130130

131131
#define MATRIX_ROW_PADDING 512 // last row of quant. matrices is a multiple of this to avoid out-of-bounds memory accesses
132132

133-
#if defined(_MSC_VER)
134-
#pragma warning(disable: 4244 4267) // possible loss of data
135-
#endif
136-
137133
#define GGML_CUDA_MAX_STREAMS 8
138134

139135
[[noreturn]]

ggml/src/ggml-cuda/mmq.cuh

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -2523,7 +2523,7 @@ template <ggml_type type, int mmq_x, int nwarps, bool need_check, bool fixup>
25232523
static __device__ __forceinline__ void mul_mat_q_process_tile(
25242524
const char * __restrict__ x, const int offset_x, const int * __restrict__ y,
25252525
const int * __restrict__ ids_dst, float * __restrict__ dst, float * __restrict__ tmp_fixup,
2526-
const int nrows_x, const int stride_row_x, const int ncols_y, const int stride_col_dst,
2526+
const int stride_row_x, const int ncols_y, const int stride_col_dst,
25272527
const int tile_x_max_i, const int tile_y_max_j, const int kb0_start, const int kb0_stop) {
25282528

25292529
constexpr int qk = ggml_cuda_type_traits<type>::qk;
@@ -2690,7 +2690,7 @@ static __global__ void mul_mat_q(
26902690

26912691
constexpr bool fixup = false;
26922692
mul_mat_q_process_tile<type, mmq_x, nwarps, need_check, fixup>
2693-
(x, offset_x, y + offset_y, ids_dst_shared, dst + offset_dst, tmp_fixup, nrows_x, stride_row_x, ncols_y, stride_col_dst,
2693+
(x, offset_x, y + offset_y, ids_dst_shared, dst + offset_dst, tmp_fixup, stride_row_x, ncols_y, stride_col_dst,
26942694
tile_x_max_i, tile_y_max_j, 0, ncols_x/qk);
26952695
return;
26962696
}
@@ -2768,7 +2768,7 @@ static __global__ void mul_mat_q(
27682768

27692769
constexpr bool fixup = false; // All but (potentially) the last iterations write their data to dst rather than the fixup buffer.
27702770
mul_mat_q_process_tile<type, mmq_x, nwarps, need_check, fixup>
2771-
(x, offset_x, y + offset_y, ids_dst_shared, dst + offset_dst, tmp_fixup, nrows_x, stride_row_x, ncols_y, stride_col_dst,
2771+
(x, offset_x, y + offset_y, ids_dst_shared, dst + offset_dst, tmp_fixup, stride_row_x, ncols_y, stride_col_dst,
27722772
tile_x_max_i, tile_y_max_j, kb0_start, kb0_stop);
27732773

27742774
kbc += blocks_per_ne00;
@@ -2835,7 +2835,7 @@ static __global__ void mul_mat_q(
28352835

28362836
constexpr bool fixup = true; // Last index writes its data to fixup buffer to avoid data races with other blocks.
28372837
mul_mat_q_process_tile<type, mmq_x, nwarps, need_check, fixup>
2838-
(x, offset_x, y + offset_y, ids_dst_shared, dst + offset_dst, tmp_fixup, nrows_x, stride_row_x, ncols_y, stride_col_dst,
2838+
(x, offset_x, y + offset_y, ids_dst_shared, dst + offset_dst, tmp_fixup, stride_row_x, ncols_y, stride_col_dst,
28392839
tile_x_max_i, tile_y_max_j, kb0_start, kb0_stop);
28402840
}
28412841

0 commit comments

Comments
 (0)