Skip to content

Commit cc01fae

Browse files
committed
Latest commits, more detailed bias settings
* `logit_bias_strings_exact` for tokens that match the words exactly or with a space * `logit_bias_strings_beginning` for tokens that match the beginning of words, or vice versa * `logit_bias_strings_ending` for tokens that match the ending of words
1 parent f51519e commit cc01fae

32 files changed

+1824
-154
lines changed

Makefile

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -473,6 +473,7 @@ OBJS_GGUF_LLAMA = \
473473
$(TMP)$(PREFIX)_llama-batch.o \
474474
$(TMP)$(PREFIX)_llama-chat.o \
475475
$(TMP)$(PREFIX)_llama-context.o \
476+
$(TMP)$(PREFIX)_llama-cparams.o \
476477
$(TMP)$(PREFIX)_llama-grammar.o \
477478
$(TMP)$(PREFIX)_llama-graph.o \
478479
$(TMP)$(PREFIX)_llama-hparams.o \

base_sampling2/chat_layer.h

Lines changed: 50 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -596,6 +596,45 @@ class chat
596596
// std::getline(std::cin, pause);
597597
}
598598

599+
bool logit_bias_check_exact(std::string_view token_str) {
600+
for (auto word : params.sparams.logit_bias_strings_exact) {
601+
if (token_str == word) return true;
602+
}
603+
604+
return false;
605+
}
606+
607+
bool logit_bias_check_beginning(std::string_view token_str) {
608+
for (auto word : params.sparams.logit_bias_strings_beginning) {
609+
if ((token_str.find(word) == 0 && (token_str.length() - word.length()) < 4) ||
610+
(token_str.length() > 2 && word.find(token_str) == 0)
611+
) return true;
612+
}
613+
614+
return false;
615+
}
616+
617+
bool logit_bias_check_ending(std::string_view token_str) {
618+
for (auto word : params.sparams.logit_bias_strings_ending) {
619+
auto token_str_pos = word.find(token_str);
620+
if (token_str_pos == (token_str.length() - 1)) return true;
621+
}
622+
623+
return false;
624+
}
625+
626+
bool logit_bias_checks(std::string token_str) {
627+
if (token_str.front() == ' ') {
628+
token_str = token_str.substr(1);
629+
}
630+
631+
if (token_str.back() == ' ') {
632+
token_str.pop_back();
633+
}
634+
635+
return logit_bias_check_exact(token_str) || logit_bias_check_beginning(token_str) || logit_bias_check_ending(token_str);
636+
}
637+
599638
void logit_bias_postfill(llama_token & id, std::string token_str) {
600639
// cutting spaces since there are "duplicated" tokens with them
601640
if (token_str.front() == ' ') {
@@ -687,14 +726,21 @@ class chat
687726
}
688727

689728
void processByVocab(std::string safeguard_string) {
729+
bool has_logit_biases_detailed = (params.sparams.logit_bias_strings_exact.size() || params.sparams.logit_bias_strings_beginning.size() || params.sparams.logit_bias_strings_ending.size());
730+
690731
bool has_logit_biases = (params.sparams.logit_bias_strings.size() || params.sparams.logit_bias_strings_ext.size());
691732
bool has_logit_biases_start = params.sparams.logit_bias_strings_start.size();
692733

693734
for (llama_token id = 0; id < llama_vocab_n_tokens(vocab); id++) {
694735
std::string token_str = common_token_to_piece(ctx, id);
695736

696-
if (has_logit_biases) logit_bias_postfill(id, token_str);
697-
if (has_logit_biases_start) start_bias_tokens_postfill(id, token_str);
737+
if (has_logit_biases_detailed == true && logit_bias_checks(token_str) == true) {
738+
params.sparams.logit_bias.push_back({id, -INFINITY});
739+
} else if (has_logit_biases == true) {
740+
logit_bias_postfill(id, token_str);
741+
}
742+
743+
if (has_logit_biases_start == true) start_bias_tokens_postfill(id, token_str);
698744
if (safeguard_token < 0) get_safeguard_token(id, token_str, safeguard_string);
699745
}
700746

@@ -1636,7 +1682,7 @@ class chat
16361682
if (id == l) {
16371683
checks = 0;
16381684
std::string c_restricted_tkn_string = common_token_to_piece(ctx, id);
1639-
writeTextFile("logit_biasing.txt", std::format("Found: '{}';", c_restricted_tkn_string));
1685+
writeTextFile("logit_biasing.txt", std::format("{}: Found '{}';", params.sparams.seed, c_restricted_tkn_string));
16401686

16411687
id = common_sampler_shift(smpl, ctx, -1, id);
16421688

@@ -1659,7 +1705,7 @@ class chat
16591705
// --attempts;
16601706
// }
16611707

1662-
if (biased_logit.token == id) {
1708+
if (biased_logit.bias < -9 && biased_logit.token == id) {
16631709
++c_restricted_tkns;
16641710
// std::string c_restricted_tkn_string = common_token_to_piece(ctx, id);
16651711
// writeTextFile("logit_biasing.txt", std::format("+{}\n", c_restricted_tkn_string));

base_sampling2/common.h

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -195,7 +195,12 @@ struct common_params_sampling {
195195

196196
std::vector<llama_logit_bias> logit_bias; // logit biases to apply
197197

198-
std::vector<std::string> logit_bias_strings; // words for logit biases
198+
std::vector<std::string> logit_bias_strings; // words for logit biases, all matches
199+
std::vector<std::string> logit_bias_strings_exact; // words for logit biases, exact matches
200+
std::vector<std::string> logit_bias_strings_beginning; // words for logit biases, beginning of the word matches
201+
std::vector<std::string> logit_bias_strings_ending; // words for logit biases, ending of the word matches
202+
203+
199204
std::map<std::string, float> logit_bias_strings_ext; // words for logit biases, but with extra configuration
200205
std::vector<std::string> logit_bias_strings_start; // restricted beginnings of messages
201206

base_sampling2/include/jsonParams.h

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -533,6 +533,11 @@ static void getSamplingParamsFromJson(nlohmann::json& config, common_params& par
533533

534534
// logit_bias_strings
535535
if (checkJArr(config, "logit_bias_strings")) params.sparams.logit_bias_strings = config["logit_bias_strings"];
536+
if (checkJArr(config, "logit_bias_strings_exact")) params.sparams.logit_bias_strings_exact = config["logit_bias_strings_exact"];
537+
if (checkJArr(config, "logit_bias_strings_beginning")) params.sparams.logit_bias_strings_beginning = config["logit_bias_strings_beginning"];
538+
if (checkJArr(config, "logit_bias_strings_ending")) params.sparams.logit_bias_strings_ending = config["logit_bias_strings_ending"];
539+
540+
536541
if (checkJObj(config, "logit_bias_strings_ext")) params.sparams.logit_bias_strings_ext = config["logit_bias_strings_ext"];
537542
if (checkJArr(config, "logit_bias_strings_start")) params.sparams.logit_bias_strings_start = config["logit_bias_strings_start"];
538543

base_sampling2/llama-addon.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -674,7 +674,7 @@ static void llama_sampler_min_p_addon_apply(struct llama_sampler * smpl, llama_t
674674
}
675675

676676
// if we have enough values the operation was a success
677-
if (filtered_tokens.size() >= ctx->min_keep) {
677+
if (!filtered_tokens.empty() && filtered_tokens.size() >= ctx->min_keep) {
678678
memcpy(cur_p->data, filtered_tokens.data(), filtered_tokens.size()*sizeof(llama_token_data));
679679
cur_p->size = filtered_tokens.size();
680680
min_p_applied = true;

base_sampling2/master/ggml/include/ggml.h

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -935,6 +935,15 @@ extern "C" {
935935
struct ggml_tensor * a,
936936
struct ggml_tensor * b);
937937

938+
// repeat a to the specified shape
939+
GGML_API struct ggml_tensor * ggml_repeat_4d(
940+
struct ggml_context * ctx,
941+
struct ggml_tensor * a,
942+
int64_t ne0,
943+
int64_t ne1,
944+
int64_t ne2,
945+
int64_t ne3);
946+
938947
// sums repetitions in a into shape of b
939948
GGML_API struct ggml_tensor * ggml_repeat_back(
940949
struct ggml_context * ctx,

base_sampling2/master/ggml/src/ggml-backend.cpp

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1598,6 +1598,9 @@ void ggml_backend_sched_synchronize(ggml_backend_sched_t sched) {
15981598
for (int i = 0; i < sched->n_backends; i++) {
15991599
ggml_backend_synchronize(sched->backends[i]);
16001600
}
1601+
// reset the current copy to 0 so that the graphs will be similar during generation
1602+
// necessary for CUDA graphs
1603+
sched->cur_copy = 0;
16011604
}
16021605

16031606
void ggml_backend_sched_set_eval_callback(ggml_backend_sched_t sched, ggml_backend_sched_eval_callback callback, void * user_data) {

base_sampling2/master/ggml/src/ggml-cpu/CMakeLists.txt

Lines changed: 23 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -299,6 +299,25 @@ function(ggml_add_cpu_backend_variant_impl tag_name)
299299
endif()
300300
endif()
301301
endif()
302+
303+
if (GGML_BACKEND_DL)
304+
if (GGML_NATIVE)
305+
# the feature check relies on ARCH_DEFINITIONS, but it is not set with GGML_NATIVE
306+
message(FATAL_ERROR "GGML_NATIVE is not compatible with GGML_BACKEND_DL, consider using GGML_CPU_ALL_VARIANTS")
307+
endif()
308+
309+
# The feature detection code is compiled as a separate target so that
310+
# it can be built without the architecture flags
311+
# Since multiple variants of the CPU backend may be included in the same
312+
# build, using set_source_files_properties() to set the arch flags is not possible
313+
set(GGML_CPU_FEATS_NAME ${GGML_CPU_NAME}-feats)
314+
add_library(${GGML_CPU_FEATS_NAME} OBJECT ggml-cpu/cpu-feats-x86.cpp)
315+
target_include_directories(${GGML_CPU_FEATS_NAME} PRIVATE . .. ../include)
316+
target_compile_definitions(${GGML_CPU_FEATS_NAME} PRIVATE ${ARCH_DEFINITIONS})
317+
target_compile_definitions(${GGML_CPU_FEATS_NAME} PRIVATE GGML_BACKEND_DL GGML_BACKEND_BUILD GGML_BACKEND_SHARED)
318+
set_target_properties(${GGML_CPU_FEATS_NAME} PROPERTIES POSITION_INDEPENDENT_CODE ON)
319+
target_link_libraries(${GGML_CPU_NAME} PRIVATE ${GGML_CPU_FEATS_NAME})
320+
endif()
302321
elseif ("${CMAKE_SYSTEM_PROCESSOR} " STREQUAL "ppc64le " OR "${CMAKE_SYSTEM_PROCESSOR} " STREQUAL "powerpc ")
303322
message(STATUS "PowerPC detected")
304323
if (GGML_NATIVE)
@@ -338,8 +357,10 @@ function(ggml_add_cpu_backend_variant_impl tag_name)
338357
elseif (${CMAKE_SYSTEM_PROCESSOR} MATCHES "riscv64")
339358
message(STATUS "RISC-V detected")
340359
if (GGML_RVV)
341-
if (GGML_RV_ZFH)
342-
list(APPEND ARCH_FLAGS -march=rv64gcv_zfhmin -DGGML_RV_ZFH -mabi=lp64d)
360+
if (GGML_XTHEADVECTOR)
361+
list(APPEND ARCH_FLAGS -march=rv64gc_xtheadvector -mabi=lp64d)
362+
elseif (GGML_RV_ZFH)
363+
list(APPEND ARCH_FLAGS -march=rv64gcv_zfhmin -mabi=lp64d)
343364
else()
344365
list(APPEND ARCH_FLAGS -march=rv64gcv -mabi=lp64d)
345366
endif()
@@ -477,25 +498,6 @@ function(ggml_add_cpu_backend_variant_impl tag_name)
477498
target_compile_options(${GGML_CPU_NAME} PRIVATE ${ARCH_FLAGS})
478499
target_compile_definitions(${GGML_CPU_NAME} PRIVATE ${ARCH_DEFINITIONS})
479500

480-
if (GGML_BACKEND_DL)
481-
if (GGML_NATIVE)
482-
# the feature check relies on ARCH_DEFINITIONS, but it is not set with GGML_NATIVE
483-
message(FATAL_ERROR "GGML_NATIVE is not compatible with GGML_BACKEND_DL, consider using GGML_CPU_ALL_VARIANTS")
484-
endif()
485-
486-
# The feature detection code is compiled as a separate target so that
487-
# it can be built without the architecture flags
488-
# Since multiple variants of the CPU backend may be included in the same
489-
# build, using set_source_files_properties() to set the arch flags is not possible
490-
set(GGML_CPU_FEATS_NAME ${GGML_CPU_NAME}-feats)
491-
add_library(${GGML_CPU_FEATS_NAME} OBJECT ggml-cpu/cpu-feats-x86.cpp)
492-
target_include_directories(${GGML_CPU_FEATS_NAME} PRIVATE . .. ../include)
493-
target_compile_definitions(${GGML_CPU_FEATS_NAME} PRIVATE ${ARCH_DEFINITIONS})
494-
target_compile_definitions(${GGML_CPU_FEATS_NAME} PRIVATE GGML_BACKEND_DL GGML_BACKEND_BUILD GGML_BACKEND_SHARED)
495-
set_target_properties(${GGML_CPU_FEATS_NAME} PROPERTIES POSITION_INDEPENDENT_CODE ON)
496-
target_link_libraries(${GGML_CPU_NAME} PRIVATE ${GGML_CPU_FEATS_NAME})
497-
endif()
498-
499501
if (EMSCRIPTEN)
500502
set_target_properties(${GGML_CPU_NAME} PROPERTIES COMPILE_FLAGS "-msimd128")
501503
endif()

base_sampling2/master/ggml/src/ggml-cpu/ggml-cpu-aarch64.cpp

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1191,7 +1191,7 @@ static void ggml_gemv_q4_0_8x8_q8_0(int n, float * GGML_RESTRICT s, size_t bs, c
11911191
}
11921192
}
11931193
return;
1194-
#elif defined(__riscv_v_intrinsic)
1194+
#elif defined __riscv_v
11951195
if (__riscv_vlenb() >= QK4_0) {
11961196
const size_t vl = QK4_0;
11971197

@@ -3783,7 +3783,7 @@ static void ggml_gemm_q4_0_8x8_q8_0(int n, float * GGML_RESTRICT s, size_t bs, c
37833783
}
37843784
return;
37853785
}
3786-
#elif defined(__riscv_v_intrinsic)
3786+
#elif defined __riscv_v
37873787
if (__riscv_vlenb() >= QK4_0) {
37883788
const size_t vl = QK4_0;
37893789

base_sampling2/master/ggml/src/ggml-cpu/ggml-cpu-impl.h

Lines changed: 5 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -320,21 +320,17 @@ inline static int32x4_t ggml_vdotq_s32(int32x4_t acc, int8x16_t a, int8x16_t b)
320320

321321
#ifdef __wasm_simd128__
322322
#include <wasm_simd128.h>
323-
#else
323+
#endif
324+
324325
#ifdef __POWER9_VECTOR__
325326
#include <altivec.h>
326-
#else
327+
#endif
328+
327329
#if defined(_MSC_VER) || defined(__MINGW32__)
328330
#include <intrin.h>
329-
#else
330-
#if defined(__AVX__) || defined(__AVX2__) || defined(__AVX512F__) || defined(__SSSE3__) || defined(__SSE3__) || defined(__SSE__)
331-
#if !defined(__riscv)
331+
#elif defined(__AVX__) || defined(__AVX2__) || defined(__AVX512F__) || defined(__SSSE3__) || defined(__SSE3__) || defined(__SSE__)
332332
#include <immintrin.h>
333333
#endif
334-
#endif
335-
#endif
336-
#endif
337-
#endif
338334

339335
#ifdef __riscv_v_intrinsic
340336
#include <riscv_vector.h>

0 commit comments

Comments
 (0)