Skip to content

Commit 53cd962

Browse files
committed
Merge branch 'concedo_experimental' into esocrok
2 parents 2e74492 + 7447a36 commit 53cd962

File tree

115 files changed

+473855
-4171
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

115 files changed

+473855
-4171
lines changed

.github/workflows/kcpp-build-release-arm64.yaml

Lines changed: 1 addition & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -80,16 +80,7 @@ jobs:
8080
--add-data './json_to_gbnf.py:.' \
8181
--add-data './LICENSE.md:.' \
8282
--add-data './MIT_LICENSE_GGML_SDCPP_LLAMACPP_ONLY.md:.' \
83-
--add-data './klite.embd:.' \
84-
--add-data './kcpp_docs.embd:.' \
85-
--add-data './kcpp_sdui.embd:.' \
86-
--add-data './taesd.embd:.' \
87-
--add-data './taesd_xl.embd:.' \
88-
--add-data './taesd_f.embd:.' \
89-
--add-data './taesd_3.embd:.' \
90-
--add-data './kokoro_ipa.embd:.' \
91-
--add-data './rwkv_vocab.embd:.' \
92-
--add-data './rwkv_world_vocab.embd:.' \
83+
--add-data './embd_res:./embd_res' \
9384
--version-file './version.txt' \
9485
--clean --console koboldcpp.py -n 'koboldcpp-linux-arm64'
9586
"

.github/workflows/kcpp-build-release-macos.yaml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -36,7 +36,7 @@ jobs:
3636
make LLAMA_METAL=1 LLAMA_PORTABLE=1
3737
chmod +x './create_ver_file.sh'
3838
. create_ver_file.sh
39-
pyinstaller --noconfirm --onefile --collect-all customtkinter --collect-all psutil --collect-all pdfplumber --collect-all PyMuPdf --collect-all tqdm --add-data './koboldcpp_default.so:.' --add-data './ggml-metal-merged.metal:.' --add-data './kcpp_adapters:./kcpp_adapters' --add-data './koboldcpp.py:.' --add-data './json_to_gbnf.py:.' --add-data './LICENSE.md:.' --add-data './MIT_LICENSE_GGML_SDCPP_LLAMACPP_ONLY.md:.' --add-data './klite.embd:.' --add-data './kcpp_docs.embd:.' --add-data './kcpp_sdui.embd:.' --add-data './taesd.embd:.' --add-data './taesd_xl.embd:.' --add-data './taesd_f.embd:.' --add-data './taesd_3.embd:.' --add-data './kokoro_ipa.embd:.' --add-data './rwkv_vocab.embd:.' --add-data './rwkv_world_vocab.embd:.' --version-file './version.txt' --clean --console koboldcpp.py -n "koboldcpp-mac-arm64"
39+
pyinstaller --noconfirm --onefile --collect-all customtkinter --collect-all psutil --collect-all pdfplumber --collect-all PyMuPdf --collect-all tqdm --add-data './koboldcpp_default.so:.' --add-data './ggml-metal-merged.metal:.' --add-data './kcpp_adapters:./kcpp_adapters' --add-data './koboldcpp.py:.' --add-data './json_to_gbnf.py:.' --add-data './LICENSE.md:.' --add-data './MIT_LICENSE_GGML_SDCPP_LLAMACPP_ONLY.md:.' --add-data './embd_res:./embd_res' --version-file './version.txt' --clean --console koboldcpp.py -n "koboldcpp-mac-arm64"
4040
4141
- name: Test
4242
id: test

Makefile

Lines changed: 14 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -471,8 +471,13 @@ endif
471471

472472
ifdef NO_VULKAN_EXTENSIONS
473473
VKGEN_NOEXT_ADD = -DNO_VULKAN_EXTENSIONS
474+
VKGEN_SUFFIX = -noext
475+
else
476+
VKGEN_SUFFIX =
474477
endif
475478
VKGEN_NOEXT_FORCE = -DNO_VULKAN_EXTENSIONS
479+
VKGEN_HPP = ggml/src/ggml-vulkan-shaders$(VKGEN_SUFFIX).hpp
480+
VKGEN_CPP = ggml/src/ggml-vulkan-shaders$(VKGEN_SUFFIX).cpp
476481

477482
#
478483
# Print build information
@@ -685,9 +690,9 @@ ggml_v3-opencl.o: otherarch/ggml_v3-opencl.cpp otherarch/ggml_v3-opencl.h
685690
$(CXX) $(CXXFLAGS) $(CLBLAST_FLAGS) -c $< -o $@
686691

687692
#vulkan
688-
ggml-vulkan.o: ggml/src/ggml-vulkan/ggml-vulkan.cpp ggml/include/ggml-vulkan.h ggml/src/ggml-vulkan-shaders.cpp
693+
ggml-vulkan.o: ggml/src/ggml-vulkan/ggml-vulkan.cpp ggml/include/ggml-vulkan.h $(VKGEN_CPP)
689694
$(CXX) $(CXXFLAGS) $(VKGEN_NOEXT_ADD) $(VULKAN_FLAGS) -c $< -o $@
690-
ggml-vulkan-shaders.o: ggml/src/ggml-vulkan-shaders.cpp ggml/include/ggml-vulkan.h
695+
ggml-vulkan-shaders.o: $(VKGEN_CPP) ggml/include/ggml-vulkan.h
691696
$(CXX) $(CXXFLAGS) $(VKGEN_NOEXT_ADD) $(VULKAN_FLAGS) -c $< -o $@
692697
ggml-vulkan-noext.o: ggml/src/ggml-vulkan/ggml-vulkan.cpp ggml/include/ggml-vulkan.h ggml/src/ggml-vulkan-shaders-noext.cpp
693698
$(CXX) $(CXXFLAGS) $(VKGEN_NOEXT_FORCE) $(VULKAN_FLAGS) -c $< -o $@
@@ -707,11 +712,11 @@ expose.o: expose.cpp expose.h
707712
$(CXX) $(CXXFLAGS) -c $< -o $@
708713

709714
# sd.cpp objects
710-
sdcpp_default.o: otherarch/sdcpp/sdtype_adapter.cpp otherarch/sdcpp/stable-diffusion.h otherarch/sdcpp/stable-diffusion.cpp otherarch/sdcpp/util.cpp otherarch/sdcpp/upscaler.cpp otherarch/sdcpp/model.cpp otherarch/sdcpp/thirdparty/zip.c
715+
sdcpp_default.o: otherarch/sdcpp/sdtype_adapter.cpp otherarch/sdcpp/stable-diffusion.h otherarch/sdcpp/stable-diffusion.cpp otherarch/sdcpp/util.cpp otherarch/sdcpp/upscaler.cpp otherarch/sdcpp/model.cpp otherarch/sdcpp/tokenize_util.cpp otherarch/sdcpp/thirdparty/zip.c
711716
$(CXX) $(CXXFLAGS) -c $< -o $@
712-
sdcpp_cublas.o: otherarch/sdcpp/sdtype_adapter.cpp otherarch/sdcpp/stable-diffusion.h otherarch/sdcpp/stable-diffusion.cpp otherarch/sdcpp/util.cpp otherarch/sdcpp/upscaler.cpp otherarch/sdcpp/model.cpp otherarch/sdcpp/thirdparty/zip.c
717+
sdcpp_cublas.o: otherarch/sdcpp/sdtype_adapter.cpp otherarch/sdcpp/stable-diffusion.h otherarch/sdcpp/stable-diffusion.cpp otherarch/sdcpp/util.cpp otherarch/sdcpp/upscaler.cpp otherarch/sdcpp/model.cpp otherarch/sdcpp/tokenize_util.cpp otherarch/sdcpp/thirdparty/zip.c
713718
$(CXX) $(CXXFLAGS) $(CUBLAS_FLAGS) $(HIPFLAGS) -c $< -o $@
714-
sdcpp_vulkan.o: otherarch/sdcpp/sdtype_adapter.cpp otherarch/sdcpp/stable-diffusion.h otherarch/sdcpp/stable-diffusion.cpp otherarch/sdcpp/util.cpp otherarch/sdcpp/upscaler.cpp otherarch/sdcpp/model.cpp otherarch/sdcpp/thirdparty/zip.c
719+
sdcpp_vulkan.o: otherarch/sdcpp/sdtype_adapter.cpp otherarch/sdcpp/stable-diffusion.h otherarch/sdcpp/stable-diffusion.cpp otherarch/sdcpp/util.cpp otherarch/sdcpp/upscaler.cpp otherarch/sdcpp/model.cpp otherarch/sdcpp/tokenize_util.cpp otherarch/sdcpp/thirdparty/zip.c
715720
$(CXX) $(CXXFLAGS) $(VULKAN_FLAGS) -c $< -o $@
716721

717722

@@ -754,8 +759,8 @@ clean:
754759
# useful tools
755760
main: tools/main/main.cpp common/arg.cpp build-info.h ggml.o ggml-cpu.o ggml-ops.o ggml-vec.o ggml-binops.o ggml-unops.o llama.o console.o llavaclip_default.o llava.o ggml-backend_default.o ggml-backend-reg_default.o ggml-repack.o $(OBJS_FULL) $(OBJS)
756761
$(CXX) $(CXXFLAGS) $(filter-out %.h,$^) -o $@ $(LDFLAGS)
757-
sdmain: otherarch/sdcpp/util.cpp otherarch/sdcpp/main.cpp otherarch/sdcpp/stable-diffusion.cpp otherarch/sdcpp/upscaler.cpp otherarch/sdcpp/model.cpp otherarch/sdcpp/thirdparty/zip.c build-info.h ggml.o ggml-cpu.o ggml-ops.o ggml-vec.o ggml-binops.o ggml-unops.o llama.o console.o ggml-backend_default.o ggml-backend-reg_default.o ggml-repack.o $(OBJS_FULL) $(OBJS)
758-
$(CXX) $(CXXFLAGS) $(filter-out %.h,$^) -o $@ $(LDFLAGS)
762+
sdmain: otherarch/sdcpp/util.cpp otherarch/sdcpp/main.cpp otherarch/sdcpp/stable-diffusion.cpp otherarch/sdcpp/upscaler.cpp otherarch/sdcpp/model.cpp otherarch/sdcpp/tokenize_util.cpp otherarch/sdcpp/thirdparty/zip.c build-info.h ggml.o ggml-cpu.o ggml-ops.o ggml-vec.o ggml-binops.o ggml-unops.o llama.o console.o ggml-backend_default.o ggml-backend-reg_default.o ggml-repack.o $(OBJS_FULL) $(OBJS)
763+
$(CXX) $(CXXFLAGS) $(filter-out %.h,$^) -DKCPP_BAKE_SD_VOCAB -o $@ $(LDFLAGS)
759764
whispermain: otherarch/whispercpp/main.cpp otherarch/whispercpp/whisper.cpp build-info.h ggml.o ggml-cpu.o ggml-ops.o ggml-vec.o ggml-binops.o ggml-unops.o llama.o console.o ggml-backend_default.o ggml-backend-reg_default.o ggml-repack.o $(OBJS_FULL) $(OBJS)
760765
$(CXX) $(CXXFLAGS) $(filter-out %.h,$^) -o $@ $(LDFLAGS)
761766
ttsmain: tools/tts/tts.cpp common/arg.cpp build-info.h ggml.o ggml-cpu.o ggml-ops.o ggml-vec.o ggml-binops.o ggml-unops.o llama.o console.o llavaclip_default.o llava.o ggml-backend_default.o ggml-backend-reg_default.o ggml-repack.o $(OBJS_FULL) $(OBJS)
@@ -785,7 +790,7 @@ vulkan-shaders-gen: ggml/src/ggml-vulkan/vulkan-shaders/vulkan-shaders-gen.cpp
785790
$(CXX) $(CXXFLAGS) $(VKGEN_NOEXT_ADD) $(filter-out %.h,$^) -o $@ $(LDFLAGS)
786791
ifeq ($(OS),Windows_NT)
787792
@echo 'Now rebuilding vulkan shaders for Windows...'
788-
$(shell) vulkan-shaders-gen --glslc glslc --input-dir ggml/src/ggml-vulkan/vulkan-shaders --target-hpp ggml/src/ggml-vulkan-shaders.hpp --target-cpp ggml/src/ggml-vulkan-shaders.cpp --output-dir vulkan-spv-tmp
793+
$(shell) vulkan-shaders-gen --glslc glslc --input-dir ggml/src/ggml-vulkan/vulkan-shaders --target-hpp $(VKGEN_HPP) --target-cpp $(VKGEN_CPP) --output-dir vulkan-spv-tmp
789794
@echo 'Vulkan Shaders Rebuilt for Windows...'
790795
else
791796
@echo 'Now rebuilding vulkan shaders for Linux...'
@@ -819,7 +824,7 @@ else
819824
echo "Error: No usable glslc found. Vulkan shaders cannot be compiled!"; \
820825
else \
821826
echo "Using GLSLC: $$GLSLC_BIN"; \
822-
./vulkan-shaders-gen --glslc "$$GLSLC_BIN" --input-dir ggml/src/ggml-vulkan/vulkan-shaders --target-hpp ggml/src/ggml-vulkan-shaders.hpp --target-cpp ggml/src/ggml-vulkan-shaders.cpp --output-dir vulkan-spv-tmp; \
827+
./vulkan-shaders-gen --glslc "$$GLSLC_BIN" --input-dir ggml/src/ggml-vulkan/vulkan-shaders --target-hpp $(VKGEN_HPP) --target-cpp $(VKGEN_CPP) --output-dir vulkan-spv-tmp; \
823828
fi
824829
@echo 'Vulkan Shaders Rebuilt for Linux...'
825830
endif

common/arg.cpp

Lines changed: 22 additions & 85 deletions
Original file line numberDiff line numberDiff line change
@@ -34,13 +34,11 @@
3434
#include <thread>
3535
#include <vector>
3636

37-
//#define LLAMA_USE_CURL
38-
3937
#if defined(LLAMA_USE_CURL)
4038
#include <curl/curl.h>
4139
#include <curl/easy.h>
4240
#elif defined(LLAMA_USE_HTTPLIB)
43-
#include <cpp-httplib/httplib.h>
41+
#include "http.h"
4442
#endif
4543

4644
#ifdef __linux__
@@ -56,6 +54,13 @@
5654
#endif
5755
#define LLAMA_MAX_URL_LENGTH 2084 // Maximum URL Length in Chrome: 2083
5856

57+
// isatty
58+
#if defined(_WIN32)
59+
#include <io.h>
60+
#else
61+
#include <unistd.h>
62+
#endif
63+
5964
using json = nlohmann::ordered_json;
6065

6166
std::initializer_list<enum llama_example> mmproj_examples = {
@@ -102,6 +107,14 @@ static void write_file(const std::string & fname, const std::string & content) {
102107
}
103108
}
104109

110+
static bool is_output_a_tty() {
111+
#if defined(_WIN32)
112+
return _isatty(_fileno(stdout));
113+
#else
114+
return isatty(1);
115+
#endif
116+
}
117+
105118
common_arg & common_arg::set_examples(std::initializer_list<enum llama_example> examples) {
106119
this->examples = std::move(examples);
107120
return *this;
@@ -268,10 +281,6 @@ static std::string read_etag(const std::string & path) {
268281

269282
#ifdef LLAMA_USE_CURL
270283

271-
bool common_has_curl() {
272-
return true;
273-
}
274-
275284
//
276285
// CURL utils
277286
//
@@ -588,83 +597,11 @@ std::pair<long, std::vector<char>> common_remote_get_content(const std::string &
588597
#else
589598

590599
#ifdef LLAMA_USE_HTTPLIB
591-
592-
bool common_has_curl() {
593-
return false;
594-
}
595-
596-
struct common_url {
597-
std::string scheme;
598-
std::string user;
599-
std::string password;
600-
std::string host;
601-
std::string path;
602-
};
603-
604-
static common_url parse_url(const std::string & url) {
605-
common_url parts;
606-
auto scheme_end = url.find("://");
607-
608-
if (scheme_end == std::string::npos) {
609-
throw std::runtime_error("invalid URL: no scheme");
610-
}
611-
parts.scheme = url.substr(0, scheme_end);
612-
613-
if (parts.scheme != "http" && parts.scheme != "https") {
614-
throw std::runtime_error("unsupported URL scheme: " + parts.scheme);
615-
}
616-
617-
auto rest = url.substr(scheme_end + 3);
618-
auto at_pos = rest.find('@');
619-
620-
if (at_pos != std::string::npos) {
621-
auto auth = rest.substr(0, at_pos);
622-
auto colon_pos = auth.find(':');
623-
if (colon_pos != std::string::npos) {
624-
parts.user = auth.substr(0, colon_pos);
625-
parts.password = auth.substr(colon_pos + 1);
626-
} else {
627-
parts.user = auth;
628-
}
629-
rest = rest.substr(at_pos + 1);
630-
}
631-
632-
auto slash_pos = rest.find('/');
633-
634-
if (slash_pos != std::string::npos) {
635-
parts.host = rest.substr(0, slash_pos);
636-
parts.path = rest.substr(slash_pos);
637-
} else {
638-
parts.host = rest;
639-
parts.path = "/";
640-
}
641-
return parts;
642-
}
643-
644-
static std::pair<httplib::Client, common_url> http_client(const std::string & url) {
645-
common_url parts = parse_url(url);
646-
647-
if (parts.host.empty()) {
648-
throw std::runtime_error("error: invalid URL format");
649-
}
650-
651-
if (!parts.user.empty()) {
652-
throw std::runtime_error("error: user:password@ not supported yet"); // TODO
600+
static void print_progress(size_t current, size_t total) {
601+
if (!is_output_a_tty()) {
602+
return;
653603
}
654604

655-
httplib::Client cli(parts.scheme + "://" + parts.host);
656-
cli.set_follow_location(true);
657-
658-
// TODO cert
659-
660-
return { std::move(cli), std::move(parts) };
661-
}
662-
663-
static std::string show_masked_url(const common_url & parts) {
664-
return parts.scheme + "://" + (parts.user.empty() ? "" : "****:****@") + parts.host + parts.path;
665-
}
666-
667-
static void print_progress(size_t current, size_t total) { // TODO isatty
668605
if (!total) {
669606
return;
670607
}
@@ -752,7 +689,7 @@ static bool common_download_file_single_online(const std::string & url,
752689
static const int max_attempts = 3;
753690
static const int retry_delay_seconds = 2;
754691

755-
auto [cli, parts] = http_client(url);
692+
auto [cli, parts] = common_http_client(url);
756693

757694
httplib::Headers default_headers = {{"User-Agent", "llama-cpp"}};
758695
if (!bearer_token.empty()) {
@@ -832,7 +769,7 @@ static bool common_download_file_single_online(const std::string & url,
832769

833770
// start the download
834771
LOG_INF("%s: trying to download model from %s to %s (etag:%s)...\n",
835-
__func__, show_masked_url(parts).c_str(), path_temporary.c_str(), etag.c_str());
772+
__func__, common_http_show_masked_url(parts).c_str(), path_temporary.c_str(), etag.c_str());
836773
const bool was_pull_successful = common_pull_file(cli, parts.path, path_temporary, supports_ranges, existing_size, total_size);
837774
if (!was_pull_successful) {
838775
if (i + 1 < max_attempts) {
@@ -860,7 +797,7 @@ static bool common_download_file_single_online(const std::string & url,
860797

861798
std::pair<long, std::vector<char>> common_remote_get_content(const std::string & url,
862799
const common_remote_params & params) {
863-
auto [cli, parts] = http_client(url);
800+
auto [cli, parts] = common_http_client(url);
864801

865802
httplib::Headers headers = {{"User-Agent", "llama-cpp"}};
866803
for (const auto & header : params.headers) {

common/arg.h

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -78,7 +78,6 @@ bool common_params_parse(int argc, char ** argv, common_params & params, llama_e
7878

7979
// function to be used by test-arg-parser
8080
common_params_context common_params_parser_init(common_params & params, llama_example ex, void(*print_usage)(int, char **) = nullptr);
81-
bool common_has_curl();
8281

8382
struct common_remote_params {
8483
std::vector<std::string> headers;

common/chat-parser.cpp

Lines changed: 29 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -75,6 +75,35 @@ bool common_chat_msg_parser::add_tool_calls(const json & arr) {
7575
}
7676
return true;
7777
}
78+
79+
bool common_chat_msg_parser::add_tool_call_short_form(const json & tool_call) {
80+
if (!tool_call.is_object() || tool_call.size() != 1) {
81+
return false;
82+
}
83+
84+
// Get the tool name (the single key in the object)
85+
auto it = tool_call.begin();
86+
std::string name = it.key();
87+
88+
if (name.empty()) {
89+
return false;
90+
}
91+
92+
// Get the arguments (the nested object)
93+
const json & args_json = it.value();
94+
std::string arguments = "";
95+
96+
if (args_json.is_object()) {
97+
arguments = args_json.dump();
98+
} else if (args_json.is_string()) {
99+
arguments = args_json;
100+
} else if (!args_json.is_null()) {
101+
// For other types, convert to string representation
102+
arguments = args_json.dump();
103+
}
104+
105+
return add_tool_call(name, "", arguments);
106+
}
78107
void common_chat_msg_parser::finish() {
79108
if (!is_partial_ && pos_ != input_.size()) {
80109
throw std::runtime_error("Unexpected content at end of input");// + input_.substr(pos_));

common/chat-parser.h

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -64,6 +64,9 @@ class common_chat_msg_parser {
6464
// Adds an array of tool calls using their "name", "id" and "arguments" fields.
6565
bool add_tool_calls(const nlohmann::ordered_json & arr);
6666

67+
// Adds a tool call using the short form: { "tool_name": { "arg1": val, "arg2": val } }
68+
bool add_tool_call_short_form(const nlohmann::ordered_json & tool_call);
69+
6770
void finish();
6871

6972
bool consume_spaces();

0 commit comments

Comments
 (0)