Skip to content

Commit a78cc4c

Browse files
committed
Revert "Add vision support in llama-server (ikawrakow#901)"
This reverts commit 15159a8.
1 parent ac5200c commit a78cc4c

26 files changed

+730
-2457
lines changed

common/CMakeLists.txt

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -57,6 +57,8 @@ add_library(${TARGET} STATIC
5757
chat-parser.cpp
5858
chat-parser.h
5959
common.cpp
60+
chat.h
61+
chat.cpp
6062
sampling.h
6163
sampling.cpp
6264
console.h

common/common.cpp

Lines changed: 2 additions & 39 deletions
Original file line numberDiff line numberDiff line change
@@ -270,14 +270,6 @@ static std::string parse_device_list(const std::string& value) {
270270
return value;
271271
}
272272

273-
274-
std::pair<long, std::vector<char>> common_remote_get_content(const std::string& url, const common_remote_params&) {
275-
if (!url.empty()) {
276-
throw std::runtime_error("error: built without CURL, cannot download file from the internet");
277-
}
278-
return {};
279-
}
280-
281273
//
282274
// CLI argument parsing
283275
//
@@ -1760,11 +1752,6 @@ bool gpt_params_find_arg(int argc, char ** argv, const std::string & arg, gpt_pa
17601752
params.n_junk = std::stoi(argv[i]);
17611753
return true;
17621754
}
1763-
if (arg == "--no-context-shift") {
1764-
CHECK_ARG
1765-
params.ctx_shift = false;
1766-
return true;
1767-
}
17681755
if (arg == "--pos") {
17691756
CHECK_ARG
17701757
params.i_pos = std::stoi(argv[i]);
@@ -2100,7 +2087,7 @@ void gpt_params_print_usage(int /*argc*/, char ** argv, const gpt_params & param
21002087
options.push_back({ "multi-modality" });
21012088
options.push_back({ "*", " --mmproj FILE", "path to a multimodal projector file for LLaVA. see examples/llava/README.md" });
21022089
options.push_back({ "*", " --image FILE", "path to an image file. use with multimodal models. Specify multiple times for batching" });
2103-
options.push_back({ "*", " --no-context-shift", "disable context-shift." });
2090+
21042091
options.push_back({ "backend" });
21052092
options.push_back({ "*", " --rpc SERVERS", "comma separated list of RPC servers" });
21062093
options.push_back({ "*", "-cuda, --cuda-params", "comma separate list of cuda parameters" });
@@ -3354,29 +3341,6 @@ std::vector<llama_token> llama_tokenize(
33543341
return result;
33553342
}
33563343

3357-
std::vector<llama_token> llama_tokenize(
3358-
const struct llama_vocab* vocab,
3359-
const std::string& text,
3360-
bool add_special,
3361-
bool parse_special) {
3362-
// upper limit for the number of tokens
3363-
int n_tokens = text.length() + 2 * add_special;
3364-
std::vector<llama_token> result(n_tokens);
3365-
n_tokens = llama_vocab_tokenize(vocab, text.data(), text.length(), result.data(), result.size(), add_special, parse_special);
3366-
if (n_tokens == std::numeric_limits<int32_t>::min()) {
3367-
throw std::runtime_error("Tokenization failed: input text too large, tokenization result exceeds int32_t limit");
3368-
}
3369-
if (n_tokens < 0) {
3370-
result.resize(-n_tokens);
3371-
int check = llama_vocab_tokenize(vocab, text.data(), text.length(), result.data(), result.size(), add_special, parse_special);
3372-
GGML_ASSERT(check == -n_tokens);
3373-
}
3374-
else {
3375-
result.resize(n_tokens);
3376-
}
3377-
return result;
3378-
}
3379-
33803344
std::string llama_token_to_piece(const struct llama_context * ctx, llama_token token, bool special) {
33813345
std::string piece;
33823346
piece.resize(piece.capacity()); // using string internal cache, 15 bytes + '\n'
@@ -3409,7 +3373,7 @@ std::string llama_token_to_piece(const struct llama_model* model, llama_token to
34093373
return piece;
34103374
}
34113375

3412-
std::string llama_detokenize(const llama_context * ctx, const std::vector<llama_token> & tokens, bool special) {
3376+
std::string llama_detokenize(llama_context * ctx, const std::vector<llama_token> & tokens, bool special) {
34133377
std::string text;
34143378
text.resize(std::max(text.capacity(), tokens.size()));
34153379
int32_t n_chars = llama_detokenize(llama_get_model(ctx), tokens.data(), (int32_t)tokens.size(), &text[0], (int32_t)text.size(), false, special);
@@ -3425,7 +3389,6 @@ std::string llama_detokenize(const llama_context * ctx, const std::vector<llama_
34253389
return text;
34263390
}
34273391

3428-
34293392
bool llama_should_add_bos_token(const llama_model * model) {
34303393
const int add_bos = llama_add_bos_token(model);
34313394

common/common.h

Lines changed: 58 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -53,8 +53,6 @@ struct llama_lora_adapter_container : llama_lora_adapter_info {
5353
struct llama_lora_adapter * adapter;
5454
};
5555

56-
using llama_tokens = std::vector<llama_token>;
57-
5856
// build info
5957
extern int LLAMA_BUILD_NUMBER;
6058
extern char const * LLAMA_COMMIT;
@@ -240,7 +238,7 @@ struct gpt_params {
240238
bool conversation = false; // conversation mode (does not print special tokens and suffix/prefix)
241239
bool prompt_cache_all = false; // save user input and generations to prompt cache
242240
bool prompt_cache_ro = false; // open the prompt cache read-only and do not update it
243-
bool ctx_shift = true;
241+
244242
bool escape = true; // escape "\n", "\r", "\t", "\'", "\"", and "\\"
245243
bool multiline_input = false; // reverse the usage of `\`
246244
bool simple_io = false; // improves compatibility with subprocesses and limited consoles
@@ -375,9 +373,6 @@ struct gpt_params {
375373
bool sweep_bench_output_jsonl = false;
376374
};
377375

378-
379-
380-
void gpt_params_handle_hf_token(gpt_params & params);
381376
void gpt_params_parse_from_env(gpt_params & params);
382377
void gpt_params_handle_model_default(gpt_params & params);
383378

@@ -388,15 +383,6 @@ void gpt_params_print_usage(int argc, char ** argv, const gpt_params & params);
388383

389384
std::string gpt_params_get_system_info(const gpt_params & params);
390385

391-
392-
struct common_remote_params {
393-
std::vector<std::string> headers;
394-
long timeout = 0; // CURLOPT_TIMEOUT, in seconds ; 0 means no timeout
395-
long max_size = 0; // max size of the response ; unlimited if 0 ; max is 2GB
396-
};
397-
// get remote file content, returns <http_code, raw_response_body>
398-
std::pair<long, std::vector<char>> common_remote_get_content(const std::string& url, const common_remote_params& params);
399-
400386
//
401387
// String utils
402388
//
@@ -513,12 +499,6 @@ std::vector<llama_token> llama_tokenize(
513499
bool add_special,
514500
bool parse_special = false);
515501

516-
std::vector<llama_token> llama_tokenize(
517-
const struct llama_vocab* vocab,
518-
const std::string& text,
519-
bool add_special,
520-
bool parse_special = false);
521-
522502
// tokenizes a token into a piece, optionally renders special/control tokens
523503
// should work similar to Python's `tokenizer.id_to_piece`
524504
std::string llama_token_to_piece(
@@ -535,16 +515,70 @@ std::string llama_token_to_piece(
535515
// should work similar to Python's `tokenizer.decode`
536516
// optionally renders special/control tokens
537517
std::string llama_detokenize(
538-
const llama_context * ctx,
518+
llama_context * ctx,
539519
const std::vector<llama_token> & tokens,
540520
bool special = true);
541521

542-
543522
// Uses the value from the model metadata if possible, otherwise
544523
// defaults to true when model type is SPM, otherwise false.
545524
bool llama_should_add_bos_token(const llama_model * model);
546525

547-
526+
//
527+
// Chat template utils
528+
//
529+
//struct common_tool_call {
530+
// std::string name;
531+
// std::string arguments;
532+
// std::string id;
533+
//};
534+
//
535+
//// same with llama_chat_message, but uses std::string
536+
//struct common_chat_msg {
537+
// std::string role;
538+
// std::string content;
539+
// std::vector<common_tool_call> tool_calls;
540+
// std::string reasoning_content = "";
541+
//};
542+
543+
//// Check if the template supplied via "--chat-template" is supported or not. Returns true if it's valid
544+
//bool llama_chat_verify_template(const struct llama_model* , const std::string& tmpl, bool use_jinja);
545+
//
546+
//namespace minja {
547+
// class chat_template;
548+
//}
549+
//
550+
//typedef minja::chat_template common_chat_template;
551+
//
552+
//struct common_chat_templates {
553+
// bool has_explicit_template; // Model had builtin template or template overridde was specified.
554+
// std::unique_ptr<common_chat_template> template_default; // always set (defaults to chatml)
555+
// std::unique_ptr<common_chat_template> template_tool_use;
556+
//};
557+
//
558+
//
559+
//// CPP wrapper for llama_chat_apply_template
560+
//// If the built-in template is not supported, we default to chatml
561+
//// If the custom "tmpl" is not supported, we throw an error
562+
//std::string llama_chat_apply_template(
563+
// const struct llama_model* model,
564+
// const common_chat_template& tmpl,
565+
// const std::vector< common_chat_msg>& chat,
566+
// bool add_ass,
567+
// bool use_jinja);
568+
//
569+
//// Format single message, while taking into account the position of that message in chat history
570+
//std::string llama_chat_format_single(const struct llama_model* model,
571+
// const common_chat_template& tmpl,
572+
// const std::vector< common_chat_msg>& past_msg,
573+
// const common_chat_msg& new_msg,
574+
// bool add_ass,
575+
// bool use_jinja);
576+
//
577+
//// Returns an example of formatted chat
578+
//std::string llama_chat_format_example(const struct llama_model* model,
579+
// const common_chat_template& tmpl, bool use_jinja);
580+
//
581+
//common_chat_templates llama_chat_templates_from_model(const struct llama_model* model, const std::string& chat_template_override);
548582

549583

550584
//

examples/mtmd/clip.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3331,7 +3331,7 @@ struct image_manipulation {
33313331
dst.buf.resize(3 * target_width * target_height);
33323332

33333333
float Cc;
3334-
float C[5] = {};
3334+
float C[5];
33353335
float d0, d2, d3, a0, a1, a2, a3;
33363336
int i, j, k, jj;
33373337
int x, y;

examples/server/CMakeLists.txt

Lines changed: 0 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -70,9 +70,6 @@ endif()
7070
target_include_directories(${TARGET} PRIVATE ${CMAKE_SOURCE_DIR})
7171
target_link_libraries(${TARGET} PRIVATE common ${CMAKE_THREAD_LIBS_INIT})
7272

73-
target_include_directories(${TARGET} PRIVATE ../mtmd)
74-
target_link_libraries(${TARGET} PRIVATE common mtmd ${CMAKE_THREAD_LIBS_INIT})
75-
7673
if (LLAMA_SERVER_SSL)
7774
find_package(OpenSSL REQUIRED)
7875
target_link_libraries(${TARGET} PRIVATE OpenSSL::SSL OpenSSL::Crypto)
-572 KB
Binary file not shown.

0 commit comments

Comments
 (0)