Skip to content
Draft
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 6 additions & 0 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -47,6 +47,7 @@ TEST_TARGETS = \
tests/test-autorelease \
tests/test-backend-ops \
tests/test-chat-template \
tests/test-cli \
tests/test-double-float \
tests/test-grad0 \
tests/test-grammar-integration \
Expand Down Expand Up @@ -1639,6 +1640,11 @@ tests/test-chat-template: tests/test-chat-template.cpp \
$(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<)
$(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS)

tests/test-cli: tests/test-cli.cpp \
$(OBJ_ALL)
$(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<)
$(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS)

#
# PoCs
#
Expand Down
2 changes: 1 addition & 1 deletion examples/eval-callback/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -5,5 +5,5 @@ target_link_libraries(${TARGET} PRIVATE common llama ${CMAKE_THREAD_LIBS_INIT})
target_compile_features(${TARGET} PRIVATE cxx_std_11)

set(TEST_TARGET test-eval-callback)
add_test(NAME ${TEST_TARGET} COMMAND llama-eval-callback --hf-repo ggml-org/models --hf-file tinyllamas/stories260K.gguf --model stories260K.gguf --prompt hello --seed 42 -ngl 0)
add_test(NAME ${TEST_TARGET} COMMAND llama-eval-callback --hf-repo ggml-org/models --hf-file tinyllamas/stories260K.gguf --prompt hello --seed 42 -ngl 0)
set_property(TEST ${TEST_TARGET} PROPERTY LABELS eval-callback curl)
11 changes: 6 additions & 5 deletions examples/main/main.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -113,12 +113,12 @@ static void sigint_handler(int signo) {
need_insert_eot = true;
} else {
console::cleanup();
LOG("\n");
LOG_INF("\n");
common_perf_print(*g_ctx, *g_smpl);
write_logfile(*g_ctx, *g_params, *g_model, *g_input_tokens, g_output_ss->str(), *g_output_tokens);

// make sure all logs are flushed
LOG("Interrupted by user\n");
LOG_INF("Interrupted by user\n");
common_log_pause(common_log_main());

_exit(130);
Expand Down Expand Up @@ -717,7 +717,8 @@ int main(int argc, char ** argv) {
const std::string token_str = common_token_to_piece(ctx, id, params.special);

// Console/Stream Output
LOG("%s", token_str.c_str());
fprintf(stdout, "%s", token_str.c_str());
fflush(stdout);
Comment on lines 719 to +721
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Mixing printf and common/log is not recommended because they run on 2 different threads and the output can get out of order. Maybe try using LOGV(-1, ...); and then instead of --log-disable use --log-verbosity -1?


// Record Displayed Tokens To Log
// Note: Generated tokens are created one by one hence this check
Expand Down Expand Up @@ -920,11 +921,11 @@ int main(int argc, char ** argv) {
}

if (!path_session.empty() && params.prompt_cache_all && !params.prompt_cache_ro) {
LOG("\n%s: saving final output to session file '%s'\n", __func__, path_session.c_str());
LOG_INF("\n%s: saving final output to session file '%s'\n", __func__, path_session.c_str());
llama_state_save_file(ctx, path_session.c_str(), session_tokens.data(), session_tokens.size());
}

LOG("\n\n");
LOG_INF("\n\n");
common_perf_print(ctx, smpl);
write_logfile(ctx, params, model, input_tokens, output_ss.str(), output_tokens);

Expand Down
6 changes: 4 additions & 2 deletions ggml/src/ggml.c
Original file line number Diff line number Diff line change
Expand Up @@ -353,8 +353,10 @@ void ggml_log_internal(enum ggml_log_level level, const char * format, ...) {
void ggml_log_callback_default(enum ggml_log_level level, const char * text, void * user_data) {
(void) level;
(void) user_data;
fputs(text, stderr);
fflush(stderr);
if (level != GGML_LOG_LEVEL_DEBUG) {
fputs(text, stderr);
fflush(stderr);
}
}

#if (GGML_DEBUG >= 1)
Expand Down
1 change: 1 addition & 0 deletions tests/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -113,6 +113,7 @@ llama_target_and_test(test-arg-parser.cpp)
llama_target_and_test(test-quantize-fns.cpp)
llama_target_and_test(test-quantize-perf.cpp)
llama_target_and_test(test-sampling.cpp)
llama_target_and_test(test-cli.cpp WORKING_DIRECTORY ${CMAKE_BINARY_DIR}/bin)
llama_target_and_test(test-chat-template.cpp)

llama_target_and_test(test-grammar-parser.cpp)
Expand Down
76 changes: 76 additions & 0 deletions tests/test-cli.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,76 @@
#ifdef NDEBUG
#undef NDEBUG
#endif


#include <algorithm>
#include <iostream>
#include <fstream>
#include <sstream>
#include <vector>
#include <unistd.h>

static std::string read(const std::string & file) {
std::ostringstream actuals;
actuals << std::ifstream(file.c_str()).rdbuf();
return actuals.str();
}

static void assert_equals(const std::string & expected, const std::string & actual) {
if (expected != actual) {
std::cerr << "Expected: " << expected << std::endl;
std::cerr << "Actual: " << actual << std::endl;
std::cerr << std::flush;
throw std::runtime_error("Test failed");
}
}

static void assert_contains(const std::string & expected, const std::string & actual) {
if (actual.find(expected) == std::string::npos) {
std::cerr << "Expected to find: " << expected << std::endl;
std::cerr << "Actual: " << actual << std::endl;
std::cerr << std::flush;
throw std::runtime_error("Test failed");
}
}

struct Out {
std::string out;
std::string err;
};

static Out run(const std::string & cmd) {
auto full_cmd = cmd + " > out/out.txt 2> out/err.txt";
std::cerr << "Running: " << full_cmd << std::endl;
if (std::system(full_cmd.c_str()) != 0)
throw std::runtime_error("llama-cli binary failed to run.");
return {
/* .out = */ read("out/out.txt"),
/* .err = */ read("out/err.txt"),
};
}

int main(int argc, char ** argv) {
std::string cli_bin = argc == 2 ? argv[1] : "./llama-cli";

std::system("mkdir out/");

{
auto p = run(cli_bin + " --help");
if (!p.err.empty())
throw std::runtime_error("llama-cli --help should not have any stderr.");
assert_contains("example usage", p.out);
}

{
auto p = run(cli_bin + " -hfr ggml-org/models -hff tinyllamas/stories260K.gguf --prompt hello --seed 42 -ngl 0 -n 10");
assert_equals(" hello Joe and Joe we", p.out);
assert_contains("system_info:", p.err);
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Default sampling settings can change in the future, using greedy sampling here may be more reliable.

}

{
auto p = run(cli_bin + " -hfr ggml-org/models -hff tinyllamas/stories260K.gguf --prompt hello --seed 42 -ngl 0 -n 10 --log-disable");
assert_equals(" hello Joe and Joe we", p.out);
assert_equals("", p.err);
}
}
Loading