diff --git a/examples/models/llama/main.cpp b/examples/models/llama/main.cpp index 899ea37d5be..b655a619b26 100644 --- a/examples/models/llama/main.cpp +++ b/examples/models/llama/main.cpp @@ -7,11 +7,14 @@ * @lint-ignore-every CLANGTIDY facebook-hte-Deprecated */ +#include #include #include #include -#include +#ifdef ET_EVENT_TRACER_ENABLED +#include +#endif #if defined(ET_USE_THREADPOOL) #include @@ -64,6 +67,11 @@ DEFINE_int32( DEFINE_bool(warmup, false, "Whether to run a warmup run."); +DEFINE_string( + etdump_path, + "etdump.in", + "If an etdump path is provided, generate an ETDump file at the specified path for profiling purposes."); + // Helper function to parse comma-separated string lists std::vector parseStringList(const std::string& input) { std::vector result; @@ -117,9 +125,26 @@ int32_t main(int32_t argc, char** argv) { ->_unsafe_reset_threadpool(num_performant_cores); } #endif + +#ifdef ET_EVENT_TRACER_ENABLED + // Create ETDumpGen and get raw pointer reference for later access + auto etdump_gen_ptr = std::make_unique(); + executorch::etdump::ETDumpGen* etdump_gen = etdump_gen_ptr.get(); +#endif + // create llama runner std::unique_ptr<::executorch::extension::llm::TextLLMRunner> runner = - example::create_llama_runner(model_path, tokenizer_path, data_paths); + example::create_llama_runner( + model_path, + tokenizer_path, + data_paths, + temperature, +#ifdef ET_EVENT_TRACER_ENABLED + std::move(etdump_gen_ptr) +#else + nullptr +#endif + ); if (runner == nullptr) { ET_LOG(Error, "Failed to create llama runner"); @@ -157,5 +182,25 @@ int32_t main(int32_t argc, char** argv) { return 1; } +#ifdef ET_EVENT_TRACER_ENABLED + if (etdump_gen != nullptr) { + executorch::etdump::ETDumpResult result = etdump_gen->get_etdump_data(); + if (result.buf != nullptr && result.size > 0) { + FILE* f = fopen(FLAGS_etdump_path.c_str(), "w+"); + if (f == nullptr) { + ET_LOG( + Error, + "Failed to open etdump file at path: %s", + FLAGS_etdump_path.c_str()); + } else { + fwrite((uint8_t*)result.buf, 1, result.size, f); + fclose(f); + ET_LOG(Info, "ETDump file written to: %s", FLAGS_etdump_path.c_str()); + } + free(result.buf); + } + } +#endif + return 0; } diff --git a/examples/models/llama/runner/runner.cpp b/examples/models/llama/runner/runner.cpp index 19ed9f88339..d2db805405e 100644 --- a/examples/models/llama/runner/runner.cpp +++ b/examples/models/llama/runner/runner.cpp @@ -36,22 +36,32 @@ std::unique_ptr create_llama_runner( const std::string& model_path, const std::string& tokenizer_path, std::optional data_path, - float temperature) { + float temperature, + std::unique_ptr<::executorch::runtime::EventTracer> event_tracer) { if (data_path.has_value()) { std::vector data_files; data_files.push_back(data_path.value()); return create_llama_runner( - model_path, tokenizer_path, std::move(data_files), temperature); + model_path, + tokenizer_path, + std::move(data_files), + temperature, + std::move(event_tracer)); } return create_llama_runner( - model_path, tokenizer_path, std::vector(), temperature); + model_path, + tokenizer_path, + std::vector(), + temperature, + std::move(event_tracer)); } std::unique_ptr create_llama_runner( const std::string& model_path, const std::string& tokenizer_path, std::vector data_files, - float temperature) { + float temperature, + std::unique_ptr<::executorch::runtime::EventTracer> event_tracer) { ET_LOG( Info, "Creating LLaMa runner: model_path=%s, tokenizer_path=%s", @@ -70,7 +80,11 @@ std::unique_ptr create_llama_runner( return nullptr; } return llm::create_text_llm_runner( - model_path, std::move(tokenizer), data_files); + model_path, + std::move(tokenizer), + data_files, + temperature, + std::move(event_tracer)); } } // namespace example diff --git a/examples/models/llama/runner/runner.h b/examples/models/llama/runner/runner.h index 728ae57efa8..10225fcb81d 100644 --- a/examples/models/llama/runner/runner.h +++ b/examples/models/llama/runner/runner.h @@ -28,13 +28,15 @@ std::unique_ptr create_llama_runner( const std::string& model_path, const std::string& tokenizer_path, std::optional data_path, - float temperature = -1.0f); + float temperature = -1.0f, + std::unique_ptr<::executorch::runtime::EventTracer> event_tracer = nullptr); std::unique_ptr create_llama_runner( const std::string& model_path, const std::string& tokenizer_path, std::vector data_files = {}, - float temperature = -1.0f); + float temperature = -1.0f, + std::unique_ptr<::executorch::runtime::EventTracer> event_tracer = nullptr); std::unique_ptr load_llama_tokenizer( const std::string& tokenizer_path, diff --git a/examples/models/llama/runner/targets.bzl b/examples/models/llama/runner/targets.bzl index fd298ee628e..9c0b7265159 100644 --- a/examples/models/llama/runner/targets.bzl +++ b/examples/models/llama/runner/targets.bzl @@ -28,6 +28,9 @@ def define_common_targets(): exported_headers = [ "runner.h", ], + deps = [ + "//executorch/devtools/etdump:etdump_flatcc", + ], preprocessor_flags = [ "-DUSE_ATEN_LIB", ] if aten else [], diff --git a/examples/models/llama/targets.bzl b/examples/models/llama/targets.bzl index 66c5dacc8e9..42512145eed 100644 --- a/examples/models/llama/targets.bzl +++ b/examples/models/llama/targets.bzl @@ -19,6 +19,7 @@ def define_common_targets(): "//executorch/extension/evalue_util:print_evalue", "//executorch/extension/threadpool:threadpool", "//executorch/extension/threadpool:cpuinfo_utils", + "//executorch/devtools/etdump:etdump_flatcc" + aten_suffix, ], external_deps = [ "gflags", diff --git a/extension/llm/runner/llm_runner_helper.cpp b/extension/llm/runner/llm_runner_helper.cpp index 674be820072..13f8d7a9db5 100644 --- a/extension/llm/runner/llm_runner_helper.cpp +++ b/extension/llm/runner/llm_runner_helper.cpp @@ -200,7 +200,8 @@ std::unique_ptr create_text_llm_runner( const std::string& model_path, std::unique_ptr<::tokenizers::Tokenizer> tokenizer, std::vector data_files, - float temperature) { + float temperature, + std::unique_ptr<::executorch::runtime::EventTracer> event_tracer) { // Sanity check tokenizer if (!tokenizer || !tokenizer->is_loaded()) { ET_LOG(Error, "Tokenizer is null or not loaded"); @@ -211,9 +212,13 @@ std::unique_ptr create_text_llm_runner( std::unique_ptr module; if (data_files.size() > 0) { module = std::make_unique( - model_path, data_files, Module::LoadMode::File); + model_path, + data_files, + Module::LoadMode::File, + std::move(event_tracer)); } else { - module = std::make_unique(model_path, Module::LoadMode::File); + module = std::make_unique( + model_path, Module::LoadMode::File, std::move(event_tracer)); } // Get metadata from Module diff --git a/extension/llm/runner/llm_runner_helper.h b/extension/llm/runner/llm_runner_helper.h index 08f0efd0353..424567b7c2b 100644 --- a/extension/llm/runner/llm_runner_helper.h +++ b/extension/llm/runner/llm_runner_helper.h @@ -123,7 +123,8 @@ ET_EXPERIMENTAL std::unique_ptr create_text_llm_runner( const std::string& model_path, std::unique_ptr<::tokenizers::Tokenizer> tokenizer, std::vector data_files = {}, - float temperature = -1.0f); + float temperature = -1.0f, + std::unique_ptr<::executorch::runtime::EventTracer> event_tracer = nullptr); /** * @brief Creates a MultimodalRunner instance with dependency injection