-
Notifications
You must be signed in to change notification settings - Fork 397
Mobile model tester apps #515
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Draft
edgchen1
wants to merge
14
commits into
main
Choose a base branch
from
edgchen1/model_tester_ios
base: main
Could not load branches
Branch not found: {{ refName }}
Loading
Could not load tags
Nothing to show
Loading
Are you sure you want to change the base?
Some commits from the old base branch may be removed from the timeline,
and old review comments may become outdated.
+2,403
−1
Draft
Changes from 6 commits
Commits
Show all changes
14 commits
Select commit
Hold shift + click to select a range
f265cf3
add model_tester files
edgchen1 c511ad6
add readmes, update to use model.onnx
edgchen1 5cfa3e0
update .clang-format to align with ORT
edgchen1 78deea4
Add readme
edgchen1 fe66c12
add download_model.py
edgchen1 e36152e
allow EP to be selected
edgchen1 a0dc624
Add provider options to UI.
edgchen1 8cdd9bb
add focusing, move option parsing logic to helper function.
edgchen1 94016b3
remove intermediate ModelTester directory
edgchen1 ef951d1
fix up paths to model_runner files
edgchen1 779733a
support loading from model bytes in common code
edgchen1 2e2af8e
add android app files
edgchen1 d22af68
update android app, very basic functionality
edgchen1 0c70b9f
add support for ep type and options args in jni function
edgchen1 File filter
Filter by extension
Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
51 changes: 51 additions & 0 deletions
51
mobile/examples/model_tester/common/include/model_runner.h
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,51 @@ | ||
| #pragma once | ||
|
|
||
| #include <cstdint> | ||
|
|
||
| #include <chrono> | ||
| #include <optional> | ||
| #include <string> | ||
| #include <unordered_map> | ||
| #include <vector> | ||
|
|
||
| namespace model_runner { | ||
|
|
||
| using Clock = std::chrono::steady_clock; | ||
| using Duration = Clock::duration; | ||
|
|
||
| struct RunConfig { | ||
| // Path to the model to run. | ||
| std::string model_path{}; | ||
|
|
||
| // Whether to run a warmup iteration before running the measured (timed) iterations. | ||
| bool run_warmup_iteration{true}; | ||
|
|
||
| // Number of iterations to run. | ||
| size_t num_iterations{10}; | ||
|
|
||
| // Configuration for an Execution Provider (EP). | ||
| struct EpConfig { | ||
| std::string provider_name{}; | ||
| std::unordered_map<std::string, std::string> provider_options{}; | ||
| }; | ||
|
|
||
| // Specifies the EP to use in the session. | ||
| std::optional<EpConfig> ep{}; | ||
|
|
||
| // Specifies the onnxruntime log level. | ||
| std::optional<int> log_level{}; | ||
| }; | ||
|
|
||
| struct RunResult { | ||
| // Time taken to load the model. | ||
| Duration load_duration; | ||
|
|
||
| // Times taken to run the model. | ||
| std::vector<Duration> run_durations; | ||
| }; | ||
|
|
||
| RunResult Run(const RunConfig& run_config); | ||
|
|
||
| std::string GetRunSummary(const RunConfig& run_config, const RunResult& run_result); | ||
|
|
||
| } // namespace model_runner |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,250 @@ | ||
| #include "model_runner.h" | ||
|
|
||
| #include <cstddef> | ||
|
|
||
| #include <algorithm> | ||
| #include <chrono> | ||
| #include <filesystem> | ||
| #include <format> | ||
| #include <iterator> | ||
| #include <numeric> | ||
| #include <span> | ||
|
|
||
| #include "onnxruntime_cxx_api.h" | ||
|
|
||
| namespace model_runner { | ||
|
|
||
| namespace { | ||
|
|
||
| size_t GetDataTypeSizeInBytes(ONNXTensorElementDataType data_type) { | ||
| switch (data_type) { | ||
| case ONNX_TENSOR_ELEMENT_DATA_TYPE_FLOAT8E4M3FN: | ||
| case ONNX_TENSOR_ELEMENT_DATA_TYPE_FLOAT8E4M3FNUZ: | ||
| case ONNX_TENSOR_ELEMENT_DATA_TYPE_FLOAT8E5M2: | ||
| case ONNX_TENSOR_ELEMENT_DATA_TYPE_FLOAT8E5M2FNUZ: | ||
| case ONNX_TENSOR_ELEMENT_DATA_TYPE_INT8: | ||
| case ONNX_TENSOR_ELEMENT_DATA_TYPE_UINT8: | ||
| return 1; | ||
| case ONNX_TENSOR_ELEMENT_DATA_TYPE_FLOAT16: | ||
| case ONNX_TENSOR_ELEMENT_DATA_TYPE_BFLOAT16: | ||
| case ONNX_TENSOR_ELEMENT_DATA_TYPE_INT16: | ||
| case ONNX_TENSOR_ELEMENT_DATA_TYPE_UINT16: | ||
| return 2; | ||
| case ONNX_TENSOR_ELEMENT_DATA_TYPE_FLOAT: | ||
| case ONNX_TENSOR_ELEMENT_DATA_TYPE_INT32: | ||
| case ONNX_TENSOR_ELEMENT_DATA_TYPE_UINT32: | ||
| return 4; | ||
| case ONNX_TENSOR_ELEMENT_DATA_TYPE_DOUBLE: | ||
| case ONNX_TENSOR_ELEMENT_DATA_TYPE_INT64: | ||
| case ONNX_TENSOR_ELEMENT_DATA_TYPE_UINT64: | ||
| return 8; | ||
| default: | ||
| throw std::invalid_argument(std::format("unsupported tensor data type: {}", static_cast<int>(data_type))); | ||
| } | ||
| } | ||
|
|
||
| void FillTensorWithZeroes(Ort::Value& value) { | ||
| const auto tensor_info = value.GetTensorTypeAndShapeInfo(); | ||
| const auto data_type = tensor_info.GetElementType(); | ||
| const auto num_elements = tensor_info.GetElementCount(); | ||
| const auto data_type_size_in_bytes = GetDataTypeSizeInBytes(data_type); | ||
| const auto data_size_in_bytes = num_elements * data_type_size_in_bytes; | ||
|
|
||
| std::byte* data = static_cast<std::byte*>(value.GetTensorMutableRawData()); | ||
| std::fill(data, data + data_size_in_bytes, std::byte{0}); | ||
| } | ||
|
|
||
| std::vector<Ort::Value> GetModelInputValues(const Ort::Session& session) { | ||
| const auto num_inputs = session.GetInputCount(); | ||
|
|
||
| std::vector<Ort::Value> input_values{}; | ||
| input_values.reserve(num_inputs); | ||
|
|
||
| Ort::AllocatorWithDefaultOptions allocator{}; | ||
|
|
||
| for (size_t i = 0; i < num_inputs; ++i) { | ||
| auto type_info = session.GetInputTypeInfo(i); | ||
| auto tensor_info = type_info.GetTensorTypeAndShapeInfo(); | ||
|
|
||
| auto tensor_shape = tensor_info.GetShape(); | ||
| // make this a static shape | ||
| for (auto& dim : tensor_shape) { | ||
| if (dim == -1) { | ||
| dim = 1; | ||
| } | ||
| } | ||
|
|
||
| const auto tensor_data_type = tensor_info.GetElementType(); | ||
|
|
||
| auto value = Ort::Value::CreateTensor(allocator, tensor_shape.data(), tensor_shape.size(), tensor_data_type); | ||
|
|
||
| FillTensorWithZeroes(value); | ||
|
|
||
| input_values.emplace_back(std::move(value)); | ||
| } | ||
|
|
||
| return input_values; | ||
| } | ||
|
|
||
| std::vector<std::string> GetModelInputOrOutputNames(const Ort::Session& session, bool is_input) { | ||
| const auto num_inputs_or_outputs = is_input ? session.GetInputCount() : session.GetOutputCount(); | ||
|
|
||
| std::vector<std::string> names{}; | ||
| names.reserve(num_inputs_or_outputs); | ||
|
|
||
| auto allocator = Ort::AllocatorWithDefaultOptions{}; | ||
| for (size_t i = 0; i < num_inputs_or_outputs; ++i) { | ||
| auto name = is_input ? session.GetInputNameAllocated(i, allocator) | ||
| : session.GetOutputNameAllocated(i, allocator); | ||
| names.emplace_back(name.get()); | ||
| } | ||
|
|
||
| return names; | ||
| } | ||
|
|
||
| std::vector<std::string> GetModelInputNames(const Ort::Session& session) { | ||
| return GetModelInputOrOutputNames(session, /* is_input */ true); | ||
| } | ||
|
|
||
| std::vector<std::string> GetModelOutputNames(const Ort::Session& session) { | ||
| return GetModelInputOrOutputNames(session, /* is_input */ false); | ||
| } | ||
|
|
||
| std::vector<const char*> GetCstrs(std::span<const std::string> strs) { | ||
| std::vector<const char*> cstrs{}; | ||
| cstrs.reserve(strs.size()); | ||
| std::transform(strs.begin(), strs.end(), std::back_inserter(cstrs), | ||
| [](const std::string& str) { return str.c_str(); }); | ||
| return cstrs; | ||
| } | ||
|
|
||
| class Timer { | ||
| public: | ||
| Timer() { Reset(); } | ||
|
|
||
| void Reset() { start_ = Clock::now(); } | ||
|
|
||
| Duration Elapsed() const { return Clock::now() - start_; } | ||
|
|
||
| private: | ||
| Clock::time_point start_; | ||
| }; | ||
|
|
||
| struct RunResultStats { | ||
| using DurationFp = std::chrono::duration<float, Duration::period>; | ||
|
|
||
| size_t n; | ||
| DurationFp average; | ||
| Duration min, max; | ||
| Duration p50, p90, p99; | ||
| }; | ||
|
|
||
| RunResultStats ComputeRunResultStats(const RunResult& run_result) { | ||
| using DurationFp = RunResultStats::DurationFp; | ||
|
|
||
| const auto& run_durations = run_result.run_durations; | ||
|
|
||
| RunResultStats stats{}; | ||
| const auto n = run_durations.size(); | ||
| stats.n = n; | ||
| if (n > 0) { | ||
| const auto total_run_duration = std::accumulate(run_durations.begin(), run_durations.end(), | ||
| DurationFp{0.0f}); | ||
| stats.average = DurationFp{total_run_duration.count() / n}; | ||
|
|
||
| auto sorted_run_durations = run_durations; | ||
| std::sort(sorted_run_durations.begin(), sorted_run_durations.end()); | ||
| stats.min = sorted_run_durations.front(); | ||
| stats.max = sorted_run_durations.back(); | ||
| stats.p50 = sorted_run_durations[static_cast<size_t>(0.5f * n)]; | ||
| stats.p90 = sorted_run_durations[static_cast<size_t>(0.9f * n)]; | ||
| stats.p99 = sorted_run_durations[static_cast<size_t>(0.99f * n)]; | ||
| } | ||
|
|
||
| return stats; | ||
| } | ||
|
|
||
| } // namespace | ||
|
|
||
| RunResult Run(const RunConfig& run_config) { | ||
| RunResult run_result{}; | ||
|
|
||
| auto env = Ort::Env{}; | ||
|
|
||
| if (run_config.log_level.has_value()) { | ||
| env.UpdateEnvWithCustomLogLevel(static_cast<OrtLoggingLevel>(*run_config.log_level)); | ||
| } | ||
|
|
||
| auto session_options = Ort::SessionOptions{}; | ||
|
|
||
| if (const auto& ep_config = run_config.ep; ep_config.has_value()) { | ||
| session_options.AppendExecutionProvider(ep_config->provider_name, ep_config->provider_options); | ||
| } | ||
|
|
||
| Timer timer{}; | ||
| auto session = Ort::Session{env, run_config.model_path.c_str(), session_options}; | ||
| run_result.load_duration = timer.Elapsed(); | ||
|
|
||
| auto input_names = GetModelInputNames(session); | ||
| auto input_name_cstrs = GetCstrs(input_names); | ||
|
|
||
| auto input_values = GetModelInputValues(session); | ||
|
|
||
| auto output_names = GetModelOutputNames(session); | ||
| auto output_name_cstrs = GetCstrs(output_names); | ||
|
|
||
| auto run_options = Ort::RunOptions{}; | ||
|
|
||
| run_result.run_durations.reserve(run_config.num_iterations); | ||
|
|
||
| // warmup | ||
| if (run_config.run_warmup_iteration) { | ||
| auto outputs = session.Run(run_options, | ||
| input_name_cstrs.data(), input_values.data(), input_values.size(), | ||
| output_name_cstrs.data(), output_name_cstrs.size()); | ||
| } | ||
|
|
||
| // measure runs | ||
| for (size_t i = 0; i < run_config.num_iterations; ++i) { | ||
| timer.Reset(); | ||
| auto outputs = session.Run(run_options, | ||
| input_name_cstrs.data(), input_values.data(), input_values.size(), | ||
| output_name_cstrs.data(), output_name_cstrs.size()); | ||
| run_result.run_durations.push_back(timer.Elapsed()); | ||
| } | ||
|
|
||
| return run_result; | ||
| } | ||
|
|
||
| std::string GetRunSummary(const RunConfig& /*run_config*/, const RunResult& run_result) { | ||
| auto to_display_duration = []<typename Rep, typename Period>(std::chrono::duration<Rep, Period> d) { | ||
| using DisplayPeriod = std::chrono::microseconds::period; | ||
| using DisplayDuration = std::chrono::duration<Rep, DisplayPeriod>; | ||
| return std::chrono::duration_cast<DisplayDuration>(d); | ||
| }; | ||
|
|
||
| const auto stats = ComputeRunResultStats(run_result); | ||
|
|
||
| const auto summary = std::format( | ||
| "Load time: {}\n" | ||
| "N (number of runs): {}\n" | ||
| "Latency\n" | ||
| " avg: {}\n" | ||
| " p50: {}\n" | ||
| " p90: {}\n" | ||
| " p99: {}\n" | ||
| " min: {}\n" | ||
| " max: {}\n", | ||
| to_display_duration(run_result.load_duration), | ||
| stats.n, | ||
| to_display_duration(stats.average), | ||
| to_display_duration(stats.p50), | ||
| to_display_duration(stats.p90), | ||
| to_display_duration(stats.p99), | ||
| to_display_duration(stats.min), | ||
| to_display_duration(stats.max)); | ||
|
|
||
| return summary; | ||
| } | ||
|
|
||
| } // namespace model_runner | ||
29 changes: 29 additions & 0 deletions
29
mobile/examples/model_tester/ios/ModelTester/ModelRunner/model_runner_objc_wrapper.h
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,29 @@ | ||
| #import <Foundation/Foundation.h> | ||
|
|
||
| NS_ASSUME_NONNULL_BEGIN | ||
|
|
||
| /** | ||
| * This class is an Objective-C wrapper around the C++ `model_runner::RunConfig` structure. | ||
| */ | ||
| @interface ModelRunnerRunConfig : NSObject | ||
|
|
||
| - (void)setModelPath:(NSString*)modelPath; | ||
|
|
||
| - (void)setNumIterations:(NSUInteger)numIterations; | ||
|
|
||
| - (void)setExecutionProvider:(NSString*)providerName | ||
| options:(nullable NSDictionary<NSString*, NSString*>*)providerOptions; | ||
|
|
||
| @end | ||
|
|
||
| /** | ||
| * This class is an Objective-C wrapper around the C++ model runner functions. | ||
| */ | ||
| @interface ModelRunner : NSObject | ||
|
|
||
| + (nullable NSString*)runWithConfig:(ModelRunnerRunConfig*)config | ||
| error:(NSError**)error NS_SWIFT_NAME(run(config:)); | ||
|
|
||
| @end | ||
|
|
||
| NS_ASSUME_NONNULL_END |
Oops, something went wrong.
Oops, something went wrong.
Add this suggestion to a batch that can be applied as a single commit.
This suggestion is invalid because no changes were made to the code.
Suggestions cannot be applied while the pull request is closed.
Suggestions cannot be applied while viewing a subset of changes.
Only one suggestion per line can be applied in a batch.
Add this suggestion to a batch that can be applied as a single commit.
Applying suggestions on deleted lines is not supported.
You must change the existing code in this line in order to create a valid suggestion.
Outdated suggestions cannot be applied.
This suggestion has been applied or marked resolved.
Suggestions cannot be applied from pending reviews.
Suggestions cannot be applied on multi-line comments.
Suggestions cannot be applied while the pull request is queued to merge.
Suggestion cannot be applied right now. Please check back later.
Uh oh!
There was an error while loading. Please reload this page.