@@ -39,19 +39,25 @@ static constexpr auto kUseSDPAWithKVCache = "use_sdpa_with_kv_cache";
3939Runner::Runner (
4040 const std::string& model_path,
4141 const std::string& tokenizer_path,
42- const float temperature)
42+ const float temperature,
43+ std::optional<const std::string> data_path)
4344 // NOTE: we observed ~2x loading performance increase on iPhone 15
4445 // and a ~5% improvement on Galaxy S22 by switching to
4546 // FileDataLoader instead of MmapDataLoader + UseMlockIgnoreErrors.
4647 : temperature_(temperature),
47- module_ (std::make_unique<Module>(model_path, Module::LoadMode::File)),
4848 tokenizer_path_ (tokenizer_path),
4949 metadata_({
5050 {kEnableDynamicShape , false },
5151 {kMaxSeqLen , 128 },
5252 {kUseKVCache , true },
5353 {kUseSDPAWithKVCache , false },
5454 }) {
55+ if (data_path.has_value ()) {
56+ module_ = std::make_unique<Module>(
57+ model_path, data_path.value (), Module::LoadMode::File);
58+ } else {
59+ module_ = std::make_unique<Module>(model_path, Module::LoadMode::File);
60+ }
5561 ET_LOG (
5662 Info,
5763 " Creating LLaMa runner: model_path=%s, tokenizer_path=%s" ,
0 commit comments