microsoft
diff --git a/‎plugin_execution_providers/tensorrt/onnx_ctx_model_helper.cc‎
Lines changed: 78 additions & 225 deletions b/‎plugin_execution_providers/tensorrt/onnx_ctx_model_helper.cc‎
Lines changed: 78 additions & 225 deletions
@@ -1,258 +1,111 @@
-#include <cassert>
+// Copyright (c) Microsoft Corporation. All rights reserved.
+// Licensed under the MIT License.
+
 #include <iostream>
 #include <fstream>
+#include <filesystem>
+
+#include "tensorrt_execution_provider_utils.h"
 #include "onnx_ctx_model_helper.h"
-#include "tensorrt_execution_provider.h"
-#include "path_string.h"
 
-namespace onnxruntime {
-
-bool GraphHasCtxNode(const OrtGraphViewer* graph_viewer) {
-  const OrtApi* api = OrtGetApiBase()->GetApi(ORT_API_VERSION);
-  const OrtGraphApi* graph_api = api->GetGraphApi(ORT_API_VERSION);
-  int maxNodeIndex = 0;
-  graph_api->OrtGraph_MaxNodeIndex(graph_viewer, &maxNodeIndex);
-  for (int i = 0; i < maxNodeIndex; ++i) {
-    const OrtNode* node = nullptr;
-    graph_api->OrtGraph_GetOrtNode(graph_viewer, i, &node);
-    if (node == nullptr) {
-      continue;
-    }
-    const char* opType = nullptr;
-    graph_api->OrtNode_GetOpType(node, &opType);
-    if (strcmp(opType, EPCONTEXT_OP.c_str()) == 0) {
-      return true;
-    }
-  }
-  return false;
-}
+extern TensorrtLogger& GetTensorrtLogger(bool verbose_log);
 
 /*
- * Return the directory where the ep context model locates
+ *  Check whether the graph has the EP context node.
+ *  The node can contain the precompiled engine info for TRT EP to directly load the engine.
+ *
+ *  Note: Please see more details about "EPContext" contrib op in contrib_defs.cc
  */
-std::filesystem::path GetPathOrParentPathOfCtxModel(const std::string& ep_context_file_path) {
-  if (ep_context_file_path.empty()) {
-    return std::filesystem::path();
-  }
-  std::filesystem::path ctx_path(ep_context_file_path);
-  if (std::filesystem::is_directory(ep_context_file_path)) {
-    return ctx_path;
-  } else {
-    return ctx_path.parent_path();
-  }
-}
+bool EPContextNodeHelper::GraphHasCtxNode(const OrtGraph* graph, const OrtApi& ort_api) {
+  size_t num_nodes = 0;
+  RETURN_IF_ERROR(ort_api.Graph_GetNumNodes(graph, &num_nodes));
 
-std::string GetCtxModelPath(const std::string& ep_context_file_path,
-                            const std::string& original_model_path) {
-  std::string ctx_model_path;
+  std::vector<const OrtNode*> nodes(num_nodes);
 
-  if (!ep_context_file_path.empty() && !std::filesystem::is_directory(ep_context_file_path)) {
-    ctx_model_path = ep_context_file_path;
-  } else {
-    std::filesystem::path model_path = original_model_path;
-    std::filesystem::path model_name_stem = model_path.stem();  // model_name.onnx -> model_name
-    std::string ctx_model_name = model_name_stem.string() + "_ctx.onnx";
+  for (size_t i = 0; i < num_nodes; ++i) {
+    auto node = nodes[i];
 
-    if (std::filesystem::is_directory(ep_context_file_path)) {
-      std::filesystem::path model_directory = ep_context_file_path;
-      ctx_model_path = model_directory.append(ctx_model_name).string();
-    } else {
-      ctx_model_path = ctx_model_name;
+    const char* op_type = nullptr;
+    RETURN_IF_ERROR(ort_api.Node_GetOperatorType(node, &op_type));
+    if (node != nullptr && op_type == "EPContext") {
+      return true;
     }
   }
-  return ctx_model_path;
-}
-
-bool IsAbsolutePath(const std::string& path_string) {
-#ifdef _WIN32
-  onnxruntime::PathString ort_path_string = onnxruntime::ToPathString(path_string);
-  auto path = std::filesystem::path(ort_path_string.c_str());
-  return path.is_absolute();
-#else
-  if (!path_string.empty() && path_string[0] == '/') {
-    return true;
-  }
   return false;
-#endif
-}
-
-// Like "../file_path"
-bool IsRelativePathToParentPath(const std::string& path_string) {
-#ifdef _WIN32
-  onnxruntime::PathString ort_path_string = onnxruntime::ToPathString(path_string);
-  auto path = std::filesystem::path(ort_path_string.c_str());
-  auto relative_path = path.lexically_normal().make_preferred().wstring();
-  if (relative_path.find(L"..", 0) != std::string::npos) {
-    return true;
-  }
-  return false;
-#else
-  if (!path_string.empty() && path_string.find("..", 0) != std::string::npos) {
-    return true;
-  }
-  return false;
-#endif
 }
 
 /*
- * Get the weight-refitted engine cache path from a weight-stripped engine cache path
- *
- * Weight-stipped engine:
- * An engine with weights stripped and its size is smaller than a regualr engine.
- * The cache name of weight-stripped engine is TensorrtExecutionProvider_TRTKernel_XXXXX.stripped.engine
- *
- * Weight-refitted engine:
- * An engine that its weights have been refitted and it's simply a regular engine.
- * The cache name of weight-refitted engine is TensorrtExecutionProvider_TRTKernel_XXXXX.engine
+ * Create EPContext OrtNode from a fused_node
  */
-std::string GetWeightRefittedEnginePath(std::string stripped_engine_cache) {
-  std::filesystem::path stripped_engine_cache_path(stripped_engine_cache);
-  std::string refitted_engine_cache_path = stripped_engine_cache_path.stem().stem().string() + ".engine";
-  return refitted_engine_cache_path;
-}
-
-bool IsWeightStrippedEngineCache(std::filesystem::path& engine_cache_path) {
-  // The weight-stripped engine cache has the naming of xxx.stripped.engine
-  return engine_cache_path.stem().extension().string() == ".stripped";
-}
-
-OrtStatusPtr TensorRTCacheModelHandler::GetEpContextFromGraph(const OrtGraphViewer* graph_viewer) {
-  if (!ValidateEPCtxNode(graph_viewer)) {
-    return api_->CreateStatus(OrtErrorCode::ORT_EP_FAIL, "It's not a valid EP Context node");
-  }
-  const OrtNode* node = nullptr;
-  graph_api_->OrtGraph_GetOrtNode(graph_viewer, 0, &node);
-
-  int64_t embed_mode = -1;
-  graph_api_->OrtNode_GetAttributeInt(node, EMBED_MODE.c_str(), &embed_mode);
-  if (embed_mode) {
-    // Get engine from byte stream.
-    const char* context_binary_cstr = nullptr;
-    size_t size;
-    graph_api_->OrtNode_GetAttributeStrWithSize(node, EP_CACHE_CONTEXT.c_str(), &context_binary_cstr, &size);
-    std::string context_binary(context_binary_cstr, size);
-    *(trt_engine_) = std::unique_ptr<nvinfer1::ICudaEngine>(trt_runtime_->deserializeCudaEngine(const_cast<char*>(context_binary.c_str()),
-                                                                                                static_cast<size_t>(context_binary.length())));
-//    LOGS_DEFAULT(VERBOSE) << "[TensorRT EP] Read engine as binary data from \"ep_cache_context\" attribute of ep context node and deserialized it";
-    if (!(*trt_engine_)) {
-      return api_->CreateStatus(OrtErrorCode::ORT_EP_FAIL, "TensorRT EP could not deserialize engine from binary data");
+OrtStatus* EPContextNodeHelper::CreateEPContextNode(const std::string& engine_cache_path,
+                                                    char* engine_data,
+                                                    size_t size,
+                                                    const int64_t embed_mode,
+                                                    const std::string& compute_capability,
+                                                    const std::string& onnx_model_path,
+                                                    OrtNode** ep_context_node) {
+
+  // Helper to collect input or output names from an array of OrtValueInfo instances.
+  auto collect_input_output_names = [&](gsl::span<const OrtValueInfo* const> value_infos,
+                                        std::vector<const char*>& result) -> OrtStatus* {
+    size_t num_values = value_infos.size();
+    std::vector<const char*> value_names(num_values);
+
+    for (size_t i = 0; i < num_values; ++i) {
+      const OrtValueInfo* value_info = value_infos[i];
+      RETURN_IF_ERROR(ort_api.GetValueInfoName(value_info, &value_names[i]));
     }
-  } else {
-    // Get engine from cache file.
-    const char* cache_path_cstr = nullptr;
-    graph_api_->OrtNode_GetAttributeStr(node, EP_CACHE_CONTEXT.c_str(), &cache_path_cstr);
-    std::string cache_path(cache_path_cstr);
 
-    // For security purpose, in the case of running context model, TRT EP won't allow
-    // engine cache path to be the relative path like "../file_path" or the absolute path.
-    // It only allows the engine cache to be in the same directory or sub directory of the context model.
-    if (IsAbsolutePath(cache_path)) {
-      return api_->CreateStatus(OrtErrorCode::ORT_EP_FAIL, std::string("For security purpose, the ep_cache_context attribute should be set with a relative path, but it is an absolute path:  " + cache_path).c_str());
-    }
-    if (IsRelativePathToParentPath(cache_path)) {
-      return api_->CreateStatus(OrtErrorCode::ORT_EP_FAIL, "The file path in ep_cache_context attribute has '..'. For security purpose, it's not allowed to point outside the directory.");
-    }
+    result = std::move(value_names);
+    return nullptr;
+  };
 
-    // The engine cache and context model (current model) should be in the same directory
-    std::filesystem::path ctx_model_dir(GetPathOrParentPathOfCtxModel(ep_context_model_path_));
-    auto engine_cache_path = ctx_model_dir.append(cache_path);
-//    LOGS_DEFAULT(VERBOSE) << "[TensorRT EP] GetEpContextFromGraph engine_cache_path: " + engine_cache_path.string();
+  const char* fused_node_name = nullptr;
 
-    // If it's a weight-stripped engine cache, it needs to be refitted even though the refit flag is not enabled
-    if (!weight_stripped_engine_refit_) {
-      weight_stripped_engine_refit_ = IsWeightStrippedEngineCache(engine_cache_path);
-    }
+  RETURN_IF_ERROR(ort_api.Node_GetName(fused_node_, &fused_node_name));
 
-    // If the serialized refitted engine is present, use it directly without refitting the engine again
-    if (weight_stripped_engine_refit_) {
-      const std::filesystem::path refitted_engine_cache_path = GetWeightRefittedEnginePath(engine_cache_path.string());
-      if (std::filesystem::exists(refitted_engine_cache_path)) {
-//        LOGS_DEFAULT(VERBOSE) << "[TensorRT EP] " + refitted_engine_cache_path.string() + " exists.";
-        engine_cache_path = refitted_engine_cache_path.string();
-        weight_stripped_engine_refit_ = false;
-      }
-    }
+  size_t num_fused_node_inputs = 0;
+  size_t num_fused_node_outputs = 0;
+  RETURN_IF_ERROR(ort_api.Node_GetNumInputs(fused_node_, &num_fused_node_inputs));
+  RETURN_IF_ERROR(ort_api.Node_GetNumOutputs(fused_node_, &num_fused_node_outputs));
 
-    if (!std::filesystem::exists(engine_cache_path)) {
-      return api_->CreateStatus(OrtErrorCode::ORT_EP_FAIL,
-                             std::string("TensorRT EP can't find engine cache: " + engine_cache_path.string() +
-                                 ". Please make sure engine cache is in the same directory or sub-directory of context model.").c_str());
-    }
+  std::vector<const OrtValueInfo*> fused_node_inputs(num_fused_node_inputs);
+  std::vector<const OrtValueInfo*> fused_node_outputs(num_fused_node_outputs);
+  RETURN_IF_ERROR(ort_api.Node_GetInputs(fused_node_, fused_node_inputs.data(), fused_node_inputs.size()));
+  RETURN_IF_ERROR(ort_api.Node_GetOutputs(fused_node_, fused_node_outputs.data(), fused_node_outputs.size()));
 
-    std::ifstream engine_file(engine_cache_path.string(), std::ios::binary | std::ios::in);
-    engine_file.seekg(0, std::ios::end);
-    size_t engine_size = engine_file.tellg();
-    engine_file.seekg(0, std::ios::beg);
-    std::unique_ptr<char[]> engine_buf{new char[engine_size]};
-    engine_file.read((char*)engine_buf.get(), engine_size);
-    *(trt_engine_) = std::unique_ptr<nvinfer1::ICudaEngine>(trt_runtime_->deserializeCudaEngine(engine_buf.get(), engine_size));
-    if (!(*trt_engine_)) {
-      return api_->CreateStatus(OrtErrorCode::ORT_EP_FAIL,
-                             std::string("TensorRT EP could not deserialize engine from cache: " + engine_cache_path.string()).c_str());
-    }
-//    LOGS_DEFAULT(VERBOSE) << "[TensorRT EP] DeSerialized " + engine_cache_path.string();
+  std::vector<const char*> input_names;
+  std::vector<const char*> output_names;
 
-    if (weight_stripped_engine_refit_) {
-      const char* onnx_model_filename_cstr = nullptr;
-      graph_api_->OrtNode_GetAttributeStr(node, ONNX_MODEL_FILENAME.c_str(), &onnx_model_filename_cstr);
-      const std::string onnx_model_filename(onnx_model_filename_cstr);
-      std::string weight_stripped_engine_cache = engine_cache_path.string();
-      auto status = TensorrtExecutionProvider::RefitEngine(onnx_model_filename,
-                                                           onnx_model_folder_path_,
-                                                           weight_stripped_engine_cache,
-                                                           true /* path check for security */,
-                                                           (*trt_engine_).get(),
-                                                           true /* serialize refitted engine to disk */,
-                                                           detailed_build_log_);
-      if (status != nullptr) {
-        return api_->CreateStatus(OrtErrorCode::ORT_EP_FAIL, api_->GetErrorMessage(status));
-      }
-    }
-  }
-  return nullptr;
-}
+  RETURN_IF_ERROR(collect_input_output_names(fused_node_inputs, /*out*/ input_names));
+  RETURN_IF_ERROR(collect_input_output_names(fused_node_outputs, /*out*/ output_names));
 
-bool TensorRTCacheModelHandler::ValidateEPCtxNode(const OrtGraphViewer* graph_viewer) {
-  int node_count = 0;
-  graph_api_->OrtGraph_NumberOfNodes(graph_viewer, &node_count);
-  assert(node_count == 1);
-  const OrtNode* node = nullptr;
-  graph_api_->OrtGraph_GetOrtNode(graph_viewer, 0, &node);
-  const char* opType = nullptr;
-  graph_api_->OrtNode_GetOpType(node, &opType);
-  assert(strcmp(opType, EPCONTEXT_OP.c_str()) == 0);
+  // Create node attributes. The CreateNode() function copies the attributes, so we have to release them.
+  std::array<OrtOpAttr*, 4> attributes = {};
+  DeferOrtRelease<OrtOpAttr> defer_release_attrs(attributes.data(), attributes.size(), ort_api.ReleaseOpAttr);
 
-  size_t key_count = 0;
-  graph_api_->OrtNode_GetAttributeKeyCount(node, COMPUTE_CAPABILITY.c_str(), &key_count);
-  // Show the warning if compute capability is not matched
-  if (key_count > 0) {
-    const char* model_compute_capability = nullptr;
-    graph_api_->OrtNode_GetAttributeStr(node, COMPUTE_CAPABILITY.c_str(), &model_compute_capability);
-    // Verify if engine was compiled with ampere+ hardware compatibility enabled
-    if (strcmp(model_compute_capability, "80+") == 0) {
-//      if (std::stoi(compute_capability_) < 80) {
-//        LOGS_DEFAULT(WARNING) << "[TensorRT EP] However, this GPU doesn't match. The compute capability of the GPU: " << compute_capability_;
-//      }
-    } else if (strcmp(model_compute_capability, compute_capability_.c_str()) != 0) {
-//      LOGS_DEFAULT(WARNING) << "[TensorRT EP] Engine was compiled for a different compatibility level and might not work or perform suboptimal";
-//      LOGS_DEFAULT(WARNING) << "[TensorRT EP] The compute capability of the engine: " << model_compute_capability;
-//      LOGS_DEFAULT(WARNING) << "[TensorRT EP] The compute capability of the GPU: " << compute_capability_;
+  RETURN_IF_ERROR(ort_api.CreateOpAttr("embed_mode", &embed_mode, 1, ORT_OP_ATTR_INT, &attributes[0]));
+
+  std::string engine_data_str = "";
+  if (embed_mode) {
+    if (size > 0) {
+      engine_data_str.assign(engine_data, size);
     }
+    RETURN_IF_ERROR(
+        ort_api.CreateOpAttr("ep_cache_context", engine_data_str.c_str(), 1, ORT_OP_ATTR_STRING, &attributes[1]));
+  } else {
+    RETURN_IF_ERROR(ort_api.CreateOpAttr("ep_cache_context", engine_cache_path.c_str(), 1, ORT_OP_ATTR_STRING, &attributes[1]));
   }
 
-  // "embed_mode" attr and "ep_cache_context" attr should be present
-  graph_api_->OrtNode_GetAttributeKeyCount(node, EMBED_MODE.c_str(), &key_count);
-  assert(key_count > 0);
-  graph_api_->OrtNode_GetAttributeKeyCount(node, EP_CACHE_CONTEXT.c_str(), &key_count);
-  assert(key_count > 0);
+ 
+  ort_api.CreateOpAttr("hardware_architecture", compute_capability.c_str(), 1, ORT_OP_ATTR_STRING, &attributes[2]);
+  ort_api.CreateOpAttr("onnx_model_filename", std::filesystem::path(onnx_model_path).filename().string().c_str(), 1,
+                       ORT_OP_ATTR_STRING, &attributes[3]);
 
-  int64_t embed_mode = -1;
-  graph_api_->OrtNode_GetAttributeInt(node, EMBED_MODE.c_str(), &embed_mode);
-  if (embed_mode == 1) {
-    // engine binary data
-//    LOGS_DEFAULT(WARNING) << EPCONTEXT_WARNING;
-  }
 
-  return true;
-}
+  RETURN_IF_ERROR(model_editor_api.CreateNode("EPContext", "com.microsoft", fused_node_name, input_names.data(),
+                                              input_names.size(), output_names.data(), output_names.size(),
+                                              attributes.data(), attributes.size(), ep_context_node));
+  
+  return nullptr;
 }