[MIGraphX EP] Add migx ep fp8 support and int4 weights (microsoft#23534)

TedThemistokleous · Ted Themistokleous · web-flow · commit 36fc8c8a87d2 · 2025-06-03T09:58:40.000-07:00
* Add fp8 and int4 types in supported list for Onnxruntime EP

* Add support for int4 inputs

Map things to int8 right now as we don't explicitly set an int4 input
type and pack/unpack int4 operands

* Add flag to allow for fp8 quantization through Onnxruntime API

* Add fp8 quantization to the compile stage of the MIGraphX EP

Mirror the same calibration code we use for int8 and just change which
quantize we call through the MIGraphx API

* cleanup logging

* Cleanup and encapsulate quantization / compile functions

- Add additional flags for fp8 thats shared for int8

- Add lockout warning message when int8/fp8 used at the same time

* Run lintrunner pass

* Fix session options inputs + add better logging.

Previous runs using session options failed as we were missing pulling in
inputs from the python interface. This plus additional logging allowed
me to track what options were invoked via env and what were added during
the start of an inference session

* Fix naming for save/load path varibles to be consistent with  enable.

* Print only env variables that are set as warnings

need this so the user knows there's any of the environment variables
running in the background to ensure proper consistently between runs.

---------

### Description
&lt;!-- Describe your changes. --&gt;
Changes to cleanup the MIGraphX EP quantization code as well as adding
fp8 quantization support along with int4 support.

Cleanup changes handle a few instances of issues seen with the python
interface when taking in provider options


### Motivation and Context
&lt;!-- - Why is this change required? What problem does it solve?
- If it fixes an open issue, please link to the issue here. --&gt;

Required as we fix ignored flags when using provider_options for the
MIGraphX EP
Adding fp8 quantization through the MIGraphX API
Adding int4 weight support for packed int4 weights for MIGraphX
inference

---------

Co-authored-by: Ted Themistokleous &lt;tedthemistokleous@amd.com&gt;
diff --git a/include/onnxruntime/core/session/onnxruntime_c_api.h b/include/onnxruntime/core/session/onnxruntime_c_api.h
@@ -673,6 +673,7 @@ typedef struct OrtTensorRTProviderOptions {
 typedef struct OrtMIGraphXProviderOptions {
   int device_id;                                     // hip device id.
   int migraphx_fp16_enable;                          // MIGraphX FP16 precision. Default 0 = false, nonzero = true
+  int migraphx_fp8_enable;                           // MIGraphX FP8 precision. Default 0 = false, nonzero = true
   int migraphx_int8_enable;                          // MIGraphX INT8 precision. Default 0 = false, nonzero = true
   int migraphx_use_native_calibration_table;         // MIGraphx INT8 cal table. Default 0 = false, noznero = true
   const char* migraphx_int8_calibration_table_name;  // MIGraphx INT8 calibration table name
diff --git a/onnxruntime/core/providers/migraphx/migraphx_execution_provider.cc b/onnxruntime/core/providers/migraphx/migraphx_execution_provider.cc
diff --git a/onnxruntime/core/providers/migraphx/migraphx_execution_provider.h b/onnxruntime/core/providers/migraphx/migraphx_execution_provider.h
@@ -17,15 +17,16 @@ namespace onnxruntime {
 
 namespace migraphx_env_vars {
 static const char kFP16Enable[] = "ORT_MIGRAPHX_FP16_ENABLE";
+static const char kFP8Enable[] = "ORT_MIGRAPHX_FP8_ENABLE";
 static const char kINT8Enable[] = "ORT_MIGRAPHX_INT8_ENABLE";
 static const char dumpModelOps[] = "ORT_MIGRAPHX_DUMP_MODEL_OPS";
 static const char kINT8CalibrationTableName[] = "ORT_MIGRAPHX_INT8_CALIBRATION_TABLE_NAME";
 static const char kCachePath[] = "ORT_MIGRAPHX_CACHE_PATH";
 static const char kINT8UseNativeMIGraphXCalibrationTable[] = "ORT_MIGRAPHX_INT8_USE_NATIVE_CALIBRATION_TABLE";
 static const char kSaveCompiledModel[] = "ORT_MIGRAPHX_SAVE_COMPILED_MODEL";
-static const char kSavedModelPath[] = "ORT_MIGRAPHX_SAVE_COMPILE_PATH";
+static const char kSavedModelPath[] = "ORT_MIGRAPHX_SAVE_COMPILED_PATH";
 static const char kLoadCompiledModel[] = "ORT_MIGRAPHX_LOAD_COMPILED_MODEL";
-static const char kLoadModelPath[] = "ORT_MIGRAPHX_LOAD_COMPILE_PATH";
+static const char kLoadModelPath[] = "ORT_MIGRAPHX_LOAD_COMPILED_PATH";
 static const char kExhaustiveTune[] = "ORT_MIGRAPHX_EXHAUSTIVE_TUNE";
 
 };  // namespace migraphx_env_vars
@@ -43,6 +44,7 @@ struct MIGraphXFuncState {
   std::mutex* mgx_mu_ptr = nullptr;
   bool no_input_shape = false;
   bool fp16_enable = false;
+  bool fp8_enable = false;
   bool int8_enable = false;
   bool int8_calibration_cache_available = false;
   std::unordered_map<std::string, float> dynamic_range_map;
@@ -60,6 +62,10 @@ class MIGraphXExecutionProvider : public IExecutionProvider {
   explicit MIGraphXExecutionProvider(const MIGraphXExecutionProviderInfo& info);
   ~MIGraphXExecutionProvider();
 
+  void get_flags_from_session_info(const MIGraphXExecutionProviderInfo& info);
+  void get_flags_from_env();
+  void print_migraphx_ep_flags();
+
   Status Sync() const override;
 
   Status OnRunStart(const onnxruntime::RunOptions& run_options) override;
@@ -91,6 +97,7 @@ class MIGraphXExecutionProvider : public IExecutionProvider {
  private:
   MIGraphXExecutionProviderInfo info_;
   bool fp16_enable_ = false;
+  bool fp8_enable_ = false;
   bool int8_enable_ = false;
   std::string int8_calibration_cache_name_;
   bool int8_calibration_cache_available_ = false;
diff --git a/onnxruntime/core/providers/migraphx/migraphx_execution_provider_info.cc b/onnxruntime/core/providers/migraphx/migraphx_execution_provider_info.cc
@@ -14,6 +14,7 @@ namespace migraphx {
 namespace provider_option_names {
 constexpr const char* kDeviceId = "device_id";
 constexpr const char* kFp16Enable = "trt_fp16_enable";
+constexpr const char* kFp8Enable = "migx_fp8_enable";
 constexpr const char* kInt8Enable = "migx_int8_enable";
 constexpr const char* kInt8CalibTable = "migx_int8_calibration_table_name";
 constexpr const char* kInt8UseNativeCalibTable = "migx_int8_use_native_calibration_table";
@@ -43,6 +44,7 @@ MIGraphXExecutionProviderInfo MIGraphXExecutionProviderInfo::FromProviderOptions
                 return Status::OK();
               })
           .AddAssignmentToReference(migraphx::provider_option_names::kFp16Enable, info.fp16_enable)
+          .AddAssignmentToReference(migraphx::provider_option_names::kFp8Enable, info.fp8_enable)
           .AddAssignmentToReference(migraphx::provider_option_names::kInt8Enable, info.int8_enable)
           .AddAssignmentToReference(migraphx::provider_option_names::kSaveCompiledModel, info.save_compiled_model)
           .AddAssignmentToReference(migraphx::provider_option_names::kLoadCompiledModel, info.load_compiled_model)
@@ -56,6 +58,7 @@ ProviderOptions MIGraphXExecutionProviderInfo::ToProviderOptions(const MIGraphXE
   const ProviderOptions options{
       {migraphx::provider_option_names::kDeviceId, MakeStringWithClassicLocale(info.device_id)},
       {migraphx::provider_option_names::kFp16Enable, MakeStringWithClassicLocale(info.fp16_enable)},
+      {migraphx::provider_option_names::kFp8Enable, MakeStringWithClassicLocale(info.fp8_enable)},
       {migraphx::provider_option_names::kInt8Enable, MakeStringWithClassicLocale(info.int8_enable)},
       {migraphx::provider_option_names::kSaveCompiledModel, MakeStringWithClassicLocale(info.save_compiled_model)},
       {migraphx::provider_option_names::kLoadCompiledModel, MakeStringWithClassicLocale(info.load_compiled_model)},
@@ -68,6 +71,7 @@ ProviderOptions MIGraphXExecutionProviderInfo::ToProviderOptions(const OrtMIGrap
   const ProviderOptions options{
       {migraphx::provider_option_names::kDeviceId, MakeStringWithClassicLocale(info.device_id)},
       {migraphx::provider_option_names::kFp16Enable, MakeStringWithClassicLocale(info.migraphx_fp16_enable)},
+      {migraphx::provider_option_names::kFp8Enable, MakeStringWithClassicLocale(info.migraphx_fp8_enable)},
       {migraphx::provider_option_names::kInt8Enable, MakeStringWithClassicLocale(info.migraphx_int8_enable)},
       {migraphx::provider_option_names::kSaveCompiledModel, MakeStringWithClassicLocale(info.migraphx_save_compiled_model)},
       {migraphx::provider_option_names::kLoadCompiledModel, MakeStringWithClassicLocale(info.migraphx_load_compiled_model)},
diff --git a/onnxruntime/core/providers/migraphx/migraphx_execution_provider_info.h b/onnxruntime/core/providers/migraphx/migraphx_execution_provider_info.h
@@ -16,6 +16,7 @@ struct MIGraphXExecutionProviderInfo {
   std::string target_device;
   OrtDevice::DeviceId device_id{0};
   bool fp16_enable{false};
+  bool fp8_enable{false};
   bool int8_enable{false};
   std::string int8_calibration_table_name{""};
   bool int8_use_native_calibration_table{false};
diff --git a/onnxruntime/core/providers/migraphx/migraphx_provider_factory.cc b/onnxruntime/core/providers/migraphx/migraphx_provider_factory.cc
@@ -60,6 +60,7 @@ struct MIGraphX_Provider : Provider {
     info.device_id = static_cast<OrtDevice::DeviceId>(options.device_id);
     info.target_device = "gpu";
     info.fp16_enable = options.migraphx_fp16_enable;
+    info.fp8_enable = options.migraphx_fp8_enable;
     info.exhaustive_tune = options.migraphx_exhaustive_tune;
     info.int8_enable = options.migraphx_int8_enable;
     info.int8_calibration_table_name = "";
@@ -85,6 +86,7 @@ struct MIGraphX_Provider : Provider {
     auto& migx_options = *reinterpret_cast<OrtMIGraphXProviderOptions*>(provider_options);
     migx_options.device_id = internal_options.device_id;
     migx_options.migraphx_fp16_enable = internal_options.fp16_enable;
+    migx_options.migraphx_fp8_enable = internal_options.fp8_enable;
     migx_options.migraphx_int8_enable = internal_options.int8_enable;
     migx_options.migraphx_exhaustive_tune = internal_options.exhaustive_tune;
 
diff --git a/onnxruntime/python/onnxruntime_pybind_state.cc b/onnxruntime/python/onnxruntime_pybind_state.cc
@@ -946,6 +946,7 @@ static std::shared_ptr<IExecutionProviderFactory> CreateExecutionProviderFactory
           0,
           0,
           0,
+          0,
           nullptr,
           1,
           "./compiled_model.mxr",
@@ -966,7 +967,17 @@ static std::shared_ptr<IExecutionProviderFactory> CreateExecutionProviderFactory
             params.migraphx_fp16_enable = false;
           } else {
             ORT_THROW(
-                "[ERROR] [MIGraphX] The value for the key 'trt_fp16_enable' should be"
+                "[ERROR] [MIGraphX] The value for the key 'migraphx_fp16_enable' should be"
+                " 'True' or 'False'. Default value is 'False'.\n");
+          }
+        } else if (option.first == "migraphx_fp8_enable") {
+          if (option.second == "True" || option.second == "true") {
+            params.migraphx_fp8_enable = true;
+          } else if (option.second == "False" || option.second == "false") {
+            params.migraphx_fp8_enable = false;
+          } else {
+            ORT_THROW(
+                "[ERROR] [MIGraphX] The value for the key 'migraphx_fp8_enable' should be"
                 " 'True' or 'False'. Default value is 'False'.\n");
           }
         } else if (option.first == "migraphx_int8_enable") {
@@ -976,7 +987,7 @@ static std::shared_ptr<IExecutionProviderFactory> CreateExecutionProviderFactory
             params.migraphx_int8_enable = false;
           } else {
             ORT_THROW(
-                "[ERROR] [MIGraphX] The value for the key 'migx_int8_enable' should be"
+                "[ERROR] [MIGraphX] The value for the key 'migraphx_int8_enable' should be"
                 " 'True' or 'False'. Default value is 'False'.\n");
           }
         } else if (option.first == "migraphx_int8_calibration_table_name") {
@@ -985,7 +996,7 @@ static std::shared_ptr<IExecutionProviderFactory> CreateExecutionProviderFactory
             params.migraphx_int8_calibration_table_name = calibration_table.c_str();
           } else {
             ORT_THROW(
-                "[ERROR] [MIGraphX] The value for the key 'migx_int8_calibration_table_name' should be a "
+                "[ERROR] [MIGraphX] The value for the key 'migraphx_int8_calibration_table_name' should be a "
                 "file name i.e. 'cal_table'.\n");
           }
         } else if (option.first == "migraphx_use_native_calibration_table") {
@@ -995,7 +1006,7 @@ static std::shared_ptr<IExecutionProviderFactory> CreateExecutionProviderFactory
             params.migraphx_use_native_calibration_table = false;
           } else {
             ORT_THROW(
-                "[ERROR] [MIGraphX] The value for the key 'migx_int8_use_native_calibration_table' should be"
+                "[ERROR] [MIGraphX] The value for the key 'migraphx_use_native_calibration_table' should be"
                 " 'True' or 'False'. Default value is 'False'.\n");
           }
         } else if (option.first == "migraphx_save_compiled_model") {
@@ -1005,7 +1016,7 @@ static std::shared_ptr<IExecutionProviderFactory> CreateExecutionProviderFactory
             params.migraphx_fp16_enable = false;
           } else {
             ORT_THROW(
-                "[ERROR] [MIGraphX] The value for the key 'migx_save_compiled_model' should be"
+                "[ERROR] [MIGraphX] The value for the key 'migraphx_save_compiled_model' should be"
                 " 'True' or 'False'. Default value is 'False'.\n");
           }
         } else if (option.first == "migraphx_save_model_path") {
@@ -1014,7 +1025,7 @@ static std::shared_ptr<IExecutionProviderFactory> CreateExecutionProviderFactory
             params.migraphx_save_model_path = save_model_path.c_str();
           } else {
             ORT_THROW(
-                "[ERROR] [MIGraphX] The value for the key 'migx_save_model_name' should be a "
+                "[ERROR] [MIGraphX] The value for the key 'migraphx_save_model_name' should be a "
                 "file name i.e. 'compiled_model.mxr'.\n");
           }
         } else if (option.first == "migraphx_load_compiled_model") {
@@ -1024,7 +1035,7 @@ static std::shared_ptr<IExecutionProviderFactory> CreateExecutionProviderFactory
             params.migraphx_fp16_enable = false;
           } else {
             ORT_THROW(
-                "[ERROR] [MIGraphX] The value for the key 'migx_load_compiled_model' should be"
+                "[ERROR] [MIGraphX] The value for the key 'migraphx_load_compiled_model' should be"
                 " 'True' or 'False'. Default value is 'False'.\n");
           }
         } else if (option.first == "migraphx_load_model_path") {
@@ -1033,7 +1044,7 @@ static std::shared_ptr<IExecutionProviderFactory> CreateExecutionProviderFactory
             params.migraphx_load_model_path = load_model_path.c_str();
           } else {
             ORT_THROW(
-                "[ERROR] [MIGraphX] The value for the key 'migx_load_model_name' should be a "
+                "[ERROR] [MIGraphX] The value for the key 'migraphx_load_model_name' should be a "
                 "file name i.e. 'compiled_model.mxr'.\n");
           }
         } else if (option.first == "migraphx_exhaustive_tune") {
diff --git a/onnxruntime/test/util/default_providers.cc b/onnxruntime/test/util/default_providers.cc
@@ -85,6 +85,7 @@ std::unique_ptr<IExecutionProvider> DefaultMIGraphXExecutionProvider() {
       0,
       0,
       0,
+      0,
       nullptr,
       1,
       "./compiled_model.mxr",