Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
21 changes: 21 additions & 0 deletions include/onnxruntime/core/framework/ortdevice.h
Original file line number Diff line number Diff line change
Expand Up @@ -65,6 +65,27 @@ struct OrtDevice {
INTEL = 0x8086, // OpenVINO
};

constexpr static const char* VendorIdToString(OrtDevice::VendorId vendorId) {
switch (vendorId) {
case OrtDevice::VendorIds::AMD:
return "AMD";
case OrtDevice::VendorIds::NVIDIA:
return "NVIDIA";
case OrtDevice::VendorIds::ARM:
return "ARM";
case OrtDevice::VendorIds::MICROSOFT:
return "MICROSOFT";
case OrtDevice::VendorIds::HUAWEI:
return "HUAWEI";
case OrtDevice::VendorIds::QUALCOMM:
return "QUALCOMM";
case OrtDevice::VendorIds::INTEL:
return "INTEL";
default:
return "";
}
}

constexpr OrtDevice(DeviceType device_type_, MemoryType memory_type_, VendorId vendor_id_, DeviceId device_id_,
Alignment alignment) /*noexcept*/
: device_type(device_type_),
Expand Down
5 changes: 4 additions & 1 deletion onnxruntime/core/platform/linux/device_discovery.cc
Original file line number Diff line number Diff line change
Expand Up @@ -2,11 +2,13 @@
// Licensed under the MIT License.

#include "core/platform/device_discovery.h"
#include "core/framework/ortdevice.h"

#include <filesystem>
#include <fstream>
#include <iterator>
#include <optional>
#include <string>
#include <string_view>

#include "core/common/common.h"
Expand Down Expand Up @@ -124,7 +126,7 @@ Status GetGpuDeviceFromSysfs(const GpuSysfsPathInfo& path_info, OrtHardwareDevic
ORT_RETURN_IF_ERROR(ReadValueFromFile(vendor_id_path, vendor_id));
gpu_device.vendor_id = vendor_id;

// TODO vendor name
gpu_device.vendor = OrtDevice::VendorIdToString(gpu_device.vendor_id);

// device id
uint16_t device_id{};
Expand All @@ -139,6 +141,7 @@ Status GetGpuDeviceFromSysfs(const GpuSysfsPathInfo& path_info, OrtHardwareDevic
is_gpu_discrete.has_value()) {
gpu_device.metadata.Add("Discrete", (*is_gpu_discrete ? "1" : "0"));
}
gpu_device.metadata.Add("bus_id", std::filesystem::read_symlink(sysfs_path / "device").filename().string()); // e.g. 0000:65:00.0
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

is it possible to infer whether the GPU is discrete from the bus_id? I'm trying to figure out how to determine that - any pointers would be appreciated.

Copy link
Author

@theHamsta theHamsta Oct 1, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Hi! I don't know whether this is reliable, but it seems that the parent bus of a iGPU seems to have a parent bus with id of pattern picXXXX:00. dGPU seems to have names like card1 or higher while iGPU would be named card0. So it's rather the topology of the bus than the bus id of the GPU itself

dGPU

realpath /sys/class/drm/card1/device 
/sys/devices/pci0000:64/0000:64:00.0/0000:65:00.0

iGPUs would often have the parent path and be named card0

/sys/devices/picXXXX:00/XXXX:00:00.0/

I don't have a iGPU so my node

00:00.0 Host bridge: Intel Corporation Sky Lake-E DMI3 Registers (rev 07)

or

/sys/devices/pci0000:00/

wouldn't have a any drm nodes

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

good to know, thanks for sharing that observation


gpu_device.type = OrtHardwareDeviceType_GPU;

Expand Down
41 changes: 41 additions & 0 deletions onnxruntime/core/platform/posix/env.cc
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@ limitations under the License.
#include <stdlib.h>
#include <string.h>
#include <sys/mman.h>
#include <filesystem>
#if !defined(_AIX)
#include <sys/syscall.h>
#endif
Expand Down Expand Up @@ -591,6 +592,46 @@ class PosixEnv : public Env {
char* val = getenv(var_name.c_str());
return val == NULL ? std::string() : std::string(val);
}
// Return the path of the executable/shared library for the current running code. This is to make it
// possible to load other shared libraries installed next to our core runtime code.
PathString GetRuntimePath() const override {
Dl_info dl_info{};
// Must be one of the symbols exported in libonnxruntime.{so,dynlib}.
void* symbol_from_this_library = dlsym(RTLD_DEFAULT, "OrtGetApiBase");
// We will find OrtGetApiBase if onnxruntime is loaded as a shared library
if (dladdr(symbol_from_this_library, &dl_info) && dl_info.dli_fname) {
return PathString(dl_info.dli_fname) + "/";
} else {
// else use path of current executable to mirror Windows behavior
#if __linux__
return PathString(std::filesystem::read_symlink(std::filesystem::path("/proc/self/exe")).parent_path()) + "/";
#else
// TODO: MacOS could use _NSGetExecutablePath, but this needs to be tested!
return PathString();
#endif
}
}


// Return the path of the executable/shared library for the current running code. This is to make it
// possible to load other shared libraries installed next to our core runtime code.
PathString GetRuntimePath() const override {
Dl_info dl_info{};
// Must be one of the symbols exported in libonnxruntime.{so,dynlib}.
void* symbol_from_this_library = dlsym(RTLD_DEFAULT, "OrtGetApiBase");
Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

needs to be an exported symbol. onnxruntim_core.a does not have exported symbols by itself (we could add one though), only with onnxruntime.so

// We will find OrtGetApiBase if onnxruntime is loaded as a shared library
if (dladdr(symbol_from_this_library, &dl_info) && dl_info.dli_fname) {
return PathString(dl_info.dli_fname) + "/";
} else {
// else use path of current executable to mirror Windows behavior
#if __linux__
return PathString(std::filesystem::read_symlink(std::filesystem::path("/proc/self/exe")).parent_path()) + "/";
#else
// TODO: MacOS could use _NSGetExecutablePath, but this needs to be tested!
return PathString();
#endif
}
}

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@edgchen1 This resolves the failing test under Linux. I could alternatively put std::filesystem::read_symlink(std::filesystem::path("/proc/self/exe")).parent_path() in the unit test instead and move PosixEnv::GetRuntimePath to a separate PR.

I'm not sure whether the Windows behavior to set runtime path to current exe's dir is always desired (but it would be needed to behave the same in tests like on Windows)

private:
Telemetry telemetry_provider_;
Expand Down
21 changes: 21 additions & 0 deletions onnxruntime/core/providers/nv_tensorrt_rtx/nv_provider_factory.cc
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
#include "core/providers/shared_library/provider_api.h"
#include "nv_provider_factory.h"
#include <atomic>
#include <string>
#include "nv_execution_provider.h"
#include "nv_provider_factory_creator.h"
#include "nv_data_transfer.h"
Expand Down Expand Up @@ -575,6 +576,7 @@ struct NvTensorRtRtxEpFactory : OrtEpFactory {
* @return True if the device is a supported NVIDIA GPU, false otherwise.
*/
bool IsOrtHardwareDeviceSupported(const OrtHardwareDevice& device) {
#if _WIN32
const auto& metadata_entries = device.metadata.Entries();
const auto it = metadata_entries.find("LUID");
if (it == metadata_entries.end()) {
Expand Down Expand Up @@ -616,6 +618,25 @@ struct NvTensorRtRtxEpFactory : OrtEpFactory {
}

return false;
#else
const auto& metadata_entries = device.metadata.Entries();
const auto it = metadata_entries.find("bus_id");
if (it == metadata_entries.end()) {
return false;
}
auto& target_id = it->second;
int cuda_device_idx = 0;
if (cudaSuccess != cudaDeviceGetByPCIBusId(&cuda_device_idx, target_id.c_str())) {
return false;
}

cudaDeviceProp prop;
if (cudaGetDeviceProperties(&prop, cuda_device_idx) != cudaSuccess) {
return false;
}
// Ampere architecture or newer is required.
return prop.major >= 8;
#endif
}

// Creates and returns OrtEpDevice instances for all OrtHardwareDevices that this factory supports.
Expand Down
2 changes: 2 additions & 0 deletions onnxruntime/core/providers/nv_tensorrt_rtx/version_script.lds
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,8 @@
VERS_1.0 {
global:
GetProvider;
CreateEpFactories;
ReleaseEpFactory;

# Hide everything else.
local:
Expand Down
8 changes: 4 additions & 4 deletions onnxruntime/test/providers/nv_tensorrt_rtx/nv_basic_test.cc
Original file line number Diff line number Diff line change
Expand Up @@ -216,7 +216,6 @@ INSTANTIATE_TEST_SUITE_P(NvExecutionProviderTest, TypeTests,
),
[](const testing::TestParamInfo<TypeTests::ParamType>& info) { return getTypeAsName(info.param); });

#ifdef _WIN32
static bool SessionHasEp(Ort::Session& session, const char* ep_name) {
// Access the underlying InferenceSession.
const OrtSession* ort_session = session;
Expand All @@ -233,7 +232,6 @@ static bool SessionHasEp(Ort::Session& session, const char* ep_name) {
}

// Tests autoEP feature to automatically select an EP that supports the GPU.
// Currently only works on Windows.
TEST(NvExecutionProviderTest, AutoEp_PreferGpu) {
PathString model_name = ORT_TSTR("nv_execution_provider_auto_ep.onnx");
Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

this test is currently still failing: NvTensorRtRtxEpFactory::CreateEp is called but not implemented https://github.com/theHamsta/onnxruntime/blob/bc7f4bbc92ed0609e18e1c4c80c3c8d560ce1729/onnxruntime/core/providers/nv_tensorrt_rtx/nv_provider_factory.cc#L698. The other test passes

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Where would would the internal factories on Windows come from
https://github.com/theHamsta/onnxruntime/blob/20d2c8912708b37fb914fbb0c719914a4040830c/onnxruntime/core/session/provider_policy_context.cc#L351-L358 ? I don't have them on Linux when no execution providers are appended to the session options. My desired device get selected, but NvEp::CreateEp is called.

NvEp like other EPs does not implement CreateEp. I guess IExecutionProvider.CreateProviders is the modern version of it?

std::string graph_name = "test";
Expand All @@ -243,7 +241,11 @@ TEST(NvExecutionProviderTest, AutoEp_PreferGpu) {
CreateBaseModel(model_name, graph_name, dims);

{
#if _WIN32
ort_env->RegisterExecutionProviderLibrary(kNvTensorRTRTXExecutionProvider, ORT_TSTR("onnxruntime_providers_nv_tensorrt_rtx.dll"));
#else
ort_env->RegisterExecutionProviderLibrary(kNvTensorRTRTXExecutionProvider, ORT_TSTR("libonnxruntime_providers_nv_tensorrt_rtx.so"));
#endif

Ort::SessionOptions so;
so.SetEpSelectionPolicy(OrtExecutionProviderDevicePolicy_PREFER_GPU);
Expand Down Expand Up @@ -398,7 +400,5 @@ TEST(NvExecutionProviderTest, DataTransfer) {
device_tensor = Ort::Value();
}

#endif

} // namespace test
} // namespace onnxruntime
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,6 @@ namespace test {

RegisteredEpDeviceUniquePtr AppendTrtEtxEP(Ort::SessionOptions& session_options, std::unordered_map<std::string, std::string>& option_map) {
RegisteredEpDeviceUniquePtr nv_tensorrt_rtx_ep;
#ifdef _WIN32
/// Since this test runs after other tests that use registration interface this test has to use it as well
/// windows as otherwise the kernel registry inside the EP will not be populated. The legacy APis ony call the initialize once.
Utils::RegisterAndGetNvTensorRtRtxEp(*ort_env, nv_tensorrt_rtx_ep);
Expand All @@ -26,9 +25,6 @@ RegisteredEpDeviceUniquePtr AppendTrtEtxEP(Ort::SessionOptions& session_options,
}
}
session_options.AppendExecutionProvider_V2(*ort_env, {selected_device}, option_map);
#else
session_options.AppendExecutionProvider(onnxruntime::kNvTensorRTRTXExecutionProvider, option_map);
#endif
return nv_tensorrt_rtx_ep;
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,6 @@

namespace onnxruntime {
namespace test {
#ifdef _WIN32

Utils::NvTensorRtRtxEpInfo Utils::nv_tensorrt_rtx_ep_info;

Expand Down Expand Up @@ -59,7 +58,6 @@ void Utils::RegisterAndGetNvTensorRtRtxEp(Ort::Env& env, RegisteredEpDeviceUniqu
c_api.UnregisterExecutionProviderLibrary(env, nv_tensorrt_rtx_ep_info.registration_name.c_str());
});
}
#endif // _WIN32

void CreateBaseModel(const PathString& model_name,
std::string graph_name,
Expand Down