Skip to content

Commit 53dc564

Browse files
committed
trying something else
1 parent 1ae9ded commit 53dc564

File tree

5 files changed

+91
-73
lines changed

5 files changed

+91
-73
lines changed

backends/cuda/CMakeLists.txt

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -46,15 +46,15 @@ set(_aoti_cuda_sources
4646
)
4747
# Build as SHARED library (.dll) on Windows MSVC, otherwise STATIC
4848
if(MSVC)
49-
add_library(aoti_cuda SHARED ${_aoti_cuda_sources} ${CMAKE_CURRENT_SOURCE_DIR}/aoti_cuda.def)
49+
add_library(aoti_cuda SHARED ${_aoti_cuda_sources})
5050
# Define export macros for Windows DLL
5151
target_compile_definitions(aoti_cuda PRIVATE
5252
EXPORT_AOTI_FUNCTIONS
5353
BUILDING_CUDA_BACKEND
5454
)
5555
# Ensure proper DLL import/export library naming on Windows with config-specific paths
5656
set_target_properties(aoti_cuda PROPERTIES
57-
WINDOWS_EXPORT_ALL_SYMBOLS OFF # We use explicit exports via AOTI_CUDA_EXPORT and .def file
57+
WINDOWS_EXPORT_ALL_SYMBOLS OFF # We use explicit exports via AOTI_CUDA_EXPORT
5858
RUNTIME_OUTPUT_DIRECTORY ${CMAKE_BINARY_DIR}/bin/$<CONFIG>
5959
LIBRARY_OUTPUT_DIRECTORY ${CMAKE_BINARY_DIR}/lib/$<CONFIG>
6060
ARCHIVE_OUTPUT_DIRECTORY ${CMAKE_BINARY_DIR}/lib/$<CONFIG>

backends/cuda/aoti_cuda.def

Lines changed: 0 additions & 3 deletions
This file was deleted.
Lines changed: 53 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,53 @@
1+
// (c) Meta Platforms, Inc. and affiliates. Confidential and proprietary.
2+
3+
#pragma once
4+
5+
#include <executorch/runtime/backend/interface.h>
6+
#include <executorch/runtime/core/error.h>
7+
#include <executorch/runtime/core/evalue.h>
8+
9+
namespace executorch::backends::cuda {
10+
11+
class ET_EXPERIMENTAL CudaBackend final
12+
: public ::executorch::runtime::BackendInterface {
13+
private:
14+
/**
15+
* Load AOTI function pointers from the shared library into the handle.
16+
*/
17+
::executorch::runtime::Error load_function_pointers_into_handle(
18+
void* so_handle,
19+
struct AOTIDelegateHandle* handle) const;
20+
21+
public:
22+
/**
23+
* Check if the CUDA backend is available.
24+
*/
25+
bool is_available() const override;
26+
27+
/**
28+
* Initialize the backend with the given context and compile specs.
29+
* Called once per loaded binary blob.
30+
*/
31+
::executorch::runtime::Result<::executorch::runtime::DelegateHandle*> init(
32+
::executorch::runtime::BackendInitContext& context,
33+
::executorch::runtime::FreeableBuffer* processed,
34+
::executorch::runtime::ArrayRef<::executorch::runtime::CompileSpec>
35+
compile_specs) const override;
36+
37+
/**
38+
* Execute the backend with the given context and arguments.
39+
* Called once per execution.
40+
*/
41+
::executorch::runtime::Error execute(
42+
::executorch::runtime::BackendExecutionContext& context,
43+
::executorch::runtime::DelegateHandle* handle,
44+
::executorch::runtime::Span<::executorch::runtime::EValue*> args)
45+
const override;
46+
47+
/**
48+
* Destroy the backend handle and clean up resources.
49+
*/
50+
void destroy(::executorch::runtime::DelegateHandle* handle) const override;
51+
};
52+
53+
} // namespace executorch::backends::cuda

backends/cuda/runtime/cuda_backend.cpp

Lines changed: 22 additions & 62 deletions
Original file line numberDiff line numberDiff line change
@@ -19,10 +19,10 @@
1919
#include <string>
2020
#include <vector>
2121

22-
// Include our shim layer headers
22+
// Include class header and shim layer headers
23+
#include <executorch/backends/cuda/runtime/CudaBackend.h>
2324
#include <executorch/backends/aoti/aoti_delegate_handle.h>
2425
#include <executorch/backends/aoti/common_shims.h>
25-
#include <executorch/backends/cuda/runtime/cuda_backend_init.h>
2626
#include <executorch/backends/cuda/runtime/platform/platform.h>
2727
#include <executorch/backends/cuda/runtime/shims/memory.h>
2828
#include <executorch/backends/cuda/runtime/utils.h>
@@ -48,12 +48,9 @@ using executorch::runtime::Result;
4848
using executorch::runtime::Span;
4949
using executorch::runtime::etensor::Tensor;
5050

51-
class ET_EXPERIMENTAL CudaBackend final
52-
: public ::executorch::runtime::BackendInterface {
53-
private:
54-
Error load_function_pointers_into_handle(
55-
void* so_handle,
56-
AOTIDelegateHandle* handle) const {
51+
Error CudaBackend::load_function_pointers_into_handle(
52+
void* so_handle,
53+
AOTIDelegateHandle* handle) const {
5754
#define LOAD_SYMBOL(member, name) \
5855
do { \
5956
auto symbol_res = get_function(so_handle, #name); \
@@ -88,17 +85,14 @@ class ET_EXPERIMENTAL CudaBackend final
8885
return Error::Ok;
8986
}
9087

91-
public:
92-
bool is_available() const override {
93-
return 1;
94-
}
88+
bool CudaBackend::is_available() const {
89+
return 1;
90+
}
9591

96-
// Once per loaded binary blob
97-
Result<DelegateHandle*> init(
92+
Result<DelegateHandle*> CudaBackend::init(
9893
BackendInitContext& context,
99-
FreeableBuffer* processed, // This will be a empty buffer
100-
ArrayRef<CompileSpec> compile_specs // This will be my empty list
101-
) const override {
94+
FreeableBuffer* processed,
95+
ArrayRef<CompileSpec> compile_specs) const {
10296
std::string method_name;
10397
for (const CompileSpec& spec : compile_specs) {
10498
if (std::strcmp(spec.key, "method_name") == 0) {
@@ -196,11 +190,10 @@ class ET_EXPERIMENTAL CudaBackend final
196190
return (DelegateHandle*)handle; // Return the handle post-processing
197191
}
198192

199-
// Once per execution
200-
Error execute(
201-
BackendExecutionContext& context,
202-
DelegateHandle* handle_,
203-
Span<EValue*> args) const override {
193+
Error CudaBackend::execute(
194+
BackendExecutionContext& context,
195+
DelegateHandle* handle_,
196+
Span<EValue*> args) const {
204197
AOTIDelegateHandle* handle = (AOTIDelegateHandle*)handle_;
205198

206199
size_t n_inputs;
@@ -322,7 +315,7 @@ class ET_EXPERIMENTAL CudaBackend final
322315
return Error::Ok;
323316
}
324317

325-
void destroy(DelegateHandle* handle_) const override {
318+
void CudaBackend::destroy(DelegateHandle* handle_) const {
326319
if (handle_ == nullptr) {
327320
return;
328321
}
@@ -367,54 +360,21 @@ class ET_EXPERIMENTAL CudaBackend final
367360
delete handle;
368361
clear_all_tensors();
369362
}
370-
};
371363

372364
} // namespace executorch::backends::cuda
373365

374366
namespace executorch::backends {
375-
namespace {
376-
// Static backend instance and registration
367+
368+
// Backend instance - static on all platforms
377369
auto cls = cuda::CudaBackend();
378-
executorch::runtime::Backend backend{"CudaBackend", &cls};
370+
executorch::runtime::Backend cuda_backend{"CudaBackend", &cls};
379371

380372
#ifndef _WIN32
381-
// On non-Windows platforms, use static initialization
373+
// On non-Windows platforms, use automatic static initialization
374+
namespace {
382375
static executorch::runtime::Error success_with_compiler =
383-
register_backend(backend);
384-
#endif
385-
376+
register_backend(cuda_backend);
386377
} // namespace
387-
388-
// InitCudaBackend is exported for explicit backend registration on Windows
389-
extern "C" CUDA_BACKEND_INIT_API void InitCudaBackend() {
390-
// Log immediately to confirm function is entered
391-
ET_LOG(Info, "InitCudaBackend: Function entered");
392-
assert(1==2);
393-
394-
#ifdef _WIN32
395-
ET_LOG(Info, "InitCudaBackend: Windows path");
396-
// On Windows, explicitly register the backend since DLL static initializers
397-
// don't run reliably
398-
static bool initialized = false;
399-
if (!initialized) {
400-
ET_LOG(Info, "Registering CUDA backend on Windows");
401-
auto error = register_backend(backend);
402-
if (error == executorch::runtime::Error::Ok) {
403-
ET_LOG(Info, "Successfully registered CudaBackend");
404-
} else {
405-
ET_LOG(Error, "Failed to register CudaBackend: error code %d", (int)error);
406-
}
407-
initialized = true;
408-
} else {
409-
ET_LOG(Info, "CUDA backend already initialized");
410-
}
411-
#else
412-
ET_LOG(Info, "InitCudaBackend: Non-Windows path");
413-
// On other platforms, static initialization already happened
414-
(void)success_with_compiler;
415378
#endif
416-
417-
ET_LOG(Info, "InitCudaBackend: Function exiting");
418-
}
419379

420380
} // namespace executorch::backends

examples/models/voxtral/multimodal.cpp

Lines changed: 14 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,10 @@
2525
#include <executorch/runtime/core/error.h>
2626
#include <executorch/runtime/platform/log.h>
2727

28+
// Manually register the CUDA backend
29+
#include <executorch/backends/cuda/runtime/CudaBackend.h>
30+
#include <executorch/runtime/backend/interface.h>
31+
2832
#if defined(ET_USE_THREADPOOL)
2933
#include <executorch/extension/threadpool/cpuinfo_utils.h>
3034
#include <executorch/extension/threadpool/threadpool.h>
@@ -282,14 +286,18 @@ MultimodalInput processAudioFile(
282286

283287
} // namespace
284288

285-
#include <executorch/backends/cuda/runtime/cuda_backend_init.h>
286289

287290
int32_t main(int32_t argc, char** argv) {
288-
// On Windows, explicitly initialize the CUDA backend to ensure
289-
// static initializers in the DLL run
290-
ET_LOG(Info, "About to call InitCudaBackend");
291-
InitCudaBackend();
292-
ET_LOG(Info, "InitCudaBackend returned");
291+
// Manually register the CUDA backend (required on Windows, harmless on other platforms)
292+
ET_LOG(Info, "Registering CUDA backend");
293+
static auto cuda_backend_impl = ::executorch::backends::cuda::CudaBackend();
294+
static auto cuda_backend = ::executorch::runtime::Backend{"CudaBackend", &cuda_backend_impl};
295+
auto error = ::executorch::runtime::register_backend(cuda_backend);
296+
if (error == ::executorch::runtime::Error::Ok) {
297+
ET_LOG(Info, "Successfully registered CudaBackend");
298+
} else {
299+
ET_LOG(Error, "Failed to register CudaBackend: error code %d", (int)error);
300+
}
293301

294302
gflags::ParseCommandLineFlags(&argc, &argv, true);
295303

0 commit comments

Comments
 (0)