File tree Expand file tree Collapse file tree 3 files changed +23
-14
lines changed
Expand file tree Collapse file tree 3 files changed +23
-14
lines changed Original file line number Diff line number Diff line change @@ -146,7 +146,7 @@ class ET_EXPERIMENTAL CudaBackend final
146146 outfile.close ();
147147
148148 // Load the lib
149- Result<void *> lib_handle_res = load_library (so_path);
149+ Result<void *> lib_handle_res = load_library (so_path. c_str () );
150150 if (!lib_handle_res.ok ()) {
151151 return lib_handle_res.error ();
152152 }
Original file line number Diff line number Diff line change 1212#include < string>
1313
1414#ifdef _WIN32
15+ #include < malloc.h>
1516#include < windows.h>
1617#else // Posix
1718#include < dlfcn.h>
1819#include < unistd.h>
20+ #include < cstdlib>
1921#endif
2022
2123namespace executorch {
@@ -26,11 +28,7 @@ executorch::runtime::Result<void*> load_library(const char* path) {
2628#ifdef _WIN32
2729 auto lib_handle = LoadLibrary (path);
2830 if (lib_handle == NULL ) {
29- ET_LOG (
30- Error,
31- " Failed to load %s with error: %lu" ,
32- path,
33- GetLastError ());
31+ ET_LOG (Error, " Failed to load %s with error: %lu" , path, GetLastError ());
3432 return executorch::runtime::Error::AccessFailed;
3533 }
3634
@@ -96,6 +94,22 @@ int32_t get_process_id() {
9694#endif
9795}
9896
97+ void * aligned_alloc (size_t alignment, size_t size) {
98+ #ifdef _WIN32
99+ return _aligned_malloc (size, alignment);
100+ #else
101+ return std::aligned_alloc (alignment, size);
102+ #endif
103+ }
104+
105+ void aligned_free (void * ptr) {
106+ #ifdef _WIN32
107+ _aligned_free (ptr);
108+ #else
109+ std::free (ptr);
110+ #endif
111+ }
112+
99113} // namespace cuda
100114} // namespace backends
101115} // namespace executorch
Original file line number Diff line number Diff line change 1313#include < executorch/backends/cuda/runtime/utils.h>
1414#include < executorch/runtime/platform/log.h>
1515#include < cstdint>
16- #include < cstdlib> // For posix_memalign
1716#include < memory>
1817#include < unordered_map>
1918#include < unordered_set>
@@ -230,15 +229,11 @@ AOTITorchError aoti_torch_empty_strided(
230229 cudaMallocAsync (&ptr, static_cast <size_t >(nbytes), cudaStreamDefault));
231230 } else if (device_type == static_cast <int32_t >(SupportedDevices::CPU)) {
232231 // Ensure 16-byte alignment for CPU memory to match CUDA requirements
233- int result = posix_memalign (&ptr, 16 , nbytes);
234- ET_CHECK_OR_RETURN_ERROR (
235- result == 0 ,
236- MemoryAllocationFailed,
237- " Failed to allocate aligned CPU memory" );
232+ &ptr = aligned_alloc (16 , nbytes);
238233 ET_CHECK_OR_RETURN_ERROR (
239234 ptr != nullptr ,
240235 MemoryAllocationFailed,
241- " Failed to call posix_memalign " );
236+ " Failed to allocate aligned CPU memory " );
242237 } else {
243238 ET_CHECK_OR_RETURN_ERROR (
244239 false ,
@@ -339,7 +334,7 @@ AOTITorchError aoti_torch_delete_tensor_object(Tensor* tensor) {
339334 Internal,
340335 " Expected host memory but got managed!" )
341336 // This is CPU memory - free immediately
342- free (data_ptr);
337+ aligned_free (data_ptr);
343338 data_ptr = nullptr ;
344339 }
345340
You can’t perform that action at this time.
0 commit comments