2222#include < vector>
2323
2424#include < map>
25+ #include < list>
2526
2627#include < csignal>
2728#include < setjmp.h>
3435#define DEFAULT_PORT 14833
3536#define MAX_CLIENTS 10
3637
37- std::map<conn_t *, std::map<void *, size_t >> managed_ptrs;
38+ struct ManagedPtr {
39+ void * src;
40+ void * dst;
41+ size_t size;
42+ cudaMemcpyKind kind;
43+
44+ ManagedPtr () : src(nullptr ), dst(nullptr ), size(0 ), kind(cudaMemcpyHostToDevice) {}
45+
46+ ManagedPtr (void * src, void * dst, size_t s, cudaMemcpyKind k)
47+ : src(src), dst(dst), size(s), kind(k) {}
48+ };
49+
50+
51+ std::map<conn_t *, ManagedPtr> managed_ptrs;
3852std::map<conn_t *, void *> host_funcs;
3953
4054static jmp_buf catch_segfault;
@@ -55,43 +69,83 @@ static void segfault(int sig, siginfo_t *info, void *unused) {
5569
5670 std::cout << " segfault!!" << faulting_address << std::endl;
5771
58- for (const auto &conn_entry : managed_ptrs) {
59- for (const auto &mem_entry : conn_entry.second ) {
60- size_t allocated_size = mem_entry.second ;
72+ for (const auto & conn_entry : managed_ptrs) {
73+ const ManagedPtr& mem_entry = conn_entry.second ;
74+
75+ void * allocated_ptr;
76+ size_t allocated_size = mem_entry.size ;
77+
78+ if (mem_entry.kind == cudaMemcpyDeviceToHost) {
79+ allocated_ptr = mem_entry.dst ;
80+ } else if (mem_entry.kind == cudaMemcpyHostToDevice) {
81+ allocated_ptr = mem_entry.src ;
82+ }
6183
62- // Check if faulting address is inside this allocated region
63- if ((uintptr_t )mem_entry.first <= (uintptr_t )faulting_address &&
64- (uintptr_t )faulting_address <
65- ((uintptr_t )mem_entry.first + allocated_size)) {
66- found = 1 ;
67- size = allocated_size;
84+ // Check if faulting address is within allocated memory
85+ if ((uintptr_t )allocated_ptr <= (uintptr_t )faulting_address &&
86+ (uintptr_t )faulting_address < (uintptr_t )allocated_ptr + allocated_size) {
87+ found = 1 ;
88+ size = allocated_size;
6889
69- // Align memory allocation to the closest possible address
70- uintptr_t aligned = (uintptr_t )faulting_address & ~(allocated_size - 1 );
90+ // Align to system page size
91+ size_t page_size = sysconf (_SC_PAGE_SIZE);
92+ uintptr_t aligned_addr = (uintptr_t )faulting_address & ~(page_size - 1 );
7193
72- // Allocate memory at the faulting address
73- void *allocated =
74- mmap ((void *)aligned,
75- allocated_size + (uintptr_t )faulting_address - aligned,
76- PROT_READ | PROT_WRITE, MAP_SHARED | MAP_ANONYMOUS, -1 , 0 );
94+ // Allocate memory at the faulting address
95+ void * allocated = mmap ((void *)aligned_addr, allocated_size,
96+ PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANONYMOUS, -1 , 0 );
7797
78- if (allocated == MAP_FAILED) {
98+ if (allocated == MAP_FAILED) {
7999 perror (" Failed to allocate memory at faulting address" );
80100 _exit (1 );
81- }
101+ }
82102
83- printf (" The address of x is: %p\n " , (void *)allocated);
103+ char msg[128 ];
104+ snprintf (msg, sizeof (msg), " Allocated memory at: %p\n " , allocated);
105+ write (STDERR_FILENO, msg, strlen (msg));
84106
85- // if (rpc_write(conn_entry.first, (void*)&allocated, sizeof(void*)) <
86- // 0) {
87- // std::cout << "failed to write memory: " << &faulting_address <<
88- // std::endl;
89- // }
107+ void * scuda_intercept_result;
90108
91- // printf("wrote data...\n");
109+ // Validate connection
110+ if (!conn_entry.first ) {
111+ std::cerr << " Error: Connection is NULL in invoke_host_func" << std::endl;
112+ return ;
113+ }
92114
115+ printf (" sending memory %p\n " , allocated_ptr);
116+
117+ if (rpc_write_start_request (conn_entry.first , 3 ) < 0 || rpc_write (conn_entry.first , &mem_entry.kind , sizeof (enum cudaMemcpyKind)) < 0 )
118+ return ;
119+
120+ // we need to swap device directions in this case
121+ switch (mem_entry.kind ) {
122+ case cudaMemcpyDeviceToHost:
123+ if (rpc_write (conn_entry.first , &mem_entry.src , sizeof (void *)) < 0 ||
124+ rpc_write (conn_entry.first , &size, sizeof (size_t )) < 0 ||
125+ rpc_wait_for_response (conn_entry.first ) < 0 || rpc_read (conn_entry.first , mem_entry.dst , size) < 0 )
126+ return ;
127+ case cudaMemcpyHostToDevice:
128+ if (rpc_write (conn_entry.first , &mem_entry.dst , sizeof (void *)) < 0 ||
129+ rpc_write (conn_entry.first , &size, sizeof (size_t )) < 0 ||
130+ rpc_write (conn_entry.first , allocated, size) < 0 || rpc_wait_for_response (conn_entry.first ) < 0 ) {
131+ return ;
132+ }
93133 break ;
134+ case cudaMemcpyDeviceToDevice:
135+ if (rpc_write (conn_entry.first , &mem_entry.dst , sizeof (void *)) < 0 ||
136+ rpc_write (conn_entry.first , &mem_entry.src , sizeof (void *)) < 0 ||
137+ rpc_write (conn_entry.first , &size, sizeof (size_t )) < 0 ||
138+ rpc_wait_for_response (conn_entry.first ) < 0 )
139+ break ;
94140 }
141+
142+ cudaError_t return_value;
143+
144+ if (rpc_read (conn_entry.first , &return_value, sizeof (cudaError_t)) < 0 ||
145+ rpc_read_end (conn_entry.first ) < 0 )
146+ return ;
147+
148+ return ;
95149 }
96150 }
97151
@@ -169,11 +223,10 @@ void append_host_func_ptr(const void *conn, void *ptr) {
169223 host_funcs[(conn_t *)conn] = ptr;
170224}
171225
172- void append_managed_ptr (const void *conn, cudaPitchedPtr ptr ) {
226+ void append_managed_ptr (const void *conn, void * srcPtr, void * dstPtr, size_t size, cudaMemcpyKind kind ) {
173227 conn_t *connfd = (conn_t *)conn;
174228
175- // Ensure the inner map exists before inserting the cudaPitchedPtr
176- managed_ptrs[connfd][ptr.ptr ] = ptr.pitch ;
229+ managed_ptrs[connfd] = ManagedPtr (srcPtr, dstPtr, size, kind);
177230}
178231
179232static void set_segfault_handlers () {
0 commit comments