Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion src/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@ add_subdirectory(cuda)
add_subdirectory(nvml)

set(LIBVGPU vgpu)
add_library(${LIBVGPU} SHARED libvgpu.c utils.c $<TARGET_OBJECTS:nvml_mod> $<TARGET_OBJECTS:cuda_mod> $<TARGET_OBJECTS:allocator_mod> $<TARGET_OBJECTS:multiprocess_mod>)
add_library(${LIBVGPU} SHARED libvgpu.c utils.c log_utils.c $<TARGET_OBJECTS:nvml_mod> $<TARGET_OBJECTS:cuda_mod> $<TARGET_OBJECTS:allocator_mod> $<TARGET_OBJECTS:multiprocess_mod>)
target_compile_options(${LIBVGPU} PUBLIC ${LIBRARY_COMPILE_FLAGS})
target_link_libraries(${LIBVGPU} PUBLIC -lcuda -lnvidia-ml)

Expand Down
2 changes: 0 additions & 2 deletions src/allocator/allocator.c
Original file line number Diff line number Diff line change
Expand Up @@ -34,8 +34,6 @@ size_t round_up(size_t size, size_t unit) {
}

int oom_check(const int dev, size_t addon) {
int count1=0;
CUDA_OVERRIDE_CALL(cuda_library_entry,cuDeviceGetCount,&count1);
CUdevice d;
if (dev==-1)
cuCtxGetDevice(&d);
Expand Down
43 changes: 22 additions & 21 deletions src/include/log_utils.h
Original file line number Diff line number Diff line change
Expand Up @@ -10,33 +10,39 @@

extern FILE *fp1;

#ifdef FILEDEBUG
/*
* Cached log level — initialized once by log_utils_init().
* Default is 2 (WARN/MSG/ERROR) to match original behavior when
* LIBCUDA_LOG_LEVEL is unset.
*
* Levels: 0=off, 1=error-only, 2=warn(default), 3=info, 4=debug
*/
extern int g_log_level;

/* Call once during early initialization to cache LIBCUDA_LOG_LEVEL. */
void log_utils_init(void);

#ifdef FILEDEBUG
#define LOG_DEBUG(msg, ...) { \
if ((getenv("LIBCUDA_LOG_LEVEL")!=NULL) && (atoi(getenv("LIBCUDA_LOG_LEVEL"))>=4)) {\
if (g_log_level >= 4) {\
if (fp1==NULL) fp1 = fopen ("/tmp/vgpulog", "a"); \
fprintf(fp1, "[HAMI-core Debug(%d:%ld:%s:%d)]: "msg"\n",getpid(),pthread_self(),basename(__FILE__),__LINE__,##__VA_ARGS__); \
}\
}
#define LOG_INFO(msg, ...) { \
if ( \
/*(getenv("LIBCUDA_LOG_LEVEL")==NULL) || */\
(getenv("LIBCUDA_LOG_LEVEL")!=NULL) && (atoi(getenv("LIBCUDA_LOG_LEVEL"))>=3)) {\
if (g_log_level >= 3) {\
if (fp1==NULL) fp1 = fopen ("/tmp/vgpulog", "a"); \
fprintf(fp1, "[HAMI-core Info(%d:%ld:%s:%d)]: "msg"\n", getpid(),pthread_self(),basename(__FILE__),__LINE__,##__VA_ARGS__); \
}\
}
#define LOG_WARN(msg, ...) { \
if ( \
(getenv("LIBCUDA_LOG_LEVEL")==NULL) || \
((getenv("LIBCUDA_LOG_LEVEL")!=NULL) && (atoi(getenv("LIBCUDA_LOG_LEVEL"))>=2))) {\
if (g_log_level >= 2) {\
if (fp1==NULL) fp1 = fopen ("/tmp/vgpulog", "a"); \
fprintf(fp1, "[HAMI-core Warn(%d:%ld:%s:%d)]: "msg"\n", getpid(),pthread_self(),basename(__FILE__),__LINE__,##__VA_ARGS__); \
}\
}
#define LOG_MSG(msg, ...) { \
if ( \
(getenv("LIBCUDA_LOG_LEVEL")==NULL) || \
((getenv("LIBCUDA_LOG_LEVEL")!=NULL) && (atoi(getenv("LIBCUDA_LOG_LEVEL"))>=2))) {\
if (g_log_level >= 2) {\
if (fp1==NULL) fp1 = fopen ("/tmp/vgpulog", "a"); \
fprintf(fp1, "[HAMI-core Msg(%d:%ld:%s:%d)]: "msg"\n", getpid(),pthread_self(),basename(__FILE__),__LINE__,##__VA_ARGS__); \
}\
Expand All @@ -47,27 +53,22 @@ extern FILE *fp1;
}
#else
#define LOG_DEBUG(msg, ...) { \
if ((getenv("LIBCUDA_LOG_LEVEL")!=NULL) && (atoi(getenv("LIBCUDA_LOG_LEVEL"))>=4)) {\
if (g_log_level >= 4) {\
fprintf(stderr, "[HAMI-core Debug(%d:%ld:%s:%d)]: "msg"\n",getpid(),pthread_self(),basename(__FILE__),__LINE__,##__VA_ARGS__); \
}\
}
#define LOG_INFO(msg, ...) { \
if ( \
(getenv("LIBCUDA_LOG_LEVEL")!=NULL) && (atoi(getenv("LIBCUDA_LOG_LEVEL"))>=3)) {\
if (g_log_level >= 3) {\
fprintf(stderr, "[HAMI-core Info(%d:%ld:%s:%d)]: "msg"\n", getpid(),pthread_self(),basename(__FILE__),__LINE__,##__VA_ARGS__); \
}\
}
#define LOG_WARN(msg, ...) { \
if ( \
(getenv("LIBCUDA_LOG_LEVEL")==NULL) || \
((getenv("LIBCUDA_LOG_LEVEL")!=NULL) && (atoi(getenv("LIBCUDA_LOG_LEVEL"))>=2))) {\
if (g_log_level >= 2) {\
fprintf(stderr, "[HAMI-core Warn(%d:%ld:%s:%d)]: "msg"\n", getpid(),pthread_self(),basename(__FILE__),__LINE__,##__VA_ARGS__); \
}\
}
#define LOG_MSG(msg, ...) { \
if ( \
(getenv("LIBCUDA_LOG_LEVEL")==NULL) || \
((getenv("LIBCUDA_LOG_LEVEL")!=NULL) && (atoi(getenv("LIBCUDA_LOG_LEVEL"))>=2))) {\
if (g_log_level >= 2) {\
fprintf(stderr, "[HAMI-core Msg(%d:%ld:%s:%d)]: "msg"\n", getpid(),pthread_self(),basename(__FILE__),__LINE__,##__VA_ARGS__); \
}\
}
Expand Down Expand Up @@ -107,7 +108,7 @@ extern FILE *fp1;
#define IF_CHECK_OOM(res) { \
if (res < 0) \
return CUDA_ERROR_OUT_OF_MEMORY; \
}
}


#endif
1 change: 1 addition & 0 deletions src/libvgpu.c
Original file line number Diff line number Diff line change
Expand Up @@ -853,6 +853,7 @@ void* __dlsym_hook_section_nvml(void* handle, const char* symbol) {
}

void preInit(){
log_utils_init();
LOG_MSG("Initializing.....");
if (real_dlsym == NULL) {
real_dlsym = dlvsym(RTLD_NEXT,"dlsym","GLIBC_2.2.5");
Expand Down
18 changes: 18 additions & 0 deletions src/log_utils.c
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
#include <stdio.h>
#include <stdlib.h>

/*
* Cached log level, read once from LIBCUDA_LOG_LEVEL by log_utils_init().
* Default 2 = warn/msg/error (matches original behavior when env is unset).
*/
int g_log_level = 2;

FILE *fp1 = NULL;

void log_utils_init(void) {
const char *env = getenv("LIBCUDA_LOG_LEVEL");
if (env != NULL) {
g_log_level = atoi(env);
}
/* else: keep default of 2 (warn level) */
}
2 changes: 1 addition & 1 deletion src/multiprocess/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,6 @@ add_library(multiprocess_mod OBJECT multiprocess_memory_limit.c multiprocess_uti
target_compile_options(multiprocess_mod PUBLIC ${LIBRARY_COMPILE_FLAGS})
target_link_libraries(multiprocess_mod PUBLIC nvidia-ml)

add_executable(shrreg-tool shrreg_tool.c)
add_executable(shrreg-tool shrreg_tool.c ${CMAKE_CURRENT_SOURCE_DIR}/../log_utils.c)
target_link_libraries(shrreg-tool multiprocess_mod -lpthread -lcuda)

28 changes: 24 additions & 4 deletions src/multiprocess/multiprocess_memory_limit.c
Original file line number Diff line number Diff line change
Expand Up @@ -221,14 +221,24 @@ int active_oom_killer() {
}

void pre_launch_kernel() {
uint64_t now = time(NULL);
struct timespec ts;
clock_gettime(CLOCK_REALTIME_COARSE, &ts);
uint64_t now = (uint64_t)ts.tv_sec;

// Fast path: skip mutex if within recording interval (double-checked)
if (now - region_info.last_kernel_time < _record_kernel_interval) {
return;
}

pthread_mutex_lock(&_kernel_mutex);
// Re-check under lock — another thread may have updated
if (now - region_info.last_kernel_time < _record_kernel_interval) {
pthread_mutex_unlock(&_kernel_mutex);
return;
}
region_info.last_kernel_time = now;
pthread_mutex_unlock(&_kernel_mutex);

LOG_INFO("write last kernel time: %ld", now)
// Lock-free update using atomic compare-exchange
uint64_t expected = atomic_load_explicit(&region_info.shared_region->last_kernel_time, memory_order_acquire);
Expand Down Expand Up @@ -1231,10 +1241,20 @@ void resume_all(){
}

int wait_status_self(int status){
// Fast path: use cached slot pointer (set during init_proc_slot_withlock)
if (region_info.my_slot != NULL) {
int32_t cur = atomic_load_explicit(&region_info.my_slot->status, memory_order_acquire);
return (cur == status) ? 1 : 0;
}

// Slow path: linear scan (only if my_slot not yet cached)
int i;
for (i=0;i<region_info.shared_region->proc_num;i++){
if (region_info.shared_region->procs[i].pid==getpid()){
if (region_info.shared_region->procs[i].status==status)
int proc_num = atomic_load_explicit(&region_info.shared_region->proc_num, memory_order_acquire);
int32_t my_pid = getpid();
for (i=0; i < proc_num; i++) {
int32_t slot_pid = atomic_load_explicit(&region_info.shared_region->procs[i].pid, memory_order_acquire);
if (slot_pid == my_pid) {
if (atomic_load_explicit(&region_info.shared_region->procs[i].status, memory_order_acquire) == status)
return 1;
else
return 0;
Expand Down
27 changes: 17 additions & 10 deletions src/multiprocess/multiprocess_utilization_watcher.c
Original file line number Diff line number Diff line change
Expand Up @@ -31,23 +31,29 @@ static volatile long g_total_cuda_cores = 0;
extern int pidfound;
int cuda_to_nvml_map_array[CUDA_DEVICE_MAX_COUNT];

/* Cached at init — these values do not change at runtime */
static int cached_sm_limit = 0;
static int cached_util_switch = 0;

void rate_limiter(int grids, int blocks) {
long before_cuda_cores = 0;
long after_cuda_cores = 0;
long kernel_size = grids;

/* Fast exit using cached values — no shared memory access needed */
if (cached_sm_limit >= 100 || cached_sm_limit == 0)
return;
if (cached_util_switch == 0)
return;

while (get_recent_kernel()<0) {
sleep(1);
}
set_recent_kernel(2);
if ((get_current_device_sm_limit(0)>=100) || (get_current_device_sm_limit(0)==0))
return;
if (get_utilization_switch()==0)
return;

LOG_DEBUG("grid: %d, blocks: %d", grids, blocks);
LOG_DEBUG("launch kernel %ld, curr core: %ld", kernel_size, g_cur_cuda_cores);
//if (g_vcuda_config.enable) {
do {
do {
CHECK:
before_cuda_cores = g_cur_cuda_cores;
LOG_DEBUG("current core: %ld", g_cur_cuda_cores);
Expand All @@ -56,8 +62,7 @@ void rate_limiter(int grids, int blocks) {
goto CHECK;
}
after_cuda_cores = before_cuda_cores - kernel_size;
} while (!CAS(&g_cur_cuda_cores, before_cuda_cores, after_cuda_cores));
//}
} while (!CAS(&g_cur_cuda_cores, before_cuda_cores, after_cuda_cores));
}

static void change_token(long delta) {
Expand Down Expand Up @@ -221,10 +226,12 @@ void* utilization_watcher() {
}

void init_utilization_watcher() {
LOG_INFO("set core utilization limit to %d",get_current_device_sm_limit(0));
cached_sm_limit = get_current_device_sm_limit(0);
cached_util_switch = get_utilization_switch();
LOG_INFO("set core utilization limit to %d", cached_sm_limit);
setspec();
pthread_t tid;
if ((get_current_device_sm_limit(0)<=100) && (get_current_device_sm_limit(0)>0)){
if ((cached_sm_limit <= 100) && (cached_sm_limit > 0)) {
pthread_create(&tid, NULL, utilization_watcher, NULL);
}
return;
Expand Down
Loading