Skip to content
Open
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
34 changes: 34 additions & 0 deletions darwin/DarwinProcess.c
Original file line number Diff line number Diff line change
Expand Up @@ -52,6 +52,8 @@ const ProcessFieldData Process_fields[LAST_PROCESSFIELD] = {
[PROC_EXE] = { .name = "EXE", .title = "EXE ", .description = "Basename of exe of the process from /proc/[pid]/exe", .flags = 0, },
[CWD] = { .name = "CWD", .title = "CWD ", .description = "The current working directory of the process", .flags = PROCESS_FLAG_CWD, },
[TRANSLATED] = { .name = "TRANSLATED", .title = "T ", .description = "Translation info (T translated, N native)", .flags = 0, },
[TIME_GPU] = { .name = "TIME_GPU", .title = "GPU TIME+", .description = "Total GPU time", .flags = PROCESS_FLAG_GPU, .defaultSortDesc = true, },
[PERCENT_GPU] = { .name = "PERCENT_GPU", .title = " GPU% ", .description = "Percentage of the GPU time the process used in the last sampling", .flags = PROCESS_FLAG_GPU, .defaultSortDesc = true, },
};

Process* DarwinProcess_new(const Machine* host) {
Expand All @@ -77,14 +79,18 @@ void Process_delete(Object* cast) {

static void DarwinProcess_rowWriteField(const Row* super, RichString* str, ProcessField field) {
const DarwinProcess* dp = (const DarwinProcess*) super;
const Machine* host = (const Machine*) super->host;

bool coloring = host->settings->highlightMegabytes;
char buffer[256]; buffer[255] = '\0';
int attr = CRT_colors[DEFAULT_COLOR];
size_t n = sizeof(buffer) - 1;

switch (field) {
// add Platform-specific fields here
case TRANSLATED: xSnprintf(buffer, n, "%c ", dp->translated ? 'T' : 'N'); break;
case PERCENT_GPU: Row_printPercentage(dp->gpu_percent, buffer, n, 5, &attr); break;
case TIME_GPU: Row_printNanoseconds(str, dp->gpu_time, coloring); return;
default:
Process_writeField(&dp->super, str, field);
return;
Expand All @@ -101,6 +107,15 @@ static int DarwinProcess_compareByKey(const Process* v1, const Process* v2, Proc
// add Platform-specific fields here
case TRANSLATED:
return SPACESHIP_NUMBER(p1->translated, p2->translated);
case PERCENT_GPU: {
int r = compareRealNumbers(p1->gpu_percent, p2->gpu_percent);
if (r)
return r;

return SPACESHIP_NUMBER(p1->gpu_time, p2->gpu_time);
}
case TIME_GPU:
return SPACESHIP_NUMBER(p1->gpu_time, p2->gpu_time);
default:
return Process_compareByKey_Base(v1, v2, key);
}
Expand Down Expand Up @@ -521,6 +536,25 @@ void DarwinProcess_scanThreads(DarwinProcess* dp, DarwinProcessTable* dpt) {
mach_port_deallocate(mach_task_self(), task);
}

void DarwinProcess_setFromGPUProcesses(DarwinProcess* dp, Hashtable* gps) {
if (gps) {
unsigned long long* data = Hashtable_get(gps, (ht_key_t) dp->super.super.id);
unsigned long long new_gpu_time = data ? *data : 0;
if (new_gpu_time > 0) {
unsigned long long gputimeDelta = saturatingSub(new_gpu_time, dp->gpu_time);
const Machine* host = dp->super.super.host;
uint64_t monotonicTimeDelta = host->monotonicMs - host->prevMonotonicMs;
dp->gpu_percent = 100.0F * gputimeDelta / (1000 * 1000) / monotonicTimeDelta;
} else {
dp->gpu_percent = 0.0F;
}
dp->gpu_time = new_gpu_time;
} else {
dp->gpu_time = 0;
dp->gpu_percent = 0.0F;
}
}


const ProcessClass DarwinProcess_class = {
.super = {
Expand Down
8 changes: 8 additions & 0 deletions darwin/DarwinProcess.h
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@ in the source distribution for its full text.


#define PROCESS_FLAG_TTY 0x00000100
#define PROCESS_FLAG_GPU 0x00000200

typedef struct DarwinProcess_ {
Process super;
Expand All @@ -22,6 +23,11 @@ typedef struct DarwinProcess_ {
uint64_t stime;
bool taskAccess;
bool translated;

/* Total GPU time used in nano seconds */
unsigned long long int gpu_time;
/* GPU utilization in percent */
float gpu_percent;
} DarwinProcess;

extern const ProcessClass DarwinProcess_class;
Expand All @@ -43,4 +49,6 @@ void DarwinProcess_setFromLibprocPidinfo(DarwinProcess* proc, DarwinProcessTable
*/
void DarwinProcess_scanThreads(DarwinProcess* dp, DarwinProcessTable* dpt);

void DarwinProcess_setFromGPUProcesses(DarwinProcess* dp, Hashtable* gps);

#endif
13 changes: 13 additions & 0 deletions darwin/DarwinProcessTable.c
Original file line number Diff line number Diff line change
Expand Up @@ -72,11 +72,14 @@ void ProcessTable_delete(Object* cast) {
void ProcessTable_goThroughEntries(ProcessTable* super) {
const Machine* host = super->super.host;
const DarwinMachine* dhost = (const DarwinMachine*) host;
const Settings* settings = host->settings;
const ScreenSettings* ss = settings->ss;
DarwinProcessTable* dpt = (DarwinProcessTable*) super;
bool preExisting = true;
struct kinfo_proc* ps;
size_t count;
DarwinProcess* proc;
Hashtable* gps = NULL;

/* Get the time difference */
dpt->global_diff = 0;
Expand All @@ -97,6 +100,10 @@ void ProcessTable_goThroughEntries(ProcessTable* super) {
*/
ps = ProcessTable_getKInfoProcs(&count);

if (ss->flags & PROCESS_FLAG_GPU) {
gps = Platform_getGPUProcesses(host);
}

for (size_t i = 0; i < count; ++i) {
proc = (DarwinProcess*)ProcessTable_getProcess(super, ps[i].kp_proc.p_pid, &preExisting, DarwinProcess_new);

Expand All @@ -122,12 +129,18 @@ void ProcessTable_goThroughEntries(ProcessTable* super) {
DarwinProcess_scanThreads(proc, dpt);
}

DarwinProcess_setFromGPUProcesses(proc, gps);

super->totalTasks += 1;

if (!preExisting) {
ProcessTable_add(super, &proc->super);
}
}

if (gps) {
Hashtable_delete(gps);
}

free(ps);
}
94 changes: 86 additions & 8 deletions darwin/Platform.c
Original file line number Diff line number Diff line change
Expand Up @@ -365,14 +365,9 @@ void Platform_setGPUValues(Meter* mtr, double* totalUsage, unsigned long long* t
return;
}

CFMutableDictionaryRef properties = NULL;
kern_return_t ret = IORegistryEntryCreateCFProperties(dhost->GPUService, &properties, kCFAllocatorDefault, kNilOptions);
if (ret != KERN_SUCCESS || !properties)
return;

CFDictionaryRef perfStats = CFDictionaryGetValue(properties, CFSTR("PerformanceStatistics"));
CFDictionaryRef perfStats = IORegistryEntryCreateCFProperty(dhost->GPUService, CFSTR("PerformanceStatistics"), kCFAllocatorDefault, kNilOptions);
if (!perfStats)
goto cleanup;
return;

assert(CFGetTypeID(perfStats) == CFDictionaryGetTypeID());

Expand All @@ -387,7 +382,7 @@ void Platform_setGPUValues(Meter* mtr, double* totalUsage, unsigned long long* t
prevMonotonicMs = host->monotonicMs;

cleanup:
CFRelease(properties);
CFRelease(perfStats);

mtr->values[0] = *totalUsage;
}
Expand Down Expand Up @@ -798,3 +793,86 @@ static void Platform_getOSRelease(char* buffer, size_t bufferLen) {
const char* Platform_getRelease(void) {
return Generic_unameRelease(Platform_getOSRelease);
}

Hashtable* Platform_getGPUProcesses(const Machine* host) {
const DarwinMachine* dhost = (const DarwinMachine*)host;
if (!dhost->GPUService) {
return NULL;
}

Hashtable* gps = Hashtable_new(64, true);

io_iterator_t iterator;
if (IORegistryEntryGetChildIterator(dhost->GPUService, kIOServicePlane, &iterator) == KERN_SUCCESS) {
io_registry_entry_t entry;
while ((entry = IOIteratorNext(iterator))) {
io_struct_inband_t buffer;
uint32_t size = sizeof(buffer);
if (IORegistryEntryGetProperty(entry, "IOUserClientCreator", buffer, &size) != KERN_SUCCESS)
goto cleanup;

pid_t pid;
if (sscanf(buffer, "pid %d", &pid) != 1)
goto cleanup;

unsigned long long gpuTime = 0;
unsigned long long* data = Hashtable_get(gps, (ht_key_t) pid);
if (data)
gpuTime = *data;

if (IOObjectConformsTo(entry, "IOGPUDeviceUserClient")) {
CFArrayRef appUsage = IORegistryEntryCreateCFProperty(entry, CFSTR("AppUsage"), kCFAllocatorDefault, kNilOptions);
if (!appUsage)
goto cleanup;

assert(CFGetTypeID(appUsage) == CFArrayGetTypeID());

// Sum up the GPU time for all command queues for this client
size_t count = CFArrayGetCount(appUsage);
for (size_t i = 0; i < count; ++i) {
CFDictionaryRef appUsageEntry = CFArrayGetValueAtIndex(appUsage, i);
assert(CFGetTypeID(appUsageEntry) == CFDictionaryGetTypeID());

CFNumberRef accumulatedGPUTimeRef = CFDictionaryGetValue(appUsageEntry, CFSTR("accumulatedGPUTime"));
if (!accumulatedGPUTimeRef) {
CFRelease(appUsage);
goto cleanup;
}

unsigned long long accumulatedGPUTime = 0;
CFNumberGetValue(accumulatedGPUTimeRef, kCFNumberLongLongType, &accumulatedGPUTime);

gpuTime += accumulatedGPUTime;
}

CFRelease(appUsage);
} else if (IOObjectConformsTo(entry, "IOAccelSubmitter2")) {
CFNumberRef accumulatedGPUTimeRef = IORegistryEntryCreateCFProperty(entry, CFSTR("accumulatedGPUTime"), kCFAllocatorDefault, kNilOptions);
if (!accumulatedGPUTimeRef)
goto cleanup;

unsigned long long accumulatedGPUTime = 0;
CFNumberGetValue(accumulatedGPUTimeRef, kCFNumberLongLongType, &accumulatedGPUTime);
CFRelease(accumulatedGPUTimeRef);

gpuTime += accumulatedGPUTime;
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
CFRelease(accumulatedGPUTimeRef);
gpuTime += accumulatedGPUTime;
gpuTime += accumulatedGPUTime;
CFRelease(accumulatedGPUTimeRef);

Re-order things to have resource management as last action in branch.

} else {
goto cleanup;
}

if (gpuTime > 0) {
if (!data) {
data = xCalloc(1, sizeof(gpuTime));
Hashtable_put(gps, (ht_key_t) pid, data);
}
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Why not handle this early when fetching the item from the hashtable. This will create some entries with zeros, but it makes the overall handling much simpler, as you can skip later NULL checks (knowing you have memory).

Another thing I kinda dislike here is the amount of malloc calls (memory fragmentation) this causes. But given we are talking ull here (why not write uint64_t instead?), why not drop this hole thing entirely and store the actual data into the pointers directly using uintptr_t? Given the unit of gpuTime this might be limiting on 32 bit systems, of which we do not have too many (~0) left that would also support this interface, I guess …

Given this only affects Darwin 10.13 and 10.14 in compat mode, let's put an assert(sizeof(uintptr_t) > sizeof(uint32_t)); in and be done on that front …

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@BenBE No, I can think of a better idea. It is to remove this new Hashtable, and instead expand DarwinProcess for this GPU time information.

Consider that this new Hashtable also uses PID as the indexing key, the two tables can be merged to one.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@Explorer09 That make sense.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Be careful that you may encounter items where no process object has been stored yet. There are some items in the process struct that are only set upon first encountering the entry. Apart from this, the idea makes sense.

*data = gpuTime;
}

cleanup:
IOObjectRelease(entry);
}
IOObjectRelease(iterator);
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
cleanup:
IOObjectRelease(entry);
}
IOObjectRelease(iterator);
cleanup:
IOObjectRelease(entry);
}
IOObjectRelease(iterator);

Labels should go first column of the line. Also let's give the code some space.

}

return gps;
}
2 changes: 2 additions & 0 deletions darwin/Platform.h
Original file line number Diff line number Diff line change
Expand Up @@ -86,6 +86,8 @@ static inline void Platform_getHostname(char* buffer, size_t size) {

const char* Platform_getRelease(void);

Hashtable* Platform_getGPUProcesses(const Machine* host);

static inline const char* Platform_getFailedState(void) {
return NULL;
}
Expand Down
2 changes: 2 additions & 0 deletions darwin/ProcessField.h
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,8 @@ in the source distribution for its full text.

#define PLATFORM_PROCESS_FIELDS \
TRANSLATED = 100, \
TIME_GPU = 101, \
PERCENT_GPU = 102, \
\
DUMMY_BUMP_FIELD = CWD, \
// End of list
Expand Down
12 changes: 6 additions & 6 deletions linux/LinuxProcess.c
Original file line number Diff line number Diff line change
Expand Up @@ -110,8 +110,8 @@ const ProcessFieldData Process_fields[LAST_PROCESSFIELD] = {
#ifdef SCHEDULER_SUPPORT
[SCHEDULERPOLICY] = { .name = "SCHEDULERPOLICY", .title = "SCHED ", .description = "Current scheduling policy of the process", .flags = PROCESS_FLAG_SCHEDPOL, },
#endif
[GPU_TIME] = { .name = "GPU_TIME", .title = "GPU_TIME ", .description = "Total GPU time", .flags = PROCESS_FLAG_LINUX_GPU, .defaultSortDesc = true, },
[GPU_PERCENT] = { .name = "GPU_PERCENT", .title = " GPU% ", .description = "Percentage of the GPU time the process used in the last sampling", .flags = PROCESS_FLAG_LINUX_GPU, .defaultSortDesc = true, },
[TIME_GPU] = { .name = "TIME_GPU", .title = "GPU TIME+", .description = "Total GPU time", .flags = PROCESS_FLAG_LINUX_GPU, .defaultSortDesc = true, },
[PERCENT_GPU] = { .name = "PERCENT_GPU", .title = " GPU% ", .description = "Percentage of the GPU time the process used in the last sampling", .flags = PROCESS_FLAG_LINUX_GPU, .defaultSortDesc = true, },
};

Process* LinuxProcess_new(const Machine* host) {
Expand Down Expand Up @@ -245,8 +245,8 @@ static void LinuxProcess_rowWriteField(const Row* super, RichString* str, Proces
switch (field) {
case CMINFLT: Row_printCount(str, lp->cminflt, coloring); return;
case CMAJFLT: Row_printCount(str, lp->cmajflt, coloring); return;
case GPU_PERCENT: Row_printPercentage(lp->gpu_percent, buffer, n, 5, &attr); break;
case GPU_TIME: Row_printNanoseconds(str, lp->gpu_time, coloring); return;
case PERCENT_GPU: Row_printPercentage(lp->gpu_percent, buffer, n, 5, &attr); break;
case TIME_GPU: Row_printNanoseconds(str, lp->gpu_time, coloring); return;
case M_DRS: Row_printBytes(str, lp->m_drs * lhost->pageSize, coloring); return;
case M_LRS:
if (lp->m_lrs) {
Expand Down Expand Up @@ -455,14 +455,14 @@ static int LinuxProcess_compareByKey(const Process* v1, const Process* v2, Proce
return SPACESHIP_NUMBER(p1->autogroup_id, p2->autogroup_id);
case AUTOGROUP_NICE:
return SPACESHIP_NUMBER(p1->autogroup_nice, p2->autogroup_nice);
case GPU_PERCENT: {
case PERCENT_GPU: {
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I have a question. What motivated you to rename the field from GPU_PERCENT to PERCENT_GPU? (Similar question to GPU_TIME to TIME_GPU.)

For me, the renaming of the fields doesn't make them easier to read.

This is beside the compatibility problem I pointed out to you before, which you have fixed. I just wonder why this idea in the first place.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Because I noticed that the previous percentages were all named like PERCENT_CPU and PERCENT_MEM. Of course, due to compatibility issues, these modifications are now meaningless. Should I revert it back to GPU_PERCENT?

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Because I noticed that the previous percentages were all named like PERCENT_CPU and PERCENT_MEM.

Oh I see. Thus it was an error when the GPU percentage fields were introduced in htop Linux monitoring.

Because htop had no requirement on whether the word PERCENT should be placed at the start or at the end.

If you grep -r TIME in the htop codebase, you would get what I mean: We had a lot of fields with "TIME" word at the end. STARTTIME, CPU_TOTAL_TIME, GUEST_TIME, etc.

Of course, due to compatibility issues, these modifications are now meaningless. Should I revert it back to GPU_PERCENT?

It's not my call. Maybe BenBE can decide this.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Given there are other fields like PERCENT_CPU etc. I actually prefer this change. Otherwise I'd already mention this change. Thus can stay as is rn. ;-)

int r = compareRealNumbers(p1->gpu_percent, p2->gpu_percent);
if (r)
return r;

return SPACESHIP_NUMBER(p1->gpu_time, p2->gpu_time);
}
case GPU_TIME:
case TIME_GPU:
return SPACESHIP_NUMBER(p1->gpu_time, p2->gpu_time);
case ISCONTAINER:
return SPACESHIP_NUMBER(v1->isRunningInContainer, v2->isRunningInContainer);
Expand Down