Skip to content

Commit bd0ca72

Browse files
authored
Merge pull request #650 from fjtrujy/gprof
Implementing profglue
2 parents 2ef91b7 + b580972 commit bd0ca72

File tree

8 files changed

+309
-5
lines changed

8 files changed

+309
-5
lines changed

.github/workflows/compilation.yml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -29,6 +29,7 @@ jobs:
2929
# Create symbolink links using relative paths
3030
(cd $PS2DEV/ee/mips64r5900el-ps2-elf/lib && ln -sf ../../../ps2sdk/ee/lib/libcglue.a libcglue.a && cd -)
3131
(cd $PS2DEV/ee/mips64r5900el-ps2-elf/lib && ln -sf ../../../ps2sdk/ee/lib/libpthreadglue.a libpthreadglue.a && cd -)
32+
(cd $PS2DEV/ee/mips64r5900el-ps2-elf/lib && ln -sf ../../../ps2sdk/ee/lib/libprofglue.a libprofglue.a && cd -)
3233
(cd $PS2DEV/ee/mips64r5900el-ps2-elf/lib && ln -sf ../../../ps2sdk/ee/lib/libkernel.a libkernel.a && cd -)
3334
(cd $PS2DEV/ee/mips64r5900el-ps2-elf/lib && ln -sf ../../../ps2sdk/ee/lib/libcdvd.a libcdvd.a && cd -)
3435

Dockerfile

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,7 @@ RUN cd /src && \
1414
# Create symbolink links using relative paths
1515
RUN (cd $PS2DEV/ee/mips64r5900el-ps2-elf/lib && ln -sf ../../../ps2sdk/ee/lib/libcglue.a libcglue.a && cd -)
1616
RUN (cd $PS2DEV/ee/mips64r5900el-ps2-elf/lib && ln -sf ../../../ps2sdk/ee/lib/libpthreadglue.a libpthreadglue.a && cd -)
17+
RUN (cd $PS2DEV/ee/mips64r5900el-ps2-elf/lib && ln -sf ../../../ps2sdk/ee/lib/libprofglue.a libprofglue.a && cd -)
1718
RUN (cd $PS2DEV/ee/mips64r5900el-ps2-elf/lib && ln -sf ../../../ps2sdk/ee/lib/libkernel.a libkernel.a && cd -)
1819
RUN (cd $PS2DEV/ee/mips64r5900el-ps2-elf/lib && ln -sf ../../../ps2sdk/ee/lib/libcdvd.a libcdvd.a && cd -)
1920

ee/Makefile

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -6,12 +6,12 @@
66
# Licenced under Academic Free License version 2.0
77
# Review ps2sdk README & LICENSE files for further details.
88

9-
SUBDIRS = startup erl kernel libcglue libpthreadglue rpc debug \
9+
SUBDIRS = startup erl kernel libcglue libpthreadglue libprofglue rpc debug \
1010
eedebug sbv dma graph math3d \
1111
packet packet2 draw libgs \
1212
libvux font input inputx network iopreboot \
1313
mpeg \
14-
elf-loader elf-loader-nocolour
14+
elf-loader elf-loader-nocolour \
1515

1616
include $(PS2SDKSRC)/Defs.make
1717
include $(PS2SDKSRC)/Rules.make

ee/libcglue/Makefile

Lines changed: 8 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -23,7 +23,14 @@ FDMAN_OBJS = \
2323
__fdman_get_dup2_descriptor.o \
2424
__fdman_release_descriptor.o
2525

26-
INIT_OBJS = __libpthreadglue_init.o __libpthreadglue_deinit.o _libcglue_init.o _libcglue_deinit.o _libcglue_args_parse.o
26+
INIT_OBJS = \
27+
__gprof_init.o \
28+
__gprof_cleanup.o \
29+
__libpthreadglue_init.o \
30+
__libpthreadglue_deinit.o \
31+
_libcglue_init.o \
32+
_libcglue_deinit.o \
33+
_libcglue_args_parse.o
2734

2835
SLEEP_OBJS = nanosleep.o
2936

ee/libcglue/src/init.c

Lines changed: 21 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,22 @@ void __locks_deinit();
2525

2626
int chdir(const char *path);
2727

28+
#ifdef F___gprof_init
29+
/* Note: This function is being called from _init and it is overrided when compiling with -pg */
30+
__attribute__((weak))
31+
void __gprof_init() {}
32+
#else
33+
void __gprof_init();
34+
#endif
35+
36+
#ifdef F___gprof_cleanup
37+
/* Note: This function is being called from _exit and it is overrided when compiling with -pg */
38+
__attribute__((weak))
39+
void __gprof_cleanup() {}
40+
#else
41+
void __gprof_cleanup();
42+
#endif
43+
2844
#ifdef F___libpthreadglue_init
2945
/* Note: This function is being called from __libcglue_init.
3046
* It is a weak function because can be override by user program
@@ -46,8 +62,6 @@ __attribute__((weak))
4662
void __libpthreadglue_deinit()
4763
{
4864
pthread_terminate();
49-
__fdman_deinit();
50-
__locks_deinit();
5165
}
5266
#else
5367
void __libpthreadglue_deinit();
@@ -68,14 +82,19 @@ void _libcglue_init()
6882

6983
_libcglue_timezone_update();
7084
_libcglue_rtc_update();
85+
86+
__gprof_init();
7187
}
7288
#endif
7389

7490
#ifdef F__libcglue_deinit
7591
__attribute__((weak))
7692
void _libcglue_deinit()
7793
{
94+
__gprof_cleanup();
7895
__libpthreadglue_deinit();
96+
__fdman_deinit();
97+
__locks_deinit();
7998
}
8099
#endif
81100

ee/libprofglue/Makefile

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,16 @@
1+
# _____ ___ ____ ___ ____
2+
# ____| | ____| | | |____|
3+
# | ___| |____ ___| ____| | \ PS2DEV Open Source Project.
4+
#-----------------------------------------------------------------------
5+
# Copyright 2001-2004, ps2dev - http://www.ps2dev.org
6+
# Licenced under Academic Free License version 2.0
7+
# Review ps2sdk README & LICENSE files for further details.
8+
9+
EE_LIB = libprofglue.a
10+
11+
EE_OBJS = prof.o mcount.o
12+
13+
include $(PS2SDKSRC)/Defs.make
14+
include $(PS2SDKSRC)/ee/Rules.lib.make
15+
include $(PS2SDKSRC)/ee/Rules.make
16+
include $(PS2SDKSRC)/ee/Rules.release

ee/libprofglue/src/mcount.S

Lines changed: 39 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,39 @@
1+
.set noreorder
2+
.set noat
3+
4+
.global _mcount
5+
.ent _mcount
6+
7+
_mcount:
8+
9+
# Generated code already substracts 8 bytes
10+
# We store our ra, at and a0-a3
11+
daddiu $sp, $sp, -56 # Adjust stack pointer for 64-bit registers, 7 registers * 8 bytes each
12+
sd $ra, 0($sp) # store ra
13+
sd $at, 8($sp) # at = ra of caller
14+
sd $a0, 16($sp)
15+
sd $a1, 24($sp)
16+
sd $a2, 32($sp)
17+
sd $a3, 40($sp)
18+
19+
# Call internal C handler
20+
move $a0, $at
21+
move $a1, $ra
22+
jal __mcount
23+
nop
24+
25+
# Restore registers
26+
ld $ra, 0($sp)
27+
ld $at, 8($sp)
28+
ld $a0, 16($sp)
29+
ld $a1, 24($sp)
30+
ld $a2, 32($sp)
31+
ld $a3, 40($sp)
32+
daddiu $sp, $sp, 56 # Adjust stack pointer back
33+
jr $ra
34+
move $ra, $at # restore caller's ra
35+
36+
.end _mcount
37+
38+
.set reorder
39+
.set at

ee/libprofglue/src/prof.c

Lines changed: 221 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,221 @@
1+
/*
2+
# _____ ___ ____ ___ ____
3+
# ____| | ____| | | |____|
4+
# | ___| |____ ___| ____| | \ PS2DEV Open Source Project.
5+
#-----------------------------------------------------------------------
6+
# Copyright 2001-2004, ps2dev - http://www.ps2dev.org
7+
# Licenced under Academic Free License version 2.0
8+
# Review ps2sdk README & LICENSE files for further details.
9+
*/
10+
11+
#include <stdlib.h>
12+
#include <malloc.h>
13+
#include <stdint.h>
14+
#include <stdio.h>
15+
#include <string.h>
16+
17+
#define GMON_PROF_ON 0
18+
#define GMON_PROF_BUSY 1
19+
#define GMON_PROF_ERROR 2
20+
#define GMON_PROF_OFF 3
21+
22+
#define GMONVERSION 0x00051879
23+
24+
#include <kernel.h>
25+
#include <timer_alarm.h>
26+
27+
/** gmon.out file header */
28+
struct gmonhdr
29+
{
30+
int lpc; /* lowest pc address */
31+
int hpc; /* highest pc address */
32+
int ncnt; /* size of samples + size of header */
33+
int version; /* version number */
34+
int profrate; /* profiling clock rate */
35+
int resv[3]; /* reserved */
36+
};
37+
38+
/** frompc -> selfpc graph */
39+
struct rawarc
40+
{
41+
unsigned int frompc;
42+
unsigned int selfpc;
43+
unsigned int count;
44+
};
45+
46+
/** context */
47+
struct gmonparam
48+
{
49+
int state;
50+
unsigned int lowpc;
51+
unsigned int highpc;
52+
unsigned int textsize;
53+
unsigned int hashfraction;
54+
55+
int narcs;
56+
struct rawarc *arcs;
57+
58+
int nsamples;
59+
unsigned int *samples;
60+
61+
int timerId;
62+
63+
unsigned int pc;
64+
};
65+
66+
/// holds context statistics
67+
static struct gmonparam gp;
68+
69+
/// one histogram per four bytes of text space
70+
#define HISTFRACTION 4
71+
72+
/// define sample frequency - 1000 hz = 1ms
73+
#define SAMPLE_FREQ 1000
74+
75+
/// defined by linker
76+
extern int _ftext;
77+
extern int _etext;
78+
79+
/** Internal timer handler
80+
*/
81+
__attribute__((__no_instrument_function__, __no_profile_instrument_function__))
82+
static uint64_t timer_handler(int id, uint64_t scheduled_time, uint64_t actual_time, void *arg, void *pc_value)
83+
{
84+
struct gmonparam *current_gp = (struct gmonparam *)arg;
85+
86+
unsigned int frompc = current_gp->pc;
87+
88+
if (current_gp->state == GMON_PROF_ON) {
89+
/* call might come from stack */
90+
if (frompc >= current_gp->lowpc && frompc <= current_gp->highpc) {
91+
int e = (frompc - current_gp->lowpc) / current_gp->hashfraction;
92+
current_gp->samples[e]++;
93+
}
94+
}
95+
96+
97+
current_gp->timerId = iSetTimerAlarm(USec2TimerBusClock(SAMPLE_FREQ), &timer_handler, arg);
98+
return 0;
99+
}
100+
101+
/** Initializes pg library
102+
103+
After calculating the text size, __gprof_initialize() allocates enough
104+
memory to allow fastest access to arc structures, and some more
105+
for sampling statistics. Note that this also installs a timer that
106+
runs at 1000 hert.
107+
*/
108+
__attribute__((__no_instrument_function__, __no_profile_instrument_function__))
109+
void __gprof_init()
110+
{
111+
memset(&gp, '\0', sizeof(gp));
112+
gp.state = GMON_PROF_ON;
113+
gp.lowpc = (unsigned int)&_ftext;
114+
gp.highpc = (unsigned int)&_etext;
115+
gp.textsize = gp.highpc - gp.lowpc;
116+
gp.hashfraction = HISTFRACTION;
117+
118+
gp.narcs = (gp.textsize + gp.hashfraction - 1) / gp.hashfraction;
119+
gp.arcs = (struct rawarc *)malloc(sizeof(struct rawarc) * gp.narcs);
120+
if (gp.arcs == NULL) {
121+
gp.state = GMON_PROF_ERROR;
122+
return;
123+
}
124+
125+
gp.nsamples = (gp.textsize + gp.hashfraction - 1) / gp.hashfraction;
126+
gp.samples = (unsigned int *)malloc(sizeof(unsigned int) * gp.nsamples);
127+
if (gp.samples == NULL) {
128+
free(gp.arcs);
129+
gp.arcs = 0;
130+
gp.state = GMON_PROF_ERROR;
131+
return;
132+
}
133+
134+
memset((void *)gp.arcs, '\0', gp.narcs * (sizeof(struct rawarc)));
135+
memset((void *)gp.samples, '\0', gp.nsamples * (sizeof(unsigned int)));
136+
137+
138+
gp.state = GMON_PROF_ON;
139+
gp.timerId = SetTimerAlarm(USec2TimerBusClock(SAMPLE_FREQ), &timer_handler, &gp);
140+
}
141+
142+
/** Writes gmon.out dump file and stops profiling
143+
144+
Called from atexit() handler; will dump out a host:gmon.out file
145+
with all collected information.
146+
*/
147+
__attribute__((__no_instrument_function__, __no_profile_instrument_function__))
148+
void __gprof_cleanup()
149+
{
150+
FILE *fp;
151+
int i;
152+
struct gmonhdr hdr;
153+
154+
if (gp.state != GMON_PROF_ON) {
155+
/* profiling was disabled anyway */
156+
return;
157+
}
158+
159+
/* disable profiling before we make plenty of libc calls */
160+
gp.state = GMON_PROF_OFF;
161+
162+
ReleaseTimerAlarm(gp.timerId);
163+
164+
fp = fopen("gmon.out", "wb");
165+
hdr.lpc = gp.lowpc;
166+
hdr.hpc = gp.highpc;
167+
hdr.ncnt = sizeof(hdr) + (sizeof(unsigned int) * gp.nsamples);
168+
hdr.version = GMONVERSION;
169+
hdr.profrate = SAMPLE_FREQ;
170+
hdr.resv[0] = 0;
171+
hdr.resv[1] = 0;
172+
hdr.resv[2] = 0;
173+
fwrite(&hdr, 1, sizeof(hdr), fp);
174+
fwrite(gp.samples, gp.nsamples, sizeof(unsigned int), fp);
175+
176+
for (i = 0; i < gp.narcs; i++) {
177+
if (gp.arcs[i].count > 0) {
178+
fwrite(gp.arcs + i, sizeof(struct rawarc), 1, fp);
179+
}
180+
}
181+
182+
fclose(fp);
183+
184+
// free memory
185+
free(gp.arcs);
186+
free(gp.samples);
187+
}
188+
189+
/** Internal C handler for _mcount()
190+
@param frompc pc address of caller
191+
@param selfpc pc address of current function
192+
193+
Called from mcount.S to make life a bit easier. __mcount is called
194+
right before a function starts. GCC generates a tiny stub at the very
195+
beginning of each compiled routine, which eventually brings the
196+
control to here.
197+
*/
198+
__attribute__((__no_instrument_function__, __no_profile_instrument_function__))
199+
void __mcount(unsigned int frompc, unsigned int selfpc)
200+
{
201+
int e;
202+
struct rawarc *arc;
203+
204+
if (gp.state != GMON_PROF_ON) {
205+
/* returned off for some reason */
206+
return;
207+
}
208+
209+
frompc = frompc & 0x0FFFFFFF;
210+
selfpc = selfpc & 0x0FFFFFFF;
211+
212+
/* call might come from stack */
213+
if (frompc >= gp.lowpc && frompc <= gp.highpc) {
214+
gp.pc = selfpc;
215+
e = (frompc - gp.lowpc) / gp.hashfraction;
216+
arc = gp.arcs + e;
217+
arc->frompc = frompc;
218+
arc->selfpc = selfpc;
219+
arc->count++;
220+
}
221+
}

0 commit comments

Comments
 (0)