Skip to content

Commit 74c9ddc

Browse files
committed
Implementing profglue
1 parent 2ef91b7 commit 74c9ddc

File tree

8 files changed

+326
-5
lines changed

8 files changed

+326
-5
lines changed

.github/workflows/compilation.yml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -29,6 +29,7 @@ jobs:
2929
# Create symbolink links using relative paths
3030
(cd $PS2DEV/ee/mips64r5900el-ps2-elf/lib && ln -sf ../../../ps2sdk/ee/lib/libcglue.a libcglue.a && cd -)
3131
(cd $PS2DEV/ee/mips64r5900el-ps2-elf/lib && ln -sf ../../../ps2sdk/ee/lib/libpthreadglue.a libpthreadglue.a && cd -)
32+
(cd $PS2DEV/ee/mips64r5900el-ps2-elf/lib && ln -sf ../../../ps2sdk/ee/lib/libprofglue.a libprofglue.a && cd -)
3233
(cd $PS2DEV/ee/mips64r5900el-ps2-elf/lib && ln -sf ../../../ps2sdk/ee/lib/libkernel.a libkernel.a && cd -)
3334
(cd $PS2DEV/ee/mips64r5900el-ps2-elf/lib && ln -sf ../../../ps2sdk/ee/lib/libcdvd.a libcdvd.a && cd -)
3435

Dockerfile

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,7 @@ RUN cd /src && \
1414
# Create symbolink links using relative paths
1515
RUN (cd $PS2DEV/ee/mips64r5900el-ps2-elf/lib && ln -sf ../../../ps2sdk/ee/lib/libcglue.a libcglue.a && cd -)
1616
RUN (cd $PS2DEV/ee/mips64r5900el-ps2-elf/lib && ln -sf ../../../ps2sdk/ee/lib/libpthreadglue.a libpthreadglue.a && cd -)
17+
RUN (cd $PS2DEV/ee/mips64r5900el-ps2-elf/lib && ln -sf ../../../ps2sdk/ee/lib/libprofglue.a libprofglue.a && cd -)
1718
RUN (cd $PS2DEV/ee/mips64r5900el-ps2-elf/lib && ln -sf ../../../ps2sdk/ee/lib/libkernel.a libkernel.a && cd -)
1819
RUN (cd $PS2DEV/ee/mips64r5900el-ps2-elf/lib && ln -sf ../../../ps2sdk/ee/lib/libcdvd.a libcdvd.a && cd -)
1920

ee/Makefile

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -6,12 +6,12 @@
66
# Licenced under Academic Free License version 2.0
77
# Review ps2sdk README & LICENSE files for further details.
88

9-
SUBDIRS = startup erl kernel libcglue libpthreadglue rpc debug \
9+
SUBDIRS = startup erl kernel libcglue libpthreadglue libprofglue rpc debug \
1010
eedebug sbv dma graph math3d \
1111
packet packet2 draw libgs \
1212
libvux font input inputx network iopreboot \
1313
mpeg \
14-
elf-loader elf-loader-nocolour
14+
elf-loader elf-loader-nocolour \
1515

1616
include $(PS2SDKSRC)/Defs.make
1717
include $(PS2SDKSRC)/Rules.make

ee/libcglue/Makefile

Lines changed: 8 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -23,7 +23,14 @@ FDMAN_OBJS = \
2323
__fdman_get_dup2_descriptor.o \
2424
__fdman_release_descriptor.o
2525

26-
INIT_OBJS = __libpthreadglue_init.o __libpthreadglue_deinit.o _libcglue_init.o _libcglue_deinit.o _libcglue_args_parse.o
26+
INIT_OBJS = \
27+
__gprof_init.o \
28+
__gprof_cleanup.o \
29+
__libpthreadglue_init.o \
30+
__libpthreadglue_deinit.o \
31+
_libcglue_init.o \
32+
_libcglue_deinit.o \
33+
_libcglue_args_parse.o
2734

2835
SLEEP_OBJS = nanosleep.o
2936

ee/libcglue/src/init.c

Lines changed: 21 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,22 @@ void __locks_deinit();
2525

2626
int chdir(const char *path);
2727

28+
#ifdef F___gprof_init
29+
/* Note: This function is being called from _init and it is overrided when compiling with -pg */
30+
__attribute__((weak))
31+
void __gprof_init() {}
32+
#else
33+
void __gprof_init();
34+
#endif
35+
36+
#ifdef F___gprof_cleanup
37+
/* Note: This function is being called from _exit and it is overrided when compiling with -pg */
38+
__attribute__((weak))
39+
void __gprof_cleanup() {}
40+
#else
41+
void __gprof_cleanup();
42+
#endif
43+
2844
#ifdef F___libpthreadglue_init
2945
/* Note: This function is being called from __libcglue_init.
3046
* It is a weak function because can be override by user program
@@ -46,8 +62,6 @@ __attribute__((weak))
4662
void __libpthreadglue_deinit()
4763
{
4864
pthread_terminate();
49-
__fdman_deinit();
50-
__locks_deinit();
5165
}
5266
#else
5367
void __libpthreadglue_deinit();
@@ -68,14 +82,19 @@ void _libcglue_init()
6882

6983
_libcglue_timezone_update();
7084
_libcglue_rtc_update();
85+
86+
__gprof_init();
7187
}
7288
#endif
7389

7490
#ifdef F__libcglue_deinit
7591
__attribute__((weak))
7692
void _libcglue_deinit()
7793
{
94+
__gprof_cleanup();
7895
__libpthreadglue_deinit();
96+
__fdman_deinit();
97+
__locks_deinit();
7998
}
8099
#endif
81100

ee/libprofglue/Makefile

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,16 @@
1+
# _____ ___ ____ ___ ____
2+
# ____| | ____| | | |____|
3+
# | ___| |____ ___| ____| | \ PS2DEV Open Source Project.
4+
#-----------------------------------------------------------------------
5+
# Copyright 2001-2004, ps2dev - http://www.ps2dev.org
6+
# Licenced under Academic Free License version 2.0
7+
# Review ps2sdk README & LICENSE files for further details.
8+
9+
EE_LIB = libprofglue.a
10+
11+
EE_OBJS = prof.o mcount.o
12+
13+
include $(PS2SDKSRC)/Defs.make
14+
include $(PS2SDKSRC)/ee/Rules.lib.make
15+
include $(PS2SDKSRC)/ee/Rules.make
16+
include $(PS2SDKSRC)/ee/Rules.release

ee/libprofglue/src/mcount.S

Lines changed: 60 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,60 @@
1+
.set noreorder
2+
.set noat
3+
4+
.global _mcount
5+
.ent _mcount
6+
7+
_mcount:
8+
9+
# Generated code already substracts 8 bytes
10+
# We store our ra, at and a0-a3
11+
daddiu $sp, $sp, -56 # Adjust stack pointer for 64-bit registers, 7 registers * 8 bytes each
12+
sd $ra, 0($sp) # store ra
13+
sd $at, 8($sp) # at = ra of caller
14+
sd $a0, 16($sp)
15+
sd $a1, 24($sp)
16+
sd $a2, 32($sp)
17+
sd $a3, 40($sp)
18+
19+
# Make sure we're not recursively called when compiling __mcount()
20+
# With -pg
21+
la $a0, _busy
22+
lw $a1, 0($a0)
23+
bnez $a1, done
24+
nop
25+
26+
# Mark busy
27+
li $a1, 1
28+
sw $a1, 0($a0)
29+
30+
# Call internal C handler
31+
move $a0, $at
32+
move $a1, $ra
33+
jal __mcount
34+
nop
35+
36+
# Unmark busy
37+
la $a0, _busy
38+
li $a1, 0
39+
sw $a1, 0($a0)
40+
41+
done:
42+
43+
# Restore registers
44+
ld $ra, 0($sp)
45+
ld $at, 8($sp)
46+
ld $a0, 16($sp)
47+
ld $a1, 24($sp)
48+
ld $a2, 32($sp)
49+
ld $a3, 40($sp)
50+
daddiu $sp, $sp, 56 # Adjust stack pointer back
51+
jr $ra
52+
move $ra, $at # restore caller's ra
53+
54+
_busy:
55+
.space 4
56+
57+
.end _mcount
58+
59+
.set reorder
60+
.set at

ee/libprofglue/src/prof.c

Lines changed: 217 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,217 @@
1+
/*
2+
# _____ ___ ____ ___ ____
3+
# ____| | ____| | | |____|
4+
# | ___| |____ ___| ____| | \ PS2DEV Open Source Project.
5+
#-----------------------------------------------------------------------
6+
# Copyright 2001-2004, ps2dev - http://www.ps2dev.org
7+
# Licenced under Academic Free License version 2.0
8+
# Review ps2sdk README & LICENSE files for further details.
9+
*/
10+
11+
#include <stdlib.h>
12+
#include <malloc.h>
13+
#include <stdint.h>
14+
#include <stdio.h>
15+
#include <string.h>
16+
17+
#define GMON_PROF_ON 0
18+
#define GMON_PROF_BUSY 1
19+
#define GMON_PROF_ERROR 2
20+
#define GMON_PROF_OFF 3
21+
22+
#define GMONVERSION 0x00051879
23+
24+
#include <kernel.h>
25+
#include <timer_alarm.h>
26+
27+
/** gmon.out file header */
28+
struct gmonhdr
29+
{
30+
int lpc; /* lowest pc address */
31+
int hpc; /* highest pc address */
32+
int ncnt; /* size of samples + size of header */
33+
int version; /* version number */
34+
int profrate; /* profiling clock rate */
35+
int resv[3]; /* reserved */
36+
};
37+
38+
/** frompc -> selfpc graph */
39+
struct rawarc
40+
{
41+
unsigned int frompc;
42+
unsigned int selfpc;
43+
unsigned int count;
44+
};
45+
46+
/** context */
47+
struct gmonparam
48+
{
49+
int state;
50+
unsigned int lowpc;
51+
unsigned int highpc;
52+
unsigned int textsize;
53+
unsigned int hashfraction;
54+
55+
int narcs;
56+
struct rawarc *arcs;
57+
58+
int nsamples;
59+
unsigned int *samples;
60+
61+
int timerId;
62+
63+
unsigned int pc;
64+
};
65+
66+
/// holds context statistics
67+
static struct gmonparam gp;
68+
69+
/// one histogram per four bytes of text space
70+
#define HISTFRACTION 4
71+
72+
/// define sample frequency - 1000 hz = 1ms
73+
#define SAMPLE_FREQ 1000
74+
75+
/// defined by linker
76+
extern int _ftext;
77+
extern int _etext;
78+
79+
/** Internal timer handler
80+
*/
81+
static uint64_t timer_handler(int id, uint64_t scheduled_time, uint64_t actual_time, void *arg, void *pc_value)
82+
{
83+
struct gmonparam *current_gp = (struct gmonparam *)arg;
84+
85+
unsigned int frompc = current_gp->pc;
86+
87+
if (current_gp->state == GMON_PROF_ON) {
88+
/* call might come from stack */
89+
if (frompc >= current_gp->lowpc && frompc <= current_gp->highpc) {
90+
int e = (frompc - current_gp->lowpc) / current_gp->hashfraction;
91+
current_gp->samples[e]++;
92+
}
93+
}
94+
95+
96+
current_gp->timerId = iSetTimerAlarm(USec2TimerBusClock(SAMPLE_FREQ), &timer_handler, arg);
97+
return 0;
98+
}
99+
100+
/** Initializes pg library
101+
102+
After calculating the text size, __gprof_initialize() allocates enough
103+
memory to allow fastest access to arc structures, and some more
104+
for sampling statistics. Note that this also installs a timer that
105+
runs at 1000 hert.
106+
*/
107+
void __gprof_init()
108+
{
109+
memset(&gp, '\0', sizeof(gp));
110+
gp.state = GMON_PROF_ON;
111+
gp.lowpc = (unsigned int)&_ftext;
112+
gp.highpc = (unsigned int)&_etext;
113+
gp.textsize = gp.highpc - gp.lowpc;
114+
gp.hashfraction = HISTFRACTION;
115+
116+
gp.narcs = (gp.textsize + gp.hashfraction - 1) / gp.hashfraction;
117+
gp.arcs = (struct rawarc *)malloc(sizeof(struct rawarc) * gp.narcs);
118+
if (gp.arcs == NULL) {
119+
gp.state = GMON_PROF_ERROR;
120+
return;
121+
}
122+
123+
gp.nsamples = (gp.textsize + gp.hashfraction - 1) / gp.hashfraction;
124+
gp.samples = (unsigned int *)malloc(sizeof(unsigned int) * gp.nsamples);
125+
if (gp.samples == NULL) {
126+
free(gp.arcs);
127+
gp.arcs = 0;
128+
gp.state = GMON_PROF_ERROR;
129+
return;
130+
}
131+
132+
memset((void *)gp.arcs, '\0', gp.narcs * (sizeof(struct rawarc)));
133+
memset((void *)gp.samples, '\0', gp.nsamples * (sizeof(unsigned int)));
134+
135+
136+
gp.state = GMON_PROF_ON;
137+
gp.timerId = SetTimerAlarm(USec2TimerBusClock(SAMPLE_FREQ), &timer_handler, &gp);
138+
}
139+
140+
/** Writes gmon.out dump file and stops profiling
141+
142+
Called from atexit() handler; will dump out a host:gmon.out file
143+
with all collected information.
144+
*/
145+
void __gprof_cleanup()
146+
{
147+
FILE *fp;
148+
int i;
149+
struct gmonhdr hdr;
150+
151+
if (gp.state != GMON_PROF_ON) {
152+
/* profiling was disabled anyway */
153+
return;
154+
}
155+
156+
/* disable profiling before we make plenty of libc calls */
157+
gp.state = GMON_PROF_OFF;
158+
159+
ReleaseTimerAlarm(gp.timerId);
160+
161+
fp = fopen("gmon.out", "wb");
162+
hdr.lpc = gp.lowpc;
163+
hdr.hpc = gp.highpc;
164+
hdr.ncnt = sizeof(hdr) + (sizeof(unsigned int) * gp.nsamples);
165+
hdr.version = GMONVERSION;
166+
hdr.profrate = SAMPLE_FREQ;
167+
hdr.resv[0] = 0;
168+
hdr.resv[1] = 0;
169+
hdr.resv[2] = 0;
170+
fwrite(&hdr, 1, sizeof(hdr), fp);
171+
fwrite(gp.samples, gp.nsamples, sizeof(unsigned int), fp);
172+
173+
for (i = 0; i < gp.narcs; i++) {
174+
if (gp.arcs[i].count > 0) {
175+
fwrite(gp.arcs + i, sizeof(struct rawarc), 1, fp);
176+
}
177+
}
178+
179+
fclose(fp);
180+
181+
// free memory
182+
free(gp.arcs);
183+
free(gp.samples);
184+
}
185+
186+
/** Internal C handler for _mcount()
187+
@param frompc pc address of caller
188+
@param selfpc pc address of current function
189+
190+
Called from mcount.S to make life a bit easier. __mcount is called
191+
right before a function starts. GCC generates a tiny stub at the very
192+
beginning of each compiled routine, which eventually brings the
193+
control to here.
194+
*/
195+
void __mcount(unsigned int frompc, unsigned int selfpc)
196+
{
197+
int e;
198+
struct rawarc *arc;
199+
200+
if (gp.state != GMON_PROF_ON) {
201+
/* returned off for some reason */
202+
return;
203+
}
204+
205+
frompc = frompc & 0x0FFFFFFF;
206+
selfpc = selfpc & 0x0FFFFFFF;
207+
208+
/* call might come from stack */
209+
if (frompc >= gp.lowpc && frompc <= gp.highpc) {
210+
gp.pc = selfpc;
211+
e = (frompc - gp.lowpc) / gp.hashfraction;
212+
arc = gp.arcs + e;
213+
arc->frompc = frompc;
214+
arc->selfpc = selfpc;
215+
arc->count++;
216+
}
217+
}

0 commit comments

Comments
 (0)