Skip to content

Commit 7ec7212

Browse files
committed
Improve particle_engine performance
Introduced a profiling lib (from computerenhance). Profiled the particle engine code in the update loop and implemented a more cpu-cache friendly solution that doesn't rely on linked lists.
1 parent d530239 commit 7ec7212

21 files changed

+1280
-81
lines changed

.gitignore

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -17,3 +17,4 @@
1717
/breakhack*.run
1818
/compile_commands.json
1919
/.cache
20+
/profile.txt

CMakeLists.txt

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,7 @@ set(breakhack_GAME_TITLE "BreakHack")
1010
set(DISABLE_STEAM_BUILD OFF CACHE BOOL "Disable Steam build")
1111
set(RELEASE_TYPE "" CACHE STRING "Release type")
1212
set(STEAM_BUILD OFF CACHE STRING "Steam build")
13+
set(PROFILER OFF CACHE STRING "Enable profiler")
1314

1415
# Checksums, Disabled until further notice
1516
# set(breakhack_STEAMAPI_DLL_CHECKSUM 0x1a4691a)
@@ -50,6 +51,9 @@ if (STEAM_BUILD)
5051
endif()
5152
add_subdirectory(lib/bh_random)
5253
add_subdirectory(lib/checksum)
54+
if (PROFILER)
55+
add_subdirectory(lib/profiler)
56+
endif()
5357

5458
if ("${CMAKE_C_COMPILER_ID}" STREQUAL "Clang")
5559
set(CLANG 1)
@@ -216,6 +220,12 @@ add_subdirectory(lib/sqlite3)
216220
set_property(TARGET ${PROJECT_NAME} PROPERTY C_STANDARD 11)
217221
target_include_directories(${PROJECT_NAME} PRIVATE ${INCLUDE_DIRS})
218222

223+
if (PROFILER)
224+
add_definitions("-DPROFILER")
225+
target_link_libraries(${PROJECT_NAME} profiler)
226+
target_include_directories(${PROJECT_NAME} PRIVATE lib/profiler/include)
227+
endif ()
228+
219229
if (NOT MSVC)
220230
set(CMAKE_C_FLAGS_DEBUG "${CMAKE_C_FLAGS_DEBUG} -Wall -Wextra -Wshadow -Wpointer-arith -Wcast-qual -Wstrict-prototypes -Wmissing-prototypes -Wconversion -Wno-sign-conversion")
221231
if (NOT APPLE)

lib/profiler/CMakeLists.txt

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,15 @@
1+
cmake_minimum_required(VERSION 3.7)
2+
3+
project(profiler LANGUAGES C VERSION 0.1.0)
4+
5+
add_library(profiler
6+
src/rdtsc.c
7+
src/profiler.c
8+
src/repetition_tester.c
9+
src/perf.c
10+
)
11+
12+
target_include_directories(profiler
13+
PUBLIC include
14+
PRIVATE include/internal
15+
)

lib/profiler/README.md

Lines changed: 71 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,71 @@
1+
# Benchmark
2+
3+
A simple library to benchmark your code.
4+
5+
## Compiling
6+
7+
- `mkdir build`
8+
- `cd build`
9+
- `cmake ..`
10+
- `make`
11+
12+
## Usage example
13+
14+
```c
15+
#include <stdio.h>
16+
#include "benchmark.h"
17+
18+
int main(int argc, char **argv)
19+
{
20+
(void) argc;
21+
(void) argv;
22+
23+
/* Create an initate the stopclock */
24+
StopClock sclock;
25+
sclock_init(&sclock);
26+
27+
sclock_start(&sclock);
28+
29+
/* Execute some code */
30+
31+
sclock_checkpoint(&sclock, "First part");
32+
33+
/* Execute some more code */
34+
35+
sclock_checkpoint(&sclock, "Second part");
36+
37+
/* Execute even more code */
38+
39+
sclock_checkpoint(&sclock, "Third part");
40+
41+
/* Stop the clock */
42+
sclock_stop(&sclock);
43+
44+
/* Output the results */
45+
sclock_print(&sclock, stdout);
46+
47+
/* Clean up */
48+
sclock_destroy(&sclock);
49+
}
50+
```
51+
52+
### Example output
53+
54+
```
55+
===== Benchmarks : =====
56+
First part : 5916119358 95.54%
57+
Second part : 6940092 0.17%
58+
Third part : 265775256 4.29%
59+
Remaining time : 3478810 0.00%
60+
```
61+
62+
## How it works
63+
64+
Under the hood `benchmark` is using the `RDTSC` assembly instruction to time
65+
things. During the `sclock_init` the number of pseudo clocks used in `RDTSC`
66+
occur in one second (over 300ms). This is a rough estimate but usually aligns
67+
well with the Ghz number specified on the running computers cpu.
68+
69+
## API
70+
71+
> TODO: Generate API and link to it here
Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,14 @@
1+
#ifndef COMMON_H_
2+
#define COMMON_H_
3+
4+
#define C_RED "\x1b[31m"
5+
#define C_GREEN "\x1b[32m"
6+
#define C_YELLOW "\x1b[33m"
7+
#define C_BLUE "\x1b[34m"
8+
#define C_MAGENTA "\x1b[35m"
9+
#define C_CYAN "\x1b[36m"
10+
#define C_WHITE "\x1b[37m"
11+
#define C_RESET "\x1b[0m"
12+
13+
#endif // COMMON_H_
14+
Lines changed: 46 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,46 @@
1+
#ifndef _STOP_CLOCK_H_
2+
#define _STOP_CLOCK_H_
3+
4+
#include <stdint.h>
5+
#include <stddef.h>
6+
#include <stdio.h>
7+
8+
#define ANCHOR_CAPACITY 4096
9+
10+
typedef struct ProfileAnchor {
11+
const char *label;
12+
size_t hits;
13+
uint64_t elapsed_exclusive;
14+
uint64_t elapsed_inclusive;
15+
size_t processed_byte_count;
16+
} ProfileAnchor;
17+
18+
typedef struct Profiler {
19+
uint64_t start, stop;
20+
size_t len;
21+
ProfileAnchor anchors[ANCHOR_CAPACITY];
22+
} Profiler;
23+
24+
typedef struct AnchorBlock {
25+
const char *label;
26+
uint64_t start;
27+
uint64_t old_elapsed_inclusive;
28+
size_t processed_byte_count;
29+
char *parent_anchor;
30+
} AnchorBlock;
31+
32+
void prof_init(void);
33+
34+
void prof_start(void);
35+
36+
void prof_stop(void);
37+
38+
void prof_add_anchor(const AnchorBlock *anchor, uint64_t elapsed);
39+
40+
void prof_print(FILE *fp);
41+
42+
AnchorBlock make_anchor_block(const char *label, size_t used_bytes);
43+
void read_anchor_block(const AnchorBlock *anchor);
44+
45+
#endif // _STOP_CLOCK_H_
46+

lib/profiler/include/macros.h

Lines changed: 47 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,47 @@
1+
#ifndef __MACROS_H_
2+
#define __MACROS_H_
3+
4+
#include "internal/profiler_c.h"
5+
#include "rdtsc.h"
6+
7+
#ifdef PROFILER
8+
9+
#define PROFILER_SETUP() \
10+
prof_start();
11+
12+
#define PROFILER_STOP(fp) \
13+
prof_stop(); \
14+
prof_print(fp);
15+
16+
#define _NameConcat(A, B) A##B
17+
#define NameConcat(A, B) _NameConcat(A, B)
18+
19+
#define TIME_BANDWIDTH_BEGIN(label, bytes) \
20+
AnchorBlock NameConcat(Block, label) = make_anchor_block(#label, bytes);
21+
22+
#define TIME_BANDWIDTH_END(label) \
23+
read_anchor_block( &Block##label );
24+
25+
#define TIME_BLOCK_BEGIN(label) TIME_BANDWIDTH_BEGIN(label, 0)
26+
#define TIME_BLOCK_END(label) TIME_BANDWIDTH_END(label)
27+
28+
#define TIME_FUNC_BEGIN() \
29+
AnchorBlock NameConcat(Block, __func__) = make_anchor_block(__func__, 0);
30+
31+
#define TIME_FUNC_END() \
32+
read_anchor_block( &Block##__func__ );
33+
34+
#else
35+
#define PROFILER_SETUP()
36+
#define PROFILER_STOP(fp)
37+
#define TIME_BLOCK_BEGIN(label)
38+
#define TIME_BLOCK_END(label)
39+
#define TIME_BANDWIDTH_BEGIN(label)
40+
#define TIME_BANDWIDTH_END(label)
41+
#define TIME_FUNC_BEGIN()
42+
#define TIME_FUNC_END()
43+
#endif
44+
45+
46+
#endif // __MACROS_H_
47+

lib/profiler/include/perf.h

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,15 @@
1+
#ifndef _PERF_H_
2+
#define _PERF_H_
3+
4+
#include <stdint.h>
5+
6+
void perf_setup(void);
7+
8+
void perf_reset_page_fault_count(void);
9+
10+
uint64_t perf_read_page_fault_count(void);
11+
12+
void perf_close(void);
13+
14+
#endif // _PERF_H_
15+

lib/profiler/include/profiler.h

Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,22 @@
1+
#ifndef _PROFILER_H_
2+
#define _PROFILER_H_
3+
4+
#ifdef __cplusplus
5+
extern "C" {
6+
#endif
7+
8+
#ifdef PROFILER
9+
10+
#include "rdtsc.h"
11+
#include "internal/profiler_c.h"
12+
#include "macros.h"
13+
#include "perf.h"
14+
15+
#endif
16+
17+
#ifdef __cplusplus
18+
}
19+
#endif
20+
21+
#endif // _PROFILER_H_
22+

lib/profiler/include/rdtsc.h

Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,20 @@
1+
#ifndef _RDTSC_H_
2+
#define _RDTSC_H_
3+
4+
#ifdef PROFILER
5+
6+
#include <stdint.h>
7+
#include <stddef.h>
8+
9+
uint64_t get_os_time_freq(void);
10+
11+
uint64_t read_os_timer(void);
12+
13+
uint64_t read_cpu_timer(void);
14+
15+
uint64_t estimate_cpu_freq(uint64_t test_time);
16+
17+
#endif
18+
19+
#endif // _RDTSC_H_
20+

0 commit comments

Comments
 (0)