Skip to content

Commit e8da3d9

Browse files
Calculate confidence interval for benchmark measurements (#6950)
1 parent d9ce9cf commit e8da3d9

File tree

6 files changed

+556
-412
lines changed

6 files changed

+556
-412
lines changed

.github/workflows/osrm-backend.yml

Lines changed: 16 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -700,15 +700,6 @@ jobs:
700700
mkdir -p $HOME/.ccache
701701
ccache --zero-stats
702702
ccache --max-size=256M
703-
- name: Build PR Branch
704-
run: |
705-
mkdir -p pr/build
706-
cd pr/build
707-
cmake -DENABLE_CONAN=ON -DCMAKE_BUILD_TYPE=Release ..
708-
make -j$(nproc)
709-
make -j$(nproc) benchmarks
710-
cd ..
711-
make -C test/data
712703
- name: Checkout Base Branch
713704
uses: actions/checkout@v4
714705
with:
@@ -723,9 +714,23 @@ jobs:
723714
make -j$(nproc) benchmarks
724715
cd ..
725716
make -C test/data
726-
- name: Run Benchmarks
717+
- name: Build PR Branch
718+
run: |
719+
mkdir -p pr/build
720+
cd pr/build
721+
cmake -DENABLE_CONAN=ON -DCMAKE_BUILD_TYPE=Release ..
722+
make -j$(nproc)
723+
make -j$(nproc) benchmarks
724+
cd ..
725+
make -C test/data
726+
- name: Run PR Benchmarks
727727
run: |
728-
./pr/scripts/ci/run_benchmarks.sh base pr
728+
./pr/scripts/ci/run_benchmarks.sh -f $(pwd)/pr -r $(pwd)/pr_results -s $(pwd)/pr -b $(pwd)/pr/build -o ~/data.osm.pbf -g ~/gps_traces.csv
729+
- name: Run Base Benchmarks
730+
run: |
731+
# we intentionally use scripts from PR branch to be able to update them and see results in the same PR
732+
./pr/scripts/ci/run_benchmarks.sh -f $(pwd)/base -r $(pwd)/base_results -s $(pwd)/pr -b $(pwd)/base/build -o ~/data.osm.pbf -g ~/gps_traces.csv
733+
729734
- name: Post Benchmark Results
730735
run: |
731736
python3 pr/scripts/ci/post_benchmark_results.py base_results pr_results

scripts/ci/e2e_benchmark.py

Lines changed: 54 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,4 @@
11
import requests
2-
import sys
32
import random
43
from collections import defaultdict
54
import os
@@ -8,12 +7,13 @@
87
import time
98
import argparse
109

10+
1111
class BenchmarkRunner:
12-
def __init__(self):
12+
def __init__(self, gps_traces_file_path):
1313
self.coordinates = []
1414
self.tracks = defaultdict(list)
1515

16-
gps_traces_file_path = os.path.expanduser('~/gps_traces.csv')
16+
gps_traces_file_path = os.path.expanduser(gps_traces_file_path)
1717
with open(gps_traces_file_path, 'r') as file:
1818
reader = csv.DictReader(file)
1919
for row in reader:
@@ -36,10 +36,9 @@ def run(self, benchmark_name, host, num_requests, warmup_requests=50):
3636
response = requests.get(url)
3737
end_time = time.time()
3838
if response.status_code != 200:
39-
if benchmark_name == 'match':
40-
code = response.json()['code']
41-
if code == 'NoSegment' or code == 'NoMatch':
42-
continue
39+
code = response.json()['code']
40+
if code in ['NoSegment', 'NoMatch', 'NoRoute', 'NoTrips']:
41+
continue
4342
raise Exception(f"Error: {response.status_code} {response.text}")
4443
times.append((end_time - start_time) * 1000) # convert to ms
4544

@@ -54,7 +53,7 @@ def make_url(self, host, benchmark_name):
5453
end_coord = f"{end[1]:.6f},{end[0]:.6f}"
5554
return f"{host}/route/v1/driving/{start_coord};{end_coord}?overview=full&steps=true"
5655
elif benchmark_name == 'table':
57-
num_coords = random.randint(3, 100)
56+
num_coords = random.randint(3, 12)
5857
selected_coords = random.sample(self.coordinates, num_coords)
5958
coords_str = ";".join([f"{coord[1]:.6f},{coord[0]:.6f}" for coord in selected_coords])
6059
return f"{host}/table/v1/driving/{coords_str}"
@@ -77,26 +76,63 @@ def make_url(self, host, benchmark_name):
7776
else:
7877
raise Exception(f"Unknown benchmark: {benchmark_name}")
7978

79+
def bootstrap_confidence_interval(data, num_samples=1000, confidence_level=0.95):
80+
means = []
81+
for _ in range(num_samples):
82+
sample = np.random.choice(data, size=len(data), replace=True)
83+
means.append(np.mean(sample))
84+
lower_bound = np.percentile(means, (1 - confidence_level) / 2 * 100)
85+
upper_bound = np.percentile(means, (1 + confidence_level) / 2 * 100)
86+
mean = np.mean(means)
87+
return mean, lower_bound, upper_bound
88+
89+
def calculate_confidence_interval(data):
90+
mean, lower, upper = bootstrap_confidence_interval(data)
91+
min_value = np.min(data)
92+
return mean, (upper - lower) / 2, min_value
93+
94+
8095
def main():
8196
parser = argparse.ArgumentParser(description='Run GPS benchmark tests.')
8297
parser.add_argument('--host', type=str, required=True, help='Host URL')
8398
parser.add_argument('--method', type=str, required=True, choices=['route', 'table', 'match', 'nearest', 'trip'], help='Benchmark method')
8499
parser.add_argument('--num_requests', type=int, required=True, help='Number of requests to perform')
100+
parser.add_argument('--iterations', type=int, required=True, help='Number of iterations to run the benchmark')
101+
parser.add_argument('--gps_traces_file_path', type=str, required=True, help='Path to the GPS traces file')
85102

86103
args = parser.parse_args()
87104

88-
random.seed(42)
105+
np.random.seed(42)
106+
107+
runner = BenchmarkRunner(args.gps_traces_file_path)
108+
109+
all_times = []
110+
for _ in range(args.iterations):
111+
random.seed(42)
112+
times = runner.run(args.method, args.host, args.num_requests)
113+
all_times.append(times)
114+
all_times = np.asarray(all_times)
115+
116+
assert all_times.shape == (args.iterations, all_times.shape[1])
117+
89118

90-
runner = BenchmarkRunner()
91-
times = runner.run(args.method, args.host, args.num_requests)
119+
total_time, total_ci, total_best = calculate_confidence_interval(np.sum(all_times, axis=1))
120+
ops_per_sec, ops_per_sec_ci, ops_per_sec_best = calculate_confidence_interval(float(all_times.shape[1]) / np.sum(all_times / 1000, axis=1))
121+
min_time, min_ci, _ = calculate_confidence_interval(np.min(all_times, axis=1))
122+
mean_time, mean_ci, _ = calculate_confidence_interval(np.mean(all_times, axis=1))
123+
median_time, median_ci, _ = calculate_confidence_interval(np.median(all_times, axis=1))
124+
perc_95_time, perc_95_ci, _ = calculate_confidence_interval(np.percentile(all_times, 95, axis=1))
125+
perc_99_time, perc_99_ci, _ = calculate_confidence_interval(np.percentile(all_times, 99, axis=1))
126+
max_time, max_ci, _ = calculate_confidence_interval(np.max(all_times, axis=1))
92127

93-
print(f'Total: {np.sum(times)}ms')
94-
print(f"Min time: {np.min(times)}ms")
95-
print(f"Mean time: {np.mean(times)}ms")
96-
print(f"Median time: {np.median(times)}ms")
97-
print(f"95th percentile: {np.percentile(times, 95)}ms")
98-
print(f"99th percentile: {np.percentile(times, 99)}ms")
99-
print(f"Max time: {np.max(times)}ms")
128+
print(f'Ops: {ops_per_sec:.2f} ± {ops_per_sec_ci:.2f} ops/s. Best: {ops_per_sec_best:.2f} ops/s')
129+
print(f'Total: {total_time:.2f}ms ± {total_ci:.2f}ms. Best: {total_best:.2f}ms')
130+
print(f"Min time: {min_time:.2f}ms ± {min_ci:.2f}ms")
131+
print(f"Mean time: {mean_time:.2f}ms ± {mean_ci:.2f}ms")
132+
print(f"Median time: {median_time:.2f}ms ± {median_ci:.2f}ms")
133+
print(f"95th percentile: {perc_95_time:.2f}ms ± {perc_95_ci:.2f}ms")
134+
print(f"99th percentile: {perc_99_time:.2f}ms ± {perc_99_ci:.2f}ms")
135+
print(f"Max time: {max_time:.2f}ms ± {max_ci:.2f}ms")
100136

101137
if __name__ == '__main__':
102138
main()

scripts/ci/run_benchmarks.sh

Lines changed: 85 additions & 36 deletions
Original file line numberDiff line numberDiff line change
@@ -1,72 +1,121 @@
11
#!/bin/bash
22
set -eou pipefail
33

4+
function usage {
5+
echo "Usage: $0 -f <folder> -r <results_folder> -s <scripts_folder> -b <binaries_folder> -o <osm_pbf> -g <gps_traces>"
6+
exit 1
7+
}
8+
9+
while getopts ":f:r:s:b:o:g:" opt; do
10+
case $opt in
11+
f) FOLDER="$OPTARG"
12+
;;
13+
r) RESULTS_FOLDER="$OPTARG"
14+
;;
15+
s) SCRIPTS_FOLDER="$OPTARG"
16+
;;
17+
b) BINARIES_FOLDER="$OPTARG"
18+
;;
19+
o) OSM_PBF="$OPTARG"
20+
;;
21+
g) GPS_TRACES="$OPTARG"
22+
;;
23+
\?) echo "Invalid option -$OPTARG" >&2
24+
usage
25+
;;
26+
:) echo "Option -$OPTARG requires an argument." >&2
27+
usage
28+
;;
29+
esac
30+
done
31+
32+
if [ -z "${FOLDER:-}" ] || [ -z "${RESULTS_FOLDER:-}" ] || [ -z "${SCRIPTS_FOLDER:-}" ] || [ -z "${BINARIES_FOLDER:-}" ] || [ -z "${OSM_PBF:-}" ] || [ -z "${GPS_TRACES:-}" ]; then
33+
usage
34+
fi
35+
436
function measure_peak_ram_and_time {
537
COMMAND=$1
638
OUTPUT_FILE=$2
7-
8-
OUTPUT=$(/usr/bin/time -f "%e %M" $COMMAND 2>&1 | tail -n 1)
9-
10-
TIME=$(echo $OUTPUT | awk '{print $1}')
11-
PEAK_RAM_KB=$(echo $OUTPUT | awk '{print $2}')
12-
PEAK_RAM_MB=$(echo "scale=2; $PEAK_RAM_KB / 1024" | bc)
13-
echo "Time: ${TIME}s Peak RAM: ${PEAK_RAM_MB}MB" > $OUTPUT_FILE
39+
if [ "$(uname)" == "Darwin" ]; then
40+
# on macOS time has different parameters, so simply run command on macOS
41+
$COMMAND > /dev/null 2>&1
42+
else
43+
OUTPUT=$(/usr/bin/time -f "%e %M" $COMMAND 2>&1 | tail -n 1)
44+
45+
TIME=$(echo $OUTPUT | awk '{print $1}')
46+
PEAK_RAM_KB=$(echo $OUTPUT | awk '{print $2}')
47+
PEAK_RAM_MB=$(echo "scale=2; $PEAK_RAM_KB / 1024" | bc)
48+
echo "Time: ${TIME}s Peak RAM: ${PEAK_RAM_MB}MB" > $OUTPUT_FILE
49+
fi
1450
}
1551

1652
function run_benchmarks_for_folder {
17-
echo "Running benchmarks for $1"
18-
19-
FOLDER=$1
20-
RESULTS_FOLDER=$2
21-
SCRIPTS_FOLDER=$3
22-
2353
mkdir -p $RESULTS_FOLDER
2454

25-
BENCHMARKS_FOLDER="$FOLDER/build/src/benchmarks"
26-
27-
./$BENCHMARKS_FOLDER/match-bench "./$FOLDER/test/data/mld/monaco.osrm" mld > "$RESULTS_FOLDER/match_mld.bench"
28-
./$BENCHMARKS_FOLDER/match-bench "./$FOLDER/test/data/ch/monaco.osrm" ch > "$RESULTS_FOLDER/match_ch.bench"
29-
./$BENCHMARKS_FOLDER/route-bench "./$FOLDER/test/data/mld/monaco.osrm" mld > "$RESULTS_FOLDER/route_mld.bench"
30-
./$BENCHMARKS_FOLDER/route-bench "./$FOLDER/test/data/ch/monaco.osrm" ch > "$RESULTS_FOLDER/route_ch.bench"
31-
./$BENCHMARKS_FOLDER/alias-bench > "$RESULTS_FOLDER/alias.bench"
32-
./$BENCHMARKS_FOLDER/json-render-bench "./$FOLDER/src/benchmarks/portugal_to_korea.json" > "$RESULTS_FOLDER/json-render.bench"
33-
./$BENCHMARKS_FOLDER/packedvector-bench > "$RESULTS_FOLDER/packedvector.bench"
34-
./$BENCHMARKS_FOLDER/rtree-bench "./$FOLDER/test/data/monaco.osrm.ramIndex" "./$FOLDER/test/data/monaco.osrm.fileIndex" "./$FOLDER/test/data/monaco.osrm.nbg_nodes" > "$RESULTS_FOLDER/rtree.bench"
35-
36-
BINARIES_FOLDER="$FOLDER/build"
37-
38-
cp ~/data.osm.pbf $FOLDER
39-
55+
BENCHMARKS_FOLDER="$BINARIES_FOLDER/src/benchmarks"
56+
echo "Running match-bench MLD"
57+
$BENCHMARKS_FOLDER/match-bench "$FOLDER/test/data/mld/monaco.osrm" mld > "$RESULTS_FOLDER/match_mld.bench"
58+
echo "Running match-bench CH"
59+
$BENCHMARKS_FOLDER/match-bench "$FOLDER/test/data/ch/monaco.osrm" ch > "$RESULTS_FOLDER/match_ch.bench"
60+
echo "Running route-bench MLD"
61+
$BENCHMARKS_FOLDER/route-bench "$FOLDER/test/data/mld/monaco.osrm" mld > "$RESULTS_FOLDER/route_mld.bench"
62+
echo "Running route-bench CH"
63+
$BENCHMARKS_FOLDER/route-bench "$FOLDER/test/data/ch/monaco.osrm" ch > "$RESULTS_FOLDER/route_ch.bench"
64+
echo "Running alias"
65+
$BENCHMARKS_FOLDER/alias-bench > "$RESULTS_FOLDER/alias.bench"
66+
echo "Running json-render-bench"
67+
$BENCHMARKS_FOLDER/json-render-bench "$FOLDER/src/benchmarks/portugal_to_korea.json" > "$RESULTS_FOLDER/json-render.bench"
68+
echo "Running packedvector-bench"
69+
$BENCHMARKS_FOLDER/packedvector-bench > "$RESULTS_FOLDER/packedvector.bench"
70+
echo "Running rtree-bench"
71+
$BENCHMARKS_FOLDER/rtree-bench "$FOLDER/test/data/monaco.osrm.ramIndex" "$FOLDER/test/data/monaco.osrm.fileIndex" "$FOLDER/test/data/monaco.osrm.nbg_nodes" > "$RESULTS_FOLDER/rtree.bench"
72+
73+
cp -rf $OSM_PBF $FOLDER/data.osm.pbf
74+
75+
echo "Running osrm-extract"
4076
measure_peak_ram_and_time "$BINARIES_FOLDER/osrm-extract -p $FOLDER/profiles/car.lua $FOLDER/data.osm.pbf" "$RESULTS_FOLDER/osrm_extract.bench"
77+
echo "Running osrm-partition"
4178
measure_peak_ram_and_time "$BINARIES_FOLDER/osrm-partition $FOLDER/data.osrm" "$RESULTS_FOLDER/osrm_partition.bench"
79+
echo "Running osrm-customize"
4280
measure_peak_ram_and_time "$BINARIES_FOLDER/osrm-customize $FOLDER/data.osrm" "$RESULTS_FOLDER/osrm_customize.bench"
81+
echo "Running osrm-contract"
4382
measure_peak_ram_and_time "$BINARIES_FOLDER/osrm-contract $FOLDER/data.osrm" "$RESULTS_FOLDER/osrm_contract.bench"
4483

45-
for BENCH in nearest table trip route match; do
46-
./$BENCHMARKS_FOLDER/bench "$FOLDER/data.osrm" mld ~/gps_traces.csv ${BENCH} > "$RESULTS_FOLDER/random_${BENCH}_mld.bench" || true
47-
./$BENCHMARKS_FOLDER/bench "$FOLDER/data.osrm" ch ~/gps_traces.csv ${BENCH} > "$RESULTS_FOLDER/random_${BENCH}_ch.bench" || true
84+
for ALGORITHM in ch mld; do
85+
for BENCH in nearest table trip route match; do
86+
echo "Running random $BENCH $ALGORITHM"
87+
START=$(date +%s.%N)
88+
$BENCHMARKS_FOLDER/bench "$FOLDER/data.osrm" $ALGORITHM $GPS_TRACES ${BENCH} > "$RESULTS_FOLDER/random_${BENCH}_${ALGORITHM}.bench" 5 || true
89+
END=$(date +%s.%N)
90+
DIFF=$(echo "$END - $START" | bc)
91+
echo "Took: ${DIFF}s"
92+
done
4893
done
4994

5095

5196
for ALGORITHM in ch mld; do
52-
$BINARIES_FOLDER/osrm-routed --algorithm $ALGORITHM $FOLDER/data.osrm &
97+
$BINARIES_FOLDER/osrm-routed --algorithm $ALGORITHM $FOLDER/data.osrm > /dev/null 2>&1 &
5398
OSRM_ROUTED_PID=$!
5499

55100
# wait for osrm-routed to start
56-
if ! curl --retry-delay 3 --retry 10 --retry-all-errors "http://127.0.0.1:5000/route/v1/driving/13.388860,52.517037;13.385983,52.496891?steps=true"; then
101+
if ! curl --retry-delay 3 --retry 10 --retry-all-errors "http://127.0.0.1:5000/route/v1/driving/13.388860,52.517037;13.385983,52.496891?steps=true" > /dev/null 2>&1; then
57102
echo "osrm-routed failed to start for algorithm $ALGORITHM"
58103
kill -9 $OSRM_ROUTED_PID
59104
continue
60105
fi
61106

62107
for METHOD in route nearest trip table match; do
63-
python3 $SCRIPTS_FOLDER/scripts/ci/e2e_benchmark.py --host http://localhost:5000 --method $METHOD --num_requests 1000 > $RESULTS_FOLDER/e2e_${METHOD}_${ALGORITHM}.bench
108+
echo "Running e2e benchmark for $METHOD $ALGORITHM"
109+
START=$(date +%s.%N)
110+
python3 $SCRIPTS_FOLDER/scripts/ci/e2e_benchmark.py --host http://localhost:5000 --method $METHOD --iterations 5 --num_requests 1000 --gps_traces_file_path $GPS_TRACES > $RESULTS_FOLDER/e2e_${METHOD}_${ALGORITHM}.bench
111+
END=$(date +%s.%N)
112+
DIFF=$(echo "$END - $START" | bc)
113+
echo "Took: ${DIFF}s"
64114
done
65115

66116
kill -9 $OSRM_ROUTED_PID
67117
done
68118
}
69119

70-
run_benchmarks_for_folder $1 "${1}_results" $2
71-
run_benchmarks_for_folder $2 "${2}_results" $2
120+
run_benchmarks_for_folder
72121

0 commit comments

Comments
 (0)