Skip to content

Commit e87fcfc

Browse files
committed
workshop 7
1 parent 8b074ca commit e87fcfc

File tree

2 files changed

+516
-0
lines changed

2 files changed

+516
-0
lines changed

evaluation/comp_stats.py

Lines changed: 61 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,61 @@
1+
import matplotlib.pyplot as plt
2+
import numpy as np
3+
from scipy import stats
4+
5+
# key parameters
6+
true_count = 20 # this is our reference, true count or M
7+
N = 20 # the total number of repetitions N
8+
N_short = 5 # the short subset
9+
10+
data_A = np.round(np.random.normal(20, 5, N)) # a standard set with the mean close to the true value
11+
data_B = np.round(np.random.normal(24, 5, N)) # a second set with the mean away from the true count value
12+
13+
data_A_short = data_A[0:N_short] # subset of data N = 5
14+
data_B_short = data_B[0:N_short]
15+
16+
print('Basic stats and standard error:')
17+
print(f'A, N = {len(data_A_short):2d}: ', data_A_short.astype(int))
18+
print(f' mean: {np.mean(data_A_short):.2f}, std: {np.std(data_A_short):.2f}, SE: {stats.sem(data_A_short):.2f}')
19+
print(f'A, N = {len(data_A):2d}: ', data_A.astype(int))
20+
print(f' mean: {np.mean(data_A):.2f}, std: {np.std(data_A):.2f}, SE: {stats.sem(data_A):.2f}')
21+
22+
print('\nError metrics:')
23+
error_A = data_A - true_count
24+
error_B = data_B - true_count
25+
26+
error_A_short = error_A[0:N_short]
27+
error_B_short = error_B[0:N_short]
28+
29+
print(f'A, N=5, MAE: {np.mean(np.abs(error_A_short)):.2f}, RMSE: {np.sqrt(np.mean(error_A_short ** 2)):.2f}')
30+
print(f'A, N=20, MAE: {np.mean(np.abs(error_A)):.2f}, RMSE: {np.sqrt(np.mean(error_A ** 2)):.2f}')
31+
32+
print(f'B, N=5, MAE: {np.mean(np.abs(error_B_short)):.2f}, RMSE: {np.sqrt(np.mean(error_B_short ** 2)):.2f}')
33+
print(f'B, N=20, MAE: {np.mean(np.abs(error_B)):.2f}, RMSE: {np.sqrt(np.mean(error_B ** 2)):.2f}')
34+
35+
print('\nOne-sample Student t-test:')
36+
print(f'A, N=5, t: {stats.ttest_1samp(data_A_short, true_count).statistic:.2f}, p: {stats.ttest_1samp(data_A_short, true_count).pvalue:.2f}')
37+
print(f'A, N=20, t: {stats.ttest_1samp(data_A, true_count).statistic:.2f}, p: {stats.ttest_1samp(data_A, true_count).pvalue:.2f}')
38+
39+
print(f'B, N=5, t: {stats.ttest_1samp(data_B_short, true_count).statistic:.2f}, p: {stats.ttest_1samp(data_B_short, true_count).pvalue:.2f}')
40+
print(f'B, N=20, t: {stats.ttest_1samp(data_B, true_count).statistic:.2f}, p: {stats.ttest_1samp(data_B, true_count).pvalue:.2f}')
41+
42+
print('\nTwo-sample Student t-test:')
43+
print(f'AB, N=5, t: {stats.ttest_ind(data_A_short, data_B_short).statistic:.2f}, p: {stats.ttest_ind(data_A_short, data_B_short).pvalue:.2f}')
44+
print(f'AB, N=20, t: {stats.ttest_ind(data_A, data_B).statistic:.2f}, p: {stats.ttest_ind(data_A, data_B).pvalue:.2f}')
45+
46+
# additional visualisations
47+
# SE with increasing N
48+
# sd = []
49+
# se = []
50+
# for i in range(3, len(data)+1):
51+
# sd.append(np.std(data[0:i]))
52+
# se.append(stats.sem(data[0:i]))
53+
54+
# print(sd, se)
55+
56+
# plt.plot(range(3, len(data)+1), sd, label='std')
57+
# plt.plot(range(3, len(data)+1), se, label='SE')
58+
# plt.xlim(3, 20)
59+
# plt.xlabel('N')
60+
# plt.legend()
61+
# plt.show()

0 commit comments

Comments
 (0)