1+ import matplotlib .pyplot as plt
2+ import numpy as np
3+ from scipy import stats
4+
5+ # key parameters
6+ true_count = 20 # this is our reference, true count or M
7+ N = 20 # the total number of repetitions N
8+ N_short = 5 # the short subset
9+
10+ data_A = np .round (np .random .normal (20 , 5 , N )) # a standard set with the mean close to the true value
11+ data_B = np .round (np .random .normal (24 , 5 , N )) # a second set with the mean away from the true count value
12+
13+ data_A_short = data_A [0 :N_short ] # subset of data N = 5
14+ data_B_short = data_B [0 :N_short ]
15+
16+ print ('Basic stats and standard error:' )
17+ print (f'A, N = { len (data_A_short ):2d} : ' , data_A_short .astype (int ))
18+ print (f' mean: { np .mean (data_A_short ):.2f} , std: { np .std (data_A_short ):.2f} , SE: { stats .sem (data_A_short ):.2f} ' )
19+ print (f'A, N = { len (data_A ):2d} : ' , data_A .astype (int ))
20+ print (f' mean: { np .mean (data_A ):.2f} , std: { np .std (data_A ):.2f} , SE: { stats .sem (data_A ):.2f} ' )
21+
22+ print ('\n Error metrics:' )
23+ error_A = data_A - true_count
24+ error_B = data_B - true_count
25+
26+ error_A_short = error_A [0 :N_short ]
27+ error_B_short = error_B [0 :N_short ]
28+
29+ print (f'A, N=5, MAE: { np .mean (np .abs (error_A_short )):.2f} , RMSE: { np .sqrt (np .mean (error_A_short ** 2 )):.2f} ' )
30+ print (f'A, N=20, MAE: { np .mean (np .abs (error_A )):.2f} , RMSE: { np .sqrt (np .mean (error_A ** 2 )):.2f} ' )
31+
32+ print (f'B, N=5, MAE: { np .mean (np .abs (error_B_short )):.2f} , RMSE: { np .sqrt (np .mean (error_B_short ** 2 )):.2f} ' )
33+ print (f'B, N=20, MAE: { np .mean (np .abs (error_B )):.2f} , RMSE: { np .sqrt (np .mean (error_B ** 2 )):.2f} ' )
34+
35+ print ('\n One-sample Student t-test:' )
36+ print (f'A, N=5, t: { stats .ttest_1samp (data_A_short , true_count ).statistic :.2f} , p: { stats .ttest_1samp (data_A_short , true_count ).pvalue :.2f} ' )
37+ print (f'A, N=20, t: { stats .ttest_1samp (data_A , true_count ).statistic :.2f} , p: { stats .ttest_1samp (data_A , true_count ).pvalue :.2f} ' )
38+
39+ print (f'B, N=5, t: { stats .ttest_1samp (data_B_short , true_count ).statistic :.2f} , p: { stats .ttest_1samp (data_B_short , true_count ).pvalue :.2f} ' )
40+ print (f'B, N=20, t: { stats .ttest_1samp (data_B , true_count ).statistic :.2f} , p: { stats .ttest_1samp (data_B , true_count ).pvalue :.2f} ' )
41+
42+ print ('\n Two-sample Student t-test:' )
43+ print (f'AB, N=5, t: { stats .ttest_ind (data_A_short , data_B_short ).statistic :.2f} , p: { stats .ttest_ind (data_A_short , data_B_short ).pvalue :.2f} ' )
44+ print (f'AB, N=20, t: { stats .ttest_ind (data_A , data_B ).statistic :.2f} , p: { stats .ttest_ind (data_A , data_B ).pvalue :.2f} ' )
45+
46+ # additional visualisations
47+ # SE with increasing N
48+ # sd = []
49+ # se = []
50+ # for i in range(3, len(data)+1):
51+ # sd.append(np.std(data[0:i]))
52+ # se.append(stats.sem(data[0:i]))
53+
54+ # print(sd, se)
55+
56+ # plt.plot(range(3, len(data)+1), sd, label='std')
57+ # plt.plot(range(3, len(data)+1), se, label='SE')
58+ # plt.xlim(3, 20)
59+ # plt.xlabel('N')
60+ # plt.legend()
61+ # plt.show()
0 commit comments