-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathtests.py
More file actions
125 lines (97 loc) · 4.69 KB
/
tests.py
File metadata and controls
125 lines (97 loc) · 4.69 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
from scipy.stats import t, chi2
def student_t_test(data):
print('-------------------------------------------')
print('Student\'s T-test |')
print('-------------------------------------------')
null_hypothesis = 'There is NO significant age difference between smart TV and laptop users'
alt_hypothesis = 'There is SIGNIFICANT age difference between smart TV and laptop users'
print('Null hypothesis:', null_hypothesis)
print('Alt hypothesis:', alt_hypothesis)
laptop_users = data[data['Device'] == 'Laptop']
smart_tv_users = data[data['Device'] == 'Smart TV']
laptop_ages = laptop_users['Age']
smart_tv_ages = smart_tv_users['Age']
# means
mean_smart_tv = sum(smart_tv_ages) / len(smart_tv_ages)
mean_laptop = sum(laptop_ages) / len(laptop_ages)
# variances
variance_smart_tv = sum((x - mean_smart_tv) ** 2 for x in smart_tv_ages) / (len(smart_tv_ages) - 1)
variance_laptop = sum((x - mean_laptop) ** 2 for x in laptop_ages) / (len(laptop_ages) - 1)
pooled_variance = ((len(smart_tv_ages) - 1) * variance_smart_tv + (len(laptop_ages) - 1) * variance_laptop) / (len(smart_tv_ages) + len(laptop_ages) - 2)
degrees_of_freedom = len(smart_tv_ages) + len(laptop_ages) - 2
significance_level = 0.05
critical_t_value = t.ppf(1 - significance_level, degrees_of_freedom)
t_statistic = (mean_smart_tv - mean_laptop) / (pooled_variance * ((1 / len(smart_tv_ages)) + (1 / len(laptop_ages)))) ** 0.5
print()
print('Significance level:', significance_level)
print('Degrees of Freedom:', degrees_of_freedom)
print('Critical T-value:', critical_t_value)
print('T-statistic:', t_statistic)
print()
# results
if abs(t_statistic) > critical_t_value:
print('Null Hypothesis is REJECTED:', alt_hypothesis)
else:
print('Null hypothesis is NOT REJECTED:', null_hypothesis)
print('-------------------------------------------')
def pearson_chi_squared_test(data):
print('-------------------------------------------')
print('Pearson\'s Chi-squared Test |')
print('-------------------------------------------')
null_hypothesis = 'There is NO relationship between country and device'
alt_hypothesis = 'There is a RELATIONSHIP between country and device'
print('Null hypothesis:', null_hypothesis)
print('Alt hypothesis:', alt_hypothesis)
countries = data['Country']
devices = data['Device']
total_entries = len(countries)
# observed frequencies
observed_frequencies = {}
for i in range(total_entries):
country = countries[i]
device = devices[i]
if country not in observed_frequencies:
observed_frequencies[country] = {}
if device not in observed_frequencies[country]:
observed_frequencies[country][device] = 0
observed_frequencies[country][device] += 1
# expected frequencies preparation
row_totals = {}
column_totals = {}
grand_total = sum(sum(row.values()) for row in observed_frequencies.values())
for country, devices in observed_frequencies.items():
row_totals[country] = sum(devices.values())
for device, frequency in devices.items():
if device not in column_totals:
column_totals[device] = frequency
else:
column_totals[device] += frequency
# expected frequencies
expected_frequencies = {}
for country, devices in observed_frequencies.items():
expected_frequencies[country] = {}
for device, frequency in devices.items():
expected_frequency = (row_totals[country] * column_totals[device]) / grand_total
expected_frequencies[country][device] = expected_frequency
# chi-squared calculation
chi_squared = 0
for country, devices in observed_frequencies.items():
for device, frequency in devices.items():
expected_count = expected_frequencies[country][device]
chi_squared += ((frequency - expected_count) ** 2) / expected_count
# degrees of freedom and critical value
degrees_of_freedom = (len(row_totals) - 1) * (len(column_totals) - 1)
significance_level = 0.05
critical_value = chi2.ppf(1 - significance_level, degrees_of_freedom)
print()
print('Significance level:', significance_level)
print('Degrees of Freedom:', degrees_of_freedom)
print('Critical Value:', critical_value)
print('Chi-squared:', chi_squared)
print()
# results
if chi_squared < critical_value:
print('Null hypothesis is NOT REJECTED:', null_hypothesis)
else:
print('Null hypothesis is REJECTED:', alt_hypothesis)
print('-------------------------------------------')