Skip to content

Commit d9ff356

Browse files
authored
Merge pull request #1 from alex98247/exponentiality-tests
Exponentiality tests and some scripts
2 parents 3899df1 + 3c45d5c commit d9ff356

File tree

9 files changed

+267
-0
lines changed

9 files changed

+267
-0
lines changed

stattest/_statistic_test.py

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,15 @@
1+
from dataclasses import dataclass
2+
from typing import Callable
3+
from experiment._distribution_type_enum import Distribution
4+
from experiment._hypothesis_enum import Hypothesis
5+
6+
7+
@dataclass
8+
class StatisticTest:
9+
"""
10+
Class for representing statistic test.
11+
"""
12+
dist_type: Distribution = None
13+
hypothesis: Hypothesis = None
14+
stat_func: Callable = None
15+
limit_dist: Distribution = None

stattest/_utils.py

Lines changed: 41 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,41 @@
1+
def _scale_sample(sample):
2+
"""
3+
Scales the sample data.
4+
5+
Parameters
6+
----------
7+
sample : array_like
8+
Array of sample data.
9+
Returns
10+
-------
11+
sample_copy : array_like
12+
Scaled sample.
13+
"""
14+
n = len(sample)
15+
sample_copy = sample.copy()
16+
sample_avg = sum(sample) / n
17+
for i in range(n):
18+
sample_copy[i] = sample_copy[i] / sample_avg
19+
20+
return sample_copy
21+
22+
23+
def _check_sample_length(sample):
24+
"""
25+
Checks if sample length is less than 3.
26+
If so, ValueError is called.
27+
28+
Parameters
29+
----------
30+
sample : array_like
31+
Array of sample data.
32+
33+
Returns
34+
-------
35+
True
36+
"""
37+
n = len(sample)
38+
if n < 3:
39+
raise ValueError("Data must be at least length 3.")
40+
41+
return True

stattest/experiment/__init__.py

Whitespace-only changes.
Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,6 @@
1+
from stattest._statistic_test import StatisticTest
2+
3+
4+
def get_test_metrics(stat_test: StatisticTest = None):
5+
6+
return True
Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,11 @@
1+
import enum
2+
3+
4+
@enum.unique
5+
class Distribution(enum.Enum):
6+
"""
7+
Enum class for representing distribution types.
8+
"""
9+
no_type = "no_type"
10+
normal = "normal"
11+
exponential = "exponential"
Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,10 @@
1+
import enum
2+
3+
4+
@enum.unique
5+
class Hypothesis(enum.Enum):
6+
"""
7+
Enum class for representing hypotheses.
8+
"""
9+
h0 = 0
10+
h1 = 1

stattest/samples/__init__.py

Whitespace-only changes.
Lines changed: 70 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,70 @@
1+
import json
2+
import os
3+
from os.path import exists, abspath
4+
import numpy as np
5+
from stattest.experiment._distribution_type_enum import Distribution
6+
7+
8+
def generate_samples(dist_type: Distribution = None,
9+
number: int = None,
10+
start_size: int = None,
11+
final_size: int = None,
12+
step: int = None,
13+
path: str = None):
14+
"""
15+
Generates samples based on parameters.
16+
17+
Parameters
18+
----------
19+
dist_type : Distribution
20+
Enum value representing distribution type.
21+
number : int
22+
Number of samples of each size.
23+
start_size : int
24+
Start size of the samples.
25+
final_size : int
26+
Final size of the samples.
27+
step : int
28+
Step of the iteration.
29+
path : str
30+
Path to save JSON file to.
31+
32+
Returns
33+
-------
34+
True
35+
"""
36+
path = path if path is not None else os.getcwd()
37+
38+
all_types = dist_type is None
39+
40+
filename = f"{'all' if all_types else dist_type.value}_{number}_{start_size}_{final_size}_{step}"
41+
if exists(f"{path}/{filename}.json"):
42+
raise FileExistsError("Such samples already exist")
43+
44+
samples_by_size = {
45+
size: [None for _ in range(number)]
46+
for size in range(start_size, final_size + 1, step)
47+
}
48+
samples = {
49+
type_.value: samples_by_size for type_ in Distribution
50+
} if all_types else {dist_type.value: samples_by_size}
51+
52+
for size in range(start_size, final_size + 1, step):
53+
for i in range(number):
54+
if all_types or dist_type is Distribution.no_type:
55+
sample = np.random.random_sample(size=size)
56+
samples[dist_type.value][size][i] = list(sample)
57+
58+
if all_types or dist_type is Distribution.normal:
59+
sample = np.random.normal(loc=0, scale=1, size=size)
60+
samples[dist_type.value][size][i] = list(sample)
61+
62+
if all_types or dist_type is Distribution.exponential:
63+
sample = np.random.exponential(scale=1, size=size)
64+
samples[dist_type.value][size][i] = list(sample)
65+
66+
save_file = open(f"{path}/{filename}.json", "w")
67+
json.dump(samples, save_file, indent=4)
68+
save_file.close()
69+
70+
return True

stattest/stats/_stats_exp.py

Lines changed: 114 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,114 @@
1+
from scipy.stats import norm
2+
import numpy as np
3+
from stattest._utils import _check_sample_length, _scale_sample
4+
5+
6+
def eptest_exp(x):
7+
"""
8+
Epps and Pulley test statistic for exponentiality.
9+
10+
Parameters
11+
----------
12+
x : array_like
13+
Array of sample data.
14+
15+
Returns
16+
-------
17+
statistic : float
18+
The test statistic.
19+
"""
20+
n = len(x)
21+
_check_sample_length(x)
22+
x_scaled = _scale_sample(x)
23+
24+
statistic_sum = 0
25+
for j in range(n):
26+
statistic_sum += np.exp(-x_scaled[j])
27+
28+
statistic = ((48 * n) ** 0.5) * ((statistic_sum / n) - 0.5)
29+
30+
return statistic
31+
32+
33+
def cmtest_exp(x):
34+
"""
35+
Cramer-von-Mises test statistic for exponentiality.
36+
37+
Parameters
38+
----------
39+
x : array_like
40+
Array of sample data.
41+
42+
Returns
43+
-------
44+
statistic : float
45+
The test statistic.
46+
"""
47+
n = len(x)
48+
_check_sample_length(x)
49+
x_scaled_sorted = sorted(_scale_sample(x))
50+
51+
statistic_sum = 0
52+
for j in range(n):
53+
statistic_sum += ((1 - np.exp(-x_scaled_sorted[j])) - (2 * j - 1) / (2 * n)) ** 2
54+
55+
statistic = (1 / 12 * n) + statistic_sum
56+
57+
return statistic
58+
59+
60+
def kstest_exp(x):
61+
"""
62+
Kolmogorov and Smirnov test statistic for exponentiality.
63+
64+
Parameters
65+
----------
66+
x : array_like
67+
Array of sample data.
68+
69+
Returns
70+
-------
71+
statistic : float
72+
The test statistic.
73+
"""
74+
n = len(x)
75+
_check_sample_length(x)
76+
x_scaled_sorted = sorted(_scale_sample(x))
77+
78+
ks_plus = float('-inf')
79+
ks_minus = float('-inf')
80+
81+
for j in range(n):
82+
ks_plus = max(j / n - (1 - np.exp(-x_scaled_sorted[j])), ks_plus)
83+
ks_minus = max((1 - np.exp(-x_scaled_sorted[j]) - (j - 1) / n), ks_minus)
84+
85+
statistic = max(ks_plus, ks_minus)
86+
87+
return statistic
88+
89+
90+
def zptest_exp(x):
91+
"""
92+
Zardasht et al. test statistic for exponentiality.
93+
94+
Parameters
95+
----------
96+
x : array_like
97+
Array of sample data.
98+
99+
Returns
100+
-------
101+
statistic : float
102+
The test statistic.
103+
"""
104+
n = len(x)
105+
_check_sample_length(x)
106+
x_scaled = _scale_sample(x)
107+
108+
statistic_sum = 0
109+
for j in range(n):
110+
statistic_sum += x_scaled[j] * np.exp(-x_scaled[j])
111+
112+
statistic = statistic_sum / n - (1 / 4)
113+
114+
return statistic

0 commit comments

Comments
 (0)