Skip to content

Commit bb971f6

Browse files
JuiPataymano
andauthored
Merge branch rlos2021_cfe to master (#36)
* Initial file structure changes for Package (#16) * Interface Approach2 (#29) * Split ips and snips. Make two classes: Estimator and Interval * Split pseudo_inverse into 2 classes: Estimator and Interval * Fix test_pi, remove redundant file ips_snips, Remove type argument from get_estimate * Slates interface implementation * Cb interface initial commit * Rename file and change class names * Edit doc strings * Change count datatype to float in cb_base * Added gaussian, clopper_pearson files and removed type from cb interface * Add newline at the end of file * Changes for slates - Renamed file from slates_helper to slates_base - Added gaussian.py - Removed type from get_interval - Removed type from get_estimate - Change doc strings for the slates interface - Changed class names - Changed data type of count - Fixed data type of p_log and p_pred - Removed unused imports * Remove redundant imports and code * Change method name to get() * Rename file to base and change class name of ips, snips * Change doc strings and variable name: slates * Changes for test_pi * Cressieread Interval update * Changes folder name and class names (#31) * Minimal changes tobasic-usage (#32) * Improvements for setup.py and slates (#33) * imports fix (#34) * Adding Tests (#35) * Unit tests added * Test for multiple examples * Added test for checking narrowing intervals * Combine all unit test functions into one * Added comments * Added another example generator * Fixed Imports * Change variable names and fix typo * Added check for correct format of Confidence Interval * Separate bandit and slates tests * Move functions to utils * Added test for correctness(slates) * Comments added for test_bandits * Added tests for slates intervals * Move data generators from helper files to test_* files * Remove num_slots as a parameter in util functions * Combine run_estimator function * Combine SlatesHelper and BanditsHelper * Move assert statements from run_estimator() to test_*.py * Move assert statements from Helper() functions to test_*.py file * Improving code consistency * Defined static methods and renamed file to utils.py * Add function assert_is_close to utils * Variable name changed * Restructuring of code * CI improvements (#38) * Added support for Python version 3.9 * CI: Check test coverage * Added interface and module for ccb (#37) * Added ccb estimator (#39) * Added ccb estimator file * Removed type and added Interval() * Added unit test for ccb + code corrections in ccb.py * Test for correctness and narrowing Intervals added * Changed module name * Change variable name * Removed hard coding for specific alpha values in gaussain files (#44) * Add tests (#43) * use random.seed() to make test scenarios reproducible * Change function names * Rename variables * Rename variables listofestimators->estimators and listofintervals->intervals * Renamed variables for test_narrowing_intervals * Added test to check alpha value is not hardcoded for bandits * Renamed to test_different_alpha_CI * Rlos2021 minor cleanup (#45) * minor cleanups * py35 removal * more type hints * snake case * ValueError * snake case Co-authored-by: Alexey Taymanov <[email protected]> Co-authored-by: Alexey Taymanov <[email protected]>
1 parent 993c080 commit bb971f6

31 files changed

+958
-279
lines changed

.github/workflows/pythonpackage.yml

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,7 @@ jobs:
1515
runs-on: ubuntu-latest
1616
strategy:
1717
matrix:
18-
python-version: [3.5, 3.6, 3.7, 3.8]
18+
python-version: [3.6, 3.7, 3.8, 3.9]
1919

2020
steps:
2121
- uses: actions/checkout@v2
@@ -34,7 +34,9 @@ jobs:
3434
flake8 . --count --select=E9,F63,F7,F82 --show-source --statistics
3535
# exit-zero treats all errors as warnings. The GitHub editor is 127 chars wide
3636
flake8 . --count --exit-zero --max-complexity=10 --max-line-length=127 --statistics
37-
- name: Test with pytest
37+
- name: Test with pytest and check coverage
3838
run: |
3939
pip install pytest
4040
pytest
41+
pip install pytest-cov
42+
pytest --cov=estimators

.gitignore

Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,24 @@
1+
#Jupyter notebook checkpoints
2+
**/.ipynb_checkpoints/*
3+
4+
# Byte-compiled / optimized / DLL files
5+
__pycache__/
6+
*.py[cod]
7+
*$py.class
8+
*.egg-info
9+
10+
# Python build artifacts
11+
build/
12+
dist/
13+
14+
#ignored examples files
15+
examples/*.log
16+
17+
# Editors
18+
.vscode/
19+
.idea/
20+
21+
# Type checking
22+
.mypy_cache
23+
24+
.coverage

estimators/__init__.py

Whitespace-only changes.

estimators/bandits/__init__.py

Whitespace-only changes.

estimators/bandits/base.py

Lines changed: 51 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,51 @@
1+
""" Interface for implementation of contextual bandit estimators """
2+
3+
from abc import ABC, abstractmethod
4+
from typing import List
5+
6+
class Estimator(ABC):
7+
""" Interface for implementation of contextual bandit estimators """
8+
9+
@abstractmethod
10+
def add_example(self, p_log: float, r: float, p_pred: float, count: float) -> None:
11+
"""
12+
Args:
13+
p_log: probability of the logging policy
14+
r: reward for choosing an action in the given context
15+
p_pred: predicted probability of making decision
16+
count: weight
17+
"""
18+
...
19+
20+
@abstractmethod
21+
def get(self) -> float:
22+
""" Calculates the selected estimator
23+
24+
Returns:
25+
The estimator value
26+
"""
27+
...
28+
29+
class Interval(ABC):
30+
""" Interface for implementation of contextual bandit estimators interval """
31+
32+
@abstractmethod
33+
def add_example(self, p_log: float, r: float, p_pred: float, count: float) -> None:
34+
"""
35+
Args:
36+
p_log: probability of the logging policy
37+
r: reward for choosing an action in the given context
38+
p_pred: predicted probability of making decision
39+
count: weight
40+
"""
41+
...
42+
43+
@abstractmethod
44+
def get(self, alpha: float) -> List[float]:
45+
""" Calculates the CI
46+
Args:
47+
alpha: alpha value
48+
Returns:
49+
Returns the confidence interval as list[float]
50+
"""
51+
...
Lines changed: 41 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,41 @@
1+
import math
2+
from scipy.stats import beta
3+
from estimators.bandits import base
4+
from typing import List
5+
6+
class Interval(base.Interval):
7+
8+
def __init__(self):
9+
################################# Aggregates quantities #########################################
10+
#
11+
# 'n': IPS of numerator
12+
# 'N': total number of samples in bin from log (IPS = n/N)
13+
# 'c': max abs. value of numerator's items (needed for Clopper-Pearson confidence intervals)
14+
#
15+
#################################################################################################
16+
17+
self.data = {'n':0.,'N':0,'c':0.}
18+
19+
def add_example(self, p_log: float, r: float, p_pred: float, count: float = 1.0) -> None:
20+
self.data['N'] += count
21+
if p_pred > 0:
22+
p_over_p = p_pred/p_log
23+
if r != 0:
24+
self.data['n'] += r*p_over_p*count
25+
self.data['c'] = max(self.data['c'], r*p_over_p)
26+
27+
def get(self, alpha: float = 0.05) -> List[float]:
28+
bounds = []
29+
num = self.data['n']
30+
den = self.data['N']
31+
max_weighted_cost = self.data['c']
32+
33+
if max_weighted_cost > 0.0:
34+
successes = num / max_weighted_cost
35+
n = den / max_weighted_cost
36+
bounds.append(beta.ppf(alpha / 2, successes, n - successes + 1))
37+
bounds.append(beta.ppf(1 - alpha / 2, successes + 1, n - successes))
38+
39+
if not bounds:
40+
bounds = [0, 0]
41+
return bounds
Lines changed: 15 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -1,12 +1,14 @@
11
# CR(-2) is particularly computationally convenient
22

33
from math import fsum, inf
4+
from estimators.bandits import base
5+
from typing import List
46

5-
class Estimator:
7+
class Estimator(base.Estimator):
68
# NB: This works better you use the true wmin and wmax
79
# which is _not_ the empirical minimum and maximum
810
# but rather the actual smallest and largest possible values
9-
def __init__(self, wmin=0, wmax=inf):
11+
def __init__(self, wmin: float = 0, wmax: float = inf):
1012
assert wmin < 1
1113
assert wmax > 1
1214

@@ -15,7 +17,7 @@ def __init__(self, wmin=0, wmax=inf):
1517

1618
self.data = []
1719

18-
def add_example(self, p_log, r, p_pred, count=1):
20+
def add_example(self, p_log: float, r: float, p_pred: float, count: float = 1.0) -> None:
1921
if count > 0:
2022
w = p_pred / p_log
2123
assert w >= 0, 'Error: negative importance weight'
@@ -24,7 +26,7 @@ def add_example(self, p_log, r, p_pred, count=1):
2426
self.wmax = max(self.wmax, w)
2527
self.wmin = min(self.wmin, w)
2628

27-
def get_estimate(self, rmin=0, rmax=1):
29+
def get(self) -> float:
2830
n = fsum(c for c, _, _ in self.data)
2931
assert n > 0, 'Error: No data point added'
3032

@@ -53,20 +55,23 @@ def get_estimate(self, rmin=0, rmax=1):
5355

5456
return vhat
5557

56-
class Interval:
58+
class Interval(base.Interval):
5759
# NB: This works better you use the true wmin and wmax
5860
# which is _not_ the empirical minimum and maximum
5961
# but rather the actual smallest and largest possible values
60-
def __init__(self, wmin=0, wmax=inf):
62+
def __init__(self, wmin: float = 0, wmax: float = inf, rmin: float = 0, rmax: float = 1):
6163
assert wmin < 1
6264
assert wmax > 1
6365

6466
self.wmin = wmin
6567
self.wmax = wmax
6668

69+
self.rmin = rmin
70+
self.rmax = rmax
71+
6772
self.data = []
6873

69-
def add_example(self, p_log, r, p_pred, count=1):
74+
def add_example(self, p_log: float, r: float, p_pred: float, count: float = 1.0) -> None:
7075
if count > 0:
7176
w = p_pred / p_log
7277
assert w >= 0, 'Error: negative importance weight'
@@ -75,7 +80,7 @@ def add_example(self, p_log, r, p_pred, count=1):
7580
self.wmax = max(self.wmax, w)
7681
self.wmin = min(self.wmin, w)
7782

78-
def get_interval(self, alpha=0.05, rmin=0, rmax=1):
83+
def get(self, alpha: float = 0.05) -> List[float]:
7984
from math import isclose, sqrt
8085
from scipy.stats import f
8186

@@ -100,7 +105,7 @@ def get_interval(self, alpha=0.05, rmin=0, rmax=1):
100105
phi = (-uncgstar - Delta) / (2 * (1 + n))
101106

102107
bounds = []
103-
for r, sign in ((rmin, 1), (rmax, -1)):
108+
for r, sign in ((self.rmin, 1), (self.rmax, -1)):
104109
candidates = []
105110
for wfake in (self.wmin, self.wmax):
106111
if wfake == inf:
@@ -144,7 +149,7 @@ def get_interval(self, alpha=0.05, rmin=0, rmax=1):
144149
candidates.append(gstar)
145150

146151
best = min(candidates)
147-
vbound = min(rmax, max(rmin, sign*best))
152+
vbound = min(self.rmax, max(self.rmin, sign*best))
148153
bounds.append(vbound)
149154

150155
return bounds

estimators/bandits/gaussian.py

Lines changed: 43 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,43 @@
1+
import math
2+
from estimators.bandits import base
3+
from scipy import stats
4+
from typing import List
5+
6+
class Interval(base.Interval):
7+
8+
def __init__(self):
9+
################################# Aggregates quantities #########################################
10+
#
11+
# 'n': IPS of numerator
12+
# 'N': total number of samples in bin from log (IPS = n/N)
13+
# 'SoS': sum of squares of numerator's items (needed for Gaussian confidence intervals)
14+
#
15+
#################################################################################################
16+
17+
self.data = {'n':0.,'N':0,'SoS':0}
18+
19+
def add_example(self, p_log: float, r: float, p_pred: float, count: float = 1.0) -> None:
20+
self.data['N'] += count
21+
if p_pred > 0:
22+
p_over_p = p_pred/p_log
23+
if r != 0:
24+
self.data['n'] += r*p_over_p*count
25+
self.data['SoS'] += ((r*p_over_p)**2)*count
26+
27+
def get(self, alpha: float = 0.05) -> List[float]:
28+
bounds = []
29+
num = self.data['n']
30+
den = self.data['N']
31+
sum_of_sq = self.data['SoS']
32+
33+
if sum_of_sq > 0.0 and den > 1:
34+
z_gaussian_cdf = stats.norm.ppf(1-alpha/2)
35+
36+
variance = (sum_of_sq - num * num / den) / (den - 1)
37+
gauss_delta = z_gaussian_cdf * math.sqrt(variance/den)
38+
bounds.append(num / den - gauss_delta)
39+
bounds.append(num / den + gauss_delta)
40+
41+
if not bounds:
42+
bounds = [0, 0]
43+
return bounds

estimators/bandits/ips.py

Lines changed: 26 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,26 @@
1+
from estimators.bandits import base
2+
3+
class Estimator(base.Estimator):
4+
5+
def __init__(self):
6+
################################# Aggregates quantities #########################################
7+
#
8+
# 'n': IPS of numerator
9+
# 'N': total number of samples in bin from log (IPS = n/N)
10+
#
11+
#################################################################################################
12+
13+
self.data = {'n':0.,'N':0}
14+
15+
def add_example(self, p_log: float, r: float, p_pred: float, count: float = 1.0) -> None:
16+
self.data['N'] += count
17+
if p_pred > 0:
18+
p_over_p = p_pred/p_log
19+
if r != 0:
20+
self.data['n'] += r*p_over_p*count
21+
22+
def get(self) -> float:
23+
if self.data['N'] == 0:
24+
raise ValueError('Error: No data point added')
25+
26+
return self.data['n']/self.data['N']

0 commit comments

Comments
 (0)