VowpalWabbit
diff --git a/‎.github/workflows/pythonpackage.yml‎
Lines changed: 4 additions & 2 deletions b/‎.github/workflows/pythonpackage.yml‎
Lines changed: 4 additions & 2 deletions
diff --git a/‎.gitignore‎
Lines changed: 24 additions & 0 deletions b/‎.gitignore‎
Lines changed: 24 additions & 0 deletions
diff --git a/‎estimators/__init__.py‎ b/‎estimators/__init__.py‎
diff --git a/‎estimators/bandits/__init__.py‎ b/‎estimators/bandits/__init__.py‎
diff --git a/‎estimators/bandits/base.py‎
Lines changed: 51 additions & 0 deletions b/‎estimators/bandits/base.py‎
Lines changed: 51 additions & 0 deletions
diff --git a/‎cats_utils.py‎ ‎estimators/bandits/cats_utils.py‎cats_utils.py renamed to estimators/bandits/cats_utils.py b/‎cats_utils.py‎ ‎estimators/bandits/cats_utils.py‎cats_utils.py renamed to estimators/bandits/cats_utils.py
diff --git a/‎estimators/bandits/clopper_pearson.py‎
Lines changed: 41 additions & 0 deletions b/‎estimators/bandits/clopper_pearson.py‎
Lines changed: 41 additions & 0 deletions
diff --git a/‎cressieread.py‎ ‎estimators/bandits/cressieread.py‎cressieread.py renamed to estimators/bandits/cressieread.py
Lines changed: 15 additions & 10 deletions b/‎cressieread.py‎ ‎estimators/bandits/cressieread.py‎cressieread.py renamed to estimators/bandits/cressieread.py
Lines changed: 15 additions & 10 deletions
diff --git a/‎estimators/bandits/gaussian.py‎
Lines changed: 43 additions & 0 deletions b/‎estimators/bandits/gaussian.py‎
Lines changed: 43 additions & 0 deletions
diff --git a/‎estimators/bandits/ips.py‎
Lines changed: 26 additions & 0 deletions b/‎estimators/bandits/ips.py‎
Lines changed: 26 additions & 0 deletions
@@ -15,7 +15,7 @@ jobs:
     runs-on: ubuntu-latest
     strategy:
       matrix:
-        python-version: [3.5, 3.6, 3.7, 3.8]
+        python-version: [3.6, 3.7, 3.8, 3.9]
 
     steps:
     - uses: actions/checkout@v2
@@ -34,7 +34,9 @@ jobs:
         flake8 . --count --select=E9,F63,F7,F82 --show-source --statistics
         # exit-zero treats all errors as warnings. The GitHub editor is 127 chars wide
         flake8 . --count --exit-zero --max-complexity=10 --max-line-length=127 --statistics
-    - name: Test with pytest
+    - name: Test with pytest and check coverage
       run: |
         pip install pytest
         pytest
+        pip install pytest-cov
+        pytest --cov=estimators
@@ -0,0 +1,24 @@
+#Jupyter notebook checkpoints
+**/.ipynb_checkpoints/*
+
+# Byte-compiled / optimized / DLL files
+__pycache__/
+*.py[cod]
+*$py.class
+*.egg-info
+
+# Python build artifacts
+build/
+dist/
+
+#ignored examples files
+examples/*.log
+
+# Editors
+.vscode/
+.idea/
+
+# Type checking
+.mypy_cache
+
+.coverage
@@ -0,0 +1,51 @@
+""" Interface for implementation of contextual bandit estimators """
+
+from abc import ABC, abstractmethod
+from typing import List
+
+class Estimator(ABC):
+	""" Interface for implementation of contextual bandit estimators """
+
+	@abstractmethod
+	def add_example(self, p_log: float, r: float, p_pred: float, count: float) -> None:
+		""" 
+		Args:
+			p_log: probability of the logging policy
+			r: reward for choosing an action in the given context
+			p_pred: predicted probability of making decision
+			count: weight
+		"""
+		...
+
+	@abstractmethod
+	def get(self) -> float:
+		""" Calculates the selected estimator
+		
+		Returns:
+			The estimator value
+		"""
+		...
+
+class Interval(ABC):
+	""" Interface for implementation of contextual bandit estimators interval """
+
+	@abstractmethod
+	def add_example(self, p_log: float, r: float, p_pred: float, count: float) -> None:
+		""" 
+		Args:
+			p_log: probability of the logging policy
+			r: reward for choosing an action in the given context
+			p_pred: predicted probability of making decision
+			count: weight
+		"""
+		...
+
+	@abstractmethod
+	def get(self, alpha: float) -> List[float]:
+		""" Calculates the CI
+		Args:
+			alpha: alpha value
+		Returns:
+			Returns the confidence interval as list[float]
+		"""
+		...
@@ -0,0 +1,41 @@
+import math
+from scipy.stats import beta
+from estimators.bandits import base
+from typing import List
+
+class Interval(base.Interval):
+
+    def __init__(self):
+        ################################# Aggregates quantities #########################################
+        #
+        # 'n':   IPS of numerator
+        # 'N':   total number of samples in bin from log (IPS = n/N)
+        # 'c':   max abs. value of numerator's items (needed for Clopper-Pearson confidence intervals)
+        #
+        #################################################################################################
+
+        self.data = {'n':0.,'N':0,'c':0.}
+
+    def add_example(self, p_log: float, r: float, p_pred: float, count: float = 1.0) -> None:
+        self.data['N'] += count
+        if p_pred > 0:
+            p_over_p = p_pred/p_log
+            if r != 0:
+                self.data['n'] += r*p_over_p*count
+                self.data['c'] = max(self.data['c'], r*p_over_p)
+
+    def get(self, alpha: float = 0.05) -> List[float]:
+        bounds = []
+        num = self.data['n']
+        den = self.data['N']
+        max_weighted_cost = self.data['c']
+
+        if max_weighted_cost > 0.0:
+            successes = num / max_weighted_cost
+            n = den / max_weighted_cost
+            bounds.append(beta.ppf(alpha / 2, successes, n - successes + 1))
+            bounds.append(beta.ppf(1 - alpha / 2, successes + 1, n - successes))
+
+        if not bounds:
+            bounds = [0, 0]
+        return bounds
@@ -1,12 +1,14 @@
 # CR(-2) is particularly computationally convenient
 
 from math import fsum, inf
+from estimators.bandits import base
+from typing import List
 
-class Estimator:
+class Estimator(base.Estimator):
     # NB: This works better you use the true wmin and wmax
     #     which is _not_ the empirical minimum and maximum
     #     but rather the actual smallest and largest possible values
-    def __init__(self, wmin=0, wmax=inf):
+    def __init__(self, wmin: float = 0, wmax: float = inf):
         assert wmin < 1
         assert wmax > 1
 
@@ -15,7 +17,7 @@ def __init__(self, wmin=0, wmax=inf):
 
         self.data = []
 
-    def add_example(self, p_log, r, p_pred, count=1):
+    def add_example(self, p_log: float, r: float, p_pred: float, count: float = 1.0) -> None:
         if count > 0:
             w = p_pred / p_log
             assert w >= 0, 'Error: negative importance weight'
@@ -24,7 +26,7 @@ def add_example(self, p_log, r, p_pred, count=1):
             self.wmax = max(self.wmax, w)
             self.wmin = min(self.wmin, w)
 
-    def get_estimate(self, rmin=0, rmax=1):
+    def get(self) -> float:
         n = fsum(c for c, _, _ in self.data)
         assert n > 0, 'Error: No data point added'
 
@@ -53,20 +55,23 @@ def get_estimate(self, rmin=0, rmax=1):
 
         return vhat
 
-class Interval:
+class Interval(base.Interval):
     # NB: This works better you use the true wmin and wmax
     #     which is _not_ the empirical minimum and maximum
     #     but rather the actual smallest and largest possible values
-    def __init__(self, wmin=0, wmax=inf):
+    def __init__(self, wmin: float = 0, wmax: float = inf, rmin: float = 0, rmax: float = 1):
         assert wmin < 1
         assert wmax > 1
 
         self.wmin = wmin
         self.wmax = wmax
 
+        self.rmin = rmin
+        self.rmax = rmax
+
         self.data = []
 
-    def add_example(self, p_log, r, p_pred, count=1):
+    def add_example(self, p_log: float, r: float, p_pred: float, count: float = 1.0) -> None:
         if count > 0:
             w = p_pred / p_log
             assert w >= 0, 'Error: negative importance weight'
@@ -75,7 +80,7 @@ def add_example(self, p_log, r, p_pred, count=1):
             self.wmax = max(self.wmax, w)
             self.wmin = min(self.wmin, w)
 
-    def get_interval(self, alpha=0.05, rmin=0, rmax=1):
+    def get(self, alpha: float = 0.05) -> List[float]:
         from math import isclose, sqrt
         from scipy.stats import f
 
@@ -100,7 +105,7 @@ def get_interval(self, alpha=0.05, rmin=0, rmax=1):
         phi = (-uncgstar - Delta) / (2 * (1 + n))
 
         bounds = []
-        for r, sign in ((rmin, 1), (rmax, -1)):
+        for r, sign in ((self.rmin, 1), (self.rmax, -1)):
             candidates = []
             for wfake in (self.wmin, self.wmax):
                 if wfake == inf:
@@ -144,7 +149,7 @@ def get_interval(self, alpha=0.05, rmin=0, rmax=1):
                                 candidates.append(gstar)
 
             best = min(candidates)
-            vbound = min(rmax, max(rmin, sign*best))
+            vbound = min(self.rmax, max(self.rmin, sign*best))
             bounds.append(vbound)
 
         return bounds
@@ -0,0 +1,43 @@
+import math
+from estimators.bandits import base
+from scipy import stats
+from typing import List
+
+class Interval(base.Interval):
+
+    def __init__(self):
+        ################################# Aggregates quantities #########################################
+        #
+        # 'n':   IPS of numerator
+        # 'N':   total number of samples in bin from log (IPS = n/N)
+        # 'SoS': sum of squares of numerator's items (needed for Gaussian confidence intervals)
+        #
+        #################################################################################################
+
+        self.data = {'n':0.,'N':0,'SoS':0}
+
+    def add_example(self, p_log: float, r: float, p_pred: float, count: float = 1.0) -> None:
+        self.data['N'] += count
+        if p_pred > 0:
+            p_over_p = p_pred/p_log
+            if r != 0:
+                self.data['n'] += r*p_over_p*count
+                self.data['SoS'] += ((r*p_over_p)**2)*count
+
+    def get(self, alpha: float = 0.05) -> List[float]:
+        bounds = []
+        num = self.data['n']
+        den = self.data['N']
+        sum_of_sq = self.data['SoS']
+
+        if sum_of_sq > 0.0 and den > 1:
+            z_gaussian_cdf = stats.norm.ppf(1-alpha/2)
+
+            variance = (sum_of_sq - num * num / den) / (den - 1)
+            gauss_delta = z_gaussian_cdf * math.sqrt(variance/den)
+            bounds.append(num / den - gauss_delta)
+            bounds.append(num / den + gauss_delta)
+
+        if not bounds:
+            bounds = [0, 0]
+        return bounds
@@ -0,0 +1,26 @@
+from estimators.bandits import base
+
+class Estimator(base.Estimator):
+
+    def __init__(self):
+        ################################# Aggregates quantities #########################################
+        #
+        # 'n':   IPS of numerator
+        # 'N':   total number of samples in bin from log (IPS = n/N)
+        #
+        #################################################################################################
+
+        self.data = {'n':0.,'N':0}
+
+    def add_example(self, p_log: float, r: float, p_pred: float, count: float = 1.0) -> None:
+        self.data['N'] += count
+        if p_pred > 0:
+            p_over_p = p_pred/p_log
+            if r != 0:
+                self.data['n'] += r*p_over_p*count
+
+    def get(self) -> float:
+        if self.data['N'] == 0:
+            raise ValueError('Error: No data point added')
+
+        return self.data['n']/self.data['N']