Skip to content

Commit 2512503

Browse files
Merge pull request #412 from Crunch-io/ZC-617-audience-ratio-measure
[ZC-617]: add audience ratio measure
2 parents d39378b + 12efa48 commit 2512503

File tree

11 files changed

+1889
-38
lines changed

11 files changed

+1889
-38
lines changed

.github/workflows/ci.yml

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -10,14 +10,14 @@ on:
1010

1111
jobs:
1212
build:
13-
runs-on: ubuntu-20.04
13+
runs-on: ubuntu-22.04
1414
env:
1515
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
1616
COVERALLS_REPO_TOKEN: ${{ secrets.COVERALLS_REPO_TOKEN }}
1717
strategy:
1818
max-parallel: 5
1919
matrix:
20-
python-version: ["3.6", "3.7", "3.8", "3.9", "3.10", "3.11", "3.12"]
20+
python-version: ["3.8", "3.9", "3.10", "3.11", "3.12"]
2121
steps:
2222
- uses: actions/checkout@v4
2323
- name: Set up Python ${{ matrix.python-version }}

src/cr/cube/cubepart.py

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -278,6 +278,11 @@ def __repr__(self):
278278

279279
# ---interface ---------------------------------------------------
280280

281+
@lazyproperty
282+
def audience_ratio(self):
283+
"""2D np.float64 ndarray of audience-ratio for each matrix cell."""
284+
return self._assemble_matrix(self._measures.audience_ratio.blocks)
285+
281286
@lazyproperty
282287
def column_aliases(self):
283288
"""1D str ndarray of alias for each column, for use as column headings."""

src/cr/cube/enums.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -116,6 +116,7 @@ class MEASURE(enum.Enum):
116116
COLUMN_BASE_WEIGHTED = "col_base_weighted"
117117
COLUMN_INDEX = "col_index"
118118
COLUMN_PERCENT = "col_percent"
119+
AUDIENCE_RATIO = "audience_ratio"
119120
COLUMN_PERCENT_MOE = "col_percent_moe"
120121
COLUMN_SHARE_SUM = "col_share_sum"
121122
COLUMN_STDDEV = "col_std_dev"

src/cr/cube/matrix/assembler.py

Lines changed: 19 additions & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -50,19 +50,17 @@ def column_display_order(cls, dimensions, second_order_measures, format):
5050
# --- form consistent with `.row_display_order()` and we'll elaborate this when
5151
# --- we add sort-by-value to columns.
5252
collation_method = dimensions[1].order_spec.collation_method
53+
# fmt: off
5354
HelperCls = (
5455
_SortColumnsByLabelHelper
5556
if collation_method == CM.LABEL
56-
else (
57-
_SortColumnsByBaseRowHelper
58-
if collation_method == CM.OPPOSING_ELEMENT
59-
else (
60-
_SortColumnsByInsertedRowHelper
61-
if collation_method == CM.OPPOSING_INSERTION
62-
else _ColumnOrderHelper
63-
)
64-
)
57+
else _SortColumnsByBaseRowHelper
58+
if collation_method == CM.OPPOSING_ELEMENT
59+
else _SortColumnsByInsertedRowHelper
60+
if collation_method == CM.OPPOSING_INSERTION
61+
else _ColumnOrderHelper
6562
)
63+
# fmt: on
6664
return HelperCls(dimensions, second_order_measures, format)._display_order
6765

6866
@classmethod
@@ -75,28 +73,21 @@ def row_display_order(cls, dimensions, second_order_measures, format):
7573
"""
7674
collation_method = dimensions[0].order_spec.collation_method
7775
dim_type = dimensions[1].dimension_type
76+
# fmt: off
7877
HelperCls = (
7978
_SortRowsByBaseColumnHelper
8079
if collation_method == CM.OPPOSING_ELEMENT
81-
else (
82-
_SortRowsByDerivedColumnHelper
83-
if collation_method == CM.OPPOSING_INSERTION
84-
and dim_type in DT.ARRAY_TYPES
85-
else (
86-
_SortRowsByInsertedColumnHelper
87-
if collation_method == CM.OPPOSING_INSERTION
88-
else (
89-
_SortRowsByLabelHelper
90-
if collation_method == CM.LABEL
91-
else (
92-
_SortRowsByMarginalHelper
93-
if collation_method == CM.MARGINAL
94-
else _RowOrderHelper
95-
)
96-
)
97-
)
98-
)
80+
else _SortRowsByDerivedColumnHelper
81+
if collation_method == CM.OPPOSING_INSERTION and dim_type in DT.ARRAY_TYPES
82+
else _SortRowsByInsertedColumnHelper
83+
if collation_method == CM.OPPOSING_INSERTION
84+
else _SortRowsByLabelHelper
85+
if collation_method == CM.LABEL
86+
else _SortRowsByMarginalHelper
87+
if collation_method == CM.MARGINAL
88+
else _RowOrderHelper
9989
)
90+
# fmt: on
10091
return HelperCls(dimensions, second_order_measures, format)._display_order
10192

10293
@lazyproperty
@@ -155,6 +146,7 @@ def _measure(self):
155146
M.COLUMN_BASE_WEIGHTED: "column_weighted_bases",
156147
M.COLUMN_INDEX: "column_index",
157148
M.COLUMN_PERCENT: "column_proportions",
149+
M.AUDIENCE_RATIO: "audience_ratio",
158150
M.COLUMN_PERCENT_MOE: "column_std_err", # monotonic transform
159151
M.COLUMN_SHARE_SUM: "column_share_sum",
160152
M.COLUMN_STDDEV: "column_proportion_variances", # monotonic transform

src/cr/cube/matrix/measure.py

Lines changed: 75 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -33,6 +33,11 @@ def __init__(self, cube, dimensions, slice_idx):
3333
self._dimensions = dimensions
3434
self._slice_idx = slice_idx
3535

36+
@lazyproperty
37+
def audience_ratio(self):
38+
"""_AudienceRatio measure object for this cube-result."""
39+
return _AudienceRatio(self._dimensions, self, self._cube_measures)
40+
3641
@lazyproperty
3742
def column_comparable_counts(self):
3843
"""_ColumnComparableCounts measure object for this cube-result."""
@@ -830,6 +835,76 @@ def _weighted_base_blocks(self):
830835
return self._second_order_measures.column_weighted_bases.blocks
831836

832837

838+
class _AudienceRatio(_ColumnProportions):
839+
"""Provides the audience-ratio measure for a matrix.
840+
841+
This measure (also known as "Index") is a 2D np.float64 ndarray of the ratio of the
842+
proportions of the first column to the proportions of the second column.
843+
844+
Audience ratio (Index) is a convenient way of showing the ratio of the Target % and
845+
the Control % when we have a compared group analysis (aka profiles analysis).
846+
847+
NOTE: At the moment we only support 2 compare groups, means that we can only have 2
848+
column max in the analysis. In case of ncol>2 we will return a 2D ndarray of nans as
849+
if it were a non-valid audience ratio.
850+
"""
851+
852+
@lazyproperty
853+
def _base_values(self):
854+
"""2D ndarray np.float64 of the base values of the audience ratio"""
855+
# --- do not propagate divide-by-zero warnings to stderr ---
856+
with np.errstate(divide="ignore", invalid="ignore"):
857+
if not self._can_compute_measure:
858+
return np.full(self._count_blocks[0][0].shape, np.nan)
859+
base_values = self._count_blocks[0][0] / self._weighted_base_blocks[0][0]
860+
values = (base_values[:, 0] / base_values[:, 1]) * 100
861+
return np.column_stack((values, np.full_like(values, np.nan)))
862+
863+
@lazyproperty
864+
def _can_compute_measure(self):
865+
"""Bool indicating whether audience ratio is computable.
866+
867+
If there are more than 2 columns, we return nans as a non-valid measure values
868+
"""
869+
return False if self._count_blocks[0][0].shape[-1] != 2 else True
870+
871+
@lazyproperty
872+
def _intersections(self):
873+
"""(n_row_subtotals, n_col_subtotals) ndarray of intersection values.
874+
875+
An intersection value arises where a row-subtotal crosses a column-subtotal.
876+
877+
Always nan because the audience ratio is not defined for the intersection
878+
"""
879+
# --- do not propagate divide-by-zero warnings to stderr ---
880+
return np.full(self._count_blocks[1][1].shape, np.nan)
881+
882+
@lazyproperty
883+
def _subtotal_columns(self):
884+
"""2D np.float64 ndarray of audience ratio values.
885+
886+
This is the second "block" and has the shape (n_rows, n_col_subtotals).
887+
888+
Always empty because the audience ratio has no subtotal columns
889+
"""
890+
# --- do not propagate divide-by-zero warnings to stderr ---
891+
return np.empty(self._count_blocks[0][1].shape)
892+
893+
@lazyproperty
894+
def _subtotal_rows(self):
895+
"""2D np.float64 ndarray of audience ratio values.
896+
897+
This is the third "block" and has the shape (n_row_subtotals, n_cols).
898+
"""
899+
# --- do not propagate divide-by-zero warnings to stderr ---
900+
with np.errstate(divide="ignore", invalid="ignore"):
901+
if not self._can_compute_measure:
902+
return np.full(self._count_blocks[1][0].shape, np.nan)
903+
base_values = self._count_blocks[1][0] / self._weighted_base_blocks[1][0]
904+
subtotal_rows = (base_values[:, 0] / base_values[:, 1]) * 100
905+
return np.column_stack((subtotal_rows, np.full_like(subtotal_rows, np.nan)))
906+
907+
833908
class _ColumnProportionsSmoothed(_ColumnProportions, _SmoothedMeasure):
834909
"""Provides the smoothed column-proportions measure for a matrix.
835910

0 commit comments

Comments
 (0)