Skip to content

Commit c378520

Browse files
tcyameterstick-copybara
authored andcommitted
Replace some EXCEPT operators by explicitly listing the columns. EXCEPT means something very different in most other SQL dialects.
PiperOrigin-RevId: 347506749
1 parent fd16f82 commit c378520

File tree

2 files changed

+71
-53
lines changed

2 files changed

+71
-53
lines changed

meterstick_demo.ipynb

Lines changed: 53 additions & 44 deletions
Original file line numberDiff line numberDiff line change
@@ -99,9 +99,9 @@
9999
"height": 204
100100
},
101101
"executionInfo": {
102-
"elapsed": 802,
102+
"elapsed": 490,
103103
"status": "ok",
104-
"timestamp": 1607230742897,
104+
"timestamp": 1607761536163,
105105
"user": {
106106
"displayName": "",
107107
"photoUrl": "",
@@ -110,7 +110,7 @@
110110
"user_tz": 480
111111
},
112112
"id": "hyDE-bxMBxQY",
113-
"outputId": "c2b9f4bf-67f1-41d9-e2cf-1ece31c306a1"
113+
"outputId": "bea4054f-8d56-40d7-9763-d3826c20dcf3"
114114
},
115115
"outputs": [
116116
{
@@ -201,7 +201,7 @@
201201
"4 16 1.470001 Desktop ctrl non-US 2"
202202
]
203203
},
204-
"execution_count": 2,
204+
"execution_count": 3,
205205
"metadata": {
206206
"tags": []
207207
},
@@ -5843,13 +5843,12 @@
58435843
"execution_count": null,
58445844
"metadata": {
58455845
"colab": {
5846-
"base_uri": "https://localhost:8080/",
58475846
"height": 173
58485847
},
58495848
"executionInfo": {
5850-
"elapsed": 776,
5849+
"elapsed": 477,
58515850
"status": "ok",
5852-
"timestamp": 1607231201244,
5851+
"timestamp": 1607761548536,
58535852
"user": {
58545853
"displayName": "",
58555854
"photoUrl": "",
@@ -5858,7 +5857,7 @@
58585857
"user_tz": 480
58595858
},
58605859
"id": "LjAANaq0Zx4d",
5861-
"outputId": "0921277e-8ca6-4a14-8976-a82efde07ca8"
5860+
"outputId": "9d674807-ebf3-4f76-e48c-b0fc7cce3b54"
58625861
},
58635862
"outputs": [
58645863
{
@@ -5882,44 +5881,49 @@
58825881
" \u003cthead\u003e\n",
58835882
" \u003ctr style=\"text-align: right;\"\u003e\n",
58845883
" \u003cth\u003e\u003c/th\u003e\n",
5885-
" \u003cth\u003esum(clicks)\u003c/th\u003e\n",
5886-
" \u003cth\u003emean(impressions)\u003c/th\u003e\n",
5884+
" \u003cth\u003e\u003c/th\u003e\n",
5885+
" \u003cth\u003esum(clicks) Absolute Change\u003c/th\u003e\n",
5886+
" \u003cth\u003emean(impressions) Absolute Change\u003c/th\u003e\n",
58875887
" \u003c/tr\u003e\n",
58885888
" \u003ctr\u003e\n",
58895889
" \u003cth\u003eplatform\u003c/th\u003e\n",
5890+
" \u003cth\u003ecountry\u003c/th\u003e\n",
58905891
" \u003cth\u003e\u003c/th\u003e\n",
58915892
" \u003cth\u003e\u003c/th\u003e\n",
58925893
" \u003c/tr\u003e\n",
58935894
" \u003c/thead\u003e\n",
58945895
" \u003ctbody\u003e\n",
58955896
" \u003ctr\u003e\n",
58965897
" \u003cth\u003eDesktop\u003c/th\u003e\n",
5897-
" \u003ctd\u003e369.812537\u003c/td\u003e\n",
5898-
" \u003ctd\u003e36.500000\u003c/td\u003e\n",
5898+
" \u003cth\u003enon-US\u003c/th\u003e\n",
5899+
" \u003ctd\u003e-143.916922\u003c/td\u003e\n",
5900+
" \u003ctd\u003e-41.928504\u003c/td\u003e\n",
58995901
" \u003c/tr\u003e\n",
59005902
" \u003ctr\u003e\n",
59015903
" \u003cth\u003eMobile\u003c/th\u003e\n",
5902-
" \u003ctd\u003e346.676598\u003c/td\u003e\n",
5903-
" \u003ctd\u003e14.413043\u003c/td\u003e\n",
5904+
" \u003cth\u003enon-US\u003c/th\u003e\n",
5905+
" \u003ctd\u003e-102.407265\u003c/td\u003e\n",
5906+
" \u003ctd\u003e-0.502706\u003c/td\u003e\n",
59045907
" \u003c/tr\u003e\n",
59055908
" \u003ctr\u003e\n",
59065909
" \u003cth\u003eTablet\u003c/th\u003e\n",
5907-
" \u003ctd\u003e383.457689\u003c/td\u003e\n",
5908-
" \u003ctd\u003e14.677711\u003c/td\u003e\n",
5910+
" \u003cth\u003enon-US\u003c/th\u003e\n",
5911+
" \u003ctd\u003e-157.311265\u003c/td\u003e\n",
5912+
" \u003ctd\u003e0.132568\u003c/td\u003e\n",
59095913
" \u003c/tr\u003e\n",
59105914
" \u003c/tbody\u003e\n",
59115915
"\u003c/table\u003e\n",
59125916
"\u003c/div\u003e"
59135917
],
59145918
"text/plain": [
5915-
" sum(clicks) mean(impressions)\n",
5916-
"platform \n",
5917-
"Desktop 369.812537 36.500000\n",
5918-
"Mobile 346.676598 14.413043\n",
5919-
"Tablet 383.457689 14.677711"
5919+
" sum(clicks) Absolute Change mean(impressions) Absolute Change\n",
5920+
"platform country \n",
5921+
"Desktop non-US -143.916922 -41.928504\n",
5922+
"Mobile non-US -102.407265 -0.502706\n",
5923+
"Tablet non-US -157.311265 0.132568"
59205924
]
59215925
},
5922-
"execution_count": 64,
5926+
"execution_count": 5,
59235927
"metadata": {
59245928
"tags": []
59255929
},
@@ -5928,6 +5932,7 @@
59285932
],
59295933
"source": [
59305934
"m = MetricList((Sum('clicks'), Mean('impressions')))\n",
5935+
"m = AbsoluteChange('country', 'US', m)\n",
59315936
"m.compute_on(df, 'platform')"
59325937
]
59335938
},
@@ -5936,22 +5941,21 @@
59365941
"execution_count": null,
59375942
"metadata": {
59385943
"colab": {
5939-
"base_uri": "https://localhost:8080/",
59405944
"height": 173
59415945
},
59425946
"executionInfo": {
5943-
"elapsed": 669,
5947+
"elapsed": 410,
59445948
"status": "ok",
5945-
"timestamp": 1607231201492,
5949+
"timestamp": 1607761538409,
59465950
"user": {
59475951
"displayName": "",
59485952
"photoUrl": "",
59495953
"userId": ""
59505954
},
59515955
"user_tz": 480
59525956
},
5953-
"id": "FHPFIADVZ67s",
5954-
"outputId": "b5837819-c18e-4864-a7f9-82fc78c41e67"
5957+
"id": "hkayduNpC4UC",
5958+
"outputId": "2511d051-b84a-4017-ccc9-dc0b52a51c18"
59555959
},
59565960
"outputs": [
59575961
{
@@ -5975,44 +5979,49 @@
59755979
" \u003cthead\u003e\n",
59765980
" \u003ctr style=\"text-align: right;\"\u003e\n",
59775981
" \u003cth\u003e\u003c/th\u003e\n",
5978-
" \u003cth\u003esum_clicks\u003c/th\u003e\n",
5979-
" \u003cth\u003emean_impressions\u003c/th\u003e\n",
5982+
" \u003cth\u003e\u003c/th\u003e\n",
5983+
" \u003cth\u003esum(clicks) Absolute Change\u003c/th\u003e\n",
5984+
" \u003cth\u003emean(impressions) Absolute Change\u003c/th\u003e\n",
59805985
" \u003c/tr\u003e\n",
59815986
" \u003ctr\u003e\n",
59825987
" \u003cth\u003eplatform\u003c/th\u003e\n",
5988+
" \u003cth\u003ecountry\u003c/th\u003e\n",
59835989
" \u003cth\u003e\u003c/th\u003e\n",
59845990
" \u003cth\u003e\u003c/th\u003e\n",
59855991
" \u003c/tr\u003e\n",
59865992
" \u003c/thead\u003e\n",
59875993
" \u003ctbody\u003e\n",
59885994
" \u003ctr\u003e\n",
59895995
" \u003cth\u003eDesktop\u003c/th\u003e\n",
5990-
" \u003ctd\u003e369.812537\u003c/td\u003e\n",
5991-
" \u003ctd\u003e36.500000\u003c/td\u003e\n",
5996+
" \u003cth\u003enon-US\u003c/th\u003e\n",
5997+
" \u003ctd\u003e-143.916922\u003c/td\u003e\n",
5998+
" \u003ctd\u003e-41.928504\u003c/td\u003e\n",
59925999
" \u003c/tr\u003e\n",
59936000
" \u003ctr\u003e\n",
59946001
" \u003cth\u003eMobile\u003c/th\u003e\n",
5995-
" \u003ctd\u003e346.676598\u003c/td\u003e\n",
5996-
" \u003ctd\u003e14.413043\u003c/td\u003e\n",
6002+
" \u003cth\u003enon-US\u003c/th\u003e\n",
6003+
" \u003ctd\u003e-102.407265\u003c/td\u003e\n",
6004+
" \u003ctd\u003e-0.502706\u003c/td\u003e\n",
59976005
" \u003c/tr\u003e\n",
59986006
" \u003ctr\u003e\n",
59996007
" \u003cth\u003eTablet\u003c/th\u003e\n",
6000-
" \u003ctd\u003e383.457689\u003c/td\u003e\n",
6001-
" \u003ctd\u003e14.677711\u003c/td\u003e\n",
6008+
" \u003cth\u003enon-US\u003c/th\u003e\n",
6009+
" \u003ctd\u003e-157.311265\u003c/td\u003e\n",
6010+
" \u003ctd\u003e0.132568\u003c/td\u003e\n",
60026011
" \u003c/tr\u003e\n",
60036012
" \u003c/tbody\u003e\n",
60046013
"\u003c/table\u003e\n",
60056014
"\u003c/div\u003e"
60066015
],
60076016
"text/plain": [
6008-
" sum_clicks mean_impressions\n",
6009-
"platform \n",
6010-
"Desktop 369.812537 36.500000\n",
6011-
"Mobile 346.676598 14.413043\n",
6012-
"Tablet 383.457689 14.677711"
6017+
" sum(clicks) Absolute Change mean(impressions) Absolute Change\n",
6018+
"platform country \n",
6019+
"Desktop non-US -143.916922 -41.928504\n",
6020+
"Mobile non-US -102.407265 -0.502706\n",
6021+
"Tablet non-US -157.311265 0.132568"
60136022
]
60146023
},
6015-
"execution_count": 65,
6024+
"execution_count": 4,
60166025
"metadata": {
60176026
"tags": []
60186027
},
@@ -6025,8 +6034,8 @@
60256034
"\n",
60266035
"engine = create_engine('sqlite://', echo=False)\n",
60276036
"df.to_sql('T', con=engine)\n",
6028-
"# Meterstick uses SQL dialect different to sqlalchemy so this only works for\n",
6029-
"# simple Metrics.\n",
6037+
"# Meterstick uses a different SQL dialect from SQLAlchemy, so this doesn't\n",
6038+
"# always work.\n",
60306039
"m.compute_on_sql('T', 'platform', execute=lambda sql: pd.read_sql(sql, engine))"
60316040
]
60326041
},
@@ -8964,7 +8973,7 @@
89648973
"colab": {
89658974
"collapsed_sections": [],
89668975
"last_runtime": {
8967-
"build_target": "//quality/ranklab/experimental/notebook:rl_colab",
8976+
"build_target": "//ads/metrics/lib/development/datacube:aqlogs_colab_notebook",
89688977
"kind": "private"
89698978
},
89708979
"name": "Meterstick Demo.ipynb",

sql.py

Lines changed: 18 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -613,7 +613,10 @@ def get_sql_for_mh(metric, table, split_by, global_filter, indexes,
613613
FROM $DATA
614614
GROUP BY split_by, condition, stratified),
615615
MHBase AS (SELECT
616-
* EXCEPT (condition)
616+
split_by,
617+
stratified,
618+
sum_click,
619+
sum_impression
617620
FROM MHRaw
618621
WHERE
619622
condition = "base_value")
@@ -676,7 +679,8 @@ def get_sql_for_mh(metric, table, split_by, global_filter, indexes,
676679
for c, b in zip(cond_cols.aliases, base))
677680
base_cond = ' AND '.join(base_cond)
678681
base_value = Sql(
679-
Column('* EXCEPT (%s)' % ', '.join(cond_cols.aliases), auto_alias=False),
682+
Columns(raw_table_sql.groupby.aliases).add(
683+
raw_table_sql.columns.aliases).difference(cond_cols.aliases),
680684
raw_table_alias, base_cond)
681685
base_table = Datasource(base_value, 'MHBase')
682686
base_table_alias = with_data.add(base_table)
@@ -739,7 +743,8 @@ def get_sql_for_change(metric, table, split_by, global_filter, indexes,
739743
FROM $DATA
740744
GROUP BY split_by, condition),
741745
ChangeBase AS (SELECT
742-
* EXCEPT (condition)
746+
split_by,
747+
mean_click
743748
FROM ChangeRaw
744749
WHERE
745750
condition = "base_value")
@@ -789,7 +794,8 @@ def get_sql_for_change(metric, table, split_by, global_filter, indexes,
789794
for c, b in zip(cond_cols.aliases, base))
790795
base_cond = ' AND '.join(base_cond)
791796
base_value = Sql(
792-
Column('* EXCEPT (%s)' % ', '.join(cond_cols.aliases), auto_alias=False),
797+
Columns(raw_table_sql.groupby.aliases).add(
798+
raw_table_sql.columns.aliases).difference(cond_cols.aliases),
793799
raw_table_alias, base_cond)
794800
base_table = Datasource(base_value, 'ChangeBase')
795801
base_table_alias = with_data.add(base_table)
@@ -1412,7 +1418,9 @@ def get_bootstrap_data(metric, table, split_by, global_filter, local_filter,
14121418
BootstrapRandomChoices AS (SELECT
14131419
b.* EXCEPT (_bs_row_number, _bs_filter)
14141420
FROM (SELECT
1415-
* EXCEPT (_bs_row_number),
1421+
split_by,
1422+
_resample_idx,
1423+
_bs_filter,
14161424
_bs_random_row_number AS _bs_row_number
14171425
FROM BootstrapRandomRows) AS a
14181426
JOIN
@@ -1487,15 +1495,16 @@ def get_bootstrap_data(metric, table, split_by, global_filter, local_filter,
14871495
random_choice_table_alias = with_data.add(
14881496
Datasource(random_choice_table, 'BootstrapRandomRows'))
14891497

1490-
random_rows = Sql((Column('* EXCEPT (_bs_row_number)', auto_alias=False),
1491-
Column('_bs_random_row_number', alias='_bs_row_number')),
1492-
random_choice_table_alias)
14931498
using = Columns(partition).add('_bs_row_number').difference(str(where))
1494-
random_rows = Datasource(random_rows, 'a')
14951499
excludes = ['_bs_row_number']
14961500
if where:
14971501
excludes.append('_bs_filter')
14981502
using.add('_bs_filter')
1503+
random_rows = Sql(
1504+
Columns(using).difference('_bs_row_number').add(
1505+
Column('_bs_random_row_number', alias='_bs_row_number')),
1506+
random_choice_table_alias)
1507+
random_rows = Datasource(random_rows, 'a')
14991508
resampled = random_rows.join(
15001509
Datasource(random_choice_table_alias, 'b'), using=using)
15011510
table = Sql(

0 commit comments

Comments
 (0)