Skip to content

Commit 231b05f

Browse files
committed
re-run example
1 parent b59f66a commit 231b05f

File tree

5 files changed

+74
-71
lines changed

5 files changed

+74
-71
lines changed

Examples/LogisticExample/BuildDiagram.md

Lines changed: 14 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -12,29 +12,32 @@ r_ops <- convert_yaml_to_pipeline(r_yaml)
1212
cat(format(r_ops))
1313
```
1414

15-
## table(d;
16-
## subjectID,
17-
## surveyCategory,
18-
## assessmentTotal,
19-
## irrelevantCol1,
20-
## irrelevantCol2) %.>%
15+
## mk_td("d", c(
16+
## "subjectID",
17+
## "surveyCategory",
18+
## "assessmentTotal",
19+
## "irrelevantCol1",
20+
## "irrelevantCol2")) %.>%
2121
## extend(.,
2222
## probability %:=% exp(assessmentTotal * 0.237)) %.>%
2323
## extend(.,
2424
## total %:=% sum(probability),
25-
## p= subjectID) %.>%
25+
## partitionby = c('subjectID'),
26+
## orderby = c(),
27+
## reverse = c()) %.>%
2628
## extend(.,
2729
## probability %:=% probability / total) %.>%
2830
## extend(.,
2931
## sort_key %:=% -(probability)) %.>%
3032
## extend(.,
3133
## row_number %:=% row_number(),
32-
## p= subjectID,
33-
## o= "sort_key") %.>%
34+
## partitionby = c('subjectID'),
35+
## orderby = c('sort_key'),
36+
## reverse = c()) %.>%
3437
## select_rows(.,
3538
## row_number == 1) %.>%
36-
## select_columns(.,
37-
## subjectID, surveyCategory, probability) %.>%
39+
## select_columns(., c(
40+
## "subjectID", "surveyCategory", "probability")) %.>%
3841
## rename_columns(.,
3942
## c('diagnosis' = 'surveyCategory'))
4043

1.22 KB
Loading

Examples/LogisticExample/ScoringExample.html

Lines changed: 19 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -13572,22 +13572,22 @@ <h3 id="SQL"><code>SQL</code><a class="anchor-link" href="#SQL">&#182;</a></h3><
1357213572

1357313573

1357413574
<div class="output_subarea output_stream output_stdout output_text">
13575-
<pre>SELECT &#34;probability&#34;,
13576-
&#34;subjectid&#34;,
13575+
<pre>SELECT &#34;subjectid&#34;,
13576+
&#34;probability&#34;,
1357713577
&#34;surveycategory&#34; AS &#34;diagnosis&#34;
1357813578
FROM
13579-
(SELECT &#34;probability&#34;,
13580-
&#34;subjectid&#34;,
13579+
(SELECT &#34;subjectid&#34;,
13580+
&#34;probability&#34;,
1358113581
&#34;surveycategory&#34;
1358213582
FROM
13583-
(SELECT &#34;probability&#34;,
13584-
&#34;subjectid&#34;,
13583+
(SELECT &#34;subjectid&#34;,
13584+
&#34;probability&#34;,
1358513585
&#34;surveycategory&#34;
1358613586
FROM
13587-
(SELECT &#34;surveycategory&#34;,
13587+
(SELECT &#34;sort_key&#34;,
1358813588
&#34;subjectid&#34;,
13589-
&#34;sort_key&#34;,
1359013589
&#34;probability&#34;,
13590+
&#34;surveycategory&#34;,
1359113591
ROW_NUMBER() OVER (PARTITION BY &#34;subjectid&#34;
1359213592
ORDER BY &#34;sort_key&#34;) AS &#34;row_number&#34;
1359313593
FROM
@@ -13672,22 +13672,22 @@ <h3 id="SQL"><code>SQL</code><a class="anchor-link" href="#SQL">&#182;</a></h3><
1367213672
<thead>
1367313673
<tr style="text-align: right;">
1367413674
<th></th>
13675-
<th>probability</th>
1367613675
<th>subjectid</th>
13676+
<th>probability</th>
1367713677
<th>diagnosis</th>
1367813678
</tr>
1367913679
</thead>
1368013680
<tbody>
1368113681
<tr>
1368213682
<th>0</th>
13683-
<td>0.670622</td>
1368413683
<td>1.0</td>
13684+
<td>0.670622</td>
1368513685
<td>withdrawal behavior</td>
1368613686
</tr>
1368713687
<tr>
1368813688
<th>1</th>
13689-
<td>0.558974</td>
1369013689
<td>2.0</td>
13690+
<td>0.558974</td>
1369113691
<td>positive re-framing</td>
1369213692
</tr>
1369313693
</tbody>
@@ -14349,15 +14349,15 @@ <h3 id="R"><code>R</code><a class="anchor-link" href="#R">&#182;</a></h3><p>This
1434914349
&#34;assessmentTotal&#34;
1435014350
FROM
1435114351
&#34;d&#34;
14352-
) tsql_21995480598153962875_0000000000
14353-
) tsql_21995480598153962875_0000000001
14354-
) tsql_21995480598153962875_0000000002
14355-
) tsql_21995480598153962875_0000000003
14356-
) tsql_21995480598153962875_0000000004
14357-
) tsql_21995480598153962875_0000000005
14352+
) tsql_85982078297649144292_0000000000
14353+
) tsql_85982078297649144292_0000000001
14354+
) tsql_85982078297649144292_0000000002
14355+
) tsql_85982078297649144292_0000000003
14356+
) tsql_85982078297649144292_0000000004
14357+
) tsql_85982078297649144292_0000000005
1435814358
WHERE &#34;row_number&#34; = 1
14359-
) tsql_21995480598153962875_0000000006
14360-
) tsql_21995480598153962875_0000000007
14359+
) tsql_85982078297649144292_0000000006
14360+
) tsql_85982078297649144292_0000000007
1436114361
</pre>
1436214362
</div>
1436314363
</div>

Examples/LogisticExample/ScoringExample.ipynb

Lines changed: 22 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -477,22 +477,22 @@
477477
"name": "stdout",
478478
"output_type": "stream",
479479
"text": [
480-
"SELECT \"probability\",\n",
481-
" \"subjectid\",\n",
480+
"SELECT \"subjectid\",\n",
481+
" \"probability\",\n",
482482
" \"surveycategory\" AS \"diagnosis\"\n",
483483
"FROM\n",
484-
" (SELECT \"probability\",\n",
485-
" \"subjectid\",\n",
484+
" (SELECT \"subjectid\",\n",
485+
" \"probability\",\n",
486486
" \"surveycategory\"\n",
487487
" FROM\n",
488-
" (SELECT \"probability\",\n",
489-
" \"subjectid\",\n",
488+
" (SELECT \"subjectid\",\n",
489+
" \"probability\",\n",
490490
" \"surveycategory\"\n",
491491
" FROM\n",
492-
" (SELECT \"surveycategory\",\n",
492+
" (SELECT \"sort_key\",\n",
493493
" \"subjectid\",\n",
494-
" \"sort_key\",\n",
495494
" \"probability\",\n",
495+
" \"surveycategory\",\n",
496496
" ROW_NUMBER() OVER (PARTITION BY \"subjectid\"\n",
497497
" ORDER BY \"sort_key\") AS \"row_number\"\n",
498498
" FROM\n",
@@ -571,32 +571,32 @@
571571
" <thead>\n",
572572
" <tr style=\"text-align: right;\">\n",
573573
" <th></th>\n",
574-
" <th>probability</th>\n",
575574
" <th>subjectid</th>\n",
575+
" <th>probability</th>\n",
576576
" <th>diagnosis</th>\n",
577577
" </tr>\n",
578578
" </thead>\n",
579579
" <tbody>\n",
580580
" <tr>\n",
581581
" <th>0</th>\n",
582-
" <td>0.670622</td>\n",
583582
" <td>1.0</td>\n",
583+
" <td>0.670622</td>\n",
584584
" <td>withdrawal behavior</td>\n",
585585
" </tr>\n",
586586
" <tr>\n",
587587
" <th>1</th>\n",
588-
" <td>0.558974</td>\n",
589588
" <td>2.0</td>\n",
589+
" <td>0.558974</td>\n",
590590
" <td>positive re-framing</td>\n",
591591
" </tr>\n",
592592
" </tbody>\n",
593593
"</table>\n",
594594
"</div>"
595595
],
596596
"text/plain": [
597-
" probability subjectid diagnosis\n",
598-
"0 0.670622 1.0 withdrawal behavior\n",
599-
"1 0.558974 2.0 positive re-framing"
597+
" subjectid probability diagnosis\n",
598+
"0 1.0 0.670622 withdrawal behavior\n",
599+
"1 2.0 0.558974 positive re-framing"
600600
]
601601
},
602602
"execution_count": 8,
@@ -1242,15 +1242,15 @@
12421242
" \"assessmentTotal\"\n",
12431243
" FROM\n",
12441244
" \"d\"\n",
1245-
" ) tsql_21995480598153962875_0000000000\n",
1246-
" ) tsql_21995480598153962875_0000000001\n",
1247-
" ) tsql_21995480598153962875_0000000002\n",
1248-
" ) tsql_21995480598153962875_0000000003\n",
1249-
" ) tsql_21995480598153962875_0000000004\n",
1250-
" ) tsql_21995480598153962875_0000000005\n",
1245+
" ) tsql_85982078297649144292_0000000000\n",
1246+
" ) tsql_85982078297649144292_0000000001\n",
1247+
" ) tsql_85982078297649144292_0000000002\n",
1248+
" ) tsql_85982078297649144292_0000000003\n",
1249+
" ) tsql_85982078297649144292_0000000004\n",
1250+
" ) tsql_85982078297649144292_0000000005\n",
12511251
" WHERE \"row_number\" = 1\n",
1252-
" ) tsql_21995480598153962875_0000000006\n",
1253-
") tsql_21995480598153962875_0000000007\n"
1252+
" ) tsql_85982078297649144292_0000000006\n",
1253+
") tsql_85982078297649144292_0000000007\n"
12541254
]
12551255
}
12561256
],

Examples/LogisticExample/ScoringExample.md

Lines changed: 19 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -340,22 +340,22 @@ sql = ops.to_sql(db_model, pretty=True)
340340
print(sql)
341341
```
342342

343-
SELECT "probability",
344-
"subjectid",
343+
SELECT "subjectid",
344+
"probability",
345345
"surveycategory" AS "diagnosis"
346346
FROM
347-
(SELECT "probability",
348-
"subjectid",
347+
(SELECT "subjectid",
348+
"probability",
349349
"surveycategory"
350350
FROM
351-
(SELECT "probability",
352-
"subjectid",
351+
(SELECT "subjectid",
352+
"probability",
353353
"surveycategory"
354354
FROM
355-
(SELECT "surveycategory",
355+
(SELECT "sort_key",
356356
"subjectid",
357-
"sort_key",
358357
"probability",
358+
"surveycategory",
359359
ROW_NUMBER() OVER (PARTITION BY "subjectid"
360360
ORDER BY "sort_key") AS "row_number"
361361
FROM
@@ -414,22 +414,22 @@ db_model.read_query(conn, sql)
414414
<thead>
415415
<tr style="text-align: right;">
416416
<th></th>
417-
<th>probability</th>
418417
<th>subjectid</th>
418+
<th>probability</th>
419419
<th>diagnosis</th>
420420
</tr>
421421
</thead>
422422
<tbody>
423423
<tr>
424424
<th>0</th>
425-
<td>0.670622</td>
426425
<td>1.0</td>
426+
<td>0.670622</td>
427427
<td>withdrawal behavior</td>
428428
</tr>
429429
<tr>
430430
<th>1</th>
431-
<td>0.558974</td>
432431
<td>2.0</td>
432+
<td>0.558974</td>
433433
<td>positive re-framing</td>
434434
</tr>
435435
</tbody>
@@ -851,15 +851,15 @@ cat(sql)
851851
"assessmentTotal"
852852
FROM
853853
"d"
854-
) tsql_21995480598153962875_0000000000
855-
) tsql_21995480598153962875_0000000001
856-
) tsql_21995480598153962875_0000000002
857-
) tsql_21995480598153962875_0000000003
858-
) tsql_21995480598153962875_0000000004
859-
) tsql_21995480598153962875_0000000005
854+
) tsql_85982078297649144292_0000000000
855+
) tsql_85982078297649144292_0000000001
856+
) tsql_85982078297649144292_0000000002
857+
) tsql_85982078297649144292_0000000003
858+
) tsql_85982078297649144292_0000000004
859+
) tsql_85982078297649144292_0000000005
860860
WHERE "row_number" = 1
861-
) tsql_21995480598153962875_0000000006
862-
) tsql_21995480598153962875_0000000007
861+
) tsql_85982078297649144292_0000000006
862+
) tsql_85982078297649144292_0000000007
863863

864864

865865
The `R` implementation is mature, and appropriate to use in production. The [`rquery`](https://github.com/WinVector/rquery) grammar is designed to have minimal state and minimal annotations (no grouping or ordering annotations!). This makes the grammar, in my opinion, a good design choice. `rquery` has very good performance, often much faster than `dplyr` or base-`R` due to its query generation ideas and use of [`data.table`](https://CRAN.R-project.org/package=data.table) via [`rqdatatable`](https://CRAN.R-project.org/package=rqdatatable). `rquery` is a mature pure `R` package; [here](https://github.com/WinVector/rquery/blob/master/README.md) is the same example being worked directly in `R`, with no translation from `Python`.

0 commit comments

Comments
 (0)