Skip to content

Commit 3a811a2

Browse files
committed
check more for missing symbols in control tables
1 parent 1ae9688 commit 3a811a2

File tree

7 files changed

+152
-19
lines changed

7 files changed

+152
-19
lines changed

Examples/cdata/ranking_pivot_example.ipynb

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -52,7 +52,7 @@
5252
"| 6 | 2004-01-09 00:00:00 | B | None | None | None | None |\n",
5353
"</blockquote>\n",
5454
"\n",
55-
"That is: for each `ID` pick the first three operations ordered by date, merging operations with the same timestamp. Then write these results into a single row for each `ID`.\n",
55+
"That is: for each `ID` pick the first three operations ordered by date, merging operations with the same timestamp. Then write these results into a single row for each `ID`. As is common in working with `Pandas`, we will have to consider `NaN` and `None` as fairly interchangeable.\n",
5656
"\n",
5757
"## The solution\n",
5858
"\n",

Examples/cdata/ranking_pivot_example.md

Lines changed: 131 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -37,7 +37,7 @@ Into this format:
3737
| 6 | 2004-01-09 00:00:00 | B | None | None | None | None |
3838
</blockquote>
3939

40-
That is: for each `ID` pick the first three operations ordered by date, merging operations with the same timestamp. Then write these results into a single row for each `ID`.
40+
That is: for each `ID` pick the first three operations ordered by date, merging operations with the same timestamp. Then write these results into a single row for each `ID`. As is common in working with `Pandas`, we will have to consider `NaN` and `None` as fairly interchangeable.
4141

4242
## The solution
4343

@@ -87,7 +87,19 @@ d
8787

8888

8989
<div>
90-
90+
<style scoped>
91+
.dataframe tbody tr th:only-of-type {
92+
vertical-align: middle;
93+
}
94+
95+
.dataframe tbody tr th {
96+
vertical-align: top;
97+
}
98+
99+
.dataframe thead th {
100+
text-align: right;
101+
}
102+
</style>
91103
<table border="1" class="dataframe">
92104
<thead>
93105
<tr style="text-align: right;">
@@ -196,7 +208,19 @@ d2
196208

197209

198210
<div>
199-
211+
<style scoped>
212+
.dataframe tbody tr th:only-of-type {
213+
vertical-align: middle;
214+
}
215+
216+
.dataframe tbody tr th {
217+
vertical-align: top;
218+
}
219+
220+
.dataframe thead th {
221+
text-align: right;
222+
}
223+
</style>
200224
<table border="1" class="dataframe">
201225
<thead>
202226
<tr style="text-align: right;">
@@ -338,7 +362,19 @@ diagram
338362

339363

340364
<div>
341-
365+
<style scoped>
366+
.dataframe tbody tr th:only-of-type {
367+
vertical-align: middle;
368+
}
369+
370+
.dataframe tbody tr th {
371+
vertical-align: top;
372+
}
373+
374+
.dataframe thead th {
375+
text-align: right;
376+
}
377+
</style>
342378
<table border="1" class="dataframe">
343379
<thead>
344380
<tr style="text-align: right;">
@@ -395,7 +431,19 @@ row_record
395431

396432

397433
<div>
398-
434+
<style scoped>
435+
.dataframe tbody tr th:only-of-type {
436+
vertical-align: middle;
437+
}
438+
439+
.dataframe tbody tr th {
440+
vertical-align: top;
441+
}
442+
443+
.dataframe thead th {
444+
text-align: right;
445+
}
446+
</style>
399447
<table border="1" class="dataframe">
400448
<thead>
401449
<tr style="text-align: right;">
@@ -481,7 +529,19 @@ res
481529

482530

483531
<div>
484-
532+
<style scoped>
533+
.dataframe tbody tr th:only-of-type {
534+
vertical-align: middle;
535+
}
536+
537+
.dataframe tbody tr th {
538+
vertical-align: top;
539+
}
540+
541+
.dataframe thead th {
542+
text-align: right;
543+
}
544+
</style>
485545
<table border="1" class="dataframe">
486546
<thead>
487547
<tr style="text-align: right;">
@@ -590,7 +650,19 @@ res
590650

591651

592652
<div>
593-
653+
<style scoped>
654+
.dataframe tbody tr th:only-of-type {
655+
vertical-align: middle;
656+
}
657+
658+
.dataframe tbody tr th {
659+
vertical-align: top;
660+
}
661+
662+
.dataframe thead th {
663+
text-align: right;
664+
}
665+
</style>
594666
<table border="1" class="dataframe">
595667
<thead>
596668
<tr style="text-align: right;">
@@ -717,7 +789,19 @@ res_db
717789

718790

719791
<div>
720-
792+
<style scoped>
793+
.dataframe tbody tr th:only-of-type {
794+
vertical-align: middle;
795+
}
796+
797+
.dataframe tbody tr th {
798+
vertical-align: top;
799+
}
800+
801+
.dataframe thead th {
802+
text-align: right;
803+
}
804+
</style>
721805
<table border="1" class="dataframe">
722806
<thead>
723807
<tr style="text-align: right;">
@@ -809,7 +893,19 @@ res_db[['ID', 'DATE1', 'OP1', 'DATE2', 'OP2', 'DATE3', 'OP3']]
809893

810894

811895
<div>
812-
896+
<style scoped>
897+
.dataframe tbody tr th:only-of-type {
898+
vertical-align: middle;
899+
}
900+
901+
.dataframe tbody tr th {
902+
vertical-align: top;
903+
}
904+
905+
.dataframe thead th {
906+
text-align: right;
907+
}
908+
</style>
813909
<table border="1" class="dataframe">
814910
<thead>
815911
<tr style="text-align: right;">
@@ -920,7 +1016,19 @@ res
9201016

9211017

9221018
<div>
923-
1019+
<style scoped>
1020+
.dataframe tbody tr th:only-of-type {
1021+
vertical-align: middle;
1022+
}
1023+
1024+
.dataframe tbody tr th {
1025+
vertical-align: top;
1026+
}
1027+
1028+
.dataframe thead th {
1029+
text-align: right;
1030+
}
1031+
</style>
9241032
<table border="1" class="dataframe">
9251033
<thead>
9261034
<tr style="text-align: right;">
@@ -1020,7 +1128,19 @@ res_db
10201128

10211129

10221130
<div>
1023-
1131+
<style scoped>
1132+
.dataframe tbody tr th:only-of-type {
1133+
vertical-align: middle;
1134+
}
1135+
1136+
.dataframe tbody tr th {
1137+
vertical-align: top;
1138+
}
1139+
1140+
.dataframe thead th {
1141+
text-align: right;
1142+
}
1143+
</style>
10241144
<table border="1" class="dataframe">
10251145
<thead>
10261146
<tr style="text-align: right;">
@@ -1111,4 +1231,3 @@ con.close()
11111231
```python
11121232

11131233
```
1114-

build/lib/data_algebra/cdata.py

Lines changed: 8 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -9,11 +9,15 @@
99

1010
class RecordSpecification:
1111
def __init__(
12-
self, control_table, *, record_keys=None, control_table_keys=None, strict=False
12+
self, control_table, *, record_keys=None, control_table_keys=None, strict=False, pd=None
1313
):
14+
if pd is None:
15+
pd = data_algebra.pd
1416
control_table = control_table.reset_index(inplace=False, drop=True)
1517
if control_table.shape[0] < 1:
1618
raise ValueError("control table should have at least 1 row")
19+
if any(data_algebra.util.is_bad(control_table.columns)):
20+
raise ValueError("control table column names can not be NA/NaN/inf/None")
1721
if len(control_table.columns) != len(set(control_table.columns)):
1822
raise ValueError("control table columns should be unique")
1923
self.control_table = control_table.reset_index(drop=True)
@@ -39,6 +43,9 @@ def __init__(
3943
raise ValueError(
4044
"columns common to record_keys and control_table_keys: " + str(confused)
4145
)
46+
for ck in self.control_table_keys:
47+
if any(data_algebra.util.is_bad(control_table[ck], pd=pd)):
48+
raise ValueError("NA/NaN/inf/None not allowed as control table keys")
4249
if strict:
4350
if not data_algebra.util.table_is_keyed_by_columns(
4451
self.control_table, self.control_table_keys

coverage.txt

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -61,7 +61,7 @@ data_algebra/SQLite.py 101 6 94%
6161
data_algebra/SparkSQL.py 19 4 79%
6262
data_algebra/__init__.py 5 0 100%
6363
data_algebra/arrow.py 173 39 77%
64-
data_algebra/cdata.py 325 91 72%
64+
data_algebra/cdata.py 332 93 72%
6565
data_algebra/cdata_impl.py 10 1 90%
6666
data_algebra/connected_components.py 49 1 98%
6767
data_algebra/custom_functions.py 18 1 94%
@@ -80,10 +80,10 @@ data_algebra/flow_text.py 17 0 100%
8080
data_algebra/near_sql.py 140 16 89%
8181
data_algebra/pandas_model.py 197 22 89%
8282
data_algebra/test_util.py 126 18 86%
83-
data_algebra/util.py 45 6 87%
83+
data_algebra/util.py 45 4 91%
8484
data_algebra/yaml.py 95 11 88%
8585
----------------------------------------------------------
86-
TOTAL 4143 933 77%
86+
TOTAL 4150 933 78%
8787

8888

89-
============================== 91 passed in 8.60s ==============================
89+
============================== 91 passed in 8.56s ==============================

data_algebra/cdata.py

Lines changed: 8 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -9,11 +9,15 @@
99

1010
class RecordSpecification:
1111
def __init__(
12-
self, control_table, *, record_keys=None, control_table_keys=None, strict=False
12+
self, control_table, *, record_keys=None, control_table_keys=None, strict=False, pd=None
1313
):
14+
if pd is None:
15+
pd = data_algebra.pd
1416
control_table = control_table.reset_index(inplace=False, drop=True)
1517
if control_table.shape[0] < 1:
1618
raise ValueError("control table should have at least 1 row")
19+
if any(data_algebra.util.is_bad(control_table.columns)):
20+
raise ValueError("control table column names can not be NA/NaN/inf/None")
1721
if len(control_table.columns) != len(set(control_table.columns)):
1822
raise ValueError("control table columns should be unique")
1923
self.control_table = control_table.reset_index(drop=True)
@@ -39,6 +43,9 @@ def __init__(
3943
raise ValueError(
4044
"columns common to record_keys and control_table_keys: " + str(confused)
4145
)
46+
for ck in self.control_table_keys:
47+
if any(data_algebra.util.is_bad(control_table[ck], pd=pd)):
48+
raise ValueError("NA/NaN/inf/None not allowed as control table keys")
4249
if strict:
4350
if not data_algebra.util.table_is_keyed_by_columns(
4451
self.control_table, self.control_table_keys
75 Bytes
Binary file not shown.

dist/data_algebra-0.4.7.tar.gz

65 Bytes
Binary file not shown.

0 commit comments

Comments
 (0)