Skip to content

Commit 11f91ea

Browse files
committed
rebuild and retest
1 parent 25c92df commit 11f91ea

File tree

5 files changed

+33
-18
lines changed

5 files changed

+33
-18
lines changed

build/lib/data_algebra/util.py

Lines changed: 10 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -75,16 +75,21 @@ def guess_carried_scalar_type(col):
7575
:param col: column or scalar to inspect
7676
:return: type of first non-None entry, if any , else type(None)
7777
"""
78+
# check for scalars first
7879
ct = map_type_to_canonical(type(col))
7980
if ct in {str, int, float, bool, type(None), numpy.int64, numpy.float64,
8081
datetime.datetime, datetime.date, datetime.timedelta}:
8182
return ct
83+
# look at a list or Series
84+
if isinstance(col, data_algebra.default_data_model.pd.core.series.Series):
85+
col = col.values
8286
if len(col) < 1:
8387
return type(None)
84-
idx = col.notna().idxmax()
85-
if idx is None:
86-
return map_type_to_canonical(type(col[0]))
87-
return map_type_to_canonical(type(col[idx]))
88+
good_idx = numpy.where(numpy.logical_not(data_algebra.default_data_model.pd.isna(col)))[0]
89+
test_idx = 0
90+
if len(good_idx) > 0:
91+
test_idx = good_idx[0]
92+
return map_type_to_canonical(type(col[test_idx]))
8893

8994

9095
def guess_column_types(d, *, columns=None):
@@ -106,7 +111,7 @@ def guess_column_types(d, *, columns=None):
106111
res = dict()
107112
for c in columns:
108113
gt = guess_carried_scalar_type(d[c])
109-
if (gt is None) or (not isinstance(gt, type)) or str(gt).endswith('.Series\'>'):
114+
if (gt is None) or (not isinstance(gt, type)) or gt == data_algebra.default_data_model.pd.core.series.Series:
110115
# pandas.concat() poisons types with Series, don't allow that
111116
return dict()
112117
res[c] = gt

coverage.txt

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -116,9 +116,9 @@ data_algebra/pandas_model.py 18 3 83%
116116
data_algebra/parse_by_lark.py 143 26 82%
117117
data_algebra/python3_lark.py 1 0 100%
118118
data_algebra/test_util.py 202 37 82%
119-
data_algebra/util.py 86 6 93%
119+
data_algebra/util.py 89 6 93%
120120
----------------------------------------------------------
121-
TOTAL 4811 853 82%
121+
TOTAL 4814 853 82%
122122

123123

124-
============================= 210 passed in 16.73s =============================
124+
============================= 210 passed in 16.61s =============================
62 Bytes
Binary file not shown.

dist/data_algebra-0.8.0.tar.gz

65 Bytes
Binary file not shown.

docs/data_algebra/util.html

Lines changed: 20 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -145,16 +145,21 @@ <h1 class="modulename">
145145
<span class="sd"> :param col: column or scalar to inspect</span>
146146
<span class="sd"> :return: type of first non-None entry, if any , else type(None)</span>
147147
<span class="sd"> &quot;&quot;&quot;</span>
148+
<span class="c1"># check for scalars first</span>
148149
<span class="n">ct</span> <span class="o">=</span> <span class="n">map_type_to_canonical</span><span class="p">(</span><span class="nb">type</span><span class="p">(</span><span class="n">col</span><span class="p">))</span>
149150
<span class="k">if</span> <span class="n">ct</span> <span class="ow">in</span> <span class="p">{</span><span class="nb">str</span><span class="p">,</span> <span class="nb">int</span><span class="p">,</span> <span class="nb">float</span><span class="p">,</span> <span class="nb">bool</span><span class="p">,</span> <span class="nb">type</span><span class="p">(</span><span class="kc">None</span><span class="p">),</span> <span class="n">numpy</span><span class="o">.</span><span class="n">int64</span><span class="p">,</span> <span class="n">numpy</span><span class="o">.</span><span class="n">float64</span><span class="p">,</span>
150151
<span class="n">datetime</span><span class="o">.</span><span class="n">datetime</span><span class="p">,</span> <span class="n">datetime</span><span class="o">.</span><span class="n">date</span><span class="p">,</span> <span class="n">datetime</span><span class="o">.</span><span class="n">timedelta</span><span class="p">}:</span>
151152
<span class="k">return</span> <span class="n">ct</span>
153+
<span class="c1"># look at a list or Series</span>
154+
<span class="k">if</span> <span class="nb">isinstance</span><span class="p">(</span><span class="n">col</span><span class="p">,</span> <span class="n">data_algebra</span><span class="o">.</span><span class="n">default_data_model</span><span class="o">.</span><span class="n">pd</span><span class="o">.</span><span class="n">core</span><span class="o">.</span><span class="n">series</span><span class="o">.</span><span class="n">Series</span><span class="p">):</span>
155+
<span class="n">col</span> <span class="o">=</span> <span class="n">col</span><span class="o">.</span><span class="n">values</span>
152156
<span class="k">if</span> <span class="nb">len</span><span class="p">(</span><span class="n">col</span><span class="p">)</span> <span class="o">&lt;</span> <span class="mi">1</span><span class="p">:</span>
153157
<span class="k">return</span> <span class="nb">type</span><span class="p">(</span><span class="kc">None</span><span class="p">)</span>
154-
<span class="n">idx</span> <span class="o">=</span> <span class="n">col</span><span class="o">.</span><span class="n">notna</span><span class="p">()</span><span class="o">.</span><span class="n">idxmax</span><span class="p">()</span>
155-
<span class="k">if</span> <span class="n">idx</span> <span class="ow">is</span> <span class="kc">None</span><span class="p">:</span>
156-
<span class="k">return</span> <span class="n">map_type_to_canonical</span><span class="p">(</span><span class="nb">type</span><span class="p">(</span><span class="n">col</span><span class="p">[</span><span class="mi">0</span><span class="p">]))</span>
157-
<span class="k">return</span> <span class="n">map_type_to_canonical</span><span class="p">(</span><span class="nb">type</span><span class="p">(</span><span class="n">col</span><span class="p">[</span><span class="n">idx</span><span class="p">]))</span>
158+
<span class="n">good_idx</span> <span class="o">=</span> <span class="n">numpy</span><span class="o">.</span><span class="n">where</span><span class="p">(</span><span class="n">numpy</span><span class="o">.</span><span class="n">logical_not</span><span class="p">(</span><span class="n">data_algebra</span><span class="o">.</span><span class="n">default_data_model</span><span class="o">.</span><span class="n">pd</span><span class="o">.</span><span class="n">isna</span><span class="p">(</span><span class="n">col</span><span class="p">)))[</span><span class="mi">0</span><span class="p">]</span>
159+
<span class="n">test_idx</span> <span class="o">=</span> <span class="mi">0</span>
160+
<span class="k">if</span> <span class="nb">len</span><span class="p">(</span><span class="n">good_idx</span><span class="p">)</span> <span class="o">&gt;</span> <span class="mi">0</span><span class="p">:</span>
161+
<span class="n">test_idx</span> <span class="o">=</span> <span class="n">good_idx</span><span class="p">[</span><span class="mi">0</span><span class="p">]</span>
162+
<span class="k">return</span> <span class="n">map_type_to_canonical</span><span class="p">(</span><span class="nb">type</span><span class="p">(</span><span class="n">col</span><span class="p">[</span><span class="n">test_idx</span><span class="p">]))</span>
158163

159164

160165
<span class="k">def</span> <span class="nf">guess_column_types</span><span class="p">(</span><span class="n">d</span><span class="p">,</span> <span class="o">*</span><span class="p">,</span> <span class="n">columns</span><span class="o">=</span><span class="kc">None</span><span class="p">):</span>
@@ -176,7 +181,7 @@ <h1 class="modulename">
176181
<span class="n">res</span> <span class="o">=</span> <span class="nb">dict</span><span class="p">()</span>
177182
<span class="k">for</span> <span class="n">c</span> <span class="ow">in</span> <span class="n">columns</span><span class="p">:</span>
178183
<span class="n">gt</span> <span class="o">=</span> <span class="n">guess_carried_scalar_type</span><span class="p">(</span><span class="n">d</span><span class="p">[</span><span class="n">c</span><span class="p">])</span>
179-
<span class="k">if</span> <span class="p">(</span><span class="n">gt</span> <span class="ow">is</span> <span class="kc">None</span><span class="p">)</span> <span class="ow">or</span> <span class="p">(</span><span class="ow">not</span> <span class="nb">isinstance</span><span class="p">(</span><span class="n">gt</span><span class="p">,</span> <span class="nb">type</span><span class="p">))</span> <span class="ow">or</span> <span class="nb">str</span><span class="p">(</span><span class="n">gt</span><span class="p">)</span><span class="o">.</span><span class="n">endswith</span><span class="p">(</span><span class="s1">&#39;.Series</span><span class="se">\&#39;</span><span class="s1">&gt;&#39;</span><span class="p">):</span>
184+
<span class="k">if</span> <span class="p">(</span><span class="n">gt</span> <span class="ow">is</span> <span class="kc">None</span><span class="p">)</span> <span class="ow">or</span> <span class="p">(</span><span class="ow">not</span> <span class="nb">isinstance</span><span class="p">(</span><span class="n">gt</span><span class="p">,</span> <span class="nb">type</span><span class="p">))</span> <span class="ow">or</span> <span class="n">gt</span> <span class="o">==</span> <span class="n">data_algebra</span><span class="o">.</span><span class="n">default_data_model</span><span class="o">.</span><span class="n">pd</span><span class="o">.</span><span class="n">core</span><span class="o">.</span><span class="n">series</span><span class="o">.</span><span class="n">Series</span><span class="p">:</span>
180185
<span class="c1"># pandas.concat() poisons types with Series, don&#39;t allow that</span>
181186
<span class="k">return</span> <span class="nb">dict</span><span class="p">()</span>
182187
<span class="n">res</span><span class="p">[</span><span class="n">c</span><span class="p">]</span> <span class="o">=</span> <span class="n">gt</span>
@@ -337,16 +342,21 @@ <h1 class="modulename">
337342
<span class="sd"> :param col: column or scalar to inspect</span>
338343
<span class="sd"> :return: type of first non-None entry, if any , else type(None)</span>
339344
<span class="sd"> &quot;&quot;&quot;</span>
345+
<span class="c1"># check for scalars first</span>
340346
<span class="n">ct</span> <span class="o">=</span> <span class="n">map_type_to_canonical</span><span class="p">(</span><span class="nb">type</span><span class="p">(</span><span class="n">col</span><span class="p">))</span>
341347
<span class="k">if</span> <span class="n">ct</span> <span class="ow">in</span> <span class="p">{</span><span class="nb">str</span><span class="p">,</span> <span class="nb">int</span><span class="p">,</span> <span class="nb">float</span><span class="p">,</span> <span class="nb">bool</span><span class="p">,</span> <span class="nb">type</span><span class="p">(</span><span class="kc">None</span><span class="p">),</span> <span class="n">numpy</span><span class="o">.</span><span class="n">int64</span><span class="p">,</span> <span class="n">numpy</span><span class="o">.</span><span class="n">float64</span><span class="p">,</span>
342348
<span class="n">datetime</span><span class="o">.</span><span class="n">datetime</span><span class="p">,</span> <span class="n">datetime</span><span class="o">.</span><span class="n">date</span><span class="p">,</span> <span class="n">datetime</span><span class="o">.</span><span class="n">timedelta</span><span class="p">}:</span>
343349
<span class="k">return</span> <span class="n">ct</span>
350+
<span class="c1"># look at a list or Series</span>
351+
<span class="k">if</span> <span class="nb">isinstance</span><span class="p">(</span><span class="n">col</span><span class="p">,</span> <span class="n">data_algebra</span><span class="o">.</span><span class="n">default_data_model</span><span class="o">.</span><span class="n">pd</span><span class="o">.</span><span class="n">core</span><span class="o">.</span><span class="n">series</span><span class="o">.</span><span class="n">Series</span><span class="p">):</span>
352+
<span class="n">col</span> <span class="o">=</span> <span class="n">col</span><span class="o">.</span><span class="n">values</span>
344353
<span class="k">if</span> <span class="nb">len</span><span class="p">(</span><span class="n">col</span><span class="p">)</span> <span class="o">&lt;</span> <span class="mi">1</span><span class="p">:</span>
345354
<span class="k">return</span> <span class="nb">type</span><span class="p">(</span><span class="kc">None</span><span class="p">)</span>
346-
<span class="n">idx</span> <span class="o">=</span> <span class="n">col</span><span class="o">.</span><span class="n">notna</span><span class="p">()</span><span class="o">.</span><span class="n">idxmax</span><span class="p">()</span>
347-
<span class="k">if</span> <span class="n">idx</span> <span class="ow">is</span> <span class="kc">None</span><span class="p">:</span>
348-
<span class="k">return</span> <span class="n">map_type_to_canonical</span><span class="p">(</span><span class="nb">type</span><span class="p">(</span><span class="n">col</span><span class="p">[</span><span class="mi">0</span><span class="p">]))</span>
349-
<span class="k">return</span> <span class="n">map_type_to_canonical</span><span class="p">(</span><span class="nb">type</span><span class="p">(</span><span class="n">col</span><span class="p">[</span><span class="n">idx</span><span class="p">]))</span>
355+
<span class="n">good_idx</span> <span class="o">=</span> <span class="n">numpy</span><span class="o">.</span><span class="n">where</span><span class="p">(</span><span class="n">numpy</span><span class="o">.</span><span class="n">logical_not</span><span class="p">(</span><span class="n">data_algebra</span><span class="o">.</span><span class="n">default_data_model</span><span class="o">.</span><span class="n">pd</span><span class="o">.</span><span class="n">isna</span><span class="p">(</span><span class="n">col</span><span class="p">)))[</span><span class="mi">0</span><span class="p">]</span>
356+
<span class="n">test_idx</span> <span class="o">=</span> <span class="mi">0</span>
357+
<span class="k">if</span> <span class="nb">len</span><span class="p">(</span><span class="n">good_idx</span><span class="p">)</span> <span class="o">&gt;</span> <span class="mi">0</span><span class="p">:</span>
358+
<span class="n">test_idx</span> <span class="o">=</span> <span class="n">good_idx</span><span class="p">[</span><span class="mi">0</span><span class="p">]</span>
359+
<span class="k">return</span> <span class="n">map_type_to_canonical</span><span class="p">(</span><span class="nb">type</span><span class="p">(</span><span class="n">col</span><span class="p">[</span><span class="n">test_idx</span><span class="p">]))</span>
350360
</pre></div>
351361

352362
</details>
@@ -388,7 +398,7 @@ <h1 class="modulename">
388398
<span class="n">res</span> <span class="o">=</span> <span class="nb">dict</span><span class="p">()</span>
389399
<span class="k">for</span> <span class="n">c</span> <span class="ow">in</span> <span class="n">columns</span><span class="p">:</span>
390400
<span class="n">gt</span> <span class="o">=</span> <span class="n">guess_carried_scalar_type</span><span class="p">(</span><span class="n">d</span><span class="p">[</span><span class="n">c</span><span class="p">])</span>
391-
<span class="k">if</span> <span class="p">(</span><span class="n">gt</span> <span class="ow">is</span> <span class="kc">None</span><span class="p">)</span> <span class="ow">or</span> <span class="p">(</span><span class="ow">not</span> <span class="nb">isinstance</span><span class="p">(</span><span class="n">gt</span><span class="p">,</span> <span class="nb">type</span><span class="p">))</span> <span class="ow">or</span> <span class="nb">str</span><span class="p">(</span><span class="n">gt</span><span class="p">)</span><span class="o">.</span><span class="n">endswith</span><span class="p">(</span><span class="s1">&#39;.Series</span><span class="se">\&#39;</span><span class="s1">&gt;&#39;</span><span class="p">):</span>
401+
<span class="k">if</span> <span class="p">(</span><span class="n">gt</span> <span class="ow">is</span> <span class="kc">None</span><span class="p">)</span> <span class="ow">or</span> <span class="p">(</span><span class="ow">not</span> <span class="nb">isinstance</span><span class="p">(</span><span class="n">gt</span><span class="p">,</span> <span class="nb">type</span><span class="p">))</span> <span class="ow">or</span> <span class="n">gt</span> <span class="o">==</span> <span class="n">data_algebra</span><span class="o">.</span><span class="n">default_data_model</span><span class="o">.</span><span class="n">pd</span><span class="o">.</span><span class="n">core</span><span class="o">.</span><span class="n">series</span><span class="o">.</span><span class="n">Series</span><span class="p">:</span>
392402
<span class="c1"># pandas.concat() poisons types with Series, don&#39;t allow that</span>
393403
<span class="k">return</span> <span class="nb">dict</span><span class="p">()</span>
394404
<span class="n">res</span><span class="p">[</span><span class="n">c</span><span class="p">]</span> <span class="o">=</span> <span class="n">gt</span>

0 commit comments

Comments
 (0)