|
518 | 518 | <h1>Source code for data_morph.data.stats</h1><div class="highlight"><pre> |
519 | 519 | <span></span><span class="sd">"""Utility functions for calculating summary statistics."""</span> |
520 | 520 |
|
521 | | -<span class="kn">from</span><span class="w"> </span><span class="nn">collections</span><span class="w"> </span><span class="kn">import</span> <span class="n">namedtuple</span> |
| 521 | +<span class="kn">from</span><span class="w"> </span><span class="nn">__future__</span><span class="w"> </span><span class="kn">import</span> <span class="n">annotations</span> |
522 | 522 |
|
523 | | -<span class="kn">import</span><span class="w"> </span><span class="nn">pandas</span><span class="w"> </span><span class="k">as</span><span class="w"> </span><span class="nn">pd</span> |
| 523 | +<span class="kn">from</span><span class="w"> </span><span class="nn">typing</span><span class="w"> </span><span class="kn">import</span> <span class="n">TYPE_CHECKING</span><span class="p">,</span> <span class="n">NamedTuple</span> |
| 524 | + |
| 525 | +<span class="k">if</span> <span class="n">TYPE_CHECKING</span><span class="p">:</span> |
| 526 | + <span class="kn">from</span><span class="w"> </span><span class="nn">collections.abc</span><span class="w"> </span><span class="kn">import</span> <span class="n">Generator</span> |
| 527 | + |
| 528 | + <span class="kn">import</span><span class="w"> </span><span class="nn">pandas</span><span class="w"> </span><span class="k">as</span><span class="w"> </span><span class="nn">pd</span> |
| 529 | + |
| 530 | + |
| 531 | +<div class="viewcode-block" id="SummaryStatistics"> |
| 532 | +<a class="viewcode-back" href="../../../api/data_morph.data.stats.SummaryStatistics.html#data_morph.data.stats.SummaryStatistics">[docs]</a> |
| 533 | +<span class="k">class</span><span class="w"> </span><span class="nc">SummaryStatistics</span><span class="p">(</span><span class="n">NamedTuple</span><span class="p">):</span> |
| 534 | +<span class="w"> </span><span class="sd">"""Named tuple containing the summary statistics for plotting/analysis."""</span> |
| 535 | + |
| 536 | + <span class="n">x_mean</span><span class="p">:</span> <span class="nb">float</span> |
| 537 | + <span class="n">y_mean</span><span class="p">:</span> <span class="nb">float</span> |
| 538 | + |
| 539 | + <span class="n">x_stdev</span><span class="p">:</span> <span class="nb">float</span> |
| 540 | + <span class="n">y_stdev</span><span class="p">:</span> <span class="nb">float</span> |
| 541 | + |
| 542 | + <span class="n">correlation</span><span class="p">:</span> <span class="nb">float</span> |
| 543 | + |
| 544 | + <span class="n">x_median</span><span class="p">:</span> <span class="nb">float</span> <span class="o">|</span> <span class="kc">None</span> |
| 545 | + <span class="n">y_median</span><span class="p">:</span> <span class="nb">float</span> <span class="o">|</span> <span class="kc">None</span> |
| 546 | + |
| 547 | + <span class="k">def</span><span class="w"> </span><span class="fm">__iter__</span><span class="p">(</span><span class="bp">self</span><span class="p">)</span> <span class="o">-></span> <span class="n">Generator</span><span class="p">[</span><span class="nb">float</span><span class="p">,</span> <span class="kc">None</span><span class="p">,</span> <span class="kc">None</span><span class="p">]:</span> |
| 548 | + <span class="k">for</span> <span class="n">statistic</span> <span class="ow">in</span> <span class="bp">self</span><span class="o">.</span><span class="n">_fields</span><span class="p">:</span> |
| 549 | + <span class="k">if</span> <span class="p">(</span><span class="n">value</span> <span class="o">:=</span> <span class="nb">getattr</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">statistic</span><span class="p">))</span> <span class="ow">is</span> <span class="ow">not</span> <span class="kc">None</span><span class="p">:</span> |
| 550 | + <span class="k">yield</span> <span class="n">value</span></div> |
524 | 551 |
|
525 | | -<span class="n">SummaryStatistics</span> <span class="o">=</span> <span class="n">namedtuple</span><span class="p">(</span> |
526 | | - <span class="s1">'SummaryStatistics'</span><span class="p">,</span> <span class="p">[</span><span class="s1">'x_mean'</span><span class="p">,</span> <span class="s1">'y_mean'</span><span class="p">,</span> <span class="s1">'x_stdev'</span><span class="p">,</span> <span class="s1">'y_stdev'</span><span class="p">,</span> <span class="s1">'correlation'</span><span class="p">]</span> |
527 | | -<span class="p">)</span> |
528 | | -<span class="n">SummaryStatistics</span><span class="o">.</span><span class="vm">__doc__</span> <span class="o">=</span> <span class="p">(</span> |
529 | | - <span class="s1">'Named tuple containing the summary statistics for plotting/analysis.'</span> |
530 | | -<span class="p">)</span> |
531 | 552 |
|
532 | 553 |
|
533 | 554 | <div class="viewcode-block" id="get_summary_statistics"> |
534 | 555 | <a class="viewcode-back" href="../../../api/data_morph.data.stats.get_summary_statistics.html#data_morph.data.stats.get_summary_statistics">[docs]</a> |
535 | | -<span class="k">def</span><span class="w"> </span><span class="nf">get_summary_statistics</span><span class="p">(</span><span class="n">data</span><span class="p">:</span> <span class="n">pd</span><span class="o">.</span><span class="n">DataFrame</span><span class="p">)</span> <span class="o">-></span> <span class="n">SummaryStatistics</span><span class="p">:</span> |
| 556 | +<span class="k">def</span><span class="w"> </span><span class="nf">get_summary_statistics</span><span class="p">(</span><span class="n">data</span><span class="p">:</span> <span class="n">pd</span><span class="o">.</span><span class="n">DataFrame</span><span class="p">,</span> <span class="n">with_median</span><span class="p">:</span> <span class="nb">bool</span><span class="p">)</span> <span class="o">-></span> <span class="n">SummaryStatistics</span><span class="p">:</span> |
536 | 557 | <span class="w"> </span><span class="sd">"""</span> |
537 | 558 | <span class="sd"> Calculate the summary statistics for the given set of points.</span> |
538 | 559 |
|
539 | 560 | <span class="sd"> Parameters</span> |
540 | 561 | <span class="sd"> ----------</span> |
541 | 562 | <span class="sd"> data : pandas.DataFrame</span> |
542 | | -<span class="sd"> A dataset with columns x and y.</span> |
| 563 | +<span class="sd"> A dataset with columns ``x`` and ``y``.</span> |
| 564 | +<span class="sd"> with_median : bool</span> |
| 565 | +<span class="sd"> Whether to include the median of ``x`` and ``y``.</span> |
543 | 566 |
|
544 | 567 | <span class="sd"> Returns</span> |
545 | 568 | <span class="sd"> -------</span> |
546 | 569 | <span class="sd"> SummaryStatistics</span> |
547 | | -<span class="sd"> Named tuple consisting of mean and standard deviations of x and y,</span> |
548 | | -<span class="sd"> along with the Pearson correlation coefficient between the two.</span> |
| 570 | +<span class="sd"> Named tuple consisting of mean and standard deviations of ``x`` and ``y``,</span> |
| 571 | +<span class="sd"> along with the Pearson correlation coefficient between the two, and optionally,</span> |
| 572 | +<span class="sd"> the median of ``x`` and ``y``.</span> |
549 | 573 | <span class="sd"> """</span> |
550 | 574 | <span class="k">return</span> <span class="n">SummaryStatistics</span><span class="p">(</span> |
551 | | - <span class="n">data</span><span class="o">.</span><span class="n">x</span><span class="o">.</span><span class="n">mean</span><span class="p">(),</span> |
552 | | - <span class="n">data</span><span class="o">.</span><span class="n">y</span><span class="o">.</span><span class="n">mean</span><span class="p">(),</span> |
553 | | - <span class="n">data</span><span class="o">.</span><span class="n">x</span><span class="o">.</span><span class="n">std</span><span class="p">(),</span> |
554 | | - <span class="n">data</span><span class="o">.</span><span class="n">y</span><span class="o">.</span><span class="n">std</span><span class="p">(),</span> |
555 | | - <span class="n">data</span><span class="o">.</span><span class="n">corr</span><span class="p">()</span><span class="o">.</span><span class="n">x</span><span class="o">.</span><span class="n">y</span><span class="p">,</span> |
| 575 | + <span class="n">x_mean</span><span class="o">=</span><span class="n">data</span><span class="o">.</span><span class="n">x</span><span class="o">.</span><span class="n">mean</span><span class="p">(),</span> |
| 576 | + <span class="n">y_mean</span><span class="o">=</span><span class="n">data</span><span class="o">.</span><span class="n">y</span><span class="o">.</span><span class="n">mean</span><span class="p">(),</span> |
| 577 | + <span class="n">x_stdev</span><span class="o">=</span><span class="n">data</span><span class="o">.</span><span class="n">x</span><span class="o">.</span><span class="n">std</span><span class="p">(),</span> |
| 578 | + <span class="n">y_stdev</span><span class="o">=</span><span class="n">data</span><span class="o">.</span><span class="n">y</span><span class="o">.</span><span class="n">std</span><span class="p">(),</span> |
| 579 | + <span class="n">correlation</span><span class="o">=</span><span class="n">data</span><span class="o">.</span><span class="n">corr</span><span class="p">()</span><span class="o">.</span><span class="n">x</span><span class="o">.</span><span class="n">y</span><span class="p">,</span> |
| 580 | + <span class="n">x_median</span><span class="o">=</span><span class="n">data</span><span class="o">.</span><span class="n">x</span><span class="o">.</span><span class="n">median</span><span class="p">()</span> <span class="k">if</span> <span class="n">with_median</span> <span class="k">else</span> <span class="kc">None</span><span class="p">,</span> |
| 581 | + <span class="n">y_median</span><span class="o">=</span><span class="n">data</span><span class="o">.</span><span class="n">y</span><span class="o">.</span><span class="n">median</span><span class="p">()</span> <span class="k">if</span> <span class="n">with_median</span> <span class="k">else</span> <span class="kc">None</span><span class="p">,</span> |
556 | 582 | <span class="p">)</span></div> |
557 | 583 |
|
558 | 584 | </pre></div> |
|
0 commit comments