REF and TEST: rank_size in inequality.py (#551)

bktaha · web-flow · commit 856dda870dec · 2020-06-29T12:47:04.000+10:00
diff --git a/quantecon/__init__.py b/quantecon/__init__.py
@@ -27,7 +27,7 @@
 from .graph_tools import DiGraph, random_tournament_graph
 from .gridtools import cartesian, mlinspace, simplex_grid, simplex_index
 from .inequality import lorenz_curve, gini_coefficient, shorrocks_index, \
-	rank_size_plot
+	rank_size
 from .kalman import Kalman
 from .lae import LAE
 from .arma import ARMA
diff --git a/quantecon/inequality.py b/quantecon/inequality.py
@@ -10,15 +10,17 @@
 @njit
 def lorenz_curve(y):
     """
-    Calculates the Lorenz Curve, a graphical representation of the distribution of income
-    or wealth.
+    Calculates the Lorenz Curve, a graphical representation of
+    the distribution of income or wealth.
 
-    It returns the cumulative share of people (x-axis) and the cumulative share of income earned
+    It returns the cumulative share of people (x-axis) and
+    the cumulative share of income earned.
 
     Parameters
     ----------
     y : array_like(float or int, ndim=1)
-        Array of income/wealth for each individual. Unordered or ordered is fine.
+        Array of income/wealth for each individual.
+        Unordered or ordered is fine.
 
     Returns
     -------
@@ -60,7 +62,8 @@ def gini_coefficient(y):
     Parameters
     -----------
     y : array_like(float)
-        Array of income/wealth for each individual. Ordered or unordered is fine
+        Array of income/wealth for each individual.
+        Ordered or unordered is fine
 
     Returns
     -------
@@ -96,15 +99,15 @@ def shorrocks_index(A):
         The Shorrocks mobility index calculated as
 
         .. math::
-            
+
             s(A) = \frac{m - \sum_j a_{jj} }{m - 1} \in (0, 1)
 
         An index equal to 0 indicates complete immobility.
 
     References
     -----------
-    .. [1] Wealth distribution and social mobility in the US: A quantitative approach
-       (Benhabib, Bisin, Luo, 2017).
+    .. [1] Wealth distribution and social mobility in the US:
+       A quantitative approach (Benhabib, Bisin, Luo, 2017).
        https://www.econ.nyu.edu/user/bisina/RevisionAugust.pdf
     """
 
@@ -119,38 +122,32 @@ def shorrocks_index(A):
     return (m - diag_sum) / (m - 1)
 
 
-def rank_size_plot(data, ax, label=None, c=1.0):
+def rank_size(data, c=1.0):
     """
     Generate rank-size data corresponding to distribution data.
 
     Examples
     --------
-
-    > import numpy as np
-    > import matplotlib.pyplot as plt
-    > y = np.exp(np.random.randn(1000))  # simulate data
-    > fig, ax = plt.subplots()
-    > rank_size_plot(y, ax)
-    > plt.show()
+    >>> y = np.exp(np.random.randn(1000))  # simulate data
+    >>> rank_data, size_data = rank_size(y, c=0.85)
 
     Parameters
     ----------
-
     data : array_like
         the set of observations
     c : int or float
         restrict plot to top (c x 100)% of the distribution
-    ax : axis object
-        for plotting on, has method ax.loglog
+
+    Returns
+    -------
+    rank_data : array_like(float, ndim=1)
+        Location in the population when sorted from smallest to largest
+    size_data : array_like(float, ndim=1)
+        Size data for top (c x 100)% of the observations
     """
     w = - np.sort(- data)                  # Reverse sort
     w = w[:int(len(w) * c)]                # extract top (c * 100)%
     rank_data = np.arange(len(w)) + 1
     size_data = w
-    ax.loglog(rank_data, size_data, 'o', markersize=3.0, alpha=0.5, label=label)
-    if label:
-        ax.legend()
-    ax.set_xlabel("log rank")
-    ax.set_ylabel("log size")
-
+    return rank_data, size_data
 
diff --git a/quantecon/tests/test_inequality.py b/quantecon/tests/test_inequality.py
@@ -5,8 +5,10 @@
 """
 
 import numpy as np
-from numpy.testing import assert_allclose
-from quantecon import lorenz_curve, gini_coefficient, shorrocks_index
+from numpy.testing import assert_allclose, assert_raises
+from scipy.stats import linregress
+from quantecon import lorenz_curve, gini_coefficient, \
+     shorrocks_index, rank_size
 
 
 def test_lorenz_curve():
@@ -37,7 +39,7 @@ def test_lorenz_curve():
 
 def test_gini_coeff():
     """
-    Tests how the funciton `gini_coefficient` calculates the Gini coefficient
+    Tests how the function `gini_coefficient` calculates the Gini coefficient
     with the Pareto and the Weibull distribution.
 
     Analytically, we know that Pareto with parameter `a` has
@@ -88,3 +90,45 @@ def test_shorrocks_index():
     index = shorrocks_index(P)
     assert_allclose(expected, index, rtol=1e-2)
 
+
+def test_rank_size():
+    """
+    Tests `rank_size` function, which generates rank-size data for
+    a Pareto distribution.
+
+    The rank-size plot for a sample drawn from a Pareto distribution
+    should be a straight line.
+
+    The length of the `rank_data` array should be within (c x 100)%
+    of the size of the distribution.
+    """
+
+    sample_size = 1000
+    c = 0.74
+
+    # Tests Pareto; r_squared ~ 1
+    pareto_draw = np.exp(np.random.exponential(scale=1.0, size=sample_size))
+    rank_data, size_data = rank_size(pareto_draw, c=c)
+
+    assert len(rank_data) == len(size_data)
+    assert_allclose(c*sample_size, len(rank_data), rtol=1e-3)
+
+    _, _, r_value, _, _ = linregress(np.log(rank_data), np.log(size_data))
+    r_sqval = r_value**2
+
+    assert_allclose(r_sqval, 1, rtol=1e-4)
+
+    # Tests Exponential; r_squared < 1
+    np.random.seed(13)
+    z = np.random.randn(sample_size)
+
+    exp_draw = np.exp(z)
+    rank_data_exp, size_data_exp = rank_size(exp_draw, c=c)
+
+    _, _, r_value_exp, _, _ = linregress(np.log(rank_data_exp),
+                                         np.log(size_data_exp))
+    r_sqval_exp = r_value_exp**2
+
+    assert_raises(AssertionError, assert_allclose, r_sqval_exp, 1, rtol=1e-4)
+
+