Skip to content

Commit 7a0ed94

Browse files
Debian Science Teamrebecca-palmer
authored andcommitted
Use fixed seeds for reproducible pseudorandomness
Author: Rebecca N. Palmer <[email protected]> Forwarded: no Gbp-Pq: Name fix_random_seeds.patch
1 parent c3cdb1c commit 7a0ed94

File tree

6 files changed

+56
-4
lines changed

6 files changed

+56
-4
lines changed

doc/source/getting_started/comparison/comparison_with_r.rst

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -237,6 +237,7 @@ In pandas we may use :meth:`~pandas.pivot_table` method to handle this:
237237
238238
import random
239239
import string
240+
random.seed(123456) # for reproducibility
240241
241242
baseball = pd.DataFrame(
242243
{

doc/source/user_guide/advanced.rst

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -590,6 +590,7 @@ they need to be sorted. As with any index, you can use :meth:`~DataFrame.sort_in
590590
591591
import random
592592
593+
random.seed(123456) # for reproducibility
593594
random.shuffle(tuples)
594595
s = pd.Series(np.random.randn(8), index=pd.MultiIndex.from_tuples(tuples))
595596
s

doc/source/user_guide/style.ipynb

Lines changed: 46 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -78,8 +78,37 @@
7878
"source": [
7979
"import pandas as pd\n",
8080
"import numpy as np\n",
81-
"import matplotlib as mpl\n",
82-
"\n",
81+
"import matplotlib as mpl\n"
82+
]
83+
},
84+
{
85+
"cell_type": "code",
86+
"execution_count": null,
87+
"metadata": {
88+
"nbsphinx": "hidden"
89+
},
90+
"outputs": [],
91+
"source": [
92+
"# For reproducibility - this doesn't respect uuid_len or positionally-passed uuid but the places here that use that coincidentally bypass this anyway\n",
93+
"from pandas.io.formats.style import Styler\n",
94+
"next_uuid = 1000\n",
95+
"class StylerReproducible(Styler):\n",
96+
" def __init__(self, *args, uuid=None, **kwargs):\n",
97+
" global next_uuid\n",
98+
" if uuid is None:\n",
99+
" uuid = str(next_uuid)\n",
100+
" next_uuid = next_uuid + 1\n",
101+
" super().__init__(*args, uuid=uuid, **kwargs)\n",
102+
"Styler = StylerReproducible\n",
103+
"pd.DataFrame.style = property(lambda self: StylerReproducible(self))\n"
104+
]
105+
},
106+
{
107+
"cell_type": "code",
108+
"execution_count": null,
109+
"metadata": {},
110+
"outputs": [],
111+
"source": [
83112
"df = pd.DataFrame({\n",
84113
" \"strings\": [\"Adam\", \"Mike\"],\n",
85114
" \"ints\": [1, 3],\n",
@@ -104,6 +133,7 @@
104133
"metadata": {},
105134
"outputs": [],
106135
"source": [
136+
"np.random.seed(25) # for reproducibility\n",
107137
"weather_df = pd.DataFrame(np.random.rand(10,2)*5, \n",
108138
" index=pd.date_range(start=\"2021-01-01\", periods=10),\n",
109139
" columns=[\"Tokyo\", \"Beijing\"])\n",
@@ -1394,7 +1424,6 @@
13941424
"outputs": [],
13951425
"source": [
13961426
"# Hide the construction of the display chart from the user\n",
1397-
"import pandas as pd\n",
13981427
"from IPython.display import HTML\n",
13991428
"\n",
14001429
"# Test series\n",
@@ -1925,6 +1954,18 @@
19251954
"from pandas.io.formats.style import Styler"
19261955
]
19271956
},
1957+
{
1958+
"cell_type": "code",
1959+
"execution_count": null,
1960+
"metadata": {
1961+
"nbsphinx": "hidden"
1962+
},
1963+
"outputs": [],
1964+
"source": [
1965+
"# For reproducibility\n",
1966+
"Styler = StylerReproducible\n"
1967+
]
1968+
},
19281969
{
19291970
"cell_type": "markdown",
19301971
"metadata": {},
@@ -2126,7 +2167,8 @@
21262167
"nbconvert_exporter": "python",
21272168
"pygments_lexer": "ipython3",
21282169
"version": "3.9.5"
2129-
}
2170+
},
2171+
"record_timing": false
21302172
},
21312173
"nbformat": 4,
21322174
"nbformat_minor": 1

doc/source/user_guide/visualization.rst

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1086,6 +1086,7 @@ are what constitutes the bootstrap plot.
10861086
:suppress:
10871087
10881088
np.random.seed(123456)
1089+
random.seed(123456) # for reproducibility - bootstrap_plot uses random.sample
10891090
10901091
.. ipython:: python
10911092

pandas/plotting/_core.py

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -604,6 +604,7 @@ def boxplot_frame_groupby(
604604
.. plot::
605605
:context: close-figs
606606
607+
>>> np.random.seed(1234)
607608
>>> import itertools
608609
>>> tuples = [t for t in itertools.product(range(1000), range(4))]
609610
>>> index = pd.MultiIndex.from_tuples(tuples, names=['lvl0', 'lvl1'])
@@ -1328,6 +1329,7 @@ def box(self, by: IndexLabel | None = None, **kwargs) -> PlotAccessor:
13281329
.. plot::
13291330
:context: close-figs
13301331
1332+
>>> np.random.seed(1234)
13311333
>>> data = np.random.randn(25, 4)
13321334
>>> df = pd.DataFrame(data, columns=list('ABCD'))
13331335
>>> ax = df.plot.box()
@@ -1392,6 +1394,7 @@ def hist(
13921394
.. plot::
13931395
:context: close-figs
13941396
1397+
>>> np.random.seed(1234)
13951398
>>> df = pd.DataFrame(np.random.randint(1, 7, 6000), columns=['one'])
13961399
>>> df['two'] = df['one'] + np.random.randint(1, 7, 6000)
13971400
>>> ax = df.plot.hist(bins=12, alpha=0.5)
@@ -1811,6 +1814,7 @@ def hexbin(
18111814
.. plot::
18121815
:context: close-figs
18131816
1817+
>>> np.random.seed(1234)
18141818
>>> n = 10000
18151819
>>> df = pd.DataFrame({'x': np.random.randn(n),
18161820
... 'y': np.random.randn(n)})

pandas/plotting/_misc.py

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -438,6 +438,8 @@ def bootstrap_plot(
438438
.. plot::
439439
:context: close-figs
440440
441+
>>> np.random.seed(1234)
442+
>>> random.seed(1234) # for reproducibility
441443
>>> s = pd.Series(np.random.uniform(size=100))
442444
>>> pd.plotting.bootstrap_plot(s) # doctest: +SKIP
443445
<Figure size 640x480 with 6 Axes>
@@ -597,6 +599,7 @@ def autocorrelation_plot(series: Series, ax: Axes | None = None, **kwargs) -> Ax
597599
.. plot::
598600
:context: close-figs
599601
602+
>>> np.random.seed(1234)
600603
>>> spacing = np.linspace(-9 * np.pi, 9 * np.pi, num=1000)
601604
>>> s = pd.Series(0.7 * np.random.rand(1000) + 0.3 * np.sin(spacing))
602605
>>> pd.plotting.autocorrelation_plot(s) # doctest: +SKIP

0 commit comments

Comments
 (0)