Skip to content

Commit 818618e

Browse files
authored
TST: Remove tm.rands/rands_array (#54368)
* remove tm.rands * remove rands array * Address failures * Use unique values
1 parent 0d0073a commit 818618e

File tree

22 files changed

+67
-103
lines changed

22 files changed

+67
-103
lines changed

asv_bench/benchmarks/array.py

Lines changed: 3 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -2,8 +2,6 @@
22

33
import pandas as pd
44

5-
from .pandas_vb_common import tm
6-
75

86
class BooleanArray:
97
def setup(self):
@@ -56,7 +54,7 @@ def time_from_tuples(self):
5654
class StringArray:
5755
def setup(self):
5856
N = 100_000
59-
values = tm.rands_array(3, N)
57+
values = np.array([str(i) for i in range(N)], dtype=object)
6058
self.values_obj = np.array(values, dtype="object")
6159
self.values_str = np.array(values, dtype="U")
6260
self.values_list = values.tolist()
@@ -80,7 +78,7 @@ def setup(self, multiple_chunks):
8078
import pyarrow as pa
8179
except ImportError:
8280
raise NotImplementedError
83-
strings = tm.rands_array(3, 10_000)
81+
strings = np.array([str(i) for i in range(10_000)], dtype=object)
8482
if multiple_chunks:
8583
chunks = [strings[i : i + 100] for i in range(0, len(strings), 100)]
8684
self.array = pd.arrays.ArrowStringArray(pa.chunked_array(chunks))
@@ -127,7 +125,7 @@ def setup(self, dtype, hasna):
127125
elif dtype == "int64[pyarrow]":
128126
data = np.arange(N)
129127
elif dtype == "string[pyarrow]":
130-
data = tm.rands_array(10, N)
128+
data = np.array([str(i) for i in range(N)], dtype=object)
131129
elif dtype == "timestamp[ns][pyarrow]":
132130
data = pd.date_range("2000-01-01", freq="s", periods=N)
133131
else:

asv_bench/benchmarks/series_methods.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -104,7 +104,7 @@ def setup(self, dtype):
104104
data = np.arange(N)
105105
na_value = NA
106106
elif dtype in ("string", "string[pyarrow]"):
107-
data = tm.rands_array(5, N)
107+
data = np.array([str(i) * 5 for i in range(N)], dtype=object)
108108
na_value = NA
109109
else:
110110
raise NotImplementedError

asv_bench/benchmarks/strings.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -34,8 +34,8 @@ class Construction:
3434
dtype_mapping = {"str": "str", "string[python]": object, "string[pyarrow]": object}
3535

3636
def setup(self, pd_type, dtype):
37-
series_arr = tm.rands_array(
38-
nchars=10, size=10**5, dtype=self.dtype_mapping[dtype]
37+
series_arr = np.array(
38+
[str(i) * 10 for i in range(100_000)], dtype=self.dtype_mapping[dtype]
3939
)
4040
if pd_type == "series":
4141
self.arr = series_arr
@@ -276,7 +276,7 @@ def time_iter(self, dtype):
276276

277277
class StringArrayConstruction:
278278
def setup(self):
279-
self.series_arr = tm.rands_array(nchars=10, size=10**5)
279+
self.series_arr = np.array([str(i) * 10 for i in range(10**5)], dtype=object)
280280
self.series_arr_nan = np.concatenate([self.series_arr, np.array([NA] * 1000)])
281281

282282
def time_string_array_construction(self):

pandas/_testing/__init__.py

Lines changed: 16 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -54,10 +54,6 @@
5454
round_trip_pickle,
5555
write_to_compressed,
5656
)
57-
from pandas._testing._random import (
58-
rands,
59-
rands_array,
60-
)
6157
from pandas._testing._warnings import (
6258
assert_produces_warning,
6359
maybe_produces_warning,
@@ -349,6 +345,22 @@ def to_array(obj):
349345
# Others
350346

351347

348+
def rands_array(
349+
nchars, size: int, dtype: NpDtype = "O", replace: bool = True
350+
) -> np.ndarray:
351+
"""
352+
Generate an array of byte strings.
353+
"""
354+
chars = np.array(list(string.ascii_letters + string.digits), dtype=(np.str_, 1))
355+
retval = (
356+
np.random.default_rng(2)
357+
.choice(chars, size=nchars * np.prod(size), replace=replace)
358+
.view((np.str_, nchars))
359+
.reshape(size)
360+
)
361+
return retval.astype(dtype)
362+
363+
352364
def getCols(k) -> str:
353365
return string.ascii_uppercase[:k]
354366

@@ -1127,7 +1139,6 @@ def shares_memory(left, right) -> bool:
11271139
"NULL_OBJECTS",
11281140
"OBJECT_DTYPES",
11291141
"raise_assert_detail",
1130-
"rands",
11311142
"reset_display_options",
11321143
"raises_chained_assignment_error",
11331144
"round_trip_localpath",

pandas/_testing/_io.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,7 @@
99
Any,
1010
Callable,
1111
)
12+
import uuid
1213
import zipfile
1314

1415
from pandas.compat import (
@@ -18,7 +19,6 @@
1819
from pandas.compat._optional import import_optional_dependency
1920

2021
import pandas as pd
21-
from pandas._testing._random import rands
2222
from pandas._testing.contexts import ensure_clean
2323

2424
if TYPE_CHECKING:
@@ -56,7 +56,7 @@ def round_trip_pickle(
5656
"""
5757
_path = path
5858
if _path is None:
59-
_path = f"__{rands(10)}__.pickle"
59+
_path = f"__{uuid.uuid4()}__.pickle"
6060
with ensure_clean(_path) as temp_path:
6161
pd.to_pickle(obj, temp_path)
6262
return pd.read_pickle(temp_path)

pandas/_testing/_random.py

Lines changed: 0 additions & 35 deletions
This file was deleted.

pandas/tests/arithmetic/test_numeric.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -881,7 +881,7 @@ def test_add_frames(self, first, second, expected):
881881
# TODO: This came from series.test.test_operators, needs cleanup
882882
def test_series_frame_radd_bug(self, fixed_now_ts):
883883
# GH#353
884-
vals = Series(tm.rands_array(5, 10))
884+
vals = Series(tm.makeStringIndex())
885885
result = "foo_" + vals
886886
expected = vals.map(lambda x: "foo_" + x)
887887
tm.assert_series_equal(result, expected)

pandas/tests/extension/base/getitem.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -272,7 +272,7 @@ def test_getitem_series_integer_with_missing_raises(self, data, idx):
272272
msg = "Cannot index with an integer indexer containing NA values"
273273
# TODO: this raises KeyError about labels not found (it tries label-based)
274274

275-
ser = pd.Series(data, index=[tm.rands(4) for _ in range(len(data))])
275+
ser = pd.Series(data, index=[chr(100 + i) for i in range(len(data))])
276276
with pytest.raises(ValueError, match=msg):
277277
ser[idx]
278278

pandas/tests/extension/base/setitem.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -197,7 +197,7 @@ def test_setitem_integer_with_missing_raises(self, data, idx, box_in_series):
197197
# TODO(xfail) this raises KeyError about labels not found (it tries label-based)
198198
# for list of labels with Series
199199
if box_in_series:
200-
arr = pd.Series(data, index=[tm.rands(4) for _ in range(len(data))])
200+
arr = pd.Series(data, index=[chr(100 + i) for i in range(len(data))])
201201

202202
msg = "Cannot index with an integer indexer containing NA values"
203203
with pytest.raises(ValueError, match=msg):

pandas/tests/frame/test_arithmetic.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -203,7 +203,7 @@ def test_timestamp_compare(self, left, right):
203203
"dates2": pd.date_range("20010102", periods=10),
204204
"intcol": np.random.default_rng(2).integers(1000000000, size=10),
205205
"floatcol": np.random.default_rng(2).standard_normal(10),
206-
"stringcol": list(tm.rands(10)),
206+
"stringcol": [chr(100 + i) for i in range(10)],
207207
}
208208
)
209209
df.loc[np.random.default_rng(2).random(len(df)) > 0.5, "dates2"] = pd.NaT

0 commit comments

Comments
 (0)