Skip to content

Commit 513e3c3

Browse files
committed
Test fixups
1 parent 60a8eee commit 513e3c3

File tree

2 files changed

+12
-4
lines changed

2 files changed

+12
-4
lines changed

pandas/io/sas/sas7bdat.py

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,8 @@
2222

2323
import numpy as np
2424

25+
from pandas._config import get_option
26+
2527
from pandas._libs.byteswap import (
2628
read_double_with_byteswap,
2729
read_float_with_byteswap,
@@ -699,6 +701,7 @@ def _chunk_to_dataframe(self) -> DataFrame:
699701
rslt = {}
700702

701703
js, jb = 0, 0
704+
infer_string = get_option("future.infer_string")
702705
for j in range(self.column_count):
703706
name = self.column_names[j]
704707

@@ -715,6 +718,9 @@ def _chunk_to_dataframe(self) -> DataFrame:
715718
rslt[name] = pd.Series(self._string_chunk[js, :], index=ix, copy=False)
716719
if self.convert_text and (self.encoding is not None):
717720
rslt[name] = self._decode_string(rslt[name].str)
721+
if infer_string:
722+
rslt[name] = rslt[name].astype("str")
723+
718724
js += 1
719725
else:
720726
self.close()

pandas/tests/io/sas/test_sas7bdat.py

Lines changed: 6 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -240,11 +240,13 @@ def test_zero_variables(datapath):
240240
pd.read_sas(fname)
241241

242242

243-
def test_zero_rows(datapath):
243+
@pytest.mark.parametrize("encoding", [None, "utf8"])
244+
def test_zero_rows(datapath, encoding):
244245
# GH 18198
245246
fname = datapath("io", "sas", "data", "zero_rows.sas7bdat")
246-
result = pd.read_sas(fname)
247-
expected = pd.DataFrame([{"char_field": "a", "num_field": 1.0}]).iloc[:0]
247+
result = pd.read_sas(fname, encoding=encoding)
248+
str_value = b"a" if encoding is None else "a"
249+
expected = pd.DataFrame([{"char_field": str_value, "num_field": 1.0}]).iloc[:0]
248250
tm.assert_frame_equal(result, expected)
249251

250252

@@ -403,7 +405,7 @@ def test_0x40_control_byte(datapath):
403405
fname = datapath("io", "sas", "data", "0x40controlbyte.sas7bdat")
404406
df = pd.read_sas(fname, encoding="ascii")
405407
fname = datapath("io", "sas", "data", "0x40controlbyte.csv")
406-
df0 = pd.read_csv(fname, dtype="object")
408+
df0 = pd.read_csv(fname, dtype="str")
407409
tm.assert_frame_equal(df, df0)
408410

409411

0 commit comments

Comments
 (0)