Skip to content

Commit 4694a42

Browse files
committed
Minor changes following code review
1 parent 478af3b commit 4694a42

File tree

2 files changed

+37
-19
lines changed

2 files changed

+37
-19
lines changed

doc/source/whatsnew/v0.17.0.txt

Lines changed: 13 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -20,7 +20,7 @@ Highlights include:
2020
if they are all ``NaN``, see :ref:`here <whatsnew_0170.api_breaking.hdf_dropna>`
2121
- Support for ``Series.dt.strftime`` to generate formatted strings for datetime-likes, see :ref:`here <whatsnew_0170.strftime>`
2222
- Development installed versions of pandas will now have ``PEP440`` compliant version strings (:issue:`9518`)
23-
- Support for reading SAS xport files, see :meth:`~pandas.io.read_sas`.
23+
- Support for reading SAS xport files, see :ref:`here <whatsnew_0170.enhancements.sas_xport>`
2424

2525
Check the :ref:`API Changes <whatsnew_0170.api>` and :ref:`deprecations <whatsnew_0170.deprecations>` before updating.
2626

@@ -85,6 +85,18 @@ We are now supporting a ``Series.dt.strftime`` method for datetime-likes to gene
8585

8686
The string format is as the python standard library and details can be found `here <https://docs.python.org/2/library/datetime.html#strftime-and-strptime-behavior>`_
8787

88+
.. _whatsnew_0170.enhancements.sas_xport:
89+
90+
Support for SAS XPORT files
91+
^^^^^^^^^^^^^^^^^^^^^^^^^^^
92+
93+
:meth:`~pandas.io.read_sas` provides support for reading SAS XPORT format files:
94+
95+
df = pd.read_sas('sas_xport.xpt')
96+
97+
It is also possible to obtain an iterator and read an XPORT file
98+
incrementally.
99+
88100
.. _whatsnew_0170.enhancements.other:
89101

90102
Other enhancements
@@ -94,13 +106,6 @@ Other enhancements
94106

95107
- Enable `read_hdf` to be used without specifying a key when the HDF file contains a single dataset (:issue:`10443`)
96108

97-
- :meth:`~pandas.io.read_sas` provides support for reading SAS XPORT format files:
98-
99-
df = pd.read_sas('sas_xport.xpt')
100-
101-
It is also possible to obtain an iterator and read an XPORT file
102-
incrementally.
103-
104109
- ``DatetimeIndex`` can be instantiated using strings contains ``NaT`` (:issue:`7599`)
105110
- The string parsing of ``to_datetime``, ``Timestamp`` and ``DatetimeIndex`` has been made consistent. (:issue:`7599`)
106111

pandas/io/tests/test_sas.py

Lines changed: 24 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -2,10 +2,19 @@
22
import pandas.util.testing as tm
33
from pandas import compat
44
from pandas.io.sas import XportReader, read_sas
5+
import numpy as np
56
import os
67

78
# CSV versions of test XPT files were obtained using the R foreign library
89

10+
# Numbers in a SAS xport file are always float64, so need to convert
11+
# before making comparisons.
12+
def numeric_as_float(data):
13+
for v in data.columns:
14+
if data[v].dtype is np.dtype('int64'):
15+
data[v] = data[v].astype(np.float64)
16+
17+
918
class TestXport(tm.TestCase):
1019

1120
def setUp(self):
@@ -20,24 +29,25 @@ def test1(self):
2029

2130
# Compare to this
2231
data_csv = pd.read_csv(self.file01.replace(".XPT", ".csv"))
32+
numeric_as_float(data_csv)
2333

2434
# Read full file
2535
data = XportReader(self.file01).read()
26-
tm.assert_frame_equal(data, data_csv, check_dtype=False)
36+
tm.assert_frame_equal(data, data_csv)
2737

2838
# Test incremental read with `read` method.
2939
reader = XportReader(self.file01)
3040
data = reader.read(10)
31-
tm.assert_frame_equal(data, data_csv.iloc[0:10, :], check_dtype=False)
41+
tm.assert_frame_equal(data, data_csv.iloc[0:10, :])
3242

3343
# Test incremental read with `get_chunk` method.
3444
reader = XportReader(self.file01, chunksize=10)
3545
data = reader.get_chunk()
36-
tm.assert_frame_equal(data, data_csv.iloc[0:10, :], check_dtype=False)
46+
tm.assert_frame_equal(data, data_csv.iloc[0:10, :])
3747

3848
# Read full file with `read_sas` method
3949
data = read_sas(self.file01)
40-
tm.assert_frame_equal(data, data_csv, check_dtype=False)
50+
tm.assert_frame_equal(data, data_csv)
4151

4252

4353
def test1_index(self):
@@ -46,44 +56,47 @@ def test1_index(self):
4656
# Compare to this
4757
data_csv = pd.read_csv(self.file01.replace(".XPT", ".csv"))
4858
data_csv = data_csv.set_index("SEQN")
59+
numeric_as_float(data_csv)
4960

5061
# Read full file
5162
data = XportReader(self.file01, index="SEQN").read()
52-
tm.assert_frame_equal(data, data_csv, check_dtype=False)
63+
tm.assert_frame_equal(data, data_csv)
5364

5465
# Test incremental read with `read` method.
5566
reader = XportReader(self.file01, index="SEQN")
5667
data = reader.read(10)
57-
tm.assert_frame_equal(data, data_csv.iloc[0:10, :], check_dtype=False)
68+
tm.assert_frame_equal(data, data_csv.iloc[0:10, :])
5869

5970
# Test incremental read with `get_chunk` method.
6071
reader = XportReader(self.file01, index="SEQN", chunksize=10)
6172
data = reader.get_chunk()
62-
tm.assert_frame_equal(data, data_csv.iloc[0:10, :], check_dtype=False)
73+
tm.assert_frame_equal(data, data_csv.iloc[0:10, :])
6374

6475

6576
def test1_incremental(self):
6677
# Test with DEMO_G.XPT, reading full file incrementally
6778

6879
data_csv = pd.read_csv(self.file01.replace(".XPT", ".csv"))
6980
data_csv = data_csv.set_index("SEQN")
81+
numeric_as_float(data_csv)
7082

7183
reader = XportReader(self.file01, index="SEQN", chunksize=1000)
7284

7385
all_data = [x for x in reader]
7486
data = pd.concat(all_data, axis=0)
7587

76-
tm.assert_frame_equal(data, data_csv, check_dtype=False)
88+
tm.assert_frame_equal(data, data_csv)
7789

7890

7991
def test2(self):
8092
# Test with SSHSV1_A.XPT
8193

8294
# Compare to this
8395
data_csv = pd.read_csv(self.file02.replace(".XPT", ".csv"))
96+
numeric_as_float(data_csv)
8497

8598
data = XportReader(self.file02).read()
86-
tm.assert_frame_equal(data, data_csv, check_dtype=False)
99+
tm.assert_frame_equal(data, data_csv)
87100

88101

89102
def test3(self):
@@ -93,7 +106,7 @@ def test3(self):
93106
data_csv = pd.read_csv(self.file03.replace(".XPT", ".csv"))
94107

95108
data = XportReader(self.file03).read()
96-
tm.assert_frame_equal(data, data_csv, check_dtype=False)
109+
tm.assert_frame_equal(data, data_csv)
97110

98111
data = read_sas(self.file03)
99-
tm.assert_frame_equal(data, data_csv, check_dtype=False)
112+
tm.assert_frame_equal(data, data_csv)

0 commit comments

Comments
 (0)