From 9d6b61f2498f616c1e515804ce12d89ca02604d3 Mon Sep 17 00:00:00 2001
From: Kerby Shedden <kshedden@umich.edu>
Date: Fri, 25 Nov 2016 10:09:00 -0500
Subject: [PATCH 1/6] Fix 14734

---
 pandas/io/sas/sas7bdat.py            | 10 ++++++++++
 pandas/io/tests/sas/test_sas7bdat.py | 26 ++++++++++++++++++++++++++
 2 files changed, 36 insertions(+)

diff --git a/pandas/io/sas/sas7bdat.py b/pandas/io/sas/sas7bdat.py
index 2a82fd7a53222..91f417abc0502 100644
--- a/pandas/io/sas/sas7bdat.py
+++ b/pandas/io/sas/sas7bdat.py
@@ -225,6 +225,12 @@ def _get_properties(self):
                 self.os_name = self.os_name.decode(
                     self.encoding or self.default_encoding)
 
+    def __next__(self):
+        da = self.read(nrows=self.chunksize or 1)
+        if da is None:
+            raise StopIteration
+        return da
+
     # Read a single float of the given width (4 or 8).
     def _read_float(self, offset, width):
         if width not in (4, 8):
@@ -591,6 +597,10 @@ def read(self, nrows=None):
         if self._current_row_in_file_index >= self.row_count:
             return None
 
+        m = self.row_count - self._current_row_in_file_index
+        if nrows > m:
+            nrows = m
+
         nd = (self.column_types == b'd').sum()
         ns = (self.column_types == b's').sum()
 
diff --git a/pandas/io/tests/sas/test_sas7bdat.py b/pandas/io/tests/sas/test_sas7bdat.py
index 06eb9774679b1..0ae8e69999987 100644
--- a/pandas/io/tests/sas/test_sas7bdat.py
+++ b/pandas/io/tests/sas/test_sas7bdat.py
@@ -65,6 +65,32 @@ def test_from_iterator(self):
                 df = rdr.read(3)
                 tm.assert_frame_equal(df, df0.iloc[2:5, :])
 
+    def test_iterator_loop(self):
+        for j in 0, 1:
+            for k in self.test_ix[j]:
+                for chunksize in 3, 5, 10, 11:
+                    fname = os.path.join(self.dirpath, "test%d.sas7bdat" % k)
+                    with open(fname, 'rb') as f:
+                        byts = f.read()
+                    buf = io.BytesIO(byts)
+                    rdr = pd.read_sas(buf, format="sas7bdat",
+                                      chunksize=chunksize, encoding='utf-8')
+                    y = 0
+                    for x in rdr:
+                        y += x.shape[0]
+                    assert(y == rdr.row_count)
+
+    def test_iterator_read_too_much(self):
+        # github #14734
+        k = self.test_ix[0][0]
+        fname = os.path.join(self.dirpath, "test%d.sas7bdat" % k)
+        with open(fname, 'rb') as f:
+            byts = f.read()
+        buf = io.BytesIO(byts)
+        rdr = pd.read_sas(buf, format="sas7bdat",
+                          iterator=True, encoding='utf-8')
+        rdr.read(rdr.row_count + 20)
+
 
 def test_encoding_options():
     dirpath = tm.get_data_path()

From e8327e0ee5a827334c42a9b71fae625e98fad0ab Mon Sep 17 00:00:00 2001
From: Kerby Shedden <kshedden@umich.edu>
Date: Fri, 25 Nov 2016 14:23:40 -0500
Subject: [PATCH 2/6] Added to whatsnew

---
 doc/source/whatsnew/v0.20.0.txt      | 3 +++
 pandas/io/tests/sas/test_sas7bdat.py | 3 ++-
 2 files changed, 5 insertions(+), 1 deletion(-)

diff --git a/doc/source/whatsnew/v0.20.0.txt b/doc/source/whatsnew/v0.20.0.txt
index 65b62601c7022..5d2c5f014391d 100644
--- a/doc/source/whatsnew/v0.20.0.txt
+++ b/doc/source/whatsnew/v0.20.0.txt
@@ -84,3 +84,6 @@ Performance Improvements
 Bug Fixes
 ~~~~~~~~~
 
+- Fix bugs (:issue:`14734`, :issue:`13654`) in ``pd.read_sas`` and
+``pandas.io.sas.sas7bdat.SAS7BDATReader`` that caused problems when
+reading a SAS file incrementally.
diff --git a/pandas/io/tests/sas/test_sas7bdat.py b/pandas/io/tests/sas/test_sas7bdat.py
index 0ae8e69999987..4cbc385ea1168 100644
--- a/pandas/io/tests/sas/test_sas7bdat.py
+++ b/pandas/io/tests/sas/test_sas7bdat.py
@@ -66,6 +66,7 @@ def test_from_iterator(self):
                 tm.assert_frame_equal(df, df0.iloc[2:5, :])
 
     def test_iterator_loop(self):
+        # github #13654
         for j in 0, 1:
             for k in self.test_ix[j]:
                 for chunksize in 3, 5, 10, 11:
@@ -78,7 +79,7 @@ def test_iterator_loop(self):
                     y = 0
                     for x in rdr:
                         y += x.shape[0]
-                    assert(y == rdr.row_count)
+                    self.assertTrue(y == rdr.row_count)
 
     def test_iterator_read_too_much(self):
         # github #14734

From 4504df5c6e63cb79356b4bd2b4a0918aca11644a Mon Sep 17 00:00:00 2001
From: Kerby Shedden <kshedden@umich.edu>
Date: Fri, 25 Nov 2016 17:03:12 -0500
Subject: [PATCH 3/6] Add iterator tests for xport

---
 pandas/io/tests/sas/test_xport.py | 15 +++++++++++++++
 1 file changed, 15 insertions(+)

diff --git a/pandas/io/tests/sas/test_xport.py b/pandas/io/tests/sas/test_xport.py
index d0627a80f9604..fe2f7cb4bf4be 100644
--- a/pandas/io/tests/sas/test_xport.py
+++ b/pandas/io/tests/sas/test_xport.py
@@ -35,6 +35,13 @@ def test1_basic(self):
         # Read full file
         data = read_sas(self.file01, format="xport")
         tm.assert_frame_equal(data, data_csv)
+        num_rows = data.shape[0]
+
+        # Test reading beyond end of file
+        reader = read_sas(self.file01, format="xport", iterator=True)
+        data = reader.read(num_rows + 100)
+        self.assertTrue(data.shape[0] == num_rows)
+        reader.close()
 
         # Test incremental read with `read` method.
         reader = read_sas(self.file01, format="xport", iterator=True)
@@ -48,6 +55,14 @@ def test1_basic(self):
         reader.close()
         tm.assert_frame_equal(data, data_csv.iloc[0:10, :])
 
+        # Test read in loop
+        m = 0
+        reader = read_sas(self.file01, format="xport", chunksize=100)
+        for x in reader:
+            m += x.shape[0]
+        reader.close()
+        self.assertTrue(m == num_rows)
+
         # Read full file with `read_sas` method
         data = read_sas(self.file01)
         tm.assert_frame_equal(data, data_csv)

From a7b7da88052f472e8c2c592d885014a0e1b75230 Mon Sep 17 00:00:00 2001
From: Kerby Shedden <kshedden@umich.edu>
Date: Fri, 25 Nov 2016 17:15:45 -0500
Subject: [PATCH 4/6] Moved whatsnew to 19.2

---
 doc/source/whatsnew/v0.19.2.txt | 1 +
 doc/source/whatsnew/v0.20.0.txt | 4 ----
 2 files changed, 1 insertion(+), 4 deletions(-)

diff --git a/doc/source/whatsnew/v0.19.2.txt b/doc/source/whatsnew/v0.19.2.txt
index d9aa92270669d..a5fca8f268d9c 100644
--- a/doc/source/whatsnew/v0.19.2.txt
+++ b/doc/source/whatsnew/v0.19.2.txt
@@ -31,6 +31,7 @@ Bug Fixes
 - Allow ``nanoseconds`` in ``Timestamp.replace`` as a kwarg (:issue:`14621`)
 - Bug in ``pd.read_csv`` where reading files fails, if the number of headers is equal to the number of lines in the file (:issue:`14515`)
 - Bug in ``pd.read_csv`` for the Python engine in which an unhelpful error message was being raised when multi-char delimiters were not being respected with quotes (:issue:`14582`)
+- Fix bugs (:issue:`14734`, :issue:`13654`) in ``pd.read_sas`` and ``pandas.io.sas.sas7bdat.SAS7BDATReader`` that caused problems when reading a SAS file incrementally.
 
 
 
diff --git a/doc/source/whatsnew/v0.20.0.txt b/doc/source/whatsnew/v0.20.0.txt
index 5d2c5f014391d..03e0cae6cc83f 100644
--- a/doc/source/whatsnew/v0.20.0.txt
+++ b/doc/source/whatsnew/v0.20.0.txt
@@ -83,7 +83,3 @@ Performance Improvements
 
 Bug Fixes
 ~~~~~~~~~
-
-- Fix bugs (:issue:`14734`, :issue:`13654`) in ``pd.read_sas`` and
-``pandas.io.sas.sas7bdat.SAS7BDATReader`` that caused problems when
-reading a SAS file incrementally.

From 8c1e17e24ffbd36383c3c8bacd7d7eff7b822a21 Mon Sep 17 00:00:00 2001
From: Kerby Shedden <kshedden@umich.edu>
Date: Fri, 25 Nov 2016 17:27:06 -0500
Subject: [PATCH 5/6] Bypass ioreader

---
 pandas/io/tests/sas/test_sas7bdat.py | 22 +++++++---------------
 1 file changed, 7 insertions(+), 15 deletions(-)

diff --git a/pandas/io/tests/sas/test_sas7bdat.py b/pandas/io/tests/sas/test_sas7bdat.py
index 4cbc385ea1168..530e0b3701e32 100644
--- a/pandas/io/tests/sas/test_sas7bdat.py
+++ b/pandas/io/tests/sas/test_sas7bdat.py
@@ -44,10 +44,7 @@ def test_from_buffer(self):
             df0 = self.data[j]
             for k in self.test_ix[j]:
                 fname = os.path.join(self.dirpath, "test%d.sas7bdat" % k)
-                with open(fname, 'rb') as f:
-                    byts = f.read()
-                buf = io.BytesIO(byts)
-                df = pd.read_sas(buf, format="sas7bdat", encoding='utf-8')
+                df = pd.read_sas(fname, encoding="utf-8")
                 tm.assert_frame_equal(df, df0, check_exact=False)
 
     def test_from_iterator(self):
@@ -55,11 +52,7 @@ def test_from_iterator(self):
             df0 = self.data[j]
             for k in self.test_ix[j]:
                 fname = os.path.join(self.dirpath, "test%d.sas7bdat" % k)
-                with open(fname, 'rb') as f:
-                    byts = f.read()
-                buf = io.BytesIO(byts)
-                rdr = pd.read_sas(buf, format="sas7bdat",
-                                  iterator=True, encoding='utf-8')
+                rdr = pd.read_sas(fname, iterator=True, encoding='utf-8')
                 df = rdr.read(2)
                 tm.assert_frame_equal(df, df0.iloc[0:2, :])
                 df = rdr.read(3)
@@ -71,11 +64,7 @@ def test_iterator_loop(self):
             for k in self.test_ix[j]:
                 for chunksize in 3, 5, 10, 11:
                     fname = os.path.join(self.dirpath, "test%d.sas7bdat" % k)
-                    with open(fname, 'rb') as f:
-                        byts = f.read()
-                    buf = io.BytesIO(byts)
-                    rdr = pd.read_sas(buf, format="sas7bdat",
-                                      chunksize=chunksize, encoding='utf-8')
+                    rdr = pd.read_sas(fname, chunksize=10, encoding='utf-8')
                     y = 0
                     for x in rdr:
                         y += x.shape[0]
@@ -90,7 +79,10 @@ def test_iterator_read_too_much(self):
         buf = io.BytesIO(byts)
         rdr = pd.read_sas(buf, format="sas7bdat",
                           iterator=True, encoding='utf-8')
-        rdr.read(rdr.row_count + 20)
+        d1 = rdr.read(rdr.row_count + 20)
+        rdr = pd.read_sas(fname, iterator=True, encoding="utf-8")
+        d2 = rdr.read(rdr.row_count + 20)
+        tm.assert_frame_equal(d1, d2)
 
 
 def test_encoding_options():

From 28d4038c3330332da2d3631214d26aa93c98d636 Mon Sep 17 00:00:00 2001
From: Kerby Shedden <kshedden@umich.edu>
Date: Sat, 26 Nov 2016 10:01:00 -0500
Subject: [PATCH 6/6] Minor change to tests

---
 pandas/io/tests/sas/test_sas7bdat.py | 12 +++++++-----
 1 file changed, 7 insertions(+), 5 deletions(-)

diff --git a/pandas/io/tests/sas/test_sas7bdat.py b/pandas/io/tests/sas/test_sas7bdat.py
index 530e0b3701e32..e20ea48247119 100644
--- a/pandas/io/tests/sas/test_sas7bdat.py
+++ b/pandas/io/tests/sas/test_sas7bdat.py
@@ -44,7 +44,12 @@ def test_from_buffer(self):
             df0 = self.data[j]
             for k in self.test_ix[j]:
                 fname = os.path.join(self.dirpath, "test%d.sas7bdat" % k)
-                df = pd.read_sas(fname, encoding="utf-8")
+                with open(fname, 'rb') as f:
+                    byts = f.read()
+                buf = io.BytesIO(byts)
+                rdr = pd.read_sas(buf, format="sas7bdat",
+                                  iterator=True, encoding='utf-8')
+                df = rdr.read()
                 tm.assert_frame_equal(df, df0, check_exact=False)
 
     def test_from_iterator(self):
@@ -74,10 +79,7 @@ def test_iterator_read_too_much(self):
         # github #14734
         k = self.test_ix[0][0]
         fname = os.path.join(self.dirpath, "test%d.sas7bdat" % k)
-        with open(fname, 'rb') as f:
-            byts = f.read()
-        buf = io.BytesIO(byts)
-        rdr = pd.read_sas(buf, format="sas7bdat",
+        rdr = pd.read_sas(fname, format="sas7bdat",
                           iterator=True, encoding='utf-8')
         d1 = rdr.read(rdr.row_count + 20)
         rdr = pd.read_sas(fname, iterator=True, encoding="utf-8")