pandas-dev · jreback · Oct 31, 2017 · Oct 26, 2017 · Oct 26, 2017 · Oct 26, 2017
diff --git a/doc/source/whatsnew/v0.21.1.txt b/doc/source/whatsnew/v0.21.1.txt
@@ -74,6 +74,9 @@ Indexing
 I/O
 ^^^
 
+- Bug in `StataReader` not converting date/time columns with display formatting addressed (:issue:`17990`)
+
+
 Plotting
 ^^^^^^^^
 

diff --git a/pandas/io/stata.py b/pandas/io/stata.py
@@ -306,39 +306,42 @@ def convert_delta_safe(base, deltas, unit):
         data_col[bad_locs] = 1.0  # Replace with NaT
     dates = dates.astype(np.int64)
 
-    if fmt in ["%tc", "tc"]:  # Delta ms relative to base
+    if fmt.startswith(("%tc", "tc")):  # Delta ms relative to base
         base = stata_epoch
         ms = dates
         conv_dates = convert_delta_safe(base, ms, 'ms')
-    elif fmt in ["%tC", "tC"]:
+    elif fmt.startswith(("%tC", "tC")):
         from warnings import warn
 
         warn("Encountered %tC format. Leaving in Stata Internal Format.")
         conv_dates = Series(dates, dtype=np.object)
         if has_bad_values:
             conv_dates[bad_locs] = pd.NaT
         return conv_dates
-    elif fmt in ["%td", "td", "%d", "d"]:  # Delta days relative to base
+    # Delta days relative to base
+    elif fmt.startswith(("%td", "td", "%d", "d")):
         base = stata_epoch
         days = dates
         conv_dates = convert_delta_safe(base, days, 'd')
-    elif fmt in ["%tw", "tw"]:  # does not count leap days - 7 days is a week
+    # does not count leap days - 7 days is a week.
+    # 52nd week may have more than 7 days
+    elif fmt.startswith(("%tw", "tw")):
         year = stata_epoch.year + dates // 52
         days = (dates % 52) * 7
         conv_dates = convert_year_days_safe(year, days)
-    elif fmt in ["%tm", "tm"]:  # Delta months relative to base
+    elif fmt.startswith(("%tm", "tm")):  # Delta months relative to base
         year = stata_epoch.year + dates // 12
         month = (dates % 12) + 1
         conv_dates = convert_year_month_safe(year, month)
-    elif fmt in ["%tq", "tq"]:  # Delta quarters relative to base
+    elif fmt.startswith(("%tq", "tq")):  # Delta quarters relative to base
         year = stata_epoch.year + dates // 4
         month = (dates % 4) * 3 + 1
         conv_dates = convert_year_month_safe(year, month)
-    elif fmt in ["%th", "th"]:  # Delta half-years relative to base
+    elif fmt.startswith(("%th", "th")):  # Delta half-years relative to base
         year = stata_epoch.year + dates // 2
         month = (dates % 2) * 6 + 1
         conv_dates = convert_year_month_safe(year, month)
-    elif fmt in ["%ty", "ty"]:  # Years -- not delta
+    elif fmt.startswith(("%ty", "ty")):  # Years -- not delta
         year = dates
         month = np.ones_like(dates)
         conv_dates = convert_year_month_safe(year, month)
@@ -1029,10 +1032,6 @@ def _read_header(self):
         # calculate size of a data record
         self.col_sizes = lmap(lambda x: self._calcsize(x), self.typlist)
 
-        # remove format details from %td
-        self.fmtlist = ["%td" if x.startswith("%td") else x
-                        for x in self.fmtlist]
-
     def _read_new_header(self, first_char):
         # The first part of the header is common to 117 and 118.
         self.path_or_buf.read(27)  # stata_dta><header><release>
@@ -1578,7 +1577,8 @@ def read(self, nrows=None, convert_dates=None,
         self._do_convert_missing(data, convert_missing)
 
         if convert_dates:
-            cols = np.where(lmap(lambda x: x in _date_formats,
+            cols = np.where(lmap(lambda x: any(x.startswith(fmt)
+                                               for fmt in _date_formats),
                                  self.fmtlist))[0]
             for i in cols:
                 col = data.columns[i]

diff --git a/pandas/tests/io/data/stata13_dates.dta b/pandas/tests/io/data/stata13_dates.dta
diff --git a/pandas/tests/io/test_stata.py b/pandas/tests/io/test_stata.py
@@ -96,6 +96,8 @@ def setup_method(self, method):
 
         self.dta24_111 = os.path.join(self.dirpath, 'stata7_111.dta')
 
+        self.stata_dates = os.path.join(self.dirpath, 'stata13_dates.dta')
+
     def read_dta(self, file):
         # Legacy default reader configuration
         return read_stata(file, convert_dates=True)
@@ -1327,3 +1329,22 @@ def test_set_index(self):
             df.to_stata(path)
             reread = pd.read_stata(path, index_col='index')
         tm.assert_frame_equal(df, reread)
+
+    @pytest.mark.parametrize(
+        'column', ['ms', 'day', 'week', 'month', 'qtr', 'half', 'yr'])
+    def test_date_parsing_ignores_format_details(self, column):
+        # GH 17797
+        #
+        # Test that display formats are ignored when determining if a numeric
+        # column is a date value.
+        #
+        # All date types are stored as numbers and format associated with the
+        # column denotes both the type of the date and the display format.
+        #
+        # STATA supports 9 date types which each have distinct units. We test 7
+        # of the 9 types, ignoring %tC and %tb. %tC is a variant of %tc that
+        # accounts for leap seconds and %tb relies on STATAs business calendar.
+        df = read_stata(self.stata_dates)
+        unformatted = df.loc[0, column]
+        formatted = df.loc[0, column + "_fmt"]
+        assert unformatted == formatted
-Original file line number
+Diff line change
@@ Expand Up / @@ -74,6 +74,9 @@ Indexing @@
     I/O
     ^^^
+    - Bug in `StataReader` not converting date/time columns with display formatting addressed (:issue:`17990`)
     Plotting
     ^^^^^^^^
@@ Expand Down @@