pvlib · kandersolar · May 26, 2023 · May 16, 2023 · May 16, 2023 · May 16, 2023
diff --git a/docs/sphinx/source/whatsnew/v0.9.6.rst b/docs/sphinx/source/whatsnew/v0.9.6.rst
@@ -11,6 +11,8 @@ Deprecations
 
 Enhancements
 ~~~~~~~~~~~~
+* Add optional encoding parameter to :py:func:`pvlib.iotools.read_tmy3`.
+  (:issue:`1732`, :pull:`1737`)
 
 
 Bug fixes
@@ -43,4 +45,4 @@ Contributors
 * Siddharth Kaul (:ghuser:`k10blogger`)
 * Kshitiz Gupta (:ghuser:`kshitiz305`)
 * Stefan de Lange (:ghuser:`langestefan`)
-
+* Andy Lam (:ghuser:`@andylam598`)
diff --git a/pvlib/iotools/tmy.py b/pvlib/iotools/tmy.py
@@ -5,7 +5,7 @@
 import pandas as pd
 
 
-def read_tmy3(filename, coerce_year=None, recolumn=True):
+def read_tmy3(filename, coerce_year=None, recolumn=True, encoding=None):
     """Read a TMY3 file into a pandas dataframe.
 
     Note that values contained in the metadata dictionary are unchanged
@@ -27,6 +27,11 @@ def read_tmy3(filename, coerce_year=None, recolumn=True):
     recolumn : bool, default True
         If ``True``, apply standard names to TMY3 columns. Typically this
         results in stripping the units from the column name.
+    encoding : str, optional
+        Encoding of the file. For files that contain non-UTF8 characters it may
+        be necessary to specify an alternative encoding, e.g., for
+        SolarAnywhere TMY3 files the encoding should be 'iso-8859-1'. Users
+        may also consider using the 'utf-8-sig' encoding.
 
     Returns
     -------
@@ -152,21 +157,21 @@ def read_tmy3(filename, coerce_year=None, recolumn=True):
     ----------
     .. [1] Wilcox, S and Marion, W. "Users Manual for TMY3 Data Sets".
        NREL/TP-581-43156, Revised May 2008.
+       :doi:`10.2172/928611`
     .. [2] Wilcox, S. (2007). National Solar Radiation Database 1991 2005
        Update: Users Manual. 472 pp.; NREL Report No. TP-581-41364.
+       :doi:`10.2172/901864`
     .. [3] `SolarAnywhere file formats
        <https://www.solaranywhere.com/support/historical-data/file-formats/>`_
     """  # noqa: E501
     head = ['USAF', 'Name', 'State', 'TZ', 'latitude', 'longitude', 'altitude']
 
-    try:
-        with open(str(filename), 'r') as fbuf:
-            firstline, data = _parse_tmy3(fbuf)
-    # SolarAnywhere files contain non-UTF8 characters and may require
-    # encoding='iso-8859-1' in order to be parsed
-    except UnicodeDecodeError:
-        with open(str(filename), 'r', encoding='iso-8859-1') as fbuf:
-            firstline, data = _parse_tmy3(fbuf)
+    with open(str(filename), 'r', encoding=encoding) as fbuf:
+        # header information on the 1st line (0 indexing)
+        firstline = fbuf.readline()
+        # use pandas to read the csv file buffer
+        # header is actually the second line, but tell pandas to look for
+        data = pd.read_csv(fbuf, header=0)
 
     meta = dict(zip(head, firstline.rstrip('\n').split(",")))
     # convert metadata strings to numeric types
@@ -178,8 +183,10 @@ def read_tmy3(filename, coerce_year=None, recolumn=True):
 
     # get the date column as a pd.Series of numpy datetime64
     data_ymd = pd.to_datetime(data['Date (MM/DD/YYYY)'], format='%m/%d/%Y')
+    # extract minutes
+    minutes = data['Time (HH:MM)'].str.split(':').str[1].astype(int)
     # shift the time column so that midnite is 00:00 instead of 24:00
-    shifted_hour = data['Time (HH:MM)'].str[:2].astype(int) % 24
+    shifted_hour = data['Time (HH:MM)'].str.split(':').str[0].astype(int) % 24
     # shift the dates at midnight (24:00) so they correspond to the next day.
     # If midnight is specified as 00:00 do not shift date.
     data_ymd[data['Time (HH:MM)'].str[:2] == '24'] += datetime.timedelta(days=1)  # noqa: E501
@@ -197,7 +204,8 @@ def read_tmy3(filename, coerce_year=None, recolumn=True):
         data_ymd.iloc[-1] = data_ymd.iloc[-1].replace(year=coerce_year+1)
     # NOTE: as of pvlib-0.6.3, min req is pandas-0.18.1, so pd.to_timedelta
     # unit must be in (D,h,m,s,ms,us,ns), but pandas>=0.24 allows unit='hour'
-    data.index = data_ymd + pd.to_timedelta(shifted_hour, unit='h')
+    data.index = data_ymd + pd.to_timedelta(shifted_hour, unit='h') \
+        + pd.to_timedelta(minutes, unit='min')
 
     if recolumn:
         data = _recolumn(data)  # rename to standard column names
@@ -207,15 +215,6 @@ def read_tmy3(filename, coerce_year=None, recolumn=True):
     return data, meta
 
 
-def _parse_tmy3(fbuf):
-    # header information on the 1st line (0 indexing)
-    firstline = fbuf.readline()
-    # use pandas to read the csv file buffer
-    # header is actually the second line, but tell pandas to look for
-    data = pd.read_csv(fbuf, header=0)
-    return firstline, data
-
-
 def _recolumn(tmy3_dataframe):
     """
     Rename the columns of the TMY3 DataFrame.
@@ -385,6 +384,7 @@ def read_tmy2(filename):
     ----------
     .. [1] Marion, W and Urban, K. "Wilcox, S and Marion, W. "User's Manual
        for TMY2s". NREL 1995.
+       :doi: `10.2172/87130`
     """  # noqa: E501
     # paste in the column info as one long line
     string = '%2d%2d%2d%2d%4d%4d%4d%1s%1d%4d%1s%1d%4d%1s%1d%4d%1s%1d%4d%1s%1d%4d%1s%1d%4d%1s%1d%2d%1s%1d%2d%1s%1d%4d%1s%1d%4d%1s%1d%3d%1s%1d%4d%1s%1d%3d%1s%1d%3d%1s%1d%4d%1s%1d%5d%1s%1d%10d%3d%1s%1d%3d%1s%1d%3d%1s%1d%2d%1s%1d'  # noqa: E501

diff --git a/pvlib/tests/iotools/test_tmy.py b/pvlib/tests/iotools/test_tmy.py
@@ -87,7 +87,7 @@ def test_solaranywhere_tmy3(solaranywhere_index):
     # The SolarAnywhere TMY3 format specifies midnight as 00:00 whereas the
     # NREL TMY3 format utilizes 24:00. The SolarAnywhere file is therefore
     # included to test files with  00:00 timestamps are parsed correctly
-    data, meta = tmy.read_tmy3(TMY3_SOLARANYWHERE)
+    data, meta = tmy.read_tmy3(TMY3_SOLARANYWHERE, encoding='iso-8859-1')
     pd.testing.assert_index_equal(data.index, solaranywhere_index)
     assert meta['USAF'] == 0
     assert meta['Name'] == 'Burlington  United States'