Skip to content
Merged
Show file tree
Hide file tree
Changes from 11 commits
Commits
Show all changes
21 commits
Select commit Hold shift + click to select a range
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 3 additions & 1 deletion docs/sphinx/source/whatsnew/v0.9.6.rst
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,8 @@ Deprecations

Enhancements
~~~~~~~~~~~~
* Add optional encoding parameter to :py:func:`pvlib.iotools.read_tmy3`.
(:issue:`1732`, :pull:`1737`)


Bug fixes
Expand Down Expand Up @@ -43,4 +45,4 @@ Contributors
* Siddharth Kaul (:ghuser:`k10blogger`)
* Kshitiz Gupta (:ghuser:`kshitiz305`)
* Stefan de Lange (:ghuser:`langestefan`)

* Andy Lam (:ghuser:`@andylam598`)
40 changes: 20 additions & 20 deletions pvlib/iotools/tmy.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
import pandas as pd


def read_tmy3(filename, coerce_year=None, recolumn=True):
def read_tmy3(filename, coerce_year=None, recolumn=True, encoding=None):
"""Read a TMY3 file into a pandas dataframe.

Note that values contained in the metadata dictionary are unchanged
Expand All @@ -27,6 +27,11 @@ def read_tmy3(filename, coerce_year=None, recolumn=True):
recolumn : bool, default True
If ``True``, apply standard names to TMY3 columns. Typically this
results in stripping the units from the column name.
encoding : str, optional
Encoding of the file. For files that contain non-UTF8 characters it may
be necessary to specify an alternative encoding, e.g., for
SolarAnywhere TMY3 files the encoding should be 'iso-8859-1'. Users
may also consider using the 'utf-8-sig' encoding.

Returns
-------
Expand Down Expand Up @@ -152,21 +157,21 @@ def read_tmy3(filename, coerce_year=None, recolumn=True):
----------
.. [1] Wilcox, S and Marion, W. "Users Manual for TMY3 Data Sets".
NREL/TP-581-43156, Revised May 2008.
:doi:`10.2172/928611`
.. [2] Wilcox, S. (2007). National Solar Radiation Database 1991 2005
Update: Users Manual. 472 pp.; NREL Report No. TP-581-41364.
:doi:`10.2172/901864`
.. [3] `SolarAnywhere file formats
<https://www.solaranywhere.com/support/historical-data/file-formats/>`_
""" # noqa: E501
head = ['USAF', 'Name', 'State', 'TZ', 'latitude', 'longitude', 'altitude']

try:
with open(str(filename), 'r') as fbuf:
firstline, data = _parse_tmy3(fbuf)
# SolarAnywhere files contain non-UTF8 characters and may require
# encoding='iso-8859-1' in order to be parsed
except UnicodeDecodeError:
with open(str(filename), 'r', encoding='iso-8859-1') as fbuf:
firstline, data = _parse_tmy3(fbuf)
with open(str(filename), 'r', encoding=encoding) as fbuf:
# header information on the 1st line (0 indexing)
firstline = fbuf.readline()
# use pandas to read the csv file buffer
# header is actually the second line, but tell pandas to look for
data = pd.read_csv(fbuf, header=0)

meta = dict(zip(head, firstline.rstrip('\n').split(",")))
# convert metadata strings to numeric types
Expand All @@ -178,8 +183,10 @@ def read_tmy3(filename, coerce_year=None, recolumn=True):

# get the date column as a pd.Series of numpy datetime64
data_ymd = pd.to_datetime(data['Date (MM/DD/YYYY)'], format='%m/%d/%Y')
# extract minutes
minutes = data['Time (HH:MM)'].str.split(':').str[1].astype(int)
# shift the time column so that midnite is 00:00 instead of 24:00
shifted_hour = data['Time (HH:MM)'].str[:2].astype(int) % 24
shifted_hour = data['Time (HH:MM)'].str.split(':').str[0].astype(int) % 24
# shift the dates at midnight (24:00) so they correspond to the next day.
# If midnight is specified as 00:00 do not shift date.
data_ymd[data['Time (HH:MM)'].str[:2] == '24'] += datetime.timedelta(days=1) # noqa: E501
Expand All @@ -197,7 +204,8 @@ def read_tmy3(filename, coerce_year=None, recolumn=True):
data_ymd.iloc[-1] = data_ymd.iloc[-1].replace(year=coerce_year+1)
# NOTE: as of pvlib-0.6.3, min req is pandas-0.18.1, so pd.to_timedelta
# unit must be in (D,h,m,s,ms,us,ns), but pandas>=0.24 allows unit='hour'
data.index = data_ymd + pd.to_timedelta(shifted_hour, unit='h')
data.index = data_ymd + pd.to_timedelta(shifted_hour, unit='h') \
+ pd.to_timedelta(minutes, unit='min')

if recolumn:
data = _recolumn(data) # rename to standard column names
Expand All @@ -207,15 +215,6 @@ def read_tmy3(filename, coerce_year=None, recolumn=True):
return data, meta


def _parse_tmy3(fbuf):
# header information on the 1st line (0 indexing)
firstline = fbuf.readline()
# use pandas to read the csv file buffer
# header is actually the second line, but tell pandas to look for
data = pd.read_csv(fbuf, header=0)
return firstline, data


def _recolumn(tmy3_dataframe):
"""
Rename the columns of the TMY3 DataFrame.
Expand Down Expand Up @@ -385,6 +384,7 @@ def read_tmy2(filename):
----------
.. [1] Marion, W and Urban, K. "Wilcox, S and Marion, W. "User's Manual
for TMY2s". NREL 1995.
:doi: `10.2172/87130`
""" # noqa: E501
# paste in the column info as one long line
string = '%2d%2d%2d%2d%4d%4d%4d%1s%1d%4d%1s%1d%4d%1s%1d%4d%1s%1d%4d%1s%1d%4d%1s%1d%4d%1s%1d%2d%1s%1d%2d%1s%1d%4d%1s%1d%4d%1s%1d%3d%1s%1d%4d%1s%1d%3d%1s%1d%3d%1s%1d%4d%1s%1d%5d%1s%1d%10d%3d%1s%1d%3d%1s%1d%3d%1s%1d%2d%1s%1d' # noqa: E501
Expand Down
2 changes: 1 addition & 1 deletion pvlib/tests/iotools/test_tmy.py
Original file line number Diff line number Diff line change
Expand Up @@ -87,7 +87,7 @@ def test_solaranywhere_tmy3(solaranywhere_index):
# The SolarAnywhere TMY3 format specifies midnight as 00:00 whereas the
# NREL TMY3 format utilizes 24:00. The SolarAnywhere file is therefore
# included to test files with 00:00 timestamps are parsed correctly
data, meta = tmy.read_tmy3(TMY3_SOLARANYWHERE)
data, meta = tmy.read_tmy3(TMY3_SOLARANYWHERE, encoding='iso-8859-1')
pd.testing.assert_index_equal(data.index, solaranywhere_index)
assert meta['USAF'] == 0
assert meta['Name'] == 'Burlington United States'
Expand Down