-
-
Notifications
You must be signed in to change notification settings - Fork 19.1k
Description
Code Sample, a copy-pastable example if possible
import sqlite3
import pandas as pd
con = sqlite3.connect("tmp.db")
c = con.cursor()
c.execute("CREATE TABLE gpkg_contents (last_change DATETIME);")
c.execute("INSERT INTO gpkg_contents VALUES ('2019-01-01T00:00:00.000Z');")
con.commit()
con.close()
df = pd.read_sql_table(
"gpkg_contents",
"sqlite:///tmp.db",
parse_dates=["last_change"]
)
df["last_change"][0]
Problem description
The above minimal example raises the following error:
---------------------------------------------------------------------------
ValueError Traceback (most recent call last)
<ipython-input-172-a1e2291991c6> in <module>
6 con.commit()
7 con.close()
----> 8 df = pd.read_sql_table("gpkg_contents", "sqlite:///tmp.db", parse_dates=["last_change"])
9 df["last_change"][0]
~/.venv/lib/python3.7/site-packages/pandas/io/sql.py in read_sql_table(table_name, con, schema, index_col, coerce_float, parse_dates, columns, chunksize)
252 parse_dates=parse_dates,
253 columns=columns,
--> 254 chunksize=chunksize,
255 )
256
~/.venv/lib/python3.7/site-packages/pandas/io/sql.py in read_table(self, table_name, index_col, coerce_float, parse_dates, columns, schema, chunksize)
1143 parse_dates=parse_dates,
1144 columns=columns,
-> 1145 chunksize=chunksize,
1146 )
1147
~/.venv/lib/python3.7/site-packages/pandas/io/sql.py in read(self, coerce_float, parse_dates, columns, chunksize)
803 data = result.fetchall()
804 self.frame = DataFrame.from_records(
--> 805 data, columns=column_names, coerce_float=coerce_float
806 )
807
~/.venv/lib/python3.7/site-packages/pandas/core/frame.py in from_records(cls, data, index, exclude, columns, coerce_float, nrows)
1611 arr_columns = columns
1612 else:
-> 1613 arrays, arr_columns = to_arrays(data, columns, coerce_float=coerce_float)
1614
1615 arr_columns = ensure_index(arr_columns)
~/.venv/lib/python3.7/site-packages/pandas/core/internals/construction.py in to_arrays(data, columns, coerce_float, dtype)
485 else:
486 # last ditch effort
--> 487 data = [tuple(x) for x in data]
488 return _list_to_arrays(data, columns, coerce_float=coerce_float, dtype=dtype)
489
~/.venv/lib/python3.7/site-packages/pandas/core/internals/construction.py in <listcomp>(.0)
485 else:
486 # last ditch effort
--> 487 data = [tuple(x) for x in data]
488 return _list_to_arrays(data, columns, coerce_float=coerce_float, dtype=dtype)
489
ValueError: Couldn't parse datetime string: '2019-01-01T00:00:00.000Z
Surprisingly, using
pd.read_sql_query("SELECT * FROM gpkg_contents", ...)
instead of read_sql_table
does give exactly the expected result.
As the table name suggests, I encountered this bug when working on a Geopackage file, that uses a CREATE
statement with DATETIME
type and string values as per the format specification and this is thus implemented as such in many GIS tools.
Expected Output
Timestamp('2019-01-01 00:00:00+0000', tz='UTC')
Output of pd.show_versions()
INSTALLED VERSIONS
commit : None
python : 3.7.5.final.0
python-bits : 64
OS : Linux
OS-release : 5.3.0-24-generic
machine : x86_64
processor : x86_64
byteorder : little
LC_ALL : None
LANG : de_DE.UTF-8
LOCALE : de_DE.UTF-8
pandas : 0.25.3
numpy : 1.18.1
pytz : 2019.3
dateutil : 2.8.1
pip : 19.3.1
setuptools : 44.0.0
Cython : None
pytest : None
hypothesis : None
sphinx : None
blosc : None
feather : None
xlsxwriter : None
lxml.etree : None
html5lib : None
pymysql : None
psycopg2 : None
jinja2 : 2.10.3
IPython : 7.11.1
pandas_datareader: None
bs4 : None
bottleneck : None
fastparquet : None
gcsfs : None
lxml.etree : None
matplotlib : 3.1.2
numexpr : None
odfpy : None
openpyxl : 3.0.2
pandas_gbq : None
pyarrow : None
pytables : None
s3fs : None
scipy : 1.4.1
sqlalchemy : 1.3.12
tables : None
xarray : None
xlrd : None
xlwt : None
xlsxwriter : None