Skip to content

Commit 9cd961c

Browse files
authored
Merge pull request #944 from econdb/main
Updates in docstrings and test cases
2 parents 398c5a6 + dcb05e7 commit 9cd961c

File tree

5 files changed

+186
-84
lines changed

5 files changed

+186
-84
lines changed

docs/source/remote_data.rst

Lines changed: 53 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -305,12 +305,26 @@ for United States, is as simple as taking the ticker segment from the URL path
305305

306306
.. code-block:: ipython
307307
308-
import os
309-
import pandas_datareader.data as web
308+
In [1]: import os
310309
311-
f = web.DataReader('ticker=RGDPUS', 'econdb')
312-
f.head()
310+
In [2]: import pandas_datareader as pdr
313311
312+
In [3]: f = pdr.get_data_econdb('ticker=RGDPUS')
313+
In [4]: f.head()
314+
Out[4]:
315+
TableName T10106
316+
SeriesCode A191RX
317+
Table Table 1.1.6. Real Gross Domestic Product, Ch...
318+
Series description Gross domestic product
319+
CL_UNIT Level
320+
CL_FREQ Q
321+
Note Table 1.1.6. Real Gross Domestic Product, Ch...
322+
TIME_PERIOD
323+
2018-01-01 18437128
324+
2018-04-01 18565696
325+
2018-07-01 18699748
326+
2018-10-01 18733740
327+
2019-01-01 18835412
314328
315329
The code snippet for exporting the whole dataset, or its filtered down subset,
316330
can be generated by using the Export -> Pandas Python3 functionality
@@ -319,16 +333,47 @@ such as the Eurostat's `GDP and main components <https://www.econdb.com/dataset/
319333

320334
.. code-block:: ipython
321335
322-
import os
323-
import pandas_datareader.data as web
336+
In [1]: import os
324337
325-
df = web.DataReader('dataset=NAMQ_10_GDP&v=Geopolitical entity (reporting)&h=TIME&from=2018-05-01&to=2021-01-01&GEO=[AL,AT,BE,BA,BG,HR,CY,CZ,DK,EE,EA19,FI,FR,DE,EL,HU,IS,IE,IT,XK,LV,LT,LU,MT,ME,NL,MK,NO,PL,PT,RO,RS,SK,SI,ES,SE,CH,TR,UK]&NA_ITEM=[B1GQ]&S_ADJ=[SCA]&UNIT=[CLV10_MNAC]', 'econdb')
326-
df.columns
338+
In [2]: import pandas_datareader as pdr
339+
340+
In [3]: df = pdr.get_data_econdb('dataset=NAMQ_10_GDP&v=Geopolitical entity (reporting)'
341+
'&h=TIME&from=2018-05-01&to=2021-01-01'
342+
'&GEO=[UK,ES,IT,DE,FR,CH,AT]&NA_ITEM=[B1GQ]'
343+
'&S_ADJ=[SCA]&UNIT=[CLV10_MNAC]')
344+
In [4]: df.head()
345+
Out[4]:
346+
Frequency Quarterly ...
347+
Unit of measure Chain linked volumes (2010), million units of national currency ...
348+
Seasonal adjustment Seasonally and calendar adjusted data ...
349+
National accounts indicator (ESA10) Gross domestic product at market prices ...
350+
Geopolitical entity (reporting) Austria ... Switzerland
351+
TIME_PERIOD ...
352+
2018-07-01 83427 ... 181338
353+
2018-10-01 84268 ... 181767
354+
2019-01-01 84919 ... 182039
355+
2019-04-01 84476 ... 182848
356+
2019-07-01 84822 ... 183866
357+
358+
In both cases, metadata for the requested Econdb series or dataset
359+
is in the ``MultiIndex`` columns of the returned ``DataFrame``,
360+
and can be conveniently converted to a ``dict`` as demonstrated below
361+
362+
.. code-block:: ipython
363+
364+
In [5]: meta = df.columns.to_frame().iloc[0].to_dict() # first column, positionally
365+
Out[5]: meta
366+
{'Frequency': 'Quarterly',
367+
'Unit of measure': 'Chain linked volumes (2010), million units of national currency',
368+
'Seasonal adjustment': 'Seasonally and calendar adjusted data',
369+
'National accounts indicator (ESA10)': 'Gross domestic product at market prices',
370+
'Geopolitical entity (reporting)': 'Austria'}
327371
328372
Datasets can be located through Econdb's `search <https://www.econdb.com/search>`__
329373
engine, or discovered by exploring the `tree <https://www.econdb.com/tree/>`__
330374
of available statistical sources.
331375

376+
332377
.. _remote_data.enigma:
333378

334379
Enigma

pandas_datareader/__init__.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,7 @@
88
get_components_yahoo,
99
get_dailysummary_iex,
1010
get_data_alphavantage,
11+
get_data_econdb,
1112
get_data_enigma,
1213
get_data_famafrench,
1314
get_data_fred,
@@ -38,6 +39,7 @@
3839
__all__ = [
3940
"__version__",
4041
"get_components_yahoo",
42+
"get_data_econdb",
4143
"get_data_enigma",
4244
"get_data_famafrench",
4345
"get_data_yahoo",

pandas_datareader/data.py

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -41,6 +41,7 @@
4141

4242
__all__ = [
4343
"get_components_yahoo",
44+
"get_data_econdb",
4445
"get_data_enigma",
4546
"get_data_famafrench",
4647
"get_data_fred",
@@ -80,6 +81,10 @@ def get_data_yahoo(*args, **kwargs):
8081
return YahooDailyReader(*args, **kwargs).read()
8182

8283

84+
def get_data_econdb(*args, **kwargs):
85+
return EcondbReader(*args, **kwargs).read()
86+
87+
8388
def get_data_enigma(*args, **kwargs):
8489
return EnigmaReader(*args, **kwargs).read()
8590

pandas_datareader/econdb.py

Lines changed: 56 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -4,18 +4,65 @@
44

55

66
class EcondbReader(_BaseReader):
7-
"""Get data for the given name from Econdb."""
7+
"""
8+
Returns DataFrame of historical stock prices from symbol, over date
9+
range, start to end.
10+
11+
.. versionadded:: 0.5.0
12+
13+
Parameters
14+
----------
15+
symbols : string
16+
Can be in two different formats:
17+
1. 'ticker=<code>' for fetching a single series,
18+
where <code> is CPIUS for, e.g. the series at
19+
https://www.econdb.com/series/CPIUS/
20+
2. 'dataset=<dataset>&<params>' for fetching full
21+
or filtered subset of a dataset, like the one at
22+
https://www.econdb.com/dataset/ABS_GDP. After choosing the desired filters,
23+
the correctly formatted query string can be easily generated
24+
from that dataset's page by using the Export function, and choosing Pandas Python3.
25+
start : string, int, date, datetime, Timestamp
26+
Starting date. Parses many different kind of date
27+
representations (e.g., 'JAN-01-2010', '1/1/10', 'Jan, 1, 1980')
28+
end : string, int, date, datetime, Timestamp
29+
Ending date
30+
retry_count : int, default 3
31+
Number of times to retry query request.
32+
pause : int, default 0.1
33+
Time, in seconds, to pause between consecutive queries of chunks. If
34+
single value given for symbol, represents the pause between retries.
35+
session : Session, default None
36+
requests.sessions.Session instance to be used
37+
"""
838

939
_URL = "https://www.econdb.com/api/series/"
1040
_format = None
1141
_show = "labels"
1242

13-
def __init__(self, *args, **kwargs):
14-
super().__init__(**kwargs)
43+
def __init__(
44+
self,
45+
symbols,
46+
start=None,
47+
end=None,
48+
retry_count=3,
49+
pause=0.1,
50+
session=None,
51+
freq=None,
52+
):
53+
super(EcondbReader, self).__init__(
54+
symbols=symbols,
55+
start=start,
56+
end=end,
57+
retry_count=retry_count,
58+
pause=pause,
59+
session=session,
60+
freq=freq,
61+
)
1562
params = dict(s.split("=") for s in self.symbols.split("&"))
16-
if "from" in params and not kwargs.get("start"):
63+
if "from" in params and not start:
1764
self.start = pd.to_datetime(params["from"], format="%Y-%m-%d")
18-
if "to" in params and not kwargs.get("end"):
65+
if "to" in params and not end:
1966
self.end = pd.to_datetime(params["to"], format="%Y-%m-%d")
2067

2168
@property
@@ -43,10 +90,13 @@ def show_func(x):
4390
def show_func(x):
4491
return x[: x.find(":")]
4592

93+
unique_keys = {k for s in results for k in s["additional_metadata"]}
4694
for entry in results:
4795
series = pd.DataFrame(entry["data"])[["dates", "values"]].set_index("dates")
4896
head = entry["additional_metadata"]
49-
97+
for k in unique_keys:
98+
if k not in head:
99+
head[k] = "-1:None"
50100
if head != "": # this additional metadata is not blank
51101
series.columns = pd.MultiIndex.from_tuples(
52102
[[show_func(x) for x in head.values()]],
Lines changed: 70 additions & 70 deletions
Original file line numberDiff line numberDiff line change
@@ -1,14 +1,33 @@
11
import numpy as np
22
import pandas as pd
3-
from pandas import testing as tm
43
import pytest
54

65
from pandas_datareader import data as web
76

87
pytestmark = pytest.mark.stable
98

109

10+
def assert_equal(x, y):
11+
assert np.isclose(x, y, rtol=1e-2)
12+
13+
1114
class TestEcondb(object):
15+
16+
def test_override_start_end(self):
17+
df = web.DataReader(
18+
'&'.join([
19+
'dataset=RBI_BULLETIN',
20+
'v=TIME',
21+
'h=Indicator',
22+
'from=2022-01-01',
23+
'to=2022-07-01'
24+
]),
25+
'econdb',
26+
start='2020-01-01',
27+
end='2022-01-01'
28+
)
29+
assert isinstance(df.index, pd.DatetimeIndex)
30+
1231
def test_infer_start_end_from_symbols(self):
1332
df = web.DataReader(
1433
(
@@ -23,88 +42,69 @@ def test_infer_start_end_from_symbols(self):
2342
assert df.index[0].year == 2010
2443
assert df.index[-1].year == 2018
2544

26-
@pytest.mark.xfail(reason="Dataset does not exist on Econdb")
27-
def test_get_cdh_e_fos(self):
28-
# EUROSTAT
29-
# Employed doctorate holders in non managerial and non professional
30-
# occupations by fields of science (%)
31-
df = web.DataReader(
32-
"dataset=CDH_E_FOS&GEO=NO,PL,PT,RU&FOS07=FOS1&Y_GRAD=TOTAL",
33-
"econdb",
34-
start=pd.Timestamp("2005-01-01"),
35-
end=pd.Timestamp("2010-01-01"),
36-
)
37-
assert isinstance(df, pd.DataFrame)
38-
assert df.shape == (2, 4)
39-
40-
# the levels and not returned consistently for econdb
41-
names = list(df.columns.names)
42-
levels = [lvl.values.tolist() for lvl in list(df.columns.levels)]
43-
44-
exp_col = pd.MultiIndex.from_product(levels, names=names)
45-
exp_idx = pd.DatetimeIndex(["2006-01-01", "2009-01-01"], name="TIME_PERIOD")
46-
47-
values = np.array([[25.49, np.nan, 39.05, np.nan], [20.38, 25.1, 27.77, 38.1]])
48-
expected = pd.DataFrame(values, index=exp_idx, columns=exp_col)
49-
tm.assert_frame_equal(df, expected)
50-
51-
def test_get_tourism(self):
52-
# OECD
53-
# TOURISM_INBOUND
45+
tickers = [
46+
f"{sec}{geo}"
47+
for sec in ["RGDP", "CPI", "URATE"]
48+
for geo in ["US", "UK", "ES", "AR"]
49+
]
5450

51+
@pytest.mark.parametrize("ticker", tickers)
52+
def test_fetch_single_ticker_series(self, ticker):
5553
df = web.DataReader(
56-
"dataset=OE_TOURISM_INBOUND&COUNTRY=JPN,USA&VARIABLE=INB_ARRIVALS_TOTAL",
54+
f"ticker={ticker}",
5755
"econdb",
58-
start=pd.Timestamp("2008-01-01"),
59-
end=pd.Timestamp("2012-01-01"),
60-
)
61-
df = df.astype(float)
62-
jp = np.array([8351000, 6790000, 8611000, 6219000, 8368000], dtype=float)
63-
us = np.array(
64-
[175702304, 160507424, 164079728, 167600272, 171320416], dtype=float
56+
start=pd.Timestamp("2010-01-01"),
57+
end=pd.Timestamp("2013-01-27"),
6558
)
66-
index = pd.date_range("2008-01-01", "2012-01-01", freq="AS", name="TIME_PERIOD")
59+
assert df.shape[1] == 1
60+
assert isinstance(df.index, pd.DatetimeIndex)
6761

68-
# check the values coming back are equal
69-
np.testing.assert_array_equal(df.values[:, 0], jp)
70-
np.testing.assert_array_equal(df.values[:, 1], us)
71-
72-
# sometimes the country and variable columns are swapped
73-
df = df.swaplevel(2, 1, axis=1)
74-
for label, values in [("Japan", jp), ("United States", us)]:
75-
expected = pd.Series(
76-
values, index=index, name="Total international arrivals"
77-
)
78-
expected.index.freq = None
79-
tm.assert_series_equal(
80-
df[label]["Tourism demand surveys"]["Total international arrivals"],
81-
expected,
82-
)
83-
84-
def test_bls(self):
85-
# BLS
86-
# CPI
62+
def test_single_nonticker_series(self):
8763
df = web.DataReader(
8864
"ticker=BLS_CU.CUSR0000SA0.M.US",
8965
"econdb",
9066
start=pd.Timestamp("2010-01-01"),
9167
end=pd.Timestamp("2013-01-27"),
9268
)
69+
assert df.shape[1] == 1
70+
assert isinstance(df.index, pd.DatetimeIndex)
71+
assert_equal(df.loc["2010-05-01"][0], 217.3)
9372

94-
assert df.loc["2010-05-01"][0] == 217.3
73+
def test_filtered_dataset(self):
74+
df = web.DataReader(
75+
"&".join(
76+
[
77+
"dataset=PRC_HICP_MIDX",
78+
"v=Geopolitical entity (reporting)",
79+
"h=TIME",
80+
"from=2022-03-01",
81+
"to=2022-09-01",
82+
"COICOP=[CP00]",
83+
"FREQ=[M]",
84+
"GEO=[ES,AT,CZ,IT,CH]",
85+
"UNIT=[I15]",
86+
]
87+
),
88+
"econdb",
89+
)
90+
assert df.shape[1] == 5
91+
assert isinstance(df.index, pd.DatetimeIndex)
9592

9693
def test_australia_gdp(self):
9794
df = web.DataReader(
98-
"dataset=ABS_GDP&to=2019-09-01&from=1959-09-01&h=TIME&v=Indicator", "econdb"
99-
)
100-
assert (
101-
df.loc[
102-
"2017-10-01",
103-
(
104-
"GDP per capita: Current prices - National Accounts",
105-
"Seasonally Adjusted",
106-
"AUD",
107-
),
108-
]
109-
== 18329
95+
"&".join(
96+
[
97+
"dataset=ABS_GDP",
98+
"4=[7]",
99+
"6=[11]",
100+
"16=[1267]",
101+
"v=TIME",
102+
"h=Indicator",
103+
"from=2019-10-01",
104+
"to=2022-06-01",
105+
"GEO=[13]",
106+
]
107+
),
108+
"econdb",
110109
)
110+
assert_equal(df.squeeze().loc["2020-10-01"], 508603)

0 commit comments

Comments
 (0)