1
1
import numpy as np
2
2
import pandas as pd
3
- from pandas import testing as tm
4
3
import pytest
5
4
6
5
from pandas_datareader import data as web
7
6
8
7
pytestmark = pytest .mark .stable
9
8
10
9
10
+ def assert_equal (x , y ):
11
+ assert np .isclose (x , y , rtol = 1e-2 )
12
+
13
+
11
14
class TestEcondb (object ):
15
+
16
+ def test_override_start_end (self ):
17
+ df = web .DataReader (
18
+ '&' .join ([
19
+ 'dataset=RBI_BULLETIN' ,
20
+ 'v=TIME' ,
21
+ 'h=Indicator' ,
22
+ 'from=2022-01-01' ,
23
+ 'to=2022-07-01'
24
+ ]),
25
+ 'econdb' ,
26
+ start = '2020-01-01' ,
27
+ end = '2022-01-01'
28
+ )
29
+ assert isinstance (df .index , pd .DatetimeIndex )
30
+
12
31
def test_infer_start_end_from_symbols (self ):
13
32
df = web .DataReader (
14
33
(
@@ -23,88 +42,69 @@ def test_infer_start_end_from_symbols(self):
23
42
assert df .index [0 ].year == 2010
24
43
assert df .index [- 1 ].year == 2018
25
44
26
- @pytest .mark .xfail (reason = "Dataset does not exist on Econdb" )
27
- def test_get_cdh_e_fos (self ):
28
- # EUROSTAT
29
- # Employed doctorate holders in non managerial and non professional
30
- # occupations by fields of science (%)
31
- df = web .DataReader (
32
- "dataset=CDH_E_FOS&GEO=NO,PL,PT,RU&FOS07=FOS1&Y_GRAD=TOTAL" ,
33
- "econdb" ,
34
- start = pd .Timestamp ("2005-01-01" ),
35
- end = pd .Timestamp ("2010-01-01" ),
36
- )
37
- assert isinstance (df , pd .DataFrame )
38
- assert df .shape == (2 , 4 )
39
-
40
- # the levels and not returned consistently for econdb
41
- names = list (df .columns .names )
42
- levels = [lvl .values .tolist () for lvl in list (df .columns .levels )]
43
-
44
- exp_col = pd .MultiIndex .from_product (levels , names = names )
45
- exp_idx = pd .DatetimeIndex (["2006-01-01" , "2009-01-01" ], name = "TIME_PERIOD" )
46
-
47
- values = np .array ([[25.49 , np .nan , 39.05 , np .nan ], [20.38 , 25.1 , 27.77 , 38.1 ]])
48
- expected = pd .DataFrame (values , index = exp_idx , columns = exp_col )
49
- tm .assert_frame_equal (df , expected )
50
-
51
- def test_get_tourism (self ):
52
- # OECD
53
- # TOURISM_INBOUND
45
+ tickers = [
46
+ f"{ sec } { geo } "
47
+ for sec in ["RGDP" , "CPI" , "URATE" ]
48
+ for geo in ["US" , "UK" , "ES" , "AR" ]
49
+ ]
54
50
51
+ @pytest .mark .parametrize ("ticker" , tickers )
52
+ def test_fetch_single_ticker_series (self , ticker ):
55
53
df = web .DataReader (
56
- "dataset=OE_TOURISM_INBOUND&COUNTRY=JPN,USA&VARIABLE=INB_ARRIVALS_TOTAL " ,
54
+ f"ticker= { ticker } " ,
57
55
"econdb" ,
58
- start = pd .Timestamp ("2008-01-01" ),
59
- end = pd .Timestamp ("2012-01-01" ),
60
- )
61
- df = df .astype (float )
62
- jp = np .array ([8351000 , 6790000 , 8611000 , 6219000 , 8368000 ], dtype = float )
63
- us = np .array (
64
- [175702304 , 160507424 , 164079728 , 167600272 , 171320416 ], dtype = float
56
+ start = pd .Timestamp ("2010-01-01" ),
57
+ end = pd .Timestamp ("2013-01-27" ),
65
58
)
66
- index = pd .date_range ("2008-01-01" , "2012-01-01" , freq = "AS" , name = "TIME_PERIOD" )
59
+ assert df .shape [1 ] == 1
60
+ assert isinstance (df .index , pd .DatetimeIndex )
67
61
68
- # check the values coming back are equal
69
- np .testing .assert_array_equal (df .values [:, 0 ], jp )
70
- np .testing .assert_array_equal (df .values [:, 1 ], us )
71
-
72
- # sometimes the country and variable columns are swapped
73
- df = df .swaplevel (2 , 1 , axis = 1 )
74
- for label , values in [("Japan" , jp ), ("United States" , us )]:
75
- expected = pd .Series (
76
- values , index = index , name = "Total international arrivals"
77
- )
78
- expected .index .freq = None
79
- tm .assert_series_equal (
80
- df [label ]["Tourism demand surveys" ]["Total international arrivals" ],
81
- expected ,
82
- )
83
-
84
- def test_bls (self ):
85
- # BLS
86
- # CPI
62
+ def test_single_nonticker_series (self ):
87
63
df = web .DataReader (
88
64
"ticker=BLS_CU.CUSR0000SA0.M.US" ,
89
65
"econdb" ,
90
66
start = pd .Timestamp ("2010-01-01" ),
91
67
end = pd .Timestamp ("2013-01-27" ),
92
68
)
69
+ assert df .shape [1 ] == 1
70
+ assert isinstance (df .index , pd .DatetimeIndex )
71
+ assert_equal (df .loc ["2010-05-01" ][0 ], 217.3 )
93
72
94
- assert df .loc ["2010-05-01" ][0 ] == 217.3
73
+ def test_filtered_dataset (self ):
74
+ df = web .DataReader (
75
+ "&" .join (
76
+ [
77
+ "dataset=PRC_HICP_MIDX" ,
78
+ "v=Geopolitical entity (reporting)" ,
79
+ "h=TIME" ,
80
+ "from=2022-03-01" ,
81
+ "to=2022-09-01" ,
82
+ "COICOP=[CP00]" ,
83
+ "FREQ=[M]" ,
84
+ "GEO=[ES,AT,CZ,IT,CH]" ,
85
+ "UNIT=[I15]" ,
86
+ ]
87
+ ),
88
+ "econdb" ,
89
+ )
90
+ assert df .shape [1 ] == 5
91
+ assert isinstance (df .index , pd .DatetimeIndex )
95
92
96
93
def test_australia_gdp (self ):
97
94
df = web .DataReader (
98
- "dataset=ABS_GDP&to=2019-09-01&from=1959-09-01&h=TIME&v=Indicator" , "econdb"
99
- )
100
- assert (
101
- df .loc [
102
- "2017-10-01" ,
103
- (
104
- "GDP per capita: Current prices - National Accounts" ,
105
- "Seasonally Adjusted" ,
106
- "AUD" ,
107
- ),
108
- ]
109
- == 18329
95
+ "&" .join (
96
+ [
97
+ "dataset=ABS_GDP" ,
98
+ "4=[7]" ,
99
+ "6=[11]" ,
100
+ "16=[1267]" ,
101
+ "v=TIME" ,
102
+ "h=Indicator" ,
103
+ "from=2019-10-01" ,
104
+ "to=2022-06-01" ,
105
+ "GEO=[13]" ,
106
+ ]
107
+ ),
108
+ "econdb" ,
110
109
)
110
+ assert_equal (df .squeeze ().loc ["2020-10-01" ], 508603 )
0 commit comments