diff --git a/.travis.yml b/.travis.yml index ab36df340e1c5..e7ace279f1063 100644 --- a/.travis.yml +++ b/.travis.yml @@ -3,7 +3,7 @@ language: python python: - 2.6 - 2.7 - - 3.1 # travis will soon EOL this +# - 3.1 # travis EOL - 3.2 - 3.3 @@ -45,8 +45,10 @@ before_install: install: - echo "Waldo2" - ci/install.sh - - ci/print_versions.py # not including stats script: - echo "Waldo3" - ci/script.sh + +after_script: + - ci/print_versions.py diff --git a/ci/before_install.sh b/ci/before_install.sh index 7b7919a41ba4e..9561c713d0f2e 100755 --- a/ci/before_install.sh +++ b/ci/before_install.sh @@ -9,20 +9,20 @@ fi sudo apt-get update $APT_ARGS # run apt-get update for all versions -# hack for broken 3.3 env -if [ x"$VIRTUAL_ENV" == x"" ]; then - VIRTUAL_ENV=~/virtualenv/python$TRAVIS_PYTHON_VERSION_with_system_site_packages; -fi +# # hack for broken 3.3 env +# if [ x"$VIRTUAL_ENV" == x"" ]; then +# VIRTUAL_ENV=~/virtualenv/python$TRAVIS_PYTHON_VERSION_with_system_site_packages; +# fi -# we only recreate the virtualenv for 3.x -# since the "Detach bug" only affects python3 -# and travis has numpy preinstalled on 2.x which is quicker -_VENV=$VIRTUAL_ENV # save it -if [ ${TRAVIS_PYTHON_VERSION:0:1} == "3" ] ; then - deactivate # pop out of any venv - sudo pip install virtualenv==1.8.4 --upgrade - sudo apt-get install $APT_ARGS python3.3 python3.3-dev - sudo rm -Rf $_VENV - virtualenv -p python$TRAVIS_PYTHON_VERSION $_VENV --system-site-packages; - source $_VENV/bin/activate -fi +# # we only recreate the virtualenv for 3.x +# # since the "Detach bug" only affects python3 +# # and travis has numpy preinstalled on 2.x which is quicker +# _VENV=$VIRTUAL_ENV # save it +# if [ ${TRAVIS_PYTHON_VERSION:0:1} == "3" ] ; then +# deactivate # pop out of any venv +# sudo pip install virtualenv==1.8.4 --upgrade +# sudo apt-get install $APT_ARGS python3.3 python3.3-dev +# sudo rm -Rf $_VENV +# virtualenv -p python$TRAVIS_PYTHON_VERSION $_VENV --system-site-packages; +# source $_VENV/bin/activate +# fi diff --git a/ci/install.sh b/ci/install.sh index ce1b5f667b03b..6874da69880b2 100755 --- a/ci/install.sh +++ b/ci/install.sh @@ -21,7 +21,7 @@ fi if [ ${TRAVIS_PYTHON_VERSION:0:1} == "2" ] || \ [ ${TRAVIS_PYTHON_VERSION} == "3.1" ] || \ [ ${TRAVIS_PYTHON_VERSION} == "3.2" ]; then - pip $PIP_ARGS install numpy; #https://github.com/y-p/numpy/archive/1.6.2_with_travis_fix.tar.gz; + pip $PIP_ARGS install numpy; else pip $PIP_ARGS install https://github.com/numpy/numpy/archive/v1.7.0b2.tar.gz; fi diff --git a/pandas/core/frame.py b/pandas/core/frame.py index 30bb4eb9ac096..b3fef8943baf3 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -3240,7 +3240,7 @@ def fillna(self, value=None, method=None, axis=0, inplace=False, Parameters ---------- - method : {'backfill', 'bfill', 'pad', 'ffill', None}, default 'pad' + method : {'backfill', 'bfill', 'pad', 'ffill', None}, default None Method to use for filling holes in reindexed Series pad / ffill: propagate last valid observation forward to next valid backfill / bfill: use NEXT valid observation to fill gap diff --git a/pandas/io/tests/test_wb.py b/pandas/io/tests/test_wb.py new file mode 100644 index 0000000000000..5bcdd22c989fe --- /dev/null +++ b/pandas/io/tests/test_wb.py @@ -0,0 +1,30 @@ +import pandas +from pandas.util.testing import network +from pandas.util.testing import assert_frame_equal +from numpy.testing.decorators import slow +from pandas.io.wdi import (search, download) + +@slow +@network +def test_wdi_search(): + expected = {u'id': {2634: u'GDPPCKD', + 4649: u'NY.GDP.PCAP.KD', + 4651: u'NY.GDP.PCAP.KN', + 4653: u'NY.GDP.PCAP.PP.KD'}, + u'name': {2634: u'GDP per Capita, constant US$, millions', + 4649: u'GDP per capita (constant 2000 US$)', + 4651: u'GDP per capita (constant LCU)', + 4653: u'GDP per capita, PPP (constant 2005 international $)'}} + result = search('gdp.*capita.*constant').ix[:,:2] + assert_frame_equal(result, pandas.DataFrame(expected)) + +@slow +@network +def test_wdi_download(): + expected = {'GDPPCKN': {(u'United States', u'2003'): u'40800.0735367688', (u'Canada', u'2004'): u'37857.1261134552', (u'United States', u'2005'): u'42714.8594790102', (u'Canada', u'2003'): u'37081.4575704003', (u'United States', u'2004'): u'41826.1728310667', (u'Mexico', u'2003'): u'72720.0691255285', (u'Mexico', u'2004'): u'74751.6003347038', (u'Mexico', u'2005'): u'76200.2154469437', (u'Canada', u'2005'): u'38617.4563629611'}, 'GDPPCKD': {(u'United States', u'2003'): u'40800.0735367688', (u'Canada', u'2004'): u'34397.055116118', (u'United States', u'2005'): u'42714.8594790102', (u'Canada', u'2003'): u'33692.2812368928', (u'United States', u'2004'): u'41826.1728310667', (u'Mexico', u'2003'): u'7608.43848670658', (u'Mexico', u'2004'): u'7820.99026814334', (u'Mexico', u'2005'): u'7972.55364129367', (u'Canada', u'2005'): u'35087.8925933298'}} + expected = pandas.DataFrame(expected) + result = download(country=['CA','MX','US', 'junk'], indicator=['GDPPCKD', + 'GDPPCKN', 'junk'], start=2003, end=2005) + expected.index = result.index + assert_frame_equal(result, pandas.DataFrame(expected)) + diff --git a/pandas/io/wb.py b/pandas/io/wb.py new file mode 100644 index 0000000000000..1270d9b0dd28f --- /dev/null +++ b/pandas/io/wb.py @@ -0,0 +1,183 @@ +import urllib2 +import warnings +import json +import pandas +import numpy as np + +def download(country=['MX','CA','US'], indicator=['GDPPCKD','GDPPCKN'], + start=2003, end=2005): + """ + Download data series from the World Bank's World Development Indicators + + Parameters + ---------- + + indicator: string or list of strings + taken from the ``id`` field in ``WDIsearch()`` + country: string or list of strings. + ``all`` downloads data for all countries + ISO-2 character codes select individual countries (e.g.``US``,``CA``) + start: int + First year of the data series + end: int + Last year of the data series (inclusive) + + Returns + ------- + + ``pandas`` DataFrame with columns: country, iso2c, year, indicator value. + """ + + # Are ISO-2 country codes valid? + valid_countries = ["AG", "AL", "AM", "AO", "AR", "AT", "AU", "AZ", "BB", + "BD", "BE", "BF", "BG", "BH", "BI", "BJ", "BO", "BR", "BS", "BW", + "BY", "BZ", "CA", "CD", "CF", "CG", "CH", "CI", "CL", "CM", "CN", + "CO", "CR", "CV", "CY", "CZ", "DE", "DK", "DM", "DO", "DZ", "EC", + "EE", "EG", "ER", "ES", "ET", "FI", "FJ", "FR", "GA", "GB", "GE", + "GH", "GM", "GN", "GQ", "GR", "GT", "GW", "GY", "HK", "HN", "HR", + "HT", "HU", "ID", "IE", "IL", "IN", "IR", "IS", "IT", "JM", "JO", + "JP", "KE", "KG", "KH", "KM", "KR", "KW", "KZ", "LA", "LB", "LC", + "LK", "LS", "LT", "LU", "LV", "MA", "MD", "MG", "MK", "ML", "MN", + "MR", "MU", "MW", "MX", "MY", "MZ", "NA", "NE", "NG", "NI", "NL", + "NO", "NP", "NZ", "OM", "PA", "PE", "PG", "PH", "PK", "PL", "PT", + "PY", "RO", "RU", "RW", "SA", "SB", "SC", "SD", "SE", "SG", "SI", + "SK", "SL", "SN", "SR", "SV", "SY", "SZ", "TD", "TG", "TH", "TN", + "TR", "TT", "TW", "TZ", "UA", "UG", "US", "UY", "UZ", "VC", "VE", + "VN", "VU", "YE", "ZA", "ZM", "ZW", "all"] + if type(country) == str: + country = [country] + bad_countries = np.setdiff1d(country, valid_countries) + country = np.intersect1d(country, valid_countries) + country = ';'.join(country) + # Work with a list of indicators + if type(indicator) == str: + indicator = [indicator] + # Download + data = [] + bad_indicators = [] + for ind in indicator: + try: + tmp = _get_data(ind, country, start, end) + tmp.columns = ['country', 'iso2c', 'year', ind] + data.append(tmp) + except: + bad_indicators.append(ind) + # Warn + if len(bad_indicators) > 0: + print 'Failed to obtain indicator(s): ' + '; '.join(bad_indicators) + print 'The data may still be available for download at http://data.worldbank.org' + if len(bad_countries) > 0: + print 'Invalid ISO-2 codes: ' + ' '.join(bad_countries) + # Merge WDI series + if len(data) > 0: + out = reduce(lambda x,y: x.merge(y, how='outer'), data) + # Clean + out = out.drop('iso2c', axis=1) + out = out.set_index(['country', 'year']) + return out + + +def _get_data(indicator = "NY.GNS.ICTR.GN.ZS", country = 'US', + start = 2002, end = 2005): + # Build URL for api call + url = "http://api.worldbank.org/countries/" + country + "/indicators/" + \ + indicator + "?date=" + str(start) + ":" + str(end) + "&per_page=25000" + \ + "&format=json" + # Download + response = urllib2.urlopen(url) + data = response.read() + # Parse JSON file + data = json.loads(data)[1] + country = map(lambda x: x['country']['value'], data) + iso2c = map(lambda x: x['country']['id'], data) + year = map(lambda x: x['date'], data) + value = map(lambda x: x['value'], data) + # Prepare output + out = pandas.DataFrame([country, iso2c, year, value]).T + return out + + +def get_countries(): + '''Query information about countries + ''' + url = 'http://api.worldbank.org/countries/all?format=json' + response = urllib2.urlopen(url) + data = response.read() + data = json.loads(data)[1] + data = pandas.DataFrame(data) + data.adminregion = map(lambda x: x['value'], data.adminregion) + data.incomeLevel = map(lambda x: x['value'], data.incomeLevel) + data.lendingType = map(lambda x: x['value'], data.lendingType) + data.region = map(lambda x: x['value'], data.region) + data = data.rename(columns={'id':'iso3c', 'iso2Code':'iso2c'}) + return data + + +def get_indicators(): + '''Download information about all World Bank data series + ''' + url = 'http://api.worldbank.org/indicators?per_page=50000&format=json' + response = urllib2.urlopen(url) + data = response.read() + data = json.loads(data)[1] + data = pandas.DataFrame(data) + # Clean fields + data.source = map(lambda x: x['value'], data.source) + fun = lambda x: x.encode('ascii', 'ignore') + data.sourceOrganization = data.sourceOrganization.apply(fun) + # Clean topic field + def get_value(x): + try: + return x['value'] + except: + return '' + fun = lambda x: map(lambda y: get_value(y), x) + data.topics = data.topics.apply(fun) + data.topics = data.topics.apply(lambda x: ' ; '.join(x)) + # Clean outpu + data = data.sort(columns='id') + data.index = pandas.Index(range(data.shape[0])) + return data + + +_cached_series = None +def search(string='gdp.*capi', field='name', case=False): + """ + Search available data series from the world bank + + Parameters + ---------- + + string: string + regular expression + field: string + id, name, source, sourceNote, sourceOrganization, topics + See notes below + case: bool + case sensitive search? + + Notes + ----- + + The first time this function is run it will download and cache the full + list of available series. Depending on the speed of your network + connection, this can take time. Subsequent searches will use the cached + copy, so they should be much faster. + + id : Data series indicator (for use with the ``indicator`` argument of + ``WDI()``) e.g. NY.GNS.ICTR.GN.ZS" + name: Short description of the data series + source: Data collection project + sourceOrganization: Data collection organization + note: + sourceNote: + topics: + """ + # Create cached list of series if it does not exist + global _cached_series + if type(_cached_series) is not pandas.core.frame.DataFrame: + _cached_series = get_indicators() + data = _cached_series[field] + idx = data.str.contains(string, case=case) + out = _cached_series.ix[idx].dropna() + return out