Skip to content

Commit b600194

Browse files
Daniel EisnerDaniel Eisner
authored andcommitted
Add support for Nasdaq symbol list.
Nasdaq provides a symbol list with all US equities, listed on the Nasdaq and other exchanges on their FTP site. This patch adds support for grabbing that list. The list isn't very larger (< 1 MB), so we cache it to RAM within the process the first time we download it.
1 parent 2b49d6f commit b600194

File tree

4 files changed

+160
-0
lines changed

4 files changed

+160
-0
lines changed

docs/source/remote_data.rst

Lines changed: 31 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -33,6 +33,7 @@ Currently the following sources are supported:
3333
- :ref:`Eurostat<remote_data.eurostat>`
3434
- :ref:`Thrift Savings Plan<remote_data.tsp>`
3535
- :ref:`Oanda currency historical rate<remote_data.oanda_curr_hist>`
36+
- :ref:`Nasdaq Trader symbol definitions<remote_data.nasdaq_symbols`
3637

3738
It should be noted, that various sources support different kinds of data, so not all sources implement the same methods and the data elements returned might also differ.
3839

@@ -540,3 +541,33 @@ Download currency historical rate from `Oanda <https://www.oanda.com/>`__.
540541
2016-06-01 1.115170 1.445410 0.009095
541542
542543
[153 rows x 3 columns]
544+
545+
.. _remote_data.nasdaq_symbols
546+
547+
Nasdaq Trader Symbol Definitions
548+
==============================
549+
550+
Download the latest symbols from `Nasdaq<ftp://ftp.nasdaqtrader.com/SymbolDirectory/nasdaqtraded.txt/>`__.
551+
552+
Note that Nasdaq updates this file daily, and historical versions are not
553+
available. More information on the `field<http://www.nasdaqtrader.com/trader.aspx?id=symboldirdefs/>` definitions.
554+
555+
.. code-block:: python
556+
557+
In [12]: from pandas_datareader.nasdaq_trader import get_nasdaq_symbols
558+
In [13]: symbols = get_nasdaq_symbols()
559+
In [14]: print(symbols.ix['IBM'])
560+
Nasdaq Traded True
561+
Security Name International Business Machines Corporation Co...
562+
Listing Exchange N
563+
Market Category
564+
ETF False
565+
Round Lot Size 100
566+
Test Issue False
567+
Financial Status NaN
568+
CQS Symbol IBM
569+
NASDAQ Symbol IBM
570+
NextShares False
571+
Name: IBM, dtype: object
572+
573+

pandas_datareader/data.py

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,7 @@
2121
from pandas_datareader.edgar import EdgarIndexReader
2222
from pandas_datareader.enigma import EnigmaReader
2323
from pandas_datareader.oanda import get_oanda_currency_historical_rates
24+
from pandas_datareader.nasdaq_trader import get_nasdaq_symbols
2425

2526

2627
def get_data_fred(*args, **kwargs):
@@ -161,6 +162,11 @@ def DataReader(name, data_source=None, start=None, end=None,
161162
quote_currency="USD", base_currency=name,
162163
reversed=True, session=session
163164
)
165+
elif data_source == 'nasdaq':
166+
if name != 'symbols':
167+
raise ValueError("Only the string 'symbols' is supported for "
168+
"Nasdaq, not %r" % (name,))
169+
return get_nasdaq_symbols(retry_count=retry_count, pause=pause)
164170
else:
165171
msg = "data_source=%r is not implemented" % data_source
166172
raise NotImplementedError(msg)

pandas_datareader/nasdaq_trader.py

Lines changed: 101 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,101 @@
1+
from ftplib import FTP, all_errors
2+
from pandas import read_csv
3+
from pandas_datareader._utils import RemoteDataError
4+
from pandas.compat import StringIO
5+
import time
6+
7+
_NASDAQ_TICKER_LOC = '/SymbolDirectory/nasdaqtraded.txt'
8+
_NASDAQ_FTP_SERVER = 'ftp.nasdaqtrader.com'
9+
_TICKER_DTYPE = [('Nasdaq Traded', bool),
10+
('Symbol', str),
11+
('Security Name', str),
12+
('Listing Exchange', str),
13+
('Market Category', str),
14+
('ETF', bool),
15+
('Round Lot Size', float),
16+
('Test Issue', bool),
17+
('Financial Status', str),
18+
('CQS Symbol', str),
19+
('NASDAQ Symbol', str),
20+
('NextShares', bool)]
21+
_CATEGORICAL = ('Listing Exchange', 'Financial Status')
22+
23+
_DELIMITER = '|'
24+
_ticker_cache = None
25+
26+
27+
def _bool_converter(item):
28+
return item == 'Y'
29+
30+
31+
def _download_nasdaq_symbols(timeout):
32+
"""
33+
@param timeout: the time to wait for the FTP connection
34+
"""
35+
try:
36+
ftp_session = FTP(_NASDAQ_FTP_SERVER, timeout=timeout)
37+
ftp_session.login()
38+
except all_errors as err:
39+
raise RemoteDataError('Error connecting to %r: $s' %
40+
(_NASDAQ_FTP_SERVER, err))
41+
42+
lines = []
43+
try:
44+
ftp_session.retrlines('RETR ' +_NASDAQ_TICKER_LOC, lines.append)
45+
except all_errors as err:
46+
raise RemoteDataError('Error downloading from %r: $s' %
47+
(_NASDAQ_FTP_SERVER, err))
48+
finally:
49+
ftp_session.close()
50+
51+
# Sanity Checking
52+
if not lines[-1].startwith('File Creation Time:'):
53+
raise RemoteDataError('Missing expected footer. Found %r' % lines[-1])
54+
55+
# Convert Y/N to True/False.
56+
converter_map = dict((col, _bool_converter) for col, t in _TICKER_DTYPE
57+
if t is bool)
58+
59+
data = read_csv(StringIO('\n'.join(lines[:-1])), '|',
60+
dtype=_TICKER_DTYPE, converters=converter_map,
61+
index_col=1)
62+
63+
# Properly cast enumerations
64+
for cat in _CATEGORICAL:
65+
data[cat] = data[cat].astype('category')
66+
67+
return data
68+
69+
70+
def get_nasdaq_symbols(retry_count=3, timeout=30, pause=None):
71+
"""
72+
Get the list of all available equity symbols from Nasdaq.
73+
74+
Returns
75+
-------
76+
nasdaq_tickers : pandas.DataFrame
77+
DataFrame wiht company tickers, names, and other properties.
78+
"""
79+
if timeout < 0:
80+
raise ValueError('timeout must be >= 0, not %r' % (timeout,))
81+
82+
if pause is None:
83+
pause = timeout / 3
84+
elif pause < 0:
85+
raise ValueError('pause must be >= 0, not %r' % (pause,))
86+
87+
global _ticker_cache
88+
if _ticker_cache is None:
89+
while retry_count > 0:
90+
try:
91+
_ticker_cache = _download_nasdaq_symbols(timeout=timeout)
92+
retry_count = -1
93+
except Exception:
94+
# retry on any exception
95+
if retry_count == 0:
96+
raise
97+
else:
98+
retry_count -= 1
99+
time.sleep(pause)
100+
101+
return _ticker_cache
Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,22 @@
1+
import nose
2+
3+
import pandas.util.testing as tm
4+
5+
import pandas_datareader.data as web
6+
from pandas_datareader._utils import RemoteDataError
7+
8+
9+
class TestNasdaqSymbols(tm.TestCase):
10+
11+
def test_get_symbols(self):
12+
try:
13+
symbols = web.DataReader('symbols', 'nasdaq')
14+
except RemoteDataError as e:
15+
raise nose.SkipTest(e)
16+
17+
assert 'IBM' in symbols.index
18+
19+
20+
if __name__ == '__main__':
21+
nose.runmodule(argv=[__file__, '-vvs', '-x', '--pdb', '--pdb-failure'],
22+
exit=False)

0 commit comments

Comments
 (0)