Skip to content

Commit 5580ca6

Browse files
committed
feat: add type hints to all functions using typing
1 parent 7bcf9a9 commit 5580ca6

File tree

1 file changed

+37
-32
lines changed

1 file changed

+37
-32
lines changed

src/cmudict/__init__.py

Lines changed: 37 additions & 32 deletions
Original file line numberDiff line numberDiff line change
@@ -4,12 +4,11 @@
44
files. Compatible with NLTK's CMUDictCorpusReader.
55
"""
66

7+
import atexit
78
import re
8-
from collections import defaultdict
99
from contextlib import ExitStack
10-
import atexit
11-
1210
from importlib import metadata, resources
11+
from typing import IO, Dict, List, Optional, Tuple
1312

1413
__version__ = metadata.version(__name__)
1514

@@ -23,17 +22,19 @@
2322
atexit.register(file_manager.close)
2423

2524

26-
def _stream(resource_name):
27-
stream = resources.files(__name__).joinpath(resource_name).open("rb")
25+
def _stream(resource_name: str) -> IO[bytes]:
26+
stream: IO[bytes] = resources.files(__name__).joinpath(resource_name).open("rb")
2827
return stream
2928

3029

31-
def _string(resource_name):
30+
def _string(resource_name: str) -> str:
3231
with resources.files(__name__).joinpath(resource_name).open() as file:
3332
return file.read()
3433

3534

36-
def _entries(stream, comment_string=None):
35+
def _entries(
36+
stream: IO[bytes], comment_string: Optional[str] = None
37+
) -> List[Tuple[str, List[str]]]:
3738
cmudict_entries = []
3839
for line in stream:
3940
parts = []
@@ -47,113 +48,117 @@ def _entries(stream, comment_string=None):
4748

4849

4950
# pylint: disable-next=redefined-builtin
50-
def dict():
51+
def dict() -> Dict[str, List[List[str]]]:
5152
"""
5253
Compatibility with NLTK.
5354
Returns the cmudict lexicon as a dictionary, whose keys are
5455
lowercase words and whose values are lists of pronunciations.
5556
"""
56-
default = defaultdict(list)
57+
default: Dict[str, List[List[str]]] = {}
5758
for key, value in entries():
59+
if key not in default:
60+
default[key] = []
5861
default[key].append(value)
5962
return default
6063

6164

62-
def dict_stream():
65+
def dict_stream() -> IO[bytes]:
6366
"""Return a readable file-like object of the cmudict.dict file."""
64-
stream = _stream(CMUDICT_DICT)
67+
stream: IO[bytes] = _stream(CMUDICT_DICT)
6568
return stream
6669

6770

68-
def dict_string():
71+
def dict_string() -> str:
6972
"""Return the contents of cmudict.dict as a string."""
7073
string = _string(CMUDICT_DICT)
7174
return string
7275

7376

74-
def license_string():
77+
def license_string() -> str:
7578
"""Return the contents of LICENSE as a string."""
7679
string = _string(CMUDICT_LICENSE)
7780
return string
7881

7982

80-
def phones():
83+
def phones() -> List[Tuple[str, List[str]]]:
8184
"""Return a list of phones used in the main dict."""
82-
cmu_phones = []
85+
cmu_phones: List[Tuple[str, List[str]]] = []
8386
for line in phones_stream():
8487
parts = line.decode("utf-8").strip().split()
8588
cmu_phones.append((parts[0], parts[1:]))
8689
return cmu_phones
8790

8891

89-
def phones_stream():
92+
def phones_stream() -> IO[bytes]:
9093
"""Return a readable file-like object of the cmudict.phones file."""
91-
p_stream = _stream(CMUDICT_PHONES)
94+
p_stream: IO[bytes] = _stream(CMUDICT_PHONES)
9295
return p_stream
9396

9497

95-
def phones_string():
98+
def phones_string() -> str:
9699
"""Return the contents of cmudict.phones as a string."""
97100
string = _string(CMUDICT_PHONES)
98101
return string
99102

100103

101-
def symbols():
104+
def symbols() -> List[str]:
102105
"""Return a list of symbols."""
103-
cmu_symbols = []
106+
cmu_symbols: List[str] = []
104107
for line in symbols_stream():
105108
cmu_symbols.append(line.decode("utf-8").strip())
106109
return cmu_symbols
107110

108111

109-
def symbols_stream():
112+
def symbols_stream() -> IO[bytes]:
110113
"""Return a readable file-like object of the cmudict.symbols file."""
111-
stream = _stream(CMUDICT_SYMBOLS)
114+
stream: IO[bytes] = _stream(CMUDICT_SYMBOLS)
112115
return stream
113116

114117

115-
def symbols_string():
118+
def symbols_string() -> str:
116119
"""Return the contents of cmudict.symbols as a string."""
117120
string = _string(CMUDICT_SYMBOLS)
118121
return string
119122

120123

121124
# pylint: disable-next=invalid-name
122-
def vp():
125+
def vp() -> Dict[str, List[List[str]]]:
123126
"""Return a list of punctuation pronounciations."""
124-
cmu_vp = defaultdict(list)
127+
cmu_vp: Dict[str, List[List[str]]] = {}
125128
with vp_stream() as stream:
126129
for key, value in _entries(stream):
130+
if not key in cmu_vp:
131+
cmu_vp[key] = []
127132
cmu_vp[key].append(value)
128133
return cmu_vp
129134

130135

131-
def vp_stream():
136+
def vp_stream() -> IO[bytes]:
132137
"""Return a readable file-like object of the cmudict.vp file."""
133-
stream = _stream(CMUDICT_VP)
138+
stream: IO[bytes] = _stream(CMUDICT_VP)
134139
return stream
135140

136141

137-
def vp_string():
142+
def vp_string() -> str:
138143
"""Return the contents of cmudict.vp as a string."""
139144
string = _string(CMUDICT_VP)
140145
return string
141146

142147

143148
# The .entries(), .raw(), and .words() functions
144149
# maintain compatability with NTLK.
145-
def entries():
150+
def entries() -> List[Tuple[str, List[str]]]:
146151
"""
147152
Compatibility with NLTK.
148153
Returns the cmudict lexicon as a list of entries
149154
containing (word, transcriptions) tuples.
150155
"""
151156
with dict_stream() as stream:
152-
cmu_entries = _entries(stream, "#")
157+
cmu_entries: List[Tuple[str, List[str]]] = _entries(stream, "#")
153158
return cmu_entries
154159

155160

156-
def raw():
161+
def raw() -> str:
157162
"""
158163
Compatibility with NLTK.
159164
Returns the cmudict lexicon as a raw string.
@@ -162,7 +167,7 @@ def raw():
162167
return string
163168

164169

165-
def words():
170+
def words() -> List[str]:
166171
"""
167172
Compatibility with NLTK.
168173
Returns a list of all words defined in the cmudict lexicon.

0 commit comments

Comments
 (0)