File tree Expand file tree Collapse file tree 1 file changed +49
-0
lines changed
Expand file tree Collapse file tree 1 file changed +49
-0
lines changed Original file line number Diff line number Diff line change 1+ #!/usr/bin/env python
2+
3+ """
4+ Fetch and parse people names from the IMDb.
5+
6+ Usage:
7+ $ python download_imdb.py
8+ """
9+
10+ import gzip
11+ import shutil
12+ import tempfile
13+ import urllib .request
14+
15+
16+ def main ():
17+ """Script entry point."""
18+
19+ print ('Fetching data from IMDb...' )
20+
21+ with open ('names.txt' , 'w' ) as destination :
22+ destination .writelines (names ())
23+
24+ with open ('names.txt' ) as source , \
25+ open ('sorted_names.txt' , 'w' ) as destination :
26+ destination .writelines (sorted (source .readlines ()))
27+
28+ print ('Created "names.txt" and "sorted_names.txt"' )
29+
30+
31+ def names ():
32+ """Return a generator of names with a trailing newline."""
33+ url = 'https://datasets.imdbws.com/name.basics.tsv.gz'
34+ with urllib .request .urlopen (url ) as response :
35+ with tempfile .NamedTemporaryFile (mode = 'w+b' ) as archive :
36+ shutil .copyfileobj (response , archive )
37+ archive .seek (0 )
38+ with gzip .open (archive , mode = 'rt' ) as source :
39+ next (source ) # Skip the header
40+ for line in source :
41+ full_name = line .split ('\t ' )[1 ]
42+ yield f'{ full_name } \n '
43+
44+
45+ if __name__ == '__main__' :
46+ try :
47+ main ()
48+ except KeyboardInterrupt :
49+ print ('Aborted' )
You can’t perform that action at this time.
0 commit comments