22# -*- coding: utf-8 -*-
33
44import bz2
5+ import sqlite3
6+ import tempfile
57from pkg_resources import resource_filename
68from pyowm .weatherapi25 .location import Location
79
8-
9- CITY_ID_FILES_PATH = 'cityids/%03d-%03d.txt.bz2'
10+ CITY_ID_DB_PATH = 'cityids/cities.db.bz2'
1011
1112
1213class CityIDRegistry :
1314
1415 MATCHINGS = {
15- 'exact' : lambda city_name , toponym : city_name == toponym ,
16- 'nocase' : lambda city_name , toponym : city_name .lower () == toponym .lower (),
17- 'like' : lambda city_name , toponym : city_name .lower () in toponym .lower (),
18- 'startswith' : lambda city_name , toponym : toponym .lower ().startswith (city_name .lower ())
16+ 'exact' : "SELECT city_id, name, country, state, lat, lon FROM city WHERE name=?" ,
17+ 'like' : r"SELECT city_id, name, country, state, lat, lon FROM city WHERE name LIKE ?"
1918 }
2019
21- def __init__ (self , filepath_regex ):
22- """
23- Initialise a registry that can be used to lookup info about cities.
24-
25- :param filepath_regex: Python format string that gives the path of the files
26- that store the city IDs information.
27- Eg: ``folder1/folder2/%02d-%02d.txt``
28- :type filepath_regex: str
29- :returns: a *CityIDRegistry* instance
30-
31- """
32- self ._filepath_regex = filepath_regex
20+ def __init__ (self , sqlite_db_path : str ):
21+ self .connection = self .__decompress_db_to_memory (sqlite_db_path )
3322
3423 @classmethod
3524 def get_instance (cls ):
3625 """
3726 Factory method returning the default city ID registry
3827 :return: a `CityIDRegistry` instance
3928 """
40- return CityIDRegistry (CITY_ID_FILES_PATH )
29+ return CityIDRegistry (CITY_ID_DB_PATH )
4130
42- def ids_for (self , city_name , country = None , matching = 'nocase' ):
31+ def __decompress_db_to_memory (self , sqlite_db_path : str ):
4332 """
44- Returns a list of tuples in the form (long, str, str) corresponding to
45- the int IDs and relative toponyms and 2-chars country of the cities
46- matching the provided city name.
47- The rule for identifying matchings is according to the provided
48- `matching` parameter value.
33+ Decompresses to memory the SQLite database at the provided path
34+ :param sqlite_db_path: str
35+ :return: None
36+ """
37+ # https://stackoverflow.com/questions/3850022/how-to-load-existing-db-file-to-memory-in-python-sqlite3
38+ # https://stackoverflow.com/questions/32681761/how-can-i-attach-an-in-memory-sqlite-database-in-python
39+ # https://pymotw.com/2/bz2/
40+
41+ # read and uncompress data from compressed DB
42+ res_name = resource_filename (__name__ , sqlite_db_path )
43+ bz2_db = bz2 .BZ2File (res_name )
44+ decompressed_data = bz2_db .read ()
45+
46+ # dump decompressed data to a temp DB
47+ with tempfile .NamedTemporaryFile (mode = 'wb' ) as tmpf :
48+ tmpf .write (decompressed_data )
49+ tmpf_name = tmpf .name
50+
51+ # read temp DB to memory and return handle
52+ src_conn = sqlite3 .connect (tmpf_name )
53+ dest_conn = sqlite3 .connect (':memory:' )
54+ src_conn .backup (dest_conn )
55+ src_conn .close ()
56+ return dest_conn
57+
58+ def __query (self , sql_query : str , * args ):
59+ """
60+ Queries the DB with the specified SQL query
61+ :param sql_query: str
62+ :return: list of tuples
63+ """
64+ cursor = self .connection .cursor ()
65+ try :
66+ return cursor .execute (sql_query , args ).fetchall ()
67+ finally :
68+ cursor .close ()
69+
70+ def ids_for (self , city_name , country = None , state = None , matching = 'like' ):
71+ """
72+ Returns a list of tuples in the form (city_id, name, country, state, lat, lon )
73+ The rule for querying follows the provided `matching` parameter value.
4974 If `country` is provided, the search is restricted to the cities of
50- the specified country.
75+ the specified country, and an even stricter search when `state` is provided as well
76+ :param city_name: the string toponym of the city to search
5177 :param country: two character str representing the country where to
5278 search for the city. Defaults to `None`, which means: search in all
5379 countries.
54- :param matching: str. Default is `nocase`. Possible values:
55- `exact` - literal, case-sensitive matching,
56- `nocase` - literal, case-insensitive matching,
80+ :param state: two character str representing the state where to
81+ search for the city. Defaults to `None`. When not `None` also `state` must be specified
82+ :param matching: str. Default is `like`. Possible values:
83+ `exact` - literal, case-sensitive matching
5784 `like` - matches cities whose name contains, as a substring, the string
5885 fed to the function, case-insensitive,
59- `startswith` - matches cities whose names start with the string fed
60- to the function, case-insensitive.
6186 :raises ValueError if the value for `matching` is unknown
6287 :return: list of tuples
6388 """
@@ -68,43 +93,49 @@ def ids_for(self, city_name, country=None, matching='nocase'):
6893 "allowed values are %s" % ", " .join (self .MATCHINGS ))
6994 if country is not None and len (country ) != 2 :
7095 raise ValueError ("Country must be a 2-char string" )
71- splits = self ._filter_matching_lines (city_name , country , matching )
72- return [(int (item [1 ]), item [0 ], item [4 ]) for item in splits ]
96+ if state is not None and country is None :
97+ raise ValueError ("A country must be specified whenever a state is specified too" )
98+
99+ q = self .MATCHINGS [matching ]
100+ if matching == 'exact' :
101+ params = [city_name ]
102+ else :
103+ params = ['%' + city_name + '%' ]
104+
105+ if country is not None :
106+ q = q + ' AND country=?'
107+ params .append (country )
108+
109+ if state is not None :
110+ q = q + ' AND state=?'
111+ params .append (state )
112+
113+ rows = self .__query (q , * params )
114+ return rows
73115
74- def locations_for (self , city_name , country = None , matching = 'nocase ' ):
116+ def locations_for (self , city_name , country = None , state = None , matching = 'like ' ):
75117 """
76- Returns a list of Location objects corresponding to
77- the int IDs and relative toponyms and 2-chars country of the cities
78- matching the provided city name.
79- The rule for identifying matchings is according to the provided
80- `matching` parameter value.
118+ Returns a list of `Location` objects
119+ The rule for querying follows the provided `matching` parameter value.
81120 If `country` is provided, the search is restricted to the cities of
82- the specified country.
121+ the specified country, and an even stricter search when `state` is provided as well
122+ :param city_name: the string toponym of the city to search
83123 :param country: two character str representing the country where to
84124 search for the city. Defaults to `None`, which means: search in all
85125 countries.
86- :param matching: str. Default is `nocase`. Possible values:
87- `exact` - literal, case-sensitive matching,
88- `nocase` - literal, case-insensitive matching,
126+ :param state: two character str representing the state where to
127+ search for the city. Defaults to `None`. When not `None` also `state` must be specified
128+ :param matching: str. Default is `like`. Possible values:
129+ `exact` - literal, case-sensitive matching
89130 `like` - matches cities whose name contains, as a substring, the string
90131 fed to the function, case-insensitive,
91- `startswith` - matches cities whose names start with the string fed
92- to the function, case-insensitive.
93132 :raises ValueError if the value for `matching` is unknown
94- :return: list of `weatherapi25.location. Location` objects
133+ :return: list of `Location` objects
95134 """
96- if not city_name :
97- return []
98- if matching not in self .MATCHINGS :
99- raise ValueError ("Unknown type of matching: "
100- "allowed values are %s" % ", " .join (self .MATCHINGS ))
101- if country is not None and len (country ) != 2 :
102- raise ValueError ("Country must be a 2-char string" )
103- splits = self ._filter_matching_lines (city_name , country , matching )
104- return [Location (item [0 ], float (item [3 ]), float (item [2 ]),
105- int (item [1 ]), item [4 ]) for item in splits ]
135+ items = self .ids_for (city_name , country = country , state = state , matching = matching )
136+ return [Location (item [1 ], item [5 ], item [4 ], item [0 ], country = item [2 ]) for item in items ]
106137
107- def geopoints_for (self , city_name , country = None , matching = 'nocase ' ):
138+ def geopoints_for (self , city_name , country = None , state = None , matching = 'like ' ):
108139 """
109140 Returns a list of ``pyowm.utils.geo.Point`` objects corresponding to
110141 the int IDs and relative toponyms and 2-chars country of the cities
@@ -113,114 +144,18 @@ def geopoints_for(self, city_name, country=None, matching='nocase'):
113144 `matching` parameter value.
114145 If `country` is provided, the search is restricted to the cities of
115146 the specified country.
147+ :param city_name: the string toponym of the city to search
116148 :param country: two character str representing the country where to
117149 search for the city. Defaults to `None`, which means: search in all
118150 countries.
151+ :param state: two character str representing the state where to
152+ search for the city. Defaults to `None`. When not `None` also `state` must be specified
119153 :param matching: str. Default is `nocase`. Possible values:
120- `exact` - literal, case-sensitive matching,
121- `nocase` - literal, case-insensitive matching,
154+ `exact` - literal, case-sensitive matching
122155 `like` - matches cities whose name contains, as a substring, the string
123156 fed to the function, case-insensitive,
124- `startswith` - matches cities whose names start with the string fed
125- to the function, case-insensitive.
126157 :raises ValueError if the value for `matching` is unknown
127158 :return: list of `pyowm.utils.geo.Point` objects
128159 """
129- locations = self .locations_for (city_name , country , matching = matching )
160+ locations = self .locations_for (city_name , country = country , state = state , matching = matching )
130161 return [loc .to_geopoint () for loc in locations ]
131-
132- # helper functions
133-
134- def _filter_matching_lines (self , city_name , country , matching ):
135- """
136- Returns an iterable whose items are the lists of split tokens of every
137- text line matched against the city ID files according to the provided
138- combination of city_name, country and matching style
139- :param city_name: str
140- :param country: str or `None`
141- :param matching: str
142- :return: list of lists
143- """
144- result = []
145-
146- # find the right file to scan and extract its lines. Upon "like"
147- # matchings, just read all files
148- if matching == 'like' :
149- lines = [l .strip () for l in self ._get_all_lines ()]
150- else :
151- filename = self ._assess_subfile_from (city_name )
152- lines = [l .strip () for l in self ._get_lines (filename )]
153-
154- # look for toponyms matching the specified city_name and according to
155- # the specified matching style
156- for line in lines :
157- tokens = line .split ("," )
158- # sometimes city names have one or more inner commas
159- if len (tokens ) > 5 :
160- tokens = [',' .join (tokens [:- 4 ]), * tokens [- 4 :]]
161- # check country
162- if country is not None and tokens [4 ] != country :
163- continue
164-
165- # check city_name
166- if self ._city_name_matches (city_name , tokens [0 ], matching ):
167- result .append (tokens )
168-
169- return result
170-
171- def _city_name_matches (self , city_name , toponym , matching ):
172- comparison_function = self .MATCHINGS [matching ]
173- return comparison_function (city_name , toponym )
174-
175- def _lookup_line_by_city_name (self , city_name ):
176- filename = self ._assess_subfile_from (city_name )
177- lines = self ._get_lines (filename )
178- return self ._match_line (city_name , lines )
179-
180- def _assess_subfile_from (self , city_name ):
181- c = ord (city_name .lower ()[0 ])
182- if c < 97 : # not a letter
183- raise ValueError ('Error: city name must start with a letter' )
184- elif c in range (97 , 103 ): # from a to f
185- return self ._filepath_regex % (97 , 102 )
186- elif c in range (103 , 109 ): # from g to l
187- return self ._filepath_regex % (103 , 108 )
188- elif c in range (109 , 115 ): # from m to r
189- return self ._filepath_regex % (109 , 114 )
190- elif c in range (115 , 123 ): # from s to z
191- return self ._filepath_regex % (115 , 122 )
192- else :
193- raise ValueError ('Error: city name must start with a letter' )
194-
195- def _get_lines (self , filename ):
196- res_name = resource_filename (__name__ , filename )
197- with bz2 .open (res_name , mode = 'rb' ) as fh :
198- lines = fh .readlines ()
199- if type (lines [0 ]) is bytes :
200- lines = map (lambda l : l .decode ("utf-8" ), lines )
201- return lines
202-
203- def _get_all_lines (self ):
204- all_lines = []
205- for city_name in ['a' , 'g' , 'm' , 's' ]: # all available city ID files
206- filename = self ._assess_subfile_from (city_name )
207- all_lines .extend (self ._get_lines (filename ))
208- return all_lines
209-
210- def _match_line (self , city_name , lines ):
211- """
212- The lookup is case insensitive and returns the first matching line,
213- stripped.
214- :param city_name: str
215- :param lines: list of str
216- :return: str
217- """
218- for line in lines :
219- toponym = line .split (',' )[0 ]
220- if toponym .lower () == city_name .lower ():
221- return line .strip ()
222- return None
223-
224- def __repr__ (self ):
225- return "<%s.%s - filepath_regex=%s>" % (__name__ , \
226- self .__class__ .__name__ , self ._filepath_regex )
0 commit comments