Skip to content

Commit 950408e

Browse files
SunsetWolfshengyuhongfibers
authored
Fix issue 1892 (#1916)
* fix: resolve #1892 by retriving the data page by page * fix: resolve #1892 by retriving the data page by page * reformat with black --------- Co-authored-by: shengyuhong <[email protected]> Co-authored-by: fibers <[email protected]>
1 parent 320bd65 commit 950408e

File tree

1 file changed

+52
-11
lines changed

1 file changed

+52
-11
lines changed

scripts/data_collector/utils.py

Lines changed: 52 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -202,18 +202,59 @@ def _get_symbol():
202202
-------
203203
{600000.ss, 600001.ss, 600002.ss, 600003.ss, ...}
204204
"""
205-
url = "http://99.push2.eastmoney.com/api/qt/clist/get?pn=1&pz=10000&po=1&np=1&fs=m:0+t:6,m:0+t:80,m:1+t:2,m:1+t:23,m:0+t:81+s:2048&fields=f12"
206-
try:
207-
resp = requests.get(url, timeout=None)
208-
resp.raise_for_status()
209-
except requests.exceptions.HTTPError as e:
210-
raise requests.exceptions.HTTPError(f"Request to {url} failed with status code {resp.status_code}") from e
205+
# url = "http://99.push2.eastmoney.com/api/qt/clist/get?pn=1&pz=10000&po=1&np=1&fs=m:0+t:6,m:0+t:80,m:1+t:2,m:1+t:23,m:0+t:81+s:2048&fields=f12"
206+
207+
base_url = "http://99.push2.eastmoney.com/api/qt/clist/get"
208+
params = {
209+
"pn": 1, # page number
210+
"pz": 100, # page size, default to 100
211+
"po": 1,
212+
"np": 1,
213+
"fs": "m:0+t:6,m:0+t:80,m:1+t:2,m:1+t:23,m:0+t:81+s:2048",
214+
"fields": "f12",
215+
}
211216

212-
try:
213-
_symbols = [_v["f12"] for _v in resp.json()["data"]["diff"]]
214-
except Exception as e:
215-
logger.warning("An error occurred while extracting data from the response.")
216-
raise
217+
_symbols = []
218+
page = 1
219+
220+
while True:
221+
params["pn"] = page
222+
try:
223+
resp = requests.get(base_url, params=params, timeout=None)
224+
resp.raise_for_status()
225+
data = resp.json()
226+
227+
# Check if response contains valid data
228+
if not data or "data" not in data or not data["data"] or "diff" not in data["data"]:
229+
logger.warning(f"Invalid response structure on page {page}")
230+
break
231+
232+
# fetch the current page data
233+
current_symbols = [_v["f12"] for _v in data["data"]["diff"]]
234+
235+
if not current_symbols: # It's the last page if there is no data in current page
236+
logger.info(f"Last page reached: {page - 1}")
237+
break
238+
239+
_symbols.extend(current_symbols)
240+
241+
# show progress
242+
logger.info(
243+
f"Page {page}: fetch {len(current_symbols)} stocks:[{current_symbols[0]} ... {current_symbols[-1]}]"
244+
)
245+
246+
page += 1
247+
248+
# sleep time to avoid overloading the server
249+
time.sleep(0.5)
250+
251+
except requests.exceptions.HTTPError as e:
252+
raise requests.exceptions.HTTPError(
253+
f"Request to {base_url} failed with status code {resp.status_code}"
254+
) from e
255+
except Exception as e:
256+
logger.warning("An error occurred while extracting data from the response.")
257+
raise
217258

218259
if len(_symbols) < 3900:
219260
raise ValueError("The complete list of stocks is not available.")

0 commit comments

Comments
 (0)