Skip to content
Closed
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
59 changes: 48 additions & 11 deletions scripts/data_collector/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -202,18 +202,55 @@ def _get_symbol():
-------
{600000.ss, 600001.ss, 600002.ss, 600003.ss, ...}
"""
url = "http://99.push2.eastmoney.com/api/qt/clist/get?pn=1&pz=10000&po=1&np=1&fs=m:0+t:6,m:0+t:80,m:1+t:2,m:1+t:23,m:0+t:81+s:2048&fields=f12"
try:
resp = requests.get(url, timeout=None)
resp.raise_for_status()
except requests.exceptions.HTTPError as e:
raise requests.exceptions.HTTPError(f"Request to {url} failed with status code {resp.status_code}") from e
# url = "http://99.push2.eastmoney.com/api/qt/clist/get?pn=1&pz=10000&po=1&np=1&fs=m:0+t:6,m:0+t:80,m:1+t:2,m:1+t:23,m:0+t:81+s:2048&fields=f12"

base_url = "http://99.push2.eastmoney.com/api/qt/clist/get"
params = {
"pn": 1, # page number
"pz": 100, # page size, default to 100
"po": 1,
"np": 1,
"fs": "m:0+t:6,m:0+t:80,m:1+t:2,m:1+t:23,m:0+t:81+s:2048",
"fields": "f12"
}

_symbols = []
page = 1

while True:
params["pn"] = page
try:
resp = requests.get(base_url, params=params, timeout=None)
resp.raise_for_status()
data = resp.json()

# Check if response contains valid data
if not data or "data" not in data or not data["data"] or "diff" not in data["data"]:
logger.warning(f"Invalid response structure on page {page}")
break

try:
_symbols = [_v["f12"] for _v in resp.json()["data"]["diff"]]
except Exception as e:
logger.warning("An error occurred while extracting data from the response.")
raise
# fetch the current page data
current_symbols = [_v["f12"] for _v in data["data"]["diff"]]

if not current_symbols: # It's the last page if there is no data in current page
logger.info(f"Last page reached: {page - 1}")
break

_symbols.extend(current_symbols)

# show progress
logger.info(f"Page {page}: fetch {len(current_symbols)} stocks:[{current_symbols[0]} ... {current_symbols[-1]}]")

page += 1

# sleep time to avoid overloading the server
time.sleep(0.5)

except requests.exceptions.HTTPError as e:
raise requests.exceptions.HTTPError(f"Request to {base_url} failed with status code {resp.status_code}") from e
except Exception as e:
logger.warning("An error occurred while extracting data from the response.")
raise

if len(_symbols) < 3900:
raise ValueError("The complete list of stocks is not available.")
Expand Down
Loading