Skip to content

Commit 54195bf

Browse files
authored
fix(cat-gateway): prepare_cardano_asset.py Ada amount issue (#2557)
* wip * wip * wip * wip * wip * wip * fix
1 parent 9498faa commit 54195bf

File tree

2 files changed

+95
-168
lines changed

2 files changed

+95
-168
lines changed

catalyst-gateway/tests/api_tests/integration/test_assets.py

Lines changed: 19 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -4,10 +4,12 @@
44
import pytest
55
from loguru import logger
66
from api.v1 import cardano
7+
from functools import reduce
78

89

910
@pytest.mark.preprod_indexing
1011
def test_persistent_ada_amount_endpoint():
12+
# could the file from https://github.com/input-output-hk/catalyst-storage/blob/main/cardano-asset-preprod.json
1113
ASSETS_DATA_PATH = os.environ["ASSETS_DATA_PATH"]
1214

1315
test_data: dict[str, any] = {}
@@ -33,23 +35,20 @@ def test_persistent_ada_amount_endpoint():
3335
assets = resp.json()
3436

3537
# check ada amount
36-
received_amt = int(assets["persistent"]["ada_amount"] / 10e5)
37-
expected_amt = entry["ada_amount"]
38-
try:
39-
assert received_amt == expected_amt
40-
except AssertionError:
41-
logger.error(
42-
f"Assertion failed: Ada amount for '{stake_addr}', expected: {expected_amt}, received: {received_amt}"
43-
)
44-
pass
45-
46-
# check total assets count
47-
received_token_len = len(assets["persistent"]["assets"])
48-
expected_token_len = len(entry["native_tokens"])
49-
try:
50-
assert received_token_len == expected_token_len
51-
except AssertionError:
52-
logger.error(
53-
f"Assertion failed: Token count for '{stake_addr}', expected: {expected_token_len}, received: {received_token_len}"
54-
)
55-
pass
38+
received_ada = assets["persistent"]["ada_amount"]
39+
expected_ada = entry["ada_amount"]
40+
41+
assert received_ada == expected_ada, logger.error(
42+
f"Assertion failed: Ada amount for '{stake_addr}', expected: {expected_ada}, received: {received_ada}"
43+
)
44+
45+
# check assets
46+
received_assets = {
47+
item["policy_hash"]: item["amount"]
48+
for item in assets["persistent"]["assets"]
49+
}
50+
expected_assets = entry["native_tokens"]
51+
52+
assert received_assets == expected_assets, logger.error(
53+
f"Assertion failed: Token count for '{stake_addr}', expected: {expected_assets}, received: {received_assets}"
54+
)
Lines changed: 76 additions & 148 deletions
Original file line numberDiff line numberDiff line change
@@ -1,183 +1,111 @@
1-
# cspell: words cloudscraper
1+
# cspell: words BLOCKFROST
22

33
"""
4-
This script is a simple web scraper tool to prepare testing data for the `cardano/asset` endpoint.
5-
6-
Prerequisites before running this script:
7-
- Make sure that you have `BeautifulSoup`, `cloudscraper`, and `certifi` installed.
8-
- Make sure that you have a snapshot file available for this script, can get one from the `catalyst-storage` repo.
9-
- Fill your own client params to `CF_CLEARANCE` and `USER_AGENT`. Other variables can be configured to fit the need.
4+
This script is a simple tool to prepare testing data for the `cardano/asset` endpoint.
105
"""
116

127
import json
138
import os
14-
import time
15-
import cloudscraper
16-
import certifi
17-
from decimal import Decimal
9+
import requests
10+
from loguru import logger
1811

1912
from utils import address
20-
from bs4 import BeautifulSoup
21-
22-
# ----- variables -----
23-
MAX_ATTEMPT = 3
24-
25-
# provide yours here, can acquire `CF_CLEARANCE` by going to `https://preprod.cexplorer.io`,
26-
# and extract this field from the cookies header using the `network` tab
27-
CF_CLEARANCE = ""
28-
29-
# can be something like "Mozilla/5.0"
30-
USER_AGENT = ""
31-
32-
# relative path to this script file for the output
33-
OUT_FILE = "./cardano-asset-80000000-preprod.json"
34-
35-
# the snapshot file to read as a reference of scraping
36-
IN_FILE = "./snapshot-80000000-preprod.json"
37-
38-
39-
# ----- functions -----
40-
def request(url) -> str:
41-
scraper = cloudscraper.create_scraper()
42-
43-
response = scraper.get(
44-
url,
45-
headers={"User-Agent": USER_AGENT},
46-
cookies={"cf_clearance": CF_CLEARANCE},
47-
verify=certifi.where(),
48-
)
49-
50-
if response.status_code != 200:
51-
raise Exception(response.text)
52-
53-
return response.text
54-
55-
56-
def get_stake_asset_page(stake_addr: str) -> str:
57-
return request(f"https://preprod.cexplorer.io/stake/{stake_addr}/asset")
5813

14+
# relative path to this script file for the output snapshot file
15+
OUT_FILE = os.environ["CARDANO_ASSETS_OUTPUT_FILE"]
5916

60-
def get_stake_data_page(stake_addr: str) -> str:
61-
return request(f"https://preprod.cexplorer.io/stake/{stake_addr}")
17+
# the snapshot file to read as a reference of stake addresses list
18+
IN_FILE = os.environ["CARDANO_ASSETS_INPUT_FILE"]
6219

20+
# blockfrost.io token value
21+
BLOCKFROST_TOKEN = os.environ["BLOCKFROST_TOKEN"]
6322

64-
def get_index_page() -> str:
65-
return request("https://preprod.cexplorer.io/")
23+
# cardano network type
24+
CARDANO_NETWORK = os.environ["CARDANO_NETWORK"]
6625

26+
BLOCKFROST_URL = f"https://cardano-{CARDANO_NETWORK}.blockfrost.io/api/v0"
6727

68-
def get_asset_page(asset: str) -> str:
69-
return request(f"https://preprod.cexplorer.io/asset/{asset}")
28+
RECORDS_LIMIT = 100
29+
START_POSITION = 0
7030

7131

72-
def epoch_2_slot(epoch: int) -> int:
73-
shelley_start_epoch = 208
74-
shelley_start_slot = 88_416_000
75-
slots_per_epoch = 432_000
32+
def get_request(s: requests.Session, url: str):
33+
resp = s.get(url=url, headers={"project_id": BLOCKFROST_TOKEN})
34+
if resp.status_code == 404:
35+
return None
36+
assert resp.status_code == 200, f"req: {url}, resp: {resp.text}"
37+
return resp.json()
7638

77-
if epoch < shelley_start_epoch:
78-
raise Exception("Epochs before 208 (Byron era) have a different slot timing")
79-
80-
return shelley_start_slot + (epoch - shelley_start_epoch) * slots_per_epoch
8139

8240
# ----- process -----
8341

8442
# read the snapshot file
85-
snapshot_path = os.path.join(os.path.dirname(__file__), IN_FILE)
86-
with open(snapshot_path, "r", encoding="utf-8") as f:
43+
with open(IN_FILE, "r", encoding="utf-8") as f:
8744
snapshot_data = json.load(f)
8845

46+
try:
47+
# open output file if already exists to write into it
48+
with open(OUT_FILE, "r", encoding="utf-8") as f:
49+
formatted_records = json.load(f)
50+
except:
51+
formatted_records = {}
52+
8953
# process each record
54+
s = requests.Session()
9055
formatted_records = {}
91-
processing_records = snapshot_data[:]
56+
processing_records = snapshot_data[START_POSITION : START_POSITION + RECORDS_LIMIT]
57+
logger.info(
58+
f"Start processing start: {START_POSITION}, end: {START_POSITION + min(len(processing_records), RECORDS_LIMIT)}"
59+
)
9260
for i, record in enumerate(processing_records):
9361
stake_addr = address.stake_public_key_to_address(
94-
key=record["stake_public_key"][2:],
95-
is_stake=True,
96-
network_type="preprod"
62+
key=record["stake_public_key"][2:], is_stake=True, network_type="preprod"
9763
)
9864

99-
attempt_count = 0
100-
101-
while attempt_count < MAX_ATTEMPT:
102-
try:
103-
print(f"Scraping {stake_addr}... ({i + 1}/{len(processing_records)})")
104-
105-
# extracting - stake/:stake_id
106-
stake_html = get_stake_data_page(stake_addr)
107-
stake_dom = BeautifulSoup(stake_html, "html.parser")
108-
109-
found_result = stake_dom.select_one("div.container-fluid").get_text(strip=True)
110-
if "404 - address not found" in found_result:
111-
print(" Skipped NOT FOUND")
112-
break
113-
114-
stake_status = stake_dom.select_one("table.table span.badge").get_text(strip=True)
115-
if stake_status.lower() == "inactive":
116-
print(" Skipped INACTIVE")
117-
break
118-
119-
ada_amount_txt = stake_dom.select_one("table.table tr:nth-child(5) span[title]:nth-child(2)")
120-
ada_amount = int(Decimal(ada_amount_txt.attrs["title"].replace(",", "")))
121-
122-
# extracting - index
123-
index_html = get_index_page()
124-
index_dom = BeautifulSoup(index_html, "html.parser")
125-
126-
epoch_number_txt = index_dom.select_one("#_epoch_no")
127-
epoch_number = int(epoch_number_txt.attrs["data-value"])
128-
slot_number = epoch_2_slot(epoch_number)
129-
130-
# extracting - stake/:stake_id/asset
131-
stake_asset_dom = BeautifulSoup(get_stake_asset_page(stake_addr), "html.parser")
132-
133-
item_rows = stake_asset_dom.select("div.table-responsive table > thead > tr > td > a")
134-
amount_rows = stake_asset_dom.select("div.table-responsive table > thead > tr > td:nth-child(6) > span")
135-
136-
native_tokens = []
137-
for j, (item_row, amount_row) in enumerate(zip(item_rows, amount_rows)):
138-
asset_name = "\n".join(item_row.get_text().split("\n")[2:-2]).strip()
139-
asset_url = item_row.attrs["href"]
140-
amount = int(Decimal(amount_row.attrs["title"].replace(",", "")))
141-
142-
print(f" Extracting asset {asset_url}... ({j + 1}/{len(item_rows)})")
143-
144-
# extracting - asset/:asset_id
145-
asset_html = request(f"https://preprod.cexplorer.io{asset_url}")
146-
asset_dom = BeautifulSoup(asset_html, "html.parser")
147-
148-
policy_hash = asset_dom.select_one("div.container-fluid > div > div:nth-child(2) > a")
149-
policy_hash = policy_hash.get_text(strip=True)
150-
151-
native_tokens.append({
152-
"policy_hash": f"0x{policy_hash}",
153-
"asset_name": asset_name,
154-
"amount": amount
155-
})
156-
157-
if stake_addr in formatted_records:
158-
print(" Warning OVERRIDDEN STAKE ADDRESS")
159-
160-
formatted_records[stake_addr] = {
161-
"ada_amount": ada_amount,
162-
"native_tokens": native_tokens,
163-
"slot_number": slot_number,
164-
}
165-
166-
break
167-
except Exception as e:
168-
print(f"ERROR: {e}")
65+
logger.info(
66+
f"Checking: '{stake_addr}'... ({i + 1}/{min(len(processing_records), RECORDS_LIMIT)})"
67+
)
16968

170-
if attempt_count >= MAX_ATTEMPT:
171-
print(" Skipped MAX ATTEMPT REACHED")
172-
break
69+
addresses = get_request(
70+
s,
71+
f"{BLOCKFROST_URL}/accounts/{stake_addr}/addresses",
72+
)
73+
if addresses == None:
74+
continue
75+
76+
ada_amount = 0
77+
native_tokens = {}
78+
for addr in addresses:
79+
addr = addr["address"]
80+
addr_info = get_request(
81+
s,
82+
f"{BLOCKFROST_URL}/addresses/{addr}",
83+
)
84+
for amount in addr_info["amount"]:
85+
if amount["unit"] == "lovelace":
86+
ada_amount += int(amount["quantity"])
87+
continue
88+
native_tokens[f"0x{amount["unit"]}"] = native_tokens.get(
89+
amount["unit"], 0
90+
) + int(amount["quantity"])
91+
92+
# get slot number
93+
latest_block = get_request(
94+
s,
95+
f"{BLOCKFROST_URL}/blocks/latest",
96+
)
17397

174-
time.sleep(3)
175-
print("Retrying...")
176-
attempt_count += 1
98+
slot_number = latest_block["slot"]
99+
formatted_records[stake_addr] = {
100+
"ada_amount": ada_amount,
101+
"native_tokens": native_tokens,
102+
"slot_number": slot_number,
103+
}
177104

178105
# write into a file
179-
write_file_path = os.path.join(os.path.dirname(__file__), OUT_FILE)
180-
with open(write_file_path, "w") as f:
106+
with open(OUT_FILE, "w") as f:
181107
json.dump(formatted_records, f, indent=2)
182108

183-
print("Completed preparing data")
109+
logger.info(
110+
f"Completed preparing data, start: {START_POSITION}, end: {START_POSITION + min(len(processing_records), RECORDS_LIMIT)}"
111+
)

0 commit comments

Comments
 (0)