Skip to content

Commit a534b16

Browse files
Merge pull request #56 from Blockchain-Technology-Lab/performance
Memory consumption reduction
2 parents 0eb7019 + a1d42c8 commit a534b16

File tree

9 files changed

+70
-72
lines changed

9 files changed

+70
-72
lines changed

data_collection_scripts/big_query_balance_data.py

Lines changed: 6 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,8 @@
11
"""
2-
This script can be used to run queries on BigQuery for any number of blockchains, and save the results in the input
3-
directory of the project.
4-
The relevant queries must be stored in a file named 'queries.yaml' in the root directory of the project.
2+
This script can be used to run queries on BigQuery for any number of blockchains,
3+
and save the results in the input directory of the project.
4+
The relevant queries must be stored in a file named 'queries.yaml'
5+
in the data_collection_scripts directory of the project.
56
67
Attention! Before running this script, you need to generate service account credentials from Google, as described
78
here (https://developers.google.com/workspace/guides/create-credentials#service-account) and save your key in the
@@ -22,7 +23,7 @@ def collect_data(ledgers, snapshot_dates, force_query):
2223
if not input_dir.is_dir():
2324
input_dir.mkdir()
2425

25-
with open(root_dir / "queries.yaml") as f:
26+
with open(root_dir / "data_collection_scripts/queries.yaml") as f:
2627
queries = safe_load(f)
2728

2829
i = 0
@@ -44,7 +45,7 @@ def collect_data(ledgers, snapshot_dates, force_query):
4445

4546
while True:
4647
try:
47-
client = bq.Client.from_service_account_json(json_credentials_path=root_dir / f"google-service-account-key-{i}.json")
48+
client = bq.Client.from_service_account_json(json_credentials_path=root_dir / f"data_collection_scripts/google-service-account-key-{i}.json")
4849
except FileNotFoundError:
4950
logging.info(f'Exhausted all {i} service account keys. Aborting..')
5051
all_quota_exceeded = True

google-service-account-key-SAMPLE.json renamed to data_collection_scripts/google-service-account-key-SAMPLE.json

File renamed without changes.

tests/test_analyze.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -90,8 +90,8 @@ def test_analyze_snapshot(mocker):
9090
output = analyze_snapshot(None, 'bitcoin', '2010-01-01')
9191
assert output == {'top-1_absolute exclude_below_fees exclude_contracts non-clustered hhi': 1}
9292

93-
get_clustered_entries_mock.return_value = [['entity', 4], ['entity 2', 4]]
94-
get_nonclustered_entries_mock.return_value = [['address', 4], ['address 2', 4]]
93+
get_clustered_entries_mock.return_value = [[4, ], [4, ]]
94+
get_nonclustered_entries_mock.return_value = [[4, ], [4, ]]
9595

9696
get_force_analyze_mock.return_value = True
9797

tests/test_helper.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -238,7 +238,7 @@ def test_get_top_limit_value(mocker):
238238

239239

240240
def test_get_circulation_from_entries():
241-
entries = [['i0', 10], ['i1', 11]]
241+
entries = [[10, ], [11, ]]
242242
circulation = hlp.get_circulation_from_entries(entries)
243243
assert circulation == 21
244244

tests/test_metrics.py

Lines changed: 33 additions & 33 deletions
Original file line numberDiff line numberDiff line change
@@ -3,51 +3,51 @@
33

44

55
def test_tau_50():
6-
tokens_per_entity = [('a', 3.0), ('b', 2), ('c', 1)]
6+
tokens_per_entity = [(3.0, ), (2, ), (1, )]
77
tau_index, tau_market_share = compute_tau(tokens_per_entity, circulation=6, threshold=0.5)
88
assert tau_index == 1
99
assert tau_market_share == 0.5
1010

11-
tokens_per_entity = [('a', 3), ('b', 2), ('c', 1), ('d', 1), ('e', 1), ('f', 1)]
11+
tokens_per_entity = [(3, ), (2, ), (1, ), (1, ), (1, ), (1, )]
1212
tau_index, tau_market_share = compute_tau(tokens_per_entity, circulation=9, threshold=0.5)
1313
assert tau_index == 2
1414
assert round(tau_market_share, 2) == 0.56
1515

16-
tokens_per_entity = [('a', 1)]
16+
tokens_per_entity = [(1, )]
1717
tau_index, tau_market_share = compute_tau(tokens_per_entity, circulation=1, threshold=0.5)
1818
assert tau_index == 1
1919
assert tau_market_share == 1
2020

2121

2222
def test_tau_33():
23-
tokens_per_entity = [('a', 3.0), ('b', 2), ('c', 1)]
23+
tokens_per_entity = [(3.0, ), (2, ), (1, )]
2424
tau_index, tau_market_share = compute_tau(tokens_per_entity, circulation=6, threshold=0.33)
2525
assert tau_index == 1
2626
assert tau_market_share == 0.5
2727

28-
tokens_per_entity = [('a', 3), ('b', 2), ('c', 1), ('d', 1), ('e', 1), ('f', 1)]
28+
tokens_per_entity = [(3, ), (2, ), (1, ), (1, ), (1, ), (1, )]
2929
tau_index, tau_market_share = compute_tau(tokens_per_entity, circulation=9, threshold=0.33)
3030
assert tau_index == 1
3131
assert round(tau_market_share, 2) == 0.33
3232

33-
tokens_per_entity = [('a', 1)]
33+
tokens_per_entity = [(1, )]
3434
tau_index, tau_market_share = compute_tau(tokens_per_entity, circulation=1, threshold=0.33)
3535
assert tau_index == 1
3636
assert tau_market_share == 1
3737

3838

3939
def test_tau_66():
40-
tokens_per_entity = [('a', 3.0), ('b', 2), ('c', 1)]
40+
tokens_per_entity = [(3.0, ), (2, ), (1, )]
4141
tau_index, tau_market_share = compute_tau(tokens_per_entity, circulation=6, threshold=0.66)
4242
assert tau_index == 2
4343
assert round(tau_market_share, 2) == 0.83
4444

45-
tokens_per_entity = [('a', 3), ('b', 2), ('c', 1), ('d', 1), ('e', 1), ('f', 1)]
45+
tokens_per_entity = [(3, ), (2, ), (1, ), (1, ), (1, ), (1, )]
4646
tau_index, tau_market_share = compute_tau(tokens_per_entity, circulation=9, threshold=0.66)
4747
assert tau_index == 3
4848
assert round(tau_market_share, 2) == 0.67
4949

50-
tokens_per_entity = [('a', 1)]
50+
tokens_per_entity = [(1, )]
5151
tau_index, tau_market_share = compute_tau(tokens_per_entity, circulation=1, threshold=0.66)
5252
assert tau_index == 1
5353
assert tau_market_share == 1
@@ -58,19 +58,19 @@ def test_gini():
5858
Ensure that the results of the compute_gini function are consistent with online calculators,
5959
such as https://goodcalculators.com/gini-coefficient-calculator/ (5 decimal accuracy)
6060
"""
61-
tokens_per_entity = [('a', 3.0), ('b', 2), ('c', 1)]
61+
tokens_per_entity = [(3.0, ), (2, ), (1, )]
6262
gini = compute_gini(tokens_per_entity, circulation=6)
6363
assert round(gini, 5) == 0.22222
6464

65-
tokens_per_entity = [('a', 3), ('b', 2), ('c', 1), ('d', 1), ('e', 1), ('f', 1)]
65+
tokens_per_entity = [(3, ), (2, ), (1, ), (1, ), (1, ), (1, )]
6666
gini = compute_gini(tokens_per_entity, circulation=9)
6767
assert round(gini, 5) == 0.24074
6868

69-
tokens_per_entity = [('a', 1)]
69+
tokens_per_entity = [(1, )]
7070
gini = compute_gini(tokens_per_entity, circulation=1)
7171
assert gini == 0
7272

73-
tokens_per_entity = [('a', 1), ('b', 1), ('c', 1)]
73+
tokens_per_entity = [(1, ), (1, ), (1, )]
7474
gini = compute_gini(tokens_per_entity, circulation=3)
7575
assert round(gini, 5) == 0 # Note that this test case fails if we don't round, because of floating point errors
7676

@@ -80,19 +80,19 @@ def test_hhi():
8080
Ensure that the results of the compute_hhi function are consistent with online calculators,
8181
such as https://www.unclaw.com/chin/teaching/antitrust/herfindahl.htm
8282
"""
83-
tokens_per_entity = [('a', 3.0), ('b', 2), ('c', 1)]
83+
tokens_per_entity = [(3.0, ), (2, ), (1, )]
8484
hhi = compute_hhi(tokens_per_entity, circulation=6)
8585
assert round(hhi) == 3889
8686

87-
tokens_per_entity = [('a', 3), ('b', 2), ('c', 1), ('d', 1), ('e', 1), ('f', 1)]
87+
tokens_per_entity = [(3, ), (2, ), (1, ), (1, ), (1, ), (1, )]
8888
hhi = compute_hhi(tokens_per_entity, circulation=9)
8989
assert round(hhi) == 2099
9090

91-
tokens_per_entity = [('a', 1)]
91+
tokens_per_entity = [(1, )]
9292
hhi = compute_hhi(tokens_per_entity, circulation=1)
9393
assert round(hhi) == 10000
9494

95-
tokens_per_entity = [('a', 1), ('b', 1), ('c', 1)]
95+
tokens_per_entity = [(1, ), (1, ), (1, )]
9696
hhi = compute_hhi(tokens_per_entity, circulation=3)
9797
assert round(hhi) == 3333
9898

@@ -102,51 +102,51 @@ def test_shannon_entropy():
102102
Ensure that the results of the compute_shannon_entropy function are consistent with online calculators,
103103
such as: https://www.omnicalculator.com/statistics/shannon-entropy
104104
"""
105-
tokens_per_entity = [('a', 3.0), ('b', 2), ('c', 1)]
105+
tokens_per_entity = [(3.0, ), (2, ), (1, )]
106106
entropy = compute_shannon_entropy(tokens_per_entity, circulation=6)
107107
assert round(entropy, 3) == 1.459
108108

109-
tokens_per_entity = [('a', 3), ('b', 2), ('c', 1), ('d', 1), ('e', 1), ('f', 1)]
109+
tokens_per_entity = [(3, ), (2, ), (1, ), (1, ), (1, ), (1, )]
110110
entropy = compute_shannon_entropy(tokens_per_entity, circulation=9)
111111
assert round(entropy, 3) == 2.419
112112

113-
tokens_per_entity = [('a', 1)]
113+
tokens_per_entity = [(1, )]
114114
entropy = compute_shannon_entropy(tokens_per_entity, circulation=1)
115115
assert entropy == 0
116116

117-
tokens_per_entity = [('a', 1), ('b', 1), ('c', 1)]
117+
tokens_per_entity = [(1, ), (1, ), (1, )]
118118
entropy = compute_shannon_entropy(tokens_per_entity, circulation=3)
119119
assert round(entropy, 3) == 1.585
120120

121121

122122
def test_total_entities():
123-
tokens_per_entity = [('a', 3.0), ('b', 2), ('c', 1)]
123+
tokens_per_entity = [(3.0, ), (2, ), (1, )]
124124
total_entities = compute_total_entities(tokens_per_entity, circulation=6)
125125
assert total_entities == 3
126126

127-
tokens_per_entity = [('a', 3), ('b', 2), ('c', 1), ('d', 1), ('e', 1), ('f', 1)]
127+
tokens_per_entity = [(3, ), (2, ), (1, ), (1, ), (1, ), (1, )]
128128
total_entities = compute_total_entities(tokens_per_entity, circulation=9)
129129
assert total_entities == 6
130130

131-
tokens_per_entity = [('a', 1)]
131+
tokens_per_entity = [(1, )]
132132
total_entities = compute_total_entities(tokens_per_entity, circulation=1)
133133
assert total_entities == 1
134134

135135

136136
def test_compute_max_power_ratio():
137-
tokens_per_entity = [('a', 3.0), ('b', 2), ('c', 1)]
137+
tokens_per_entity = [(3.0, ), (2, ), (1, )]
138138
max_mpr = compute_max_power_ratio(tokens_per_entity, circulation=6)
139139
assert max_mpr == 0.5
140140

141-
tokens_per_entity = [('a', 3), ('b', 2), ('c', 1), ('d', 1), ('e', 1), ('f', 1)]
141+
tokens_per_entity = [(3, ), (2, ), (1, ), (1, ), (1, ), (1, )]
142142
max_mpr = compute_max_power_ratio(tokens_per_entity, circulation=9)
143143
assert max_mpr == 1 / 3
144144

145-
tokens_per_entity = [('a', 1)]
145+
tokens_per_entity = [(1, )]
146146
max_mpr = compute_max_power_ratio(tokens_per_entity, circulation=1)
147147
assert max_mpr == 1
148148

149-
tokens_per_entity = [('a', 1), ('b', 1), ('c', 1)]
149+
tokens_per_entity = [(1, ), (1, ), (1, )]
150150
max_mpr = compute_max_power_ratio(tokens_per_entity, circulation=3)
151151
assert max_mpr == 1 / 3
152152

@@ -158,22 +158,22 @@ def test_compute_theil_index():
158158
"""
159159
decimals = 3
160160

161-
tokens_per_entity = [('a', 3.0), ('b', 2), ('c', 1)]
161+
tokens_per_entity = [(3.0, ), (2, ), (1, )]
162162
theil_t = compute_theil_index(tokens_per_entity, 6)
163163
assert round(theil_t, decimals) == 0.087
164164

165-
tokens_per_entity = [('a', 3), ('b', 2), ('c', 1), ('d', 1), ('e', 1), ('f', 1)]
165+
tokens_per_entity = [(3, ), (2, ), (1, ), (1, ), (1, ), (1, )]
166166
theil_t = compute_theil_index(tokens_per_entity, 9)
167167
assert round(theil_t, decimals) == 0.115
168168

169-
tokens_per_entity = {('a', 432), ('b', 0), ('c', 0), ('d', 0)}
169+
tokens_per_entity = [(432, ), (0, ), (0, ), (0, )]
170170
theil_t = compute_theil_index(tokens_per_entity, 432)
171171
assert round(theil_t, decimals) == 1.386
172172

173-
tokens_per_entity = {('a', 432)}
173+
tokens_per_entity = [(432, )]
174174
theil_t = compute_theil_index(tokens_per_entity, 432)
175175
assert round(theil_t, decimals) == 0
176176

177-
tokens_per_entity = {}
177+
tokens_per_entity = []
178178
theil_t = compute_theil_index(tokens_per_entity, 432)
179179
assert theil_t == 0

tokenomics_decentralization/db_helper.py

Lines changed: 14 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -154,7 +154,7 @@ def get_non_clustered_balance_entries(conn, snapshot, ledger, balance_threshold)
154154

155155
start = time()
156156
query = f'''
157-
SELECT addresses.name, balance
157+
SELECT balance
158158
FROM balances
159159
LEFT JOIN addresses ON balances.address_id=addresses.id
160160
WHERE snapshot_id=?
@@ -187,15 +187,19 @@ def get_balance_entries(conn, snapshot, ledger, balance_threshold):
187187

188188
start = time()
189189
query = f'''
190-
SELECT IFNULL(entities.name, addresses.name) AS entity, SUM(CAST(balance AS REAL)) AS aggregate_balance
191-
FROM balances
192-
LEFT JOIN addresses ON balances.address_id=addresses.id
193-
LEFT JOIN entities ON addresses.entity_id=entities.id
194-
WHERE snapshot_id=?
195-
{exclude_below_threshold_clause}
196-
{exclude_contract_addresses_clause}
197-
{special_addresses_clause}
198-
GROUP BY entity
190+
WITH entries AS (
191+
SELECT IFNULL(entities.name, addresses.name) AS entity, SUM(CAST(balance AS REAL)) AS aggregate_balance
192+
FROM balances
193+
LEFT JOIN addresses ON balances.address_id=addresses.id
194+
LEFT JOIN entities ON addresses.entity_id=entities.id
195+
WHERE snapshot_id=?
196+
{exclude_below_threshold_clause}
197+
{exclude_contract_addresses_clause}
198+
{special_addresses_clause}
199+
GROUP BY entity
200+
)
201+
SELECT aggregate_balance
202+
FROM entries
199203
ORDER BY aggregate_balance DESC
200204
'''
201205

tokenomics_decentralization/helper.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -292,7 +292,7 @@ def get_circulation_from_entries(entries):
292292
"""
293293
circulation = 0
294294
for entry in entries:
295-
circulation += int(entry[1])
295+
circulation += int(entry[0])
296296
return circulation
297297

298298

0 commit comments

Comments
 (0)