Skip to content

Commit 742ceff

Browse files
committed
Select only balance in SQL queries
1 parent 0eb7019 commit 742ceff

File tree

6 files changed

+64
-67
lines changed

6 files changed

+64
-67
lines changed

tests/test_analyze.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -90,8 +90,8 @@ def test_analyze_snapshot(mocker):
9090
output = analyze_snapshot(None, 'bitcoin', '2010-01-01')
9191
assert output == {'top-1_absolute exclude_below_fees exclude_contracts non-clustered hhi': 1}
9292

93-
get_clustered_entries_mock.return_value = [['entity', 4], ['entity 2', 4]]
94-
get_nonclustered_entries_mock.return_value = [['address', 4], ['address 2', 4]]
93+
get_clustered_entries_mock.return_value = [[4, ], [4, ]]
94+
get_nonclustered_entries_mock.return_value = [[4, ], [4, ]]
9595

9696
get_force_analyze_mock.return_value = True
9797

tests/test_helper.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -238,7 +238,7 @@ def test_get_top_limit_value(mocker):
238238

239239

240240
def test_get_circulation_from_entries():
241-
entries = [['i0', 10], ['i1', 11]]
241+
entries = [[10, ], [11, ]]
242242
circulation = hlp.get_circulation_from_entries(entries)
243243
assert circulation == 21
244244

tests/test_metrics.py

Lines changed: 33 additions & 33 deletions
Original file line numberDiff line numberDiff line change
@@ -3,51 +3,51 @@
33

44

55
def test_tau_50():
6-
tokens_per_entity = [('a', 3.0), ('b', 2), ('c', 1)]
6+
tokens_per_entity = [(3.0, ), (2, ), (1, )]
77
tau_index, tau_market_share = compute_tau(tokens_per_entity, circulation=6, threshold=0.5)
88
assert tau_index == 1
99
assert tau_market_share == 0.5
1010

11-
tokens_per_entity = [('a', 3), ('b', 2), ('c', 1), ('d', 1), ('e', 1), ('f', 1)]
11+
tokens_per_entity = [(3, ), (2, ), (1, ), (1, ), (1, ), (1, )]
1212
tau_index, tau_market_share = compute_tau(tokens_per_entity, circulation=9, threshold=0.5)
1313
assert tau_index == 2
1414
assert round(tau_market_share, 2) == 0.56
1515

16-
tokens_per_entity = [('a', 1)]
16+
tokens_per_entity = [(1, )]
1717
tau_index, tau_market_share = compute_tau(tokens_per_entity, circulation=1, threshold=0.5)
1818
assert tau_index == 1
1919
assert tau_market_share == 1
2020

2121

2222
def test_tau_33():
23-
tokens_per_entity = [('a', 3.0), ('b', 2), ('c', 1)]
23+
tokens_per_entity = [(3.0, ), (2, ), (1, )]
2424
tau_index, tau_market_share = compute_tau(tokens_per_entity, circulation=6, threshold=0.33)
2525
assert tau_index == 1
2626
assert tau_market_share == 0.5
2727

28-
tokens_per_entity = [('a', 3), ('b', 2), ('c', 1), ('d', 1), ('e', 1), ('f', 1)]
28+
tokens_per_entity = [(3, ), (2, ), (1, ), (1, ), (1, ), (1, )]
2929
tau_index, tau_market_share = compute_tau(tokens_per_entity, circulation=9, threshold=0.33)
3030
assert tau_index == 1
3131
assert round(tau_market_share, 2) == 0.33
3232

33-
tokens_per_entity = [('a', 1)]
33+
tokens_per_entity = [(1, )]
3434
tau_index, tau_market_share = compute_tau(tokens_per_entity, circulation=1, threshold=0.33)
3535
assert tau_index == 1
3636
assert tau_market_share == 1
3737

3838

3939
def test_tau_66():
40-
tokens_per_entity = [('a', 3.0), ('b', 2), ('c', 1)]
40+
tokens_per_entity = [(3.0, ), (2, ), (1, )]
4141
tau_index, tau_market_share = compute_tau(tokens_per_entity, circulation=6, threshold=0.66)
4242
assert tau_index == 2
4343
assert round(tau_market_share, 2) == 0.83
4444

45-
tokens_per_entity = [('a', 3), ('b', 2), ('c', 1), ('d', 1), ('e', 1), ('f', 1)]
45+
tokens_per_entity = [(3, ), (2, ), (1, ), (1, ), (1, ), (1, )]
4646
tau_index, tau_market_share = compute_tau(tokens_per_entity, circulation=9, threshold=0.66)
4747
assert tau_index == 3
4848
assert round(tau_market_share, 2) == 0.67
4949

50-
tokens_per_entity = [('a', 1)]
50+
tokens_per_entity = [(1, )]
5151
tau_index, tau_market_share = compute_tau(tokens_per_entity, circulation=1, threshold=0.66)
5252
assert tau_index == 1
5353
assert tau_market_share == 1
@@ -58,19 +58,19 @@ def test_gini():
5858
Ensure that the results of the compute_gini function are consistent with online calculators,
5959
such as https://goodcalculators.com/gini-coefficient-calculator/ (5 decimal accuracy)
6060
"""
61-
tokens_per_entity = [('a', 3.0), ('b', 2), ('c', 1)]
61+
tokens_per_entity = [(3.0, ), (2, ), (1, )]
6262
gini = compute_gini(tokens_per_entity, circulation=6)
6363
assert round(gini, 5) == 0.22222
6464

65-
tokens_per_entity = [('a', 3), ('b', 2), ('c', 1), ('d', 1), ('e', 1), ('f', 1)]
65+
tokens_per_entity = [(3, ), (2, ), (1, ), (1, ), (1, ), (1, )]
6666
gini = compute_gini(tokens_per_entity, circulation=9)
6767
assert round(gini, 5) == 0.24074
6868

69-
tokens_per_entity = [('a', 1)]
69+
tokens_per_entity = [(1, )]
7070
gini = compute_gini(tokens_per_entity, circulation=1)
7171
assert gini == 0
7272

73-
tokens_per_entity = [('a', 1), ('b', 1), ('c', 1)]
73+
tokens_per_entity = [(1, ), (1, ), (1, )]
7474
gini = compute_gini(tokens_per_entity, circulation=3)
7575
assert round(gini, 5) == 0 # Note that this test case fails if we don't round, because of floating point errors
7676

@@ -80,19 +80,19 @@ def test_hhi():
8080
Ensure that the results of the compute_hhi function are consistent with online calculators,
8181
such as https://www.unclaw.com/chin/teaching/antitrust/herfindahl.htm
8282
"""
83-
tokens_per_entity = [('a', 3.0), ('b', 2), ('c', 1)]
83+
tokens_per_entity = [(3.0, ), (2, ), (1, )]
8484
hhi = compute_hhi(tokens_per_entity, circulation=6)
8585
assert round(hhi) == 3889
8686

87-
tokens_per_entity = [('a', 3), ('b', 2), ('c', 1), ('d', 1), ('e', 1), ('f', 1)]
87+
tokens_per_entity = [(3, ), (2, ), (1, ), (1, ), (1, ), (1, )]
8888
hhi = compute_hhi(tokens_per_entity, circulation=9)
8989
assert round(hhi) == 2099
9090

91-
tokens_per_entity = [('a', 1)]
91+
tokens_per_entity = [(1, )]
9292
hhi = compute_hhi(tokens_per_entity, circulation=1)
9393
assert round(hhi) == 10000
9494

95-
tokens_per_entity = [('a', 1), ('b', 1), ('c', 1)]
95+
tokens_per_entity = [(1, ), (1, ), (1, )]
9696
hhi = compute_hhi(tokens_per_entity, circulation=3)
9797
assert round(hhi) == 3333
9898

@@ -102,51 +102,51 @@ def test_shannon_entropy():
102102
Ensure that the results of the compute_shannon_entropy function are consistent with online calculators,
103103
such as: https://www.omnicalculator.com/statistics/shannon-entropy
104104
"""
105-
tokens_per_entity = [('a', 3.0), ('b', 2), ('c', 1)]
105+
tokens_per_entity = [(3.0, ), (2, ), (1, )]
106106
entropy = compute_shannon_entropy(tokens_per_entity, circulation=6)
107107
assert round(entropy, 3) == 1.459
108108

109-
tokens_per_entity = [('a', 3), ('b', 2), ('c', 1), ('d', 1), ('e', 1), ('f', 1)]
109+
tokens_per_entity = [(3, ), (2, ), (1, ), (1, ), (1, ), (1, )]
110110
entropy = compute_shannon_entropy(tokens_per_entity, circulation=9)
111111
assert round(entropy, 3) == 2.419
112112

113-
tokens_per_entity = [('a', 1)]
113+
tokens_per_entity = [(1, )]
114114
entropy = compute_shannon_entropy(tokens_per_entity, circulation=1)
115115
assert entropy == 0
116116

117-
tokens_per_entity = [('a', 1), ('b', 1), ('c', 1)]
117+
tokens_per_entity = [(1, ), (1, ), (1, )]
118118
entropy = compute_shannon_entropy(tokens_per_entity, circulation=3)
119119
assert round(entropy, 3) == 1.585
120120

121121

122122
def test_total_entities():
123-
tokens_per_entity = [('a', 3.0), ('b', 2), ('c', 1)]
123+
tokens_per_entity = [(3.0, ), (2, ), (1, )]
124124
total_entities = compute_total_entities(tokens_per_entity, circulation=6)
125125
assert total_entities == 3
126126

127-
tokens_per_entity = [('a', 3), ('b', 2), ('c', 1), ('d', 1), ('e', 1), ('f', 1)]
127+
tokens_per_entity = [(3, ), (2, ), (1, ), (1, ), (1, ), (1, )]
128128
total_entities = compute_total_entities(tokens_per_entity, circulation=9)
129129
assert total_entities == 6
130130

131-
tokens_per_entity = [('a', 1)]
131+
tokens_per_entity = [(1, )]
132132
total_entities = compute_total_entities(tokens_per_entity, circulation=1)
133133
assert total_entities == 1
134134

135135

136136
def test_compute_max_power_ratio():
137-
tokens_per_entity = [('a', 3.0), ('b', 2), ('c', 1)]
137+
tokens_per_entity = [(3.0, ), (2, ), (1, )]
138138
max_mpr = compute_max_power_ratio(tokens_per_entity, circulation=6)
139139
assert max_mpr == 0.5
140140

141-
tokens_per_entity = [('a', 3), ('b', 2), ('c', 1), ('d', 1), ('e', 1), ('f', 1)]
141+
tokens_per_entity = [(3, ), (2, ), (1, ), (1, ), (1, ), (1, )]
142142
max_mpr = compute_max_power_ratio(tokens_per_entity, circulation=9)
143143
assert max_mpr == 1 / 3
144144

145-
tokens_per_entity = [('a', 1)]
145+
tokens_per_entity = [(1, )]
146146
max_mpr = compute_max_power_ratio(tokens_per_entity, circulation=1)
147147
assert max_mpr == 1
148148

149-
tokens_per_entity = [('a', 1), ('b', 1), ('c', 1)]
149+
tokens_per_entity = [(1, ), (1, ), (1, )]
150150
max_mpr = compute_max_power_ratio(tokens_per_entity, circulation=3)
151151
assert max_mpr == 1 / 3
152152

@@ -158,22 +158,22 @@ def test_compute_theil_index():
158158
"""
159159
decimals = 3
160160

161-
tokens_per_entity = [('a', 3.0), ('b', 2), ('c', 1)]
161+
tokens_per_entity = [(3.0, ), (2, ), (1, )]
162162
theil_t = compute_theil_index(tokens_per_entity, 6)
163163
assert round(theil_t, decimals) == 0.087
164164

165-
tokens_per_entity = [('a', 3), ('b', 2), ('c', 1), ('d', 1), ('e', 1), ('f', 1)]
165+
tokens_per_entity = [(3, ), (2, ), (1, ), (1, ), (1, ), (1, )]
166166
theil_t = compute_theil_index(tokens_per_entity, 9)
167167
assert round(theil_t, decimals) == 0.115
168168

169-
tokens_per_entity = {('a', 432), ('b', 0), ('c', 0), ('d', 0)}
169+
tokens_per_entity = [(432, ), (0, ), (0, ), (0, )]
170170
theil_t = compute_theil_index(tokens_per_entity, 432)
171171
assert round(theil_t, decimals) == 1.386
172172

173-
tokens_per_entity = {('a', 432)}
173+
tokens_per_entity = [(432, )]
174174
theil_t = compute_theil_index(tokens_per_entity, 432)
175175
assert round(theil_t, decimals) == 0
176176

177-
tokens_per_entity = {}
177+
tokens_per_entity = []
178178
theil_t = compute_theil_index(tokens_per_entity, 432)
179179
assert theil_t == 0

tokenomics_decentralization/db_helper.py

Lines changed: 14 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -154,7 +154,7 @@ def get_non_clustered_balance_entries(conn, snapshot, ledger, balance_threshold)
154154

155155
start = time()
156156
query = f'''
157-
SELECT addresses.name, balance
157+
SELECT balance
158158
FROM balances
159159
LEFT JOIN addresses ON balances.address_id=addresses.id
160160
WHERE snapshot_id=?
@@ -187,15 +187,19 @@ def get_balance_entries(conn, snapshot, ledger, balance_threshold):
187187

188188
start = time()
189189
query = f'''
190-
SELECT IFNULL(entities.name, addresses.name) AS entity, SUM(CAST(balance AS REAL)) AS aggregate_balance
191-
FROM balances
192-
LEFT JOIN addresses ON balances.address_id=addresses.id
193-
LEFT JOIN entities ON addresses.entity_id=entities.id
194-
WHERE snapshot_id=?
195-
{exclude_below_threshold_clause}
196-
{exclude_contract_addresses_clause}
197-
{special_addresses_clause}
198-
GROUP BY entity
190+
WITH entries AS (
191+
SELECT IFNULL(entities.name, addresses.name) AS entity, SUM(CAST(balance AS REAL)) AS aggregate_balance
192+
FROM balances
193+
LEFT JOIN addresses ON balances.address_id=addresses.id
194+
LEFT JOIN entities ON addresses.entity_id=entities.id
195+
WHERE snapshot_id=?
196+
{exclude_below_threshold_clause}
197+
{exclude_contract_addresses_clause}
198+
{special_addresses_clause}
199+
GROUP BY entity
200+
)
201+
SELECT aggregate_balance
202+
FROM entries
199203
ORDER BY aggregate_balance DESC
200204
'''
201205

tokenomics_decentralization/helper.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -292,7 +292,7 @@ def get_circulation_from_entries(entries):
292292
"""
293293
circulation = 0
294294
for entry in entries:
295-
circulation += int(entry[1])
295+
circulation += int(entry[0])
296296
return circulation
297297

298298

tokenomics_decentralization/metrics.py

Lines changed: 13 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -4,8 +4,7 @@
44
def compute_tau(entries, circulation, threshold):
55
"""
66
Calculates the tau index of a distribution of balances
7-
:param entries: list of tuples (address, balance), sorted by balance in descending order, where
8-
address is a string and balance is a numeric type (int or float)
7+
:param entries: list of tuples (balance, ), sorted by balance in descending order, where balance is a numeric type (int or float)
98
:param circulation: int, the total amount of tokens in circulation
109
:param threshold: float, the parameter of the tau index, i.e. the threshold for the market share
1110
that is captured by the index
@@ -15,7 +14,7 @@ def compute_tau(entries, circulation, threshold):
1514
results = [0, 0]
1615

1716
for entry in entries:
18-
market_share = int(entry[1]) / circulation
17+
market_share = int(entry[0]) / circulation
1918
if results[1] >= threshold:
2019
break
2120
results[0] += 1
@@ -27,8 +26,7 @@ def compute_tau(entries, circulation, threshold):
2726
def compute_gini(entries, circulation):
2827
"""
2928
Calculates the Gini coefficient of a distribution of balances
30-
:param entries: list of tuples (address, balance), sorted by balance in descending order, where
31-
address is a string and balance is a numeric type (int or float)
29+
:param entries: list of tuples (balance, ), sorted by balance in descending order, where balance is a numeric type (int or float)
3230
:param circulation: int, the total amount of tokens in circulation
3331
:returns: float between 0 and 1 that represents the Gini coefficient of the given distribution
3432
"""
@@ -37,7 +35,7 @@ def compute_gini(entries, circulation):
3735
gini = 1
3836
for entry in entries:
3937
richer_population_percentage = parsed_entries / population
40-
market_share = int(entry[1]) / circulation
38+
market_share = int(entry[0]) / circulation
4139
gini -= market_share * ((1 / population) + (2 * richer_population_percentage))
4240
parsed_entries += 1
4341

@@ -47,14 +45,13 @@ def compute_gini(entries, circulation):
4745
def compute_hhi(entries, circulation):
4846
"""
4947
Calculates the Herfindahl-Hirschman index (HHI) of a distribution of balances
50-
:param entries: list of tuples (address, balance), sorted by balance in descending order, where
51-
address is a string and balance is a numeric type (int or float)
48+
:param entries: list of tuples (balance, ), sorted by balance in descending order, where balance is a numeric type (int or float)
5249
:param circulation: int, the total amount of tokens in circulation
5350
:returns: float between 0 and 10,000 that represents the HHI of the given distribution
5451
"""
5552
hhi = 0
5653
for entry in entries:
57-
market_share = int(entry[1]) / circulation * 100
54+
market_share = int(entry[0]) / circulation * 100
5855
hhi += market_share**2
5956

6057
return hhi
@@ -63,14 +60,13 @@ def compute_hhi(entries, circulation):
6360
def compute_shannon_entropy(entries, circulation):
6461
"""
6562
Calculates the Shannon entropy of a distribution of balances
66-
:param entries: list of tuples (address, balance), sorted by balance in descending order, where
67-
address is a string and balance is a numeric type (int or float)
63+
:param entries: list of tuples (balance, ), sorted by balance in descending order, where balance is a numeric type (int or float)
6864
:param circulation: int, the total amount of tokens in circulation
6965
:returns: float between 0 and 1 that represents the Shannon entropy of the given distribution
7066
"""
7167
entropy = 0
7268
for entry in entries:
73-
market_share = int(entry[1]) / circulation
69+
market_share = int(entry[0]) / circulation
7470
if market_share > 0:
7571
entropy -= market_share * log(market_share, 2)
7672

@@ -80,8 +76,7 @@ def compute_shannon_entropy(entries, circulation):
8076
def compute_total_entities(entries, circulation):
8177
"""
8278
Calculates the total number of entities in a distribution of balances
83-
:param entries: list of tuples (address, balance), sorted by balance in descending order, where
84-
address is a string and balance is a numeric type (int or float)
79+
:param entries: list of tuples (balance, ), sorted by balance in descending order, where balance is a numeric type (int or float)
8580
:param circulation: int, the total amount of tokens in circulation
8681
:returns: int that represents the total number of entities in the given distribution
8782
"""
@@ -91,20 +86,18 @@ def compute_total_entities(entries, circulation):
9186
def compute_max_power_ratio(entries, circulation):
9287
"""
9388
Calculates the maximum power ratio of a distribution of balances
94-
:param entries: list of tuples (address, balance), sorted by balance in descending order, where
95-
address is a string and balance is a numeric type (int or float)
89+
:param entries: list of tuples (balance, ), sorted by balance in descending order, where balance is a numeric type (int or float)
9690
:param circulation: int, the total amount of tokens in circulation
9791
:returns: float that represents the maximum power ratio among all token holders
9892
"""
99-
max_balance = entries[0][1]
93+
max_balance = entries[0][0]
10094
return max_balance / circulation if circulation > 0 else 0
10195

10296

10397
def compute_theil_index(entries, circulation):
10498
"""
10599
Calculates the Theil-T index of a distribution of balances
106-
:param entries: list of tuples (address, balance), sorted by balance in descending order, where
107-
address is a string and balance is a numeric type (int or float)
100+
:param entries: list of tuples (balance, ), sorted by balance in descending order, where balance is a numeric type (int or float)
108101
:param circulation: int, the total amount of tokens in circulation
109102
:returns: float that represents the Thiel index of the given distribution
110103
"""
@@ -114,7 +107,7 @@ def compute_theil_index(entries, circulation):
114107
mu = circulation / N
115108
theil = 0
116109
for entry in entries:
117-
x = entry[1] / mu
110+
x = entry[0] / mu
118111
if x > 0:
119112
theil += x * log(x)
120113
theil /= N

0 commit comments

Comments
 (0)