Select only balance in SQL queries

dimkarakostas · dimkarakostas · commit 742cefff9a38 · 2023-12-14T14:19:12.000Z
diff --git a/tests/test_analyze.py b/tests/test_analyze.py
@@ -90,8 +90,8 @@ def test_analyze_snapshot(mocker):
     output = analyze_snapshot(None, 'bitcoin', '2010-01-01')
     assert output == {'top-1_absolute exclude_below_fees exclude_contracts non-clustered hhi': 1}
 
-    get_clustered_entries_mock.return_value = [['entity', 4], ['entity 2', 4]]
-    get_nonclustered_entries_mock.return_value = [['address', 4], ['address 2', 4]]
+    get_clustered_entries_mock.return_value = [[4, ], [4, ]]
+    get_nonclustered_entries_mock.return_value = [[4, ], [4, ]]
 
     get_force_analyze_mock.return_value = True
 
diff --git a/tests/test_helper.py b/tests/test_helper.py
@@ -238,7 +238,7 @@ def test_get_top_limit_value(mocker):
 
 
 def test_get_circulation_from_entries():
-    entries = [['i0', 10], ['i1', 11]]
+    entries = [[10, ], [11, ]]
     circulation = hlp.get_circulation_from_entries(entries)
     assert circulation == 21
 
diff --git a/tests/test_metrics.py b/tests/test_metrics.py
@@ -3,51 +3,51 @@
 
 
 def test_tau_50():
-    tokens_per_entity = [('a', 3.0), ('b', 2), ('c', 1)]
+    tokens_per_entity = [(3.0, ), (2, ), (1, )]
     tau_index, tau_market_share = compute_tau(tokens_per_entity, circulation=6, threshold=0.5)
     assert tau_index == 1
     assert tau_market_share == 0.5
 
-    tokens_per_entity = [('a', 3), ('b', 2), ('c', 1), ('d', 1), ('e', 1), ('f', 1)]
+    tokens_per_entity = [(3, ), (2, ), (1, ), (1, ), (1, ), (1, )]
     tau_index, tau_market_share = compute_tau(tokens_per_entity, circulation=9, threshold=0.5)
     assert tau_index == 2
     assert round(tau_market_share, 2) == 0.56
 
-    tokens_per_entity = [('a', 1)]
+    tokens_per_entity = [(1, )]
     tau_index, tau_market_share = compute_tau(tokens_per_entity, circulation=1, threshold=0.5)
     assert tau_index == 1
     assert tau_market_share == 1
 
 
 def test_tau_33():
-    tokens_per_entity = [('a', 3.0), ('b', 2), ('c', 1)]
+    tokens_per_entity = [(3.0, ), (2, ), (1, )]
     tau_index, tau_market_share = compute_tau(tokens_per_entity, circulation=6, threshold=0.33)
     assert tau_index == 1
     assert tau_market_share == 0.5
 
-    tokens_per_entity = [('a', 3), ('b', 2), ('c', 1), ('d', 1), ('e', 1), ('f', 1)]
+    tokens_per_entity = [(3, ), (2, ), (1, ), (1, ), (1, ), (1, )]
     tau_index, tau_market_share = compute_tau(tokens_per_entity, circulation=9, threshold=0.33)
     assert tau_index == 1
     assert round(tau_market_share, 2) == 0.33
 
-    tokens_per_entity = [('a', 1)]
+    tokens_per_entity = [(1, )]
     tau_index, tau_market_share = compute_tau(tokens_per_entity, circulation=1, threshold=0.33)
     assert tau_index == 1
     assert tau_market_share == 1
 
 
 def test_tau_66():
-    tokens_per_entity = [('a', 3.0), ('b', 2), ('c', 1)]
+    tokens_per_entity = [(3.0, ), (2, ), (1, )]
     tau_index, tau_market_share = compute_tau(tokens_per_entity, circulation=6, threshold=0.66)
     assert tau_index == 2
     assert round(tau_market_share, 2) == 0.83
 
-    tokens_per_entity = [('a', 3), ('b', 2), ('c', 1), ('d', 1), ('e', 1), ('f', 1)]
+    tokens_per_entity = [(3, ), (2, ), (1, ), (1, ), (1, ), (1, )]
     tau_index, tau_market_share = compute_tau(tokens_per_entity, circulation=9, threshold=0.66)
     assert tau_index == 3
     assert round(tau_market_share, 2) == 0.67
 
-    tokens_per_entity = [('a', 1)]
+    tokens_per_entity = [(1, )]
     tau_index, tau_market_share = compute_tau(tokens_per_entity, circulation=1, threshold=0.66)
     assert tau_index == 1
     assert tau_market_share == 1
@@ -58,19 +58,19 @@ def test_gini():
     Ensure that the results of the compute_gini function are consistent with online calculators,
     such as https://goodcalculators.com/gini-coefficient-calculator/ (5 decimal accuracy)
     """
-    tokens_per_entity = [('a', 3.0), ('b', 2), ('c', 1)]
+    tokens_per_entity = [(3.0, ), (2, ), (1, )]
     gini = compute_gini(tokens_per_entity, circulation=6)
     assert round(gini, 5) == 0.22222
 
-    tokens_per_entity = [('a', 3), ('b', 2), ('c', 1), ('d', 1), ('e', 1), ('f', 1)]
+    tokens_per_entity = [(3, ), (2, ), (1, ), (1, ), (1, ), (1, )]
     gini = compute_gini(tokens_per_entity, circulation=9)
     assert round(gini, 5) == 0.24074
 
-    tokens_per_entity = [('a', 1)]
+    tokens_per_entity = [(1, )]
     gini = compute_gini(tokens_per_entity, circulation=1)
     assert gini == 0
 
-    tokens_per_entity = [('a', 1), ('b', 1), ('c', 1)]
+    tokens_per_entity = [(1, ), (1, ), (1, )]
     gini = compute_gini(tokens_per_entity, circulation=3)
     assert round(gini, 5) == 0  # Note that this test case fails if we don't round, because of floating point errors
 
@@ -80,19 +80,19 @@ def test_hhi():
     Ensure that the results of the compute_hhi function are consistent with online calculators,
     such as https://www.unclaw.com/chin/teaching/antitrust/herfindahl.htm
     """
-    tokens_per_entity = [('a', 3.0), ('b', 2), ('c', 1)]
+    tokens_per_entity = [(3.0, ), (2, ), (1, )]
     hhi = compute_hhi(tokens_per_entity, circulation=6)
     assert round(hhi) == 3889
 
-    tokens_per_entity = [('a', 3), ('b', 2), ('c', 1), ('d', 1), ('e', 1), ('f', 1)]
+    tokens_per_entity = [(3, ), (2, ), (1, ), (1, ), (1, ), (1, )]
     hhi = compute_hhi(tokens_per_entity, circulation=9)
     assert round(hhi) == 2099
 
-    tokens_per_entity = [('a', 1)]
+    tokens_per_entity = [(1, )]
     hhi = compute_hhi(tokens_per_entity, circulation=1)
     assert round(hhi) == 10000
 
-    tokens_per_entity = [('a', 1), ('b', 1), ('c', 1)]
+    tokens_per_entity = [(1, ), (1, ), (1, )]
     hhi = compute_hhi(tokens_per_entity, circulation=3)
     assert round(hhi) == 3333
 
@@ -102,51 +102,51 @@ def test_shannon_entropy():
     Ensure that the results of the compute_shannon_entropy function are consistent with online calculators,
     such as: https://www.omnicalculator.com/statistics/shannon-entropy
     """
-    tokens_per_entity = [('a', 3.0), ('b', 2), ('c', 1)]
+    tokens_per_entity = [(3.0, ), (2, ), (1, )]
     entropy = compute_shannon_entropy(tokens_per_entity, circulation=6)
     assert round(entropy, 3) == 1.459
 
-    tokens_per_entity = [('a', 3), ('b', 2), ('c', 1), ('d', 1), ('e', 1), ('f', 1)]
+    tokens_per_entity = [(3, ), (2, ), (1, ), (1, ), (1, ), (1, )]
     entropy = compute_shannon_entropy(tokens_per_entity, circulation=9)
     assert round(entropy, 3) == 2.419
 
-    tokens_per_entity = [('a', 1)]
+    tokens_per_entity = [(1, )]
     entropy = compute_shannon_entropy(tokens_per_entity, circulation=1)
     assert entropy == 0
 
-    tokens_per_entity = [('a', 1), ('b', 1), ('c', 1)]
+    tokens_per_entity = [(1, ), (1, ), (1, )]
     entropy = compute_shannon_entropy(tokens_per_entity, circulation=3)
     assert round(entropy, 3) == 1.585
 
 
 def test_total_entities():
-    tokens_per_entity = [('a', 3.0), ('b', 2), ('c', 1)]
+    tokens_per_entity = [(3.0, ), (2, ), (1, )]
     total_entities = compute_total_entities(tokens_per_entity, circulation=6)
     assert total_entities == 3
 
-    tokens_per_entity = [('a', 3), ('b', 2), ('c', 1), ('d', 1), ('e', 1), ('f', 1)]
+    tokens_per_entity = [(3, ), (2, ), (1, ), (1, ), (1, ), (1, )]
     total_entities = compute_total_entities(tokens_per_entity, circulation=9)
     assert total_entities == 6
 
-    tokens_per_entity = [('a', 1)]
+    tokens_per_entity = [(1, )]
     total_entities = compute_total_entities(tokens_per_entity, circulation=1)
     assert total_entities == 1
 
 
 def test_compute_max_power_ratio():
-    tokens_per_entity = [('a', 3.0), ('b', 2), ('c', 1)]
+    tokens_per_entity = [(3.0, ), (2, ), (1, )]
     max_mpr = compute_max_power_ratio(tokens_per_entity, circulation=6)
     assert max_mpr == 0.5
 
-    tokens_per_entity = [('a', 3), ('b', 2), ('c', 1), ('d', 1), ('e', 1), ('f', 1)]
+    tokens_per_entity = [(3, ), (2, ), (1, ), (1, ), (1, ), (1, )]
     max_mpr = compute_max_power_ratio(tokens_per_entity, circulation=9)
     assert max_mpr == 1 / 3
 
-    tokens_per_entity = [('a', 1)]
+    tokens_per_entity = [(1, )]
     max_mpr = compute_max_power_ratio(tokens_per_entity, circulation=1)
     assert max_mpr == 1
 
-    tokens_per_entity = [('a', 1), ('b', 1), ('c', 1)]
+    tokens_per_entity = [(1, ), (1, ), (1, )]
     max_mpr = compute_max_power_ratio(tokens_per_entity, circulation=3)
     assert max_mpr == 1 / 3
 
@@ -158,22 +158,22 @@ def test_compute_theil_index():
     """
     decimals = 3
 
-    tokens_per_entity = [('a', 3.0), ('b', 2), ('c', 1)]
+    tokens_per_entity = [(3.0, ), (2, ), (1, )]
     theil_t = compute_theil_index(tokens_per_entity, 6)
     assert round(theil_t, decimals) == 0.087
 
-    tokens_per_entity = [('a', 3), ('b', 2), ('c', 1), ('d', 1), ('e', 1), ('f', 1)]
+    tokens_per_entity = [(3, ), (2, ), (1, ), (1, ), (1, ), (1, )]
     theil_t = compute_theil_index(tokens_per_entity, 9)
     assert round(theil_t, decimals) == 0.115
 
-    tokens_per_entity = {('a', 432), ('b', 0), ('c', 0), ('d', 0)}
+    tokens_per_entity = [(432, ), (0, ), (0, ), (0, )]
     theil_t = compute_theil_index(tokens_per_entity, 432)
     assert round(theil_t, decimals) == 1.386
 
-    tokens_per_entity = {('a', 432)}
+    tokens_per_entity = [(432, )]
     theil_t = compute_theil_index(tokens_per_entity, 432)
     assert round(theil_t, decimals) == 0
 
-    tokens_per_entity = {}
+    tokens_per_entity = []
     theil_t = compute_theil_index(tokens_per_entity, 432)
     assert theil_t == 0
diff --git a/tokenomics_decentralization/db_helper.py b/tokenomics_decentralization/db_helper.py
@@ -154,7 +154,7 @@ def get_non_clustered_balance_entries(conn, snapshot, ledger, balance_threshold)
 
     start = time()
     query = f'''
-        SELECT addresses.name, balance
+        SELECT balance
         FROM balances
         LEFT JOIN addresses ON balances.address_id=addresses.id
         WHERE snapshot_id=?
@@ -187,15 +187,19 @@ def get_balance_entries(conn, snapshot, ledger, balance_threshold):
 
     start = time()
     query = f'''
-        SELECT IFNULL(entities.name, addresses.name) AS entity, SUM(CAST(balance AS REAL)) AS aggregate_balance
-        FROM balances
-        LEFT JOIN addresses ON balances.address_id=addresses.id
-        LEFT JOIN entities ON addresses.entity_id=entities.id
-        WHERE snapshot_id=?
-        {exclude_below_threshold_clause}
-        {exclude_contract_addresses_clause}
-        {special_addresses_clause}
-        GROUP BY entity
+        WITH entries AS (
+            SELECT IFNULL(entities.name, addresses.name) AS entity, SUM(CAST(balance AS REAL)) AS aggregate_balance
+            FROM balances
+            LEFT JOIN addresses ON balances.address_id=addresses.id
+            LEFT JOIN entities ON addresses.entity_id=entities.id
+            WHERE snapshot_id=?
+            {exclude_below_threshold_clause}
+            {exclude_contract_addresses_clause}
+            {special_addresses_clause}
+            GROUP BY entity
+        )
+        SELECT aggregate_balance
+        FROM entries
         ORDER BY aggregate_balance DESC
     '''
 
diff --git a/tokenomics_decentralization/helper.py b/tokenomics_decentralization/helper.py
@@ -292,7 +292,7 @@ def get_circulation_from_entries(entries):
     """
     circulation = 0
     for entry in entries:
-        circulation += int(entry[1])
+        circulation += int(entry[0])
     return circulation
 
 
diff --git a/tokenomics_decentralization/metrics.py b/tokenomics_decentralization/metrics.py
@@ -4,8 +4,7 @@
 def compute_tau(entries, circulation, threshold):
     """
     Calculates the tau index of a distribution of balances
-    :param entries: list of tuples (address, balance), sorted by balance in descending order, where
-    address is a string and balance is a numeric type (int or float)
+    :param entries: list of tuples (balance, ), sorted by balance in descending order, where balance is a numeric type (int or float)
     :param circulation: int, the total amount of tokens in circulation
     :param threshold: float, the parameter of the tau index, i.e. the threshold for the market share
     that is captured by the index
@@ -15,7 +14,7 @@ def compute_tau(entries, circulation, threshold):
     results = [0, 0]
 
     for entry in entries:
-        market_share = int(entry[1]) / circulation
+        market_share = int(entry[0]) / circulation
         if results[1] >= threshold:
             break
         results[0] += 1
@@ -27,8 +26,7 @@ def compute_tau(entries, circulation, threshold):
 def compute_gini(entries, circulation):
     """
     Calculates the Gini coefficient of a distribution of balances
-    :param entries: list of tuples (address, balance), sorted by balance in descending order, where
-    address is a string and balance is a numeric type (int or float)
+    :param entries: list of tuples (balance, ), sorted by balance in descending order, where balance is a numeric type (int or float)
     :param circulation: int, the total amount of tokens in circulation
     :returns: float between 0 and 1 that represents the Gini coefficient of the given distribution
     """
@@ -37,7 +35,7 @@ def compute_gini(entries, circulation):
     gini = 1
     for entry in entries:
         richer_population_percentage = parsed_entries / population
-        market_share = int(entry[1]) / circulation
+        market_share = int(entry[0]) / circulation
         gini -= market_share * ((1 / population) + (2 * richer_population_percentage))
         parsed_entries += 1
 
@@ -47,14 +45,13 @@ def compute_gini(entries, circulation):
 def compute_hhi(entries, circulation):
     """
     Calculates the Herfindahl-Hirschman index (HHI) of a distribution of balances
-    :param entries: list of tuples (address, balance), sorted by balance in descending order, where
-    address is a string and balance is a numeric type (int or float)
+    :param entries: list of tuples (balance, ), sorted by balance in descending order, where balance is a numeric type (int or float)
     :param circulation: int, the total amount of tokens in circulation
     :returns: float between 0 and 10,000 that represents the HHI of the given distribution
     """
     hhi = 0
     for entry in entries:
-        market_share = int(entry[1]) / circulation * 100
+        market_share = int(entry[0]) / circulation * 100
         hhi += market_share**2
 
     return hhi
@@ -63,14 +60,13 @@ def compute_hhi(entries, circulation):
 def compute_shannon_entropy(entries, circulation):
     """
     Calculates the Shannon entropy of a distribution of balances
-    :param entries: list of tuples (address, balance), sorted by balance in descending order, where
-    address is a string and balance is a numeric type (int or float)
+    :param entries: list of tuples (balance, ), sorted by balance in descending order, where balance is a numeric type (int or float)
     :param circulation: int, the total amount of tokens in circulation
     :returns: float between 0 and 1 that represents the Shannon entropy of the given distribution
     """
     entropy = 0
     for entry in entries:
-        market_share = int(entry[1]) / circulation
+        market_share = int(entry[0]) / circulation
         if market_share > 0:
             entropy -= market_share * log(market_share, 2)
 
@@ -80,8 +76,7 @@ def compute_shannon_entropy(entries, circulation):
 def compute_total_entities(entries, circulation):
     """
     Calculates the total number of entities in a distribution of balances
-    :param entries: list of tuples (address, balance), sorted by balance in descending order, where
-    address is a string and balance is a numeric type (int or float)
+    :param entries: list of tuples (balance, ), sorted by balance in descending order, where balance is a numeric type (int or float)
     :param circulation: int, the total amount of tokens in circulation
     :returns: int that represents the total number of entities in the given distribution
     """
@@ -91,20 +86,18 @@ def compute_total_entities(entries, circulation):
 def compute_max_power_ratio(entries, circulation):
     """
     Calculates the maximum power ratio of a distribution of balances
-    :param entries: list of tuples (address, balance), sorted by balance in descending order, where
-    address is a string and balance is a numeric type (int or float)
+    :param entries: list of tuples (balance, ), sorted by balance in descending order, where balance is a numeric type (int or float)
     :param circulation: int, the total amount of tokens in circulation
     :returns: float that represents the maximum power ratio among all token holders
     """
-    max_balance = entries[0][1]
+    max_balance = entries[0][0]
     return max_balance / circulation if circulation > 0 else 0
 
 
 def compute_theil_index(entries, circulation):
     """
     Calculates the Theil-T index of a distribution of balances
-    :param entries: list of tuples (address, balance), sorted by balance in descending order, where
-    address is a string and balance is a numeric type (int or float)
+    :param entries: list of tuples (balance, ), sorted by balance in descending order, where balance is a numeric type (int or float)
     :param circulation: int, the total amount of tokens in circulation
     :returns: float that represents the Thiel index of the given distribution
     """
@@ -114,7 +107,7 @@ def compute_theil_index(entries, circulation):
     mu = circulation / N
     theil = 0
     for entry in entries:
-        x = entry[1] / mu
+        x = entry[0] / mu
         if x > 0:
             theil += x * log(x)
     theil /= N