Skip to content

Commit 4484086

Browse files
Clustering flag (#151)
* Retrieve config data once in helper file * Add config param for disabling clustering * Print clustering flag in aggregate output file * Remove empty lines from aggregate output * Remove space from aggregate output csv header * Handle list of metric params in config * Use clustering instead of no_clustering config flag * Renaming vars in analyze
1 parent 81501d6 commit 4484086

File tree

6 files changed

+66
-44
lines changed

6 files changed

+66
-44
lines changed

config.yaml

Lines changed: 7 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,15 +1,19 @@
11
metrics:
22
entropy:
3-
alpha: 1
3+
- 1
44
entropy_percentage:
5-
alpha: 1
5+
- 1
66
gini:
77
hhi:
88
nakamoto_coefficient:
99
theil_index:
1010
max_power_ratio:
1111
tau_index:
12-
threshold: 0.66
12+
- 0.33
13+
- 0.66
14+
15+
analyze_flags:
16+
clustering: true
1317

1418
default_timeframe:
1519
start_date: 2010-01-01

consensus_decentralization/analyze.py

Lines changed: 24 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -21,7 +21,14 @@ def analyze(projects, aggregated_data_filename, output_dir):
2121
"""
2222
logging.info('Calculating metrics on aggregated data..')
2323
metrics = hlp.get_metrics_config()
24-
metric_names = list(metrics.keys())
24+
metric_params = []
25+
for key, args in metrics.items():
26+
if args:
27+
for val in args:
28+
metric_params.append((f'{key}={val}', key, val))
29+
else:
30+
metric_params.append((key, key, None))
31+
metric_names = [name for name, _, _ in metric_params]
2532

2633
aggregate_output = {}
2734

@@ -43,34 +50,39 @@ def analyze(projects, aggregated_data_filename, output_dir):
4350
for tchunk, nblocks in block_values.items():
4451
if nblocks > 0:
4552
chunks_with_blocks.add(tchunk)
46-
for metric, args_dict in metrics.items():
53+
for metric_name, metric, param in metric_params:
54+
logging.info(f'Calculating {metric_name}')
55+
4756
for row_index, time_chunk in enumerate(time_chunks):
4857
time_chunk_blocks_per_entity = {}
4958
if column_index == 0:
50-
csv_contents[metric].append([time_chunk])
59+
csv_contents[metric_name].append([time_chunk])
5160
if time_chunk in chunks_with_blocks:
5261
for entity, block_values in blocks_per_entity.items():
5362
try:
5463
time_chunk_blocks_per_entity[entity] = block_values[time_chunk]
5564
except KeyError:
5665
time_chunk_blocks_per_entity[entity] = 0
5766
func = eval(f'compute_{metric}')
58-
result = func(time_chunk_blocks_per_entity, **args_dict) if args_dict else func(
59-
time_chunk_blocks_per_entity)
60-
csv_contents[metric][row_index + 1].append(result)
61-
aggregate_output[project][time_chunk][metric] = result
67+
if param:
68+
result = func(time_chunk_blocks_per_entity, param)
69+
else:
70+
result = func(time_chunk_blocks_per_entity)
71+
csv_contents[metric_name][row_index + 1].append(result)
72+
aggregate_output[project][time_chunk][metric_name] = result
6273

63-
for metric in metrics.keys():
74+
for metric in metric_names:
6475
with open(output_dir / f'{metric}.csv', 'w') as f:
6576
csv_writer = csv.writer(f)
6677
csv_writer.writerows(csv_contents[metric])
6778

68-
aggregate_csv_output = [['ledger', 'snapshot date'] + metric_names]
79+
clustering_flag = hlp.get_config_data()['analyze_flags']['clustering']
80+
aggregate_csv_output = [['ledger', 'snapshot_date', 'clustering'] + metric_names]
6981
for project, timeframes in aggregate_output.items():
7082
for time_chunk, results in timeframes.items():
71-
aggregate_csv_output.append([project, time_chunk])
72-
for metric in metric_names:
73-
aggregate_csv_output[-1].append(results[metric])
83+
metric_values = [results[metric] for metric in metric_names]
84+
if any(metric_values):
85+
aggregate_csv_output.append([project, time_chunk, clustering_flag] + metric_values)
7486
with open(output_dir / 'output.csv', 'w') as f:
7587
csv_writer = csv.writer(f)
7688
csv_writer.writerows(aggregate_csv_output)

consensus_decentralization/helper.py

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,9 @@
1717
OUTPUT_DIR = ROOT_DIR / 'output'
1818
MAPPING_INFO_DIR = ROOT_DIR / 'mapping_information'
1919

20+
with open(ROOT_DIR / "config.yaml") as f:
21+
config = safe_load(f)
22+
2023

2124
def valid_date(date_string):
2225
"""
@@ -231,8 +234,6 @@ def get_config_data():
231234
root directory of the project.
232235
:returns: a dictionary of configuration keys and values
233236
"""
234-
with open(ROOT_DIR / "config.yaml") as f:
235-
config = safe_load(f)
236237
return config
237238

238239

consensus_decentralization/mappings/default_mapping.py

Lines changed: 24 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -45,29 +45,34 @@ def perform_mapping(self):
4545
project.
4646
:returns: a list of dictionaries (mapped block data)
4747
"""
48+
clustering_flag = hlp.get_config_data()['analyze_flags']['clustering']
4849
for block in self.data_to_map:
49-
entity = self.map_from_known_identifiers(block)
50-
if entity:
51-
mapping_method = 'known_identifiers'
50+
if not clustering_flag:
51+
entity = self.fallback_mapping(block)
52+
mapping_method = 'fallback_mapping'
5253
else:
53-
entity = self.map_from_known_addresses(block)
54+
entity = self.map_from_known_identifiers(block)
5455
if entity:
55-
mapping_method = 'known_addresses'
56+
mapping_method = 'known_identifiers'
5657
else:
57-
entity = self.fallback_mapping(block)
58-
mapping_method = 'fallback_mapping'
59-
60-
cluster = self.map_from_known_clusters(block)
61-
if cluster:
62-
entity = cluster
63-
mapping_method = 'known_clusters'
64-
65-
# Finally, check legal links to map to the highest-level entity, if relevant
66-
day = hlp.get_date_from_block(block)
67-
legal_links = hlp.get_pool_legal_links(timeframe=day)
68-
if entity in legal_links.keys():
69-
entity = legal_links[entity]
70-
mapping_method = 'known_legal_links'
58+
entity = self.map_from_known_addresses(block)
59+
if entity:
60+
mapping_method = 'known_addresses'
61+
else:
62+
entity = self.fallback_mapping(block)
63+
mapping_method = 'fallback_mapping'
64+
65+
cluster = self.map_from_known_clusters(block)
66+
if cluster:
67+
entity = cluster
68+
mapping_method = 'known_clusters'
69+
70+
# Finally, check legal links to map to the highest-level entity, if relevant
71+
day = hlp.get_date_from_block(block)
72+
legal_links = hlp.get_pool_legal_links(timeframe=day)
73+
if entity in legal_links.keys():
74+
entity = legal_links[entity]
75+
mapping_method = 'known_legal_links'
7176

7277
self.mapped_data.append({
7378
"number": block['number'],

tests/test_analyze.py

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -52,7 +52,7 @@ def test_analyze(setup_and_cleanup):
5252
output_dir=test_output_dir
5353
)
5454

55-
metrics = ['gini', 'nakamoto_coefficient', 'entropy']
55+
metrics = ['gini', 'nakamoto_coefficient', 'entropy=1']
5656
for metric in metrics:
5757
output_file = test_output_dir / f'{metric}.csv'
5858
assert output_file.is_file()
@@ -64,7 +64,7 @@ def test_analyze(setup_and_cleanup):
6464
assert lines[1] == '2018,0.25\n'
6565
elif metric == 'nakamoto_coefficient':
6666
assert lines[1] == '2018,2\n'
67-
elif metric == 'entropy':
67+
elif metric == 'entropy=1':
6868
assert lines[1] == '2018,1.836591668108979\n'
6969

7070
analyze(
@@ -73,7 +73,7 @@ def test_analyze(setup_and_cleanup):
7373
output_dir=test_output_dir
7474
)
7575

76-
metrics = ['gini', 'nakamoto_coefficient', 'entropy']
76+
metrics = ['gini', 'nakamoto_coefficient', 'entropy=1']
7777
for metric in metrics:
7878
output_file = test_output_dir / f'{metric}.csv'
7979
assert output_file.is_file()
@@ -87,7 +87,7 @@ def test_analyze(setup_and_cleanup):
8787
elif metric == 'nakamoto_coefficient':
8888
assert lines[1] == 'Feb-2018,1\n'
8989
assert lines[2] == 'Mar-2018,1\n'
90-
elif metric == 'entropy':
90+
elif metric == 'entropy=1':
9191
assert lines[1] == 'Feb-2018,1.5\n'
9292
assert lines[2] == 'Mar-2018,0.0\n'
9393

@@ -97,7 +97,7 @@ def test_analyze(setup_and_cleanup):
9797
output_dir=test_output_dir
9898
)
9999

100-
metrics = ['gini', 'nakamoto_coefficient', 'entropy']
100+
metrics = ['gini', 'nakamoto_coefficient', 'entropy=1']
101101
for metric in metrics:
102102
output_file = test_output_dir / f'{metric}.csv'
103103
assert output_file.is_file()

tests/test_end_to_end.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -85,7 +85,7 @@ def test_end_to_end(setup_and_cleanup):
8585
'timeframe,sample_bitcoin,sample_cardano\n',
8686
'2010,,\n'
8787
]
88-
with open(test_output_dir / 'entropy.csv') as f:
88+
with open(test_output_dir / 'entropy=1.csv') as f:
8989
lines = f.readlines()
9090
for idx, line in enumerate(lines):
9191
assert line == expected_entropy[idx]
@@ -123,7 +123,7 @@ def test_end_to_end(setup_and_cleanup):
123123
'Feb-2018,1.5,\n',
124124
'Mar-2018,0.0,\n',
125125
]
126-
with open(test_output_dir / 'entropy.csv') as f:
126+
with open(test_output_dir / 'entropy=1.csv') as f:
127127
lines = f.readlines()
128128
for idx, line in enumerate(lines):
129129
assert line == expected_entropy[idx]
@@ -161,7 +161,7 @@ def test_end_to_end(setup_and_cleanup):
161161
'timeframe,sample_bitcoin,sample_cardano\n',
162162
'Dec-2020,,1.9219280948873623\n'
163163
]
164-
with open(test_output_dir / 'entropy.csv') as f:
164+
with open(test_output_dir / 'entropy=1.csv') as f:
165165
lines = f.readlines()
166166
for idx, line in enumerate(lines):
167167
assert line == expected_entropy[idx]

0 commit comments

Comments
 (0)