Skip to content

Commit 95d757a

Browse files
Update execution parameter handling (#154)
* Remove cmd args and move all params to config file * Validate dates given in config * Update get_granularity return value when empty * Fix docstring --------- Co-authored-by: LadyChristina <[email protected]>
1 parent 4816892 commit 95d757a

File tree

7 files changed

+160
-131
lines changed

7 files changed

+160
-131
lines changed

config.yaml

Lines changed: 36 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,4 @@
1+
# The metrics for which an analysis should be performed.
12
metrics:
23
entropy:
34
- 1
@@ -12,14 +13,8 @@ metrics:
1213
- 0.33
1314
- 0.66
1415

15-
analyze_flags:
16-
clustering: true
17-
18-
default_timeframe:
19-
start_date: 2010-01-01
20-
end_date: 2023-12-31
21-
22-
default_ledgers:
16+
# The ledgers for which an analysis should be performed.
17+
ledgers:
2318
- bitcoin
2419
- bitcoin_cash
2520
- cardano
@@ -28,3 +23,36 @@ default_ledgers:
2823
- litecoin
2924
- tezos
3025
- zcash
26+
27+
# Execution flags
28+
execution_flags:
29+
force_map: false
30+
31+
# Analyze flags
32+
analyze_flags:
33+
clustering: true
34+
35+
# The timeframe for which an analysis should be performed.
36+
# Each date is a string of the form YYYY-MM-DD.
37+
# If granularity is also set, then the analysis will run on the timeframe of the two farthest snapshots.
38+
timeframe:
39+
start_date: 2010-01-01
40+
end_date: 2023-12-31
41+
42+
# The granularity for the analysis when two dates are provided in the --snapshot_dates argument (which are then interpreted as start and end dates).
43+
# It can be one of: "day", "week", "month", "year", or empty. If empty, then only the snapshots for the given dates will be analyzed.
44+
granularity: "month"
45+
46+
input_directories: # Paths to directories that contain raw input data
47+
- ./input
48+
49+
# Paths to directories of snapshot db files; either absolute or relative from run.py.
50+
# The first path will be used to write newly created dbs and the output of runs
51+
output_directories:
52+
- ./output
53+
54+
55+
# Plot flags
56+
plot_parameters:
57+
plot: false
58+
animated: true

consensus_decentralization/collect_data.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -54,7 +54,7 @@ def collect_data(ledgers, force_query):
5454
if __name__ == '__main__':
5555
logging.basicConfig(format='[%(asctime)s] %(message)s', datefmt='%Y/%m/%d %I:%M:%S %p', level=logging.INFO)
5656

57-
default_ledgers = hlp.get_default_ledgers()
57+
default_ledgers = hlp.get_ledgers()
5858

5959
parser = argparse.ArgumentParser()
6060
parser.add_argument(

consensus_decentralization/helper.py

Lines changed: 63 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -265,24 +265,27 @@ def get_metrics_config():
265265
return metrics
266266

267267

268-
def get_default_ledgers():
268+
def get_ledgers():
269269
"""
270-
Retrieves data regarding the default ledgers to use
270+
Retrieves data regarding the ledgers to use
271271
:returns: a list of strings that correspond to the ledgers that will be used (unless overriden by the relevant cmd
272272
arg)
273273
"""
274274
config = get_config_data()
275-
ledgers = config['default_ledgers']
275+
ledgers = config['ledgers']
276276
return ledgers
277277

278278

279-
def get_default_start_end_dates():
279+
def get_start_end_dates():
280280
"""
281281
Retrieves the start and end dates for which to analyze data
282282
:returns: a tuple of two strings, (<start date>, <end date>)
283283
"""
284284
config = get_config_data()
285-
return str(config['default_timeframe']['start_date']), str(config['default_timeframe']['end_date'])
285+
for date_type in ['start', 'end']:
286+
if not valid_date(str(config['timeframe'][f'{date_type}_date'])):
287+
raise ValueError(f'Invalid {date_type} date')
288+
return str(config['timeframe']['start_date']), str(config['timeframe']['end_date'])
286289

287290

288291
def read_mapped_project_data(project_dir):
@@ -361,3 +364,58 @@ def get_date_from_block(block, level='day'):
361364
elif level == 'day':
362365
return timestamp[:10]
363366
raise ValueError(f'Invalid level: {level}')
367+
368+
369+
def get_granularity():
370+
"""
371+
Retrieves the granularity to be used in the analysis
372+
:returns: string in ['day', 'week', 'month', 'year'] that represents the chosen granularity
373+
or 'all' if the relevant field is empty in the config file
374+
:raises ValueError: if the granularity field is missing from the config file or if
375+
the chosen value is not one of the allowed ones
376+
"""
377+
try:
378+
granularity = get_config_data()['granularity']
379+
if granularity:
380+
if granularity in ['day', 'week', 'month', 'year']:
381+
return granularity
382+
else:
383+
raise ValueError('Malformed "granularity" in config; should be one of: "day", "week", "month", "year", or empty')
384+
else:
385+
return 'all'
386+
except KeyError:
387+
raise ValueError('"granularity" not in config file')
388+
389+
390+
def get_plot_flag():
391+
"""
392+
Gets the flag that determines whether generate plots for the output
393+
:returns: boolean
394+
:raises ValueError: if the flag is not set in the config file
395+
"""
396+
config = get_config_data()
397+
try:
398+
return config['plot_parameters']['plot']
399+
except KeyError:
400+
raise ValueError('Flag "plot" not in config file')
401+
402+
403+
def get_plot_config_data():
404+
"""
405+
Retrieves the plot-related config parameters
406+
:returns: dictionary
407+
"""
408+
return get_config_data()['plot_parameters']
409+
410+
411+
def get_force_map_flag():
412+
"""
413+
Gets the flag that determines whether to forcefully map the data
414+
:returns: boolean
415+
:raises ValueError: if the flag is not set in the config file
416+
"""
417+
config = get_config_data()
418+
try:
419+
return config['execution_flags']['force_map']
420+
except KeyError:
421+
raise ValueError('Flag "force_map" not in config file')

consensus_decentralization/plot.py

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -258,10 +258,10 @@ def plot(ledgers, metrics, aggregated_data_filename, animated):
258258

259259

260260
if __name__ == '__main__':
261-
default_ledgers = hlp.get_default_ledgers()
261+
ledgers = hlp.get_ledgers()
262262
default_metrics = hlp.get_metrics_config().keys()
263263

264-
default_start_date, default_end_date = hlp.get_default_start_end_dates()
264+
default_start_date, default_end_date = hlp.get_start_end_dates()
265265
timeframe_start = hlp.get_timeframe_beginning(default_start_date)
266266
timeframe_end = hlp.get_timeframe_end(default_end_date)
267267

@@ -271,8 +271,8 @@ def plot(ledgers, metrics, aggregated_data_filename, animated):
271271
'--ledgers',
272272
nargs="*",
273273
type=str.lower,
274-
default=default_ledgers,
275-
choices=default_ledgers,
274+
default=ledgers,
275+
choices=ledgers,
276276
help='The ledgers whose data will be plotted.'
277277
)
278278
parser.add_argument(

run.py

Lines changed: 38 additions & 91 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,3 @@
1-
import argparse
21
import logging
32
from consensus_decentralization.aggregate import aggregate
43
from consensus_decentralization.map import apply_mapping
@@ -10,19 +9,19 @@
109
logging.basicConfig(format='[%(asctime)s] %(message)s', datefmt='%Y/%m/%d %I:%M:%S %p', level=logging.INFO)
1110

1211

13-
def process_data(force_map, project_dir, project, output_dir):
14-
mapped_data_file = project_dir / 'mapped_data.json'
12+
def process_data(force_map, ledger_dir, ledger, output_dir):
13+
mapped_data_file = ledger_dir / 'mapped_data.json'
1514
if force_map or not mapped_data_file.is_file():
16-
parsed_data = parse(project=project, input_dir=hlp.RAW_DATA_DIR)
17-
apply_mapping(project=project, parsed_data=parsed_data, output_dir=output_dir)
15+
parsed_data = parse(ledger, input_dir=hlp.RAW_DATA_DIR)
16+
apply_mapping(ledger, parsed_data=parsed_data, output_dir=output_dir)
1817

1918

20-
def main(projects, timeframe, aggregate_by, force_map, make_plots, make_animated_plots, output_dir=hlp.OUTPUT_DIR):
19+
def main(ledgers, timeframe, granularity, output_dir=hlp.OUTPUT_DIR):
2120
"""
2221
Executes the entire pipeline (parsing, mapping, analyzing) for some projects and timeframes.
23-
:param projects: list of strings that correspond to the ledgers whose data should be analyzed
22+
:param ledgers: list of strings that correspond to the ledgers whose data should be analyzed
2423
:param timeframe: tuple of (start_date, end_date) where each date is a datetime.date object.
25-
:param aggregate_by: string that corresponds to the granularity that will be used for the analysis. It can be one
24+
:param granularity: string that corresponds to the granularity that will be used for the analysis. It can be one
2625
of: day, week, month, year, all.
2726
:param force_map: bool. If True, then the parsing and mapping will be performed, regardless of whether
2827
mapped data for some or all of the projects already exist
@@ -31,104 +30,52 @@ def main(projects, timeframe, aggregate_by, force_map, make_plots, make_animated
3130
Warning: generating animated plots might take a long time
3231
:param output_dir: pathlib.PosixPath object of the directory where the output data will be saved
3332
"""
34-
logging.info(f"The ledgers that will be analyzed are: {','.join(projects)}")
35-
for project in projects:
36-
project_dir = output_dir / project
37-
project_dir.mkdir(parents=True, exist_ok=True) # create project output directory if it doesn't already exist
33+
logging.info(f"The ledgers that will be analyzed are: {','.join(ledgers)}")
3834

39-
process_data(force_map, project_dir, project, output_dir)
35+
force_map = hlp.get_force_map_flag()
36+
37+
for ledger in ledgers:
38+
ledger_dir = output_dir / ledger
39+
ledger_dir.mkdir(parents=True, exist_ok=True) # create ledger output directory if it doesn't already exist
40+
41+
process_data(force_map, ledger_dir, ledger, output_dir)
4042

4143
aggregate(
42-
project=project,
43-
output_dir=output_dir,
44-
timeframe=timeframe,
45-
aggregate_by=aggregate_by,
46-
force_aggregate=force_map
44+
ledger,
45+
output_dir,
46+
timeframe,
47+
granularity,
48+
force_map
4749
)
4850

4951
used_metrics = analyze(
50-
projects=projects,
51-
aggregated_data_filename=hlp.get_blocks_per_entity_filename(aggregate_by=aggregate_by, timeframe=timeframe),
52+
ledgers,
53+
aggregated_data_filename=hlp.get_blocks_per_entity_filename(granularity, timeframe),
5254
output_dir=output_dir
5355
)
5456

55-
if make_plots:
57+
if hlp.get_plot_flag():
5658
plot(
57-
ledgers=projects,
59+
ledgers,
5860
metrics=used_metrics,
59-
aggregated_data_filename=hlp.get_blocks_per_entity_filename(aggregate_by=aggregate_by, timeframe=timeframe),
60-
animated=make_animated_plots
61+
aggregated_data_filename=hlp.get_blocks_per_entity_filename(granularity, timeframe),
62+
animated=hlp.get_plot_config_data()['animated']
6163
)
6264

6365

6466
if __name__ == '__main__':
65-
default_ledgers = hlp.get_default_ledgers()
66-
start_date, end_date = hlp.get_default_start_end_dates()
67-
68-
parser = argparse.ArgumentParser()
69-
parser.add_argument(
70-
'--ledgers',
71-
nargs="*",
72-
type=str.lower,
73-
default=default_ledgers,
74-
choices=default_ledgers,
75-
help='The ledgers that will be analyzed.'
76-
)
77-
parser.add_argument(
78-
'--timeframe',
79-
nargs="*",
80-
type=hlp.valid_date,
81-
default=[start_date, end_date],
82-
help='The timeframe that will be analyzed. You can provide two values to mark the beginning and end of the '
83-
'time frame or a single value that encapsulates both.'
84-
)
85-
parser.add_argument(
86-
'--aggregate-by',
87-
nargs="?",
88-
type=str.lower,
89-
default='month',
90-
choices=['day', 'week', 'month', 'year', 'all'],
91-
help='The granularity that will be used for the analysis. It can be one of: "day", "week", "month", "year", '
92-
'"all" and by default it is month. Note that in the case of weekly aggregation, we consider a week to '
93-
'be 7 consecutive days, starting from the first day of the time period under consideration (so not '
94-
'necessarily Monday to Sunday). If "all" is chosen then no aggregation will be performed, meaning that '
95-
'the given timeframe will be treated as one unit of time in our analysis.'
96-
)
97-
parser.add_argument(
98-
'--force-map',
99-
action='store_true',
100-
help='Flag to specify whether to map the parsed data, regardless if the mapped data files exist.'
101-
)
102-
parser.add_argument(
103-
'--plot',
104-
action='store_true',
105-
help='Flag to specify whether to produce and save plots of the results.'
106-
)
107-
parser.add_argument(
108-
'--animated',
109-
action='store_true',
110-
help='Flag to specify whether to also generate animated plots.'
111-
)
112-
args = parser.parse_args()
113-
114-
aggregate_by = args.aggregate_by
115-
timeframe = args.timeframe
116-
if len(timeframe) > 2:
117-
parser.error('Too many values given for --timeframe argument. Please provide one date to get a snapshot or '
118-
'two dates to get a time series.')
119-
timeframe_start = hlp.get_timeframe_beginning(timeframe[0])
120-
timeframe_end = hlp.get_timeframe_end(timeframe[-1])
67+
ledgers = hlp.get_ledgers()
68+
69+
granularity = hlp.get_granularity()
70+
71+
start_date, end_date = hlp.get_start_end_dates()
72+
timeframe_start = hlp.get_timeframe_beginning(start_date)
73+
timeframe_end = hlp.get_timeframe_end(end_date)
12174
if timeframe_end < timeframe_start:
122-
parser.error('Invalid --timeframe values. Please note that if providing a second date, it must occur after '
123-
'the first date.')
124-
125-
main(
126-
projects=args.ledgers,
127-
timeframe=(timeframe_start, timeframe_end),
128-
aggregate_by=aggregate_by,
129-
force_map=args.force_map,
130-
make_plots=args.plot,
131-
make_animated_plots=args.animated
132-
)
75+
raise ValueError('Invalid --timeframe values. Please note that if providing a second date, it must occur after '
76+
'the first date.')
77+
timeframe = (timeframe_start, timeframe_end)
78+
79+
main(ledgers, timeframe, granularity)
13380

13481
logging.info('Done. Please check the output directory for results.')

0 commit comments

Comments
 (0)