1- import argparse
21import logging
32from consensus_decentralization .aggregate import aggregate
43from consensus_decentralization .map import apply_mapping
109logging .basicConfig (format = '[%(asctime)s] %(message)s' , datefmt = '%Y/%m/%d %I:%M:%S %p' , level = logging .INFO )
1110
1211
13- def process_data (force_map , project_dir , project , output_dir ):
14- mapped_data_file = project_dir / 'mapped_data.json'
12+ def process_data (force_map , ledger_dir , ledger , output_dir ):
13+ mapped_data_file = ledger_dir / 'mapped_data.json'
1514 if force_map or not mapped_data_file .is_file ():
16- parsed_data = parse (project = project , input_dir = hlp .RAW_DATA_DIR )
17- apply_mapping (project = project , parsed_data = parsed_data , output_dir = output_dir )
15+ parsed_data = parse (ledger , input_dir = hlp .RAW_DATA_DIR )
16+ apply_mapping (ledger , parsed_data = parsed_data , output_dir = output_dir )
1817
1918
20- def main (projects , timeframe , aggregate_by , force_map , make_plots , make_animated_plots , output_dir = hlp .OUTPUT_DIR ):
19+ def main (ledgers , timeframe , granularity , output_dir = hlp .OUTPUT_DIR ):
2120 """
2221 Executes the entire pipeline (parsing, mapping, analyzing) for some projects and timeframes.
23- :param projects : list of strings that correspond to the ledgers whose data should be analyzed
22+ :param ledgers : list of strings that correspond to the ledgers whose data should be analyzed
2423 :param timeframe: tuple of (start_date, end_date) where each date is a datetime.date object.
25- :param aggregate_by : string that corresponds to the granularity that will be used for the analysis. It can be one
24+ :param granularity : string that corresponds to the granularity that will be used for the analysis. It can be one
2625 of: day, week, month, year, all.
2726 :param force_map: bool. If True, then the parsing and mapping will be performed, regardless of whether
2827 mapped data for some or all of the projects already exist
@@ -31,104 +30,52 @@ def main(projects, timeframe, aggregate_by, force_map, make_plots, make_animated
3130 Warning: generating animated plots might take a long time
3231 :param output_dir: pathlib.PosixPath object of the directory where the output data will be saved
3332 """
34- logging .info (f"The ledgers that will be analyzed are: { ',' .join (projects )} " )
35- for project in projects :
36- project_dir = output_dir / project
37- project_dir .mkdir (parents = True , exist_ok = True ) # create project output directory if it doesn't already exist
33+ logging .info (f"The ledgers that will be analyzed are: { ',' .join (ledgers )} " )
3834
39- process_data (force_map , project_dir , project , output_dir )
35+ force_map = hlp .get_force_map_flag ()
36+
37+ for ledger in ledgers :
38+ ledger_dir = output_dir / ledger
39+ ledger_dir .mkdir (parents = True , exist_ok = True ) # create ledger output directory if it doesn't already exist
40+
41+ process_data (force_map , ledger_dir , ledger , output_dir )
4042
4143 aggregate (
42- project = project ,
43- output_dir = output_dir ,
44- timeframe = timeframe ,
45- aggregate_by = aggregate_by ,
46- force_aggregate = force_map
44+ ledger ,
45+ output_dir ,
46+ timeframe ,
47+ granularity ,
48+ force_map
4749 )
4850
4951 used_metrics = analyze (
50- projects = projects ,
51- aggregated_data_filename = hlp .get_blocks_per_entity_filename (aggregate_by = aggregate_by , timeframe = timeframe ),
52+ ledgers ,
53+ aggregated_data_filename = hlp .get_blocks_per_entity_filename (granularity , timeframe ),
5254 output_dir = output_dir
5355 )
5456
55- if make_plots :
57+ if hlp . get_plot_flag () :
5658 plot (
57- ledgers = projects ,
59+ ledgers ,
5860 metrics = used_metrics ,
59- aggregated_data_filename = hlp .get_blocks_per_entity_filename (aggregate_by = aggregate_by , timeframe = timeframe ),
60- animated = make_animated_plots
61+ aggregated_data_filename = hlp .get_blocks_per_entity_filename (granularity , timeframe ),
62+ animated = hlp . get_plot_config_data ()[ 'animated' ]
6163 )
6264
6365
6466if __name__ == '__main__' :
65- default_ledgers = hlp .get_default_ledgers ()
66- start_date , end_date = hlp .get_default_start_end_dates ()
67-
68- parser = argparse .ArgumentParser ()
69- parser .add_argument (
70- '--ledgers' ,
71- nargs = "*" ,
72- type = str .lower ,
73- default = default_ledgers ,
74- choices = default_ledgers ,
75- help = 'The ledgers that will be analyzed.'
76- )
77- parser .add_argument (
78- '--timeframe' ,
79- nargs = "*" ,
80- type = hlp .valid_date ,
81- default = [start_date , end_date ],
82- help = 'The timeframe that will be analyzed. You can provide two values to mark the beginning and end of the '
83- 'time frame or a single value that encapsulates both.'
84- )
85- parser .add_argument (
86- '--aggregate-by' ,
87- nargs = "?" ,
88- type = str .lower ,
89- default = 'month' ,
90- choices = ['day' , 'week' , 'month' , 'year' , 'all' ],
91- help = 'The granularity that will be used for the analysis. It can be one of: "day", "week", "month", "year", '
92- '"all" and by default it is month. Note that in the case of weekly aggregation, we consider a week to '
93- 'be 7 consecutive days, starting from the first day of the time period under consideration (so not '
94- 'necessarily Monday to Sunday). If "all" is chosen then no aggregation will be performed, meaning that '
95- 'the given timeframe will be treated as one unit of time in our analysis.'
96- )
97- parser .add_argument (
98- '--force-map' ,
99- action = 'store_true' ,
100- help = 'Flag to specify whether to map the parsed data, regardless if the mapped data files exist.'
101- )
102- parser .add_argument (
103- '--plot' ,
104- action = 'store_true' ,
105- help = 'Flag to specify whether to produce and save plots of the results.'
106- )
107- parser .add_argument (
108- '--animated' ,
109- action = 'store_true' ,
110- help = 'Flag to specify whether to also generate animated plots.'
111- )
112- args = parser .parse_args ()
113-
114- aggregate_by = args .aggregate_by
115- timeframe = args .timeframe
116- if len (timeframe ) > 2 :
117- parser .error ('Too many values given for --timeframe argument. Please provide one date to get a snapshot or '
118- 'two dates to get a time series.' )
119- timeframe_start = hlp .get_timeframe_beginning (timeframe [0 ])
120- timeframe_end = hlp .get_timeframe_end (timeframe [- 1 ])
67+ ledgers = hlp .get_ledgers ()
68+
69+ granularity = hlp .get_granularity ()
70+
71+ start_date , end_date = hlp .get_start_end_dates ()
72+ timeframe_start = hlp .get_timeframe_beginning (start_date )
73+ timeframe_end = hlp .get_timeframe_end (end_date )
12174 if timeframe_end < timeframe_start :
122- parser .error ('Invalid --timeframe values. Please note that if providing a second date, it must occur after '
123- 'the first date.' )
124-
125- main (
126- projects = args .ledgers ,
127- timeframe = (timeframe_start , timeframe_end ),
128- aggregate_by = aggregate_by ,
129- force_map = args .force_map ,
130- make_plots = args .plot ,
131- make_animated_plots = args .animated
132- )
75+ raise ValueError ('Invalid --timeframe values. Please note that if providing a second date, it must occur after '
76+ 'the first date.' )
77+ timeframe = (timeframe_start , timeframe_end )
78+
79+ main (ledgers , timeframe , granularity )
13380
13481 logging .info ('Done. Please check the output directory for results.' )
0 commit comments