1414
1515ROOT_DIR = pathlib .Path (__file__ ).resolve ().parent .parent
1616RAW_DATA_DIR = ROOT_DIR / 'raw_block_data'
17- OUTPUT_DIR = ROOT_DIR / 'output '
17+ INTERIM_DIR = ROOT_DIR / 'processed_data '
1818MAPPING_INFO_DIR = ROOT_DIR / 'mapping_information'
19+ RESULTS_DIR = ROOT_DIR / 'results'
1920
2021with open (ROOT_DIR / "config.yaml" ) as f :
2122 config = safe_load (f )
@@ -190,11 +191,13 @@ def write_blocks_per_entity_to_file(output_dir, blocks_per_entity, dates, filena
190191 csv_writer .writerow (entity_row )
191192
192193
193- def get_blocks_per_entity_from_file (filepath ):
194+ def get_blocks_per_entity_from_file (filepath , population_windows ):
194195 """
195196 Retrieves information about the number of blocks that each entity produced over some timeframe for some project.
196197 :param filepath: the path to the file with the relevant information. It can be either an absolute or a relative
197198 path in either a pathlib.PosixPath object or a string.
199+ :param population_windows: int representing the number of windows to look back and forward when determining if an
200+ entity is active during a certain time frame
198201 :returns: a tuple of length 2 where the first item is a list of time chunks (strings) and the second item is a
199202 dictionary with entities (keys) and a list of the number of blocks they produced during each time chunk (values)
200203 """
@@ -206,7 +209,17 @@ def get_blocks_per_entity_from_file(filepath):
206209 for row in csv_reader :
207210 entity = row [0 ]
208211 for idx , item in enumerate (row [1 :]):
209- if item != '0' :
212+ if item == '0' :
213+ if population_windows == 'all' :
214+ blocks_per_entity [entity ][dates [idx ]] = 0
215+ else :
216+ # If the entity hasn't produced any blocks in the current time chunk, we only consider it as
217+ # active if it has produced at least one block in population_windows time chunks before or after
218+ # (otherwise it's not considered part of the population for this time frame)
219+ for i in range (max (0 , idx - population_windows ), min (len (row ) - 1 , idx + population_windows + 1 )):
220+ if row [i + 1 ] != '0' :
221+ blocks_per_entity [entity ][dates [idx ]] = 0
222+ else :
210223 blocks_per_entity [entity ][dates [idx ]] = int (item )
211224 return dates , blocks_per_entity
212225
@@ -294,7 +307,7 @@ def read_mapped_project_data(project_dir):
294307 :param project_dir: pathlib.PosixPath object of the output directory corresponding to the project
295308 :returns: a dictionary with the mapped data
296309 """
297- with open (project_dir / 'mapped_data.json' ) as f :
310+ with open (project_dir / get_mapped_data_filename ( get_clustering_flag ()) ) as f :
298311 data = json .load (f )
299312 return data
300313
@@ -309,6 +322,15 @@ def get_representative_dates(time_chunks):
309322 return [str (chunk [0 ] + (chunk [1 ] - chunk [0 ]) // 2 ) for chunk in time_chunks ]
310323
311324
325+ def get_aggregated_data_dir_name (clustering_flag ):
326+ """
327+ Determines the name of the directory that will contain the aggregated data
328+ :param clustering_flag: boolean that determines whether the data is clustered or not
329+ :returns: str that corresponds to the name of the directory
330+ """
331+ return 'blocks_per_entity_' + ('clustered' if clustering_flag else 'non_clustered' )
332+
333+
312334def get_blocks_per_entity_filename (timeframe , estimation_window , frequency ):
313335 """
314336 Determines the filename of the csv file that contains the aggregated data
@@ -363,6 +385,21 @@ def get_estimation_window_and_frequency():
363385 raise ValueError ('"estimation_window" or "frequency" missing from config file' )
364386
365387
388+ def get_population_windows ():
389+ """
390+ Retrieves the number of windows to be used for estimating the population of block producers
391+ :returns: int representing the number of windows to look back and forward when determining if an entity is active
392+ during a certain time frame
393+ :raises ValueError: if the population_windows field is missing from the config file
394+ """
395+ try :
396+ config = get_config_data ()
397+ population_windows = config ['population_windows' ]
398+ return population_windows
399+ except KeyError :
400+ raise ValueError ('"population_windows" missing from config file' )
401+
402+
366403def get_plot_flag ():
367404 """
368405 Gets the flag that determines whether generate plots for the output
@@ -395,3 +432,35 @@ def get_force_map_flag():
395432 return config ['execution_flags' ]['force_map' ]
396433 except KeyError :
397434 raise ValueError ('Flag "force_map" missing from config file' )
435+
436+
437+ def get_clustering_flag ():
438+ """
439+ Gets the flag that determines whether to perform clustering
440+ :returns: boolean
441+ :raises ValueError: if the flag is not set in the config file
442+ """
443+ config = get_config_data ()
444+ try :
445+ return config ['analyze_flags' ]['clustering' ]
446+ except KeyError :
447+ raise ValueError ('Flag "clustering" missing from config file' )
448+
449+
450+ def get_results_dir (estimation_window , frequency , population_windows ):
451+ """
452+ Retrieves the path to the results directory for the specific config parameters
453+ :returns: pathlib.PosixPath object
454+ """
455+ results_dir_name = (f'{ estimation_window } _day_window_with_{ population_windows } _population_windows_sampled_every'
456+ f'_{ frequency } _days' )
457+ return RESULTS_DIR / results_dir_name
458+
459+
460+ def get_mapped_data_filename (clustering_flag ):
461+ """
462+ Retrieves the filename of the mapped data file
463+ :param clustering_flag: boolean that determines whether the data is clustered or not
464+ :returns: str
465+ """
466+ return 'mapped_data_' + ('clustered' if clustering_flag else 'non_clustered' ) + '.json'
0 commit comments