1+ from typing import List
2+ import pandas as pd
3+
4+ from .snapshot import load_and_process_snapshot
5+
6+
7+ def save_dataframe (df : pd .DataFrame , filepath : str ):
8+ """
9+ Saves the DataFrame to a pickle file.
10+
11+ Parameters:
12+ df (pd.DataFrame): The DataFrame to save.
13+ filepath (str): The path to the pickle file.
14+ """
15+ df .to_pickle (filepath )
16+ print (f"DataFrame saved to { filepath } " )
17+
18+
19+ def load_dataframe (filepath : str ) -> pd .DataFrame :
20+ """
21+ Loads the DataFrame from a pickle file.
22+
23+ Parameters:
24+ filepath (str): The path to the pickle file.
25+
26+ Returns:
27+ pd.DataFrame: The loaded DataFrame.
28+ """
29+ df = pd .read_pickle (filepath )
30+ print (f"DataFrame loaded from { filepath } " )
31+ return df
32+
33+
34+ def extract_timestamp_from_filepath (filepath : str ) -> pd .Timestamp :
35+ """
36+ Extracts a timestamp from the filepath.
37+
38+ Assumes the filepath is of the format: 'data/YYYY-MM-DD/orderbook_HH-MM.json'
39+
40+ Parameters:
41+ filepath (str): The full path to the snapshot file.
42+
43+ Returns:
44+ pd.Timestamp: The extracted timestamp.
45+ """
46+ import re
47+ from datetime import datetime
48+
49+ # Adjust the regex pattern to match the new filepath format
50+ pattern = r'.*/(\d{4}-\d{2}-\d{2})/orderbook_(\d{2}-\d{2}).json$'
51+ match = re .match (pattern , filepath )
52+ if match :
53+ date_str = match .group (1 ) # Extracts 'YYYY-MM-DD'
54+ time_str = match .group (2 ) # Extracts 'HH-MM'
55+ datetime_str = f"{ date_str } { time_str } "
56+ timestamp = datetime .strptime (datetime_str , '%Y-%m-%d %H-%M' )
57+ return pd .Timestamp (timestamp )
58+ else :
59+ # If no timestamp found, return NaT (Not a Time)
60+ return pd .NaT
61+
62+
63+ def load_snapshots_to_dataframe (filepaths : List [str ]) -> pd .DataFrame :
64+ """
65+ Loads and processes snapshots from a list of filepaths to create a DataFrame.
66+
67+ Parameters:
68+ filepaths (List[str]): List of snapshot filepaths.
69+
70+ Returns:
71+ pd.DataFrame: DataFrame containing computed analysis for each snapshot.
72+ """
73+ records = []
74+ for filepath in filepaths :
75+ timestamp = extract_timestamp_from_filepath (filepath )
76+ if pd .isnull (timestamp ):
77+ continue # Skip files without a valid timestamp
78+ snapshot_stats = load_and_process_snapshot (filepath , timestamp )
79+ records .append (snapshot_stats )
80+
81+ df_stats = pd .DataFrame (records )
82+ df_stats .set_index ('timestamp' , inplace = True )
83+ df_stats .sort_index (inplace = True )
84+ return df_stats
0 commit comments