@@ -52,7 +52,8 @@ def __init__(
5252 self .symbols = data_cfg .symbols
5353 self .start_date = data_cfg .start_date
5454 self .end_date = data_cfg .end_date
55- self .timeframe = data_cfg .timeframe or "1d"
55+ self .timeframe = data_cfg .timeframe or "1d"
56+ self .secondary_timeframes = data_cfg .secondary_timeframes or []
5657 # allow both legacy 'cache_dir' and new 'cache_path' keys
5758 self .cache_dir = data_cfg .cache_path or data_cfg .cache_dir or "data/raw"
5859 self .cache_expiration_days = data_cfg .cache_expiration_days
@@ -72,37 +73,66 @@ def _is_cache_valid(self, cache_file: str) -> bool:
7273
7374 def _fetch_single (self , symbol : str , refresh : bool ) -> Optional [pd .DataFrame ]:
7475 """Fetch data for a single symbol and handle caching."""
75- cache_file = os .path .join (
76- self .cache_dir , f"{ symbol } _{ self .timeframe } _data.parquet"
77- )
78- try :
79- if self .use_cache and not refresh and self ._is_cache_valid (cache_file ):
80- logger .info (f"Loading cached data for { symbol } from { cache_file } " )
81- df = pd .read_parquet (cache_file )
82- else :
83- logger .info (f"Fetching data for { symbol } " )
84- df = self .data_source .fetch (
85- symbol , self .start_date , self .end_date , self .timeframe
86- )
87-
88- if df is None or df .empty :
89- logger .error (f"No data found for { symbol } " )
90- return None
91-
92- if self .use_cache :
93- os .makedirs (self .cache_dir , exist_ok = True )
94- df .to_parquet (cache_file )
95- logger .info (f"Cached data for { symbol } at { cache_file } " )
96-
97- missing_dates = self ._check_missing_dates (df )
98- if missing_dates :
99- logger .warning (f"Missing dates for { symbol } : { len (missing_dates )} days" )
100-
101- logger .info (f"Successfully retrieved { len (df )} records for { symbol } " )
102- return df
103- except Exception as e :
104- logger .error (f"Error fetching data for { symbol } : { str (e )} " )
105- return None
76+ cache_file = os .path .join (
77+ self .cache_dir , f"{ symbol } _{ self .timeframe } _data.parquet"
78+ )
79+ try :
80+ df = self ._load_timeframe_data (
81+ symbol = symbol ,
82+ timeframe = self .timeframe ,
83+ cache_file = cache_file ,
84+ refresh = refresh ,
85+ )
86+
87+ if df is None or df .empty :
88+ logger .error (f"No data found for { symbol } " )
89+ return None
90+
91+ df = df .sort_index ()
92+
93+ for secondary_tf in self .secondary_timeframes :
94+ secondary_cache = os .path .join (
95+ self .cache_dir , f"{ symbol } _{ secondary_tf } _data.parquet"
96+ )
97+ secondary_df = self ._load_timeframe_data (
98+ symbol = symbol ,
99+ timeframe = secondary_tf ,
100+ cache_file = secondary_cache ,
101+ refresh = refresh ,
102+ )
103+
104+ if secondary_df is None or secondary_df .empty :
105+ logger .warning (
106+ "No data found for %s at secondary timeframe %s" ,
107+ symbol ,
108+ secondary_tf ,
109+ )
110+ continue
111+
112+ try :
113+ resampled = self ._resample_secondary (
114+ secondary_df , df .index , secondary_tf
115+ )
116+ except ValueError as exc :
117+ logger .warning (
118+ "Skipping secondary timeframe %s for %s: %s" ,
119+ secondary_tf ,
120+ symbol ,
121+ exc ,
122+ )
123+ continue
124+
125+ df = df .join (resampled , how = "left" )
126+
127+ missing_dates = self ._check_missing_dates (df )
128+ if missing_dates :
129+ logger .warning (f"Missing dates for { symbol } : { len (missing_dates )} days" )
130+
131+ logger .info (f"Successfully retrieved { len (df )} records for { symbol } " )
132+ return df
133+ except Exception as e :
134+ logger .error (f"Error fetching data for { symbol } : { str (e )} " )
135+ return None
106136
107137 def fetch_data (
108138 self , symbols : Optional [List [str ]] = None , refresh : Optional [bool ] = None
@@ -139,11 +169,77 @@ def fetch_data(
139169
140170 return data_dict
141171
142- def _check_missing_dates (self , df : pd .DataFrame ) -> List [datetime ]:
143- """Check for missing trading days in the data."""
144- all_dates = pd .date_range (start = df .index .min (), end = df .index .max (), freq = "B" )
145- missing_dates = all_dates .difference (df .index )
146- return list (missing_dates )
172+ def _check_missing_dates (self , df : pd .DataFrame ) -> List [datetime ]:
173+ """Check for missing trading days in the data."""
174+ all_dates = pd .date_range (start = df .index .min (), end = df .index .max (), freq = "B" )
175+ missing_dates = all_dates .difference (df .index )
176+ return list (missing_dates )
177+
178+ def _load_timeframe_data (
179+ self , symbol : str , timeframe : str , cache_file : str , refresh : bool
180+ ) -> Optional [pd .DataFrame ]:
181+ """Load data for a given timeframe from cache or datasource."""
182+
183+ if self .use_cache and not refresh and self ._is_cache_valid (cache_file ):
184+ logger .info (
185+ "Loading cached data for %s (%s) from %s" , symbol , timeframe , cache_file
186+ )
187+ return pd .read_parquet (cache_file )
188+
189+ logger .info (f"Fetching data for { symbol } ({ timeframe } )" )
190+ df = self .data_source .fetch (
191+ symbol , self .start_date , self .end_date , timeframe
192+ )
193+
194+ if df is None or df .empty :
195+ return None
196+
197+ if self .use_cache :
198+ os .makedirs (self .cache_dir , exist_ok = True )
199+ df .to_parquet (cache_file )
200+ logger .info (
201+ "Cached data for %s (%s) at %s" , symbol , timeframe , cache_file
202+ )
203+
204+ return df
205+
206+ def _resample_secondary (
207+ self ,
208+ df : pd .DataFrame ,
209+ target_index : pd .Index ,
210+ source_timeframe : str ,
211+ ) -> pd .DataFrame :
212+ """Resample a secondary timeframe to the loader's primary timeframe."""
213+
214+ if not isinstance (df .index , pd .DatetimeIndex ):
215+ df = df .copy ()
216+ df .index = pd .to_datetime (df .index )
217+
218+ df = df .sort_index ()
219+
220+ required_columns = {
221+ "Open" : "first" ,
222+ "High" : "max" ,
223+ "Low" : "min" ,
224+ "Close" : "last" ,
225+ "Volume" : "sum" ,
226+ }
227+
228+ missing = [col for col in required_columns if col not in df .columns ]
229+ if missing :
230+ raise ValueError (
231+ f"Secondary timeframe data missing required columns: { missing } "
232+ )
233+
234+ resampled = df .resample (self .timeframe ).agg (required_columns )
235+
236+ renamed = {
237+ col : f"{ col .lower ()} _{ source_timeframe } _{ required_columns [col ]} "
238+ for col in required_columns
239+ }
240+ resampled = resampled .rename (columns = renamed )
241+ resampled = resampled .reindex (target_index )
242+ return resampled
147243
148244 def validate_data (self , data_dict : Dict [str , pd .DataFrame ]) -> bool :
149245 """
0 commit comments