@@ -24,27 +24,36 @@ def clean_column_names(df):
2424 return df
2525
2626
27- def fetch_stock_data (ticker , period , interval ):
28- data = yf .download (tickers = ticker , period = period , interval = interval )
29- data = data .reset_index ()
30- if isinstance (data .columns , pd .MultiIndex ):
31- data .columns = ['_' .join (filter (None , col )).strip () for col in data .columns ]
32- data = clean_column_names (data )
33- if 'Date' in data .columns :
34- data .rename (columns = {'Date' : 'Datetime' }, inplace = True )
35- elif 'Datetime' not in data .columns :
36- data .rename (columns = {data .columns [0 ]: 'Datetime' }, inplace = True )
37- data = data .set_index ('Datetime' ).asfreq ('h' )
38- for col in ['Open' , 'High' , 'Low' , 'Close' ]:
39- if col in data .columns :
40- data [col ] = data [col ].ffill ()
41- if 'Volume' in data .columns :
42- data ['Volume' ] = data ['Volume' ].fillna (0 )
43- data .reset_index (inplace = True )
44- return data
27+ # def fetch_stock_data(ticker, period, interval):
28+ # data = yf.download(tickers=ticker, period=period, interval=interval)
29+ # data = data.reset_index()
4530
31+ # if data.empty:
32+ # raise ValueError(f"No stock data fetched for ticker '{ticker}' with period '{period}' and interval '{interval}'")
4633
47- def fetch_options_data (symbol , days_to_fetch ):
34+ # if isinstance(data.columns, pd.MultiIndex):
35+ # data.columns = ['_'.join(filter(None, col)).strip() for col in data.columns]
36+ # data = clean_column_names(data)
37+ # if 'Date' in data.columns:
38+ # data.rename(columns={'Date': 'Datetime'}, inplace=True)
39+ # elif 'Datetime' not in data.columns:
40+ # data.rename(columns={data.columns[0]: 'Datetime'}, inplace=True)
41+ # data = data.set_index('Datetime').asfreq('h')
42+ # for col in ['Open', 'High', 'Low', 'Close']:
43+ # if col in data.columns:
44+ # data[col] = data[col].ffill()
45+ # if 'Volume' in data.columns:
46+ # data['Volume'] = data['Volume'].fillna(0)
47+ # data.reset_index(inplace=True)
48+ # return data
49+
50+
51+
52+
53+
54+
55+
56+ # def fetch_options_data(symbol, days_to_fetch):
4857 all_data = []
4958 for i in range (days_to_fetch ):
5059 date = datetime .now () - timedelta (days = i )
@@ -72,11 +81,24 @@ def fetch_options_data(symbol, days_to_fetch):
7281 pass
7382 time .sleep (2 )
7483 options_df = pd .DataFrame (all_data )
84+ if options_df .empty :
85+ raise ValueError (f"No options data fetched for symbol '{ symbol } ' in the last { days_to_fetch } days" )
7586 options_df ['date' ] = pd .to_datetime (options_df ['date' ], errors = 'coerce' )
7687 return options_df
7788
7889
79- def clean_and_merge (stock_df , options_df ):
90+
91+
92+
93+
94+ # options_df['date'] = pd.to_datetime(options_df['date'], errors='coerce')
95+ # return options_df
96+
97+
98+
99+
100+
101+ # def clean_and_merge(stock_df, options_df):
80102 if 'Datetime' not in stock_df .columns :
81103 possible = [col for col in stock_df .columns if 'datetime' in col .lower ()]
82104 if possible :
@@ -118,6 +140,141 @@ def clean_and_merge(stock_df, options_df):
118140 return merged
119141
120142
143+
144+
145+ def fetch_stock_data (ticker , period , interval ):
146+ import yfinance as yf
147+ import pandas as pd
148+
149+ data = yf .download (tickers = ticker , period = period , interval = interval , auto_adjust = False , progress = False )
150+
151+ # If empty, fallback to daily interval
152+ if data .empty and interval != "1d" :
153+ data = yf .download (tickers = ticker , period = period , interval = "1d" , auto_adjust = False , progress = False )
154+ interval = "1d"
155+
156+ # If still empty, create placeholder DataFrame
157+ if data .empty :
158+ print (f"⚠️ No stock data fetched for { ticker } . Returning placeholder DataFrame." )
159+ dates = pd .date_range (end = pd .Timestamp .now (), periods = period_to_hours (period ))
160+ data = pd .DataFrame ({
161+ 'Datetime' : dates ,
162+ 'Open' : 0 , 'High' : 0 , 'Low' : 0 , 'Close' : 0 , 'Volume' : 0
163+ })
164+ else :
165+ data = data .reset_index ()
166+ if isinstance (data .columns , pd .MultiIndex ):
167+ data .columns = ['_' .join (filter (None , col )).strip () for col in data .columns ]
168+ if 'Date' in data .columns :
169+ data .rename (columns = {'Date' :'Datetime' }, inplace = True )
170+ elif 'Datetime' not in data .columns :
171+ data .rename (columns = {data .columns [0 ]:'Datetime' }, inplace = True )
172+ data ['Datetime' ] = pd .to_datetime (data ['Datetime' ], errors = 'coerce' )
173+ for col in ['Open' ,'High' ,'Low' ,'Close' ,'Volume' ]:
174+ if col in data .columns :
175+ data [col ] = pd .to_numeric (data [col ], errors = 'coerce' ).ffill ().fillna (0 )
176+
177+ return data
178+
179+ def fetch_options_data (symbol , days_to_fetch ):
180+ all_data = []
181+ for i in range (days_to_fetch ):
182+ date = datetime .now () - timedelta (days = i )
183+ if date .weekday () >= 5 :
184+ continue
185+ try :
186+ opt_data = nse_optionchain_scrapper (symbol )
187+ records = opt_data ['records' ]['data' ]
188+ for rec in records :
189+ ce = rec .get ('CE' , {})
190+ pe = rec .get ('PE' , {})
191+ all_data .append ({
192+ 'date' : date .strftime ('%Y-%m-%d' ),
193+ 'strikePrice' : rec ['strikePrice' ],
194+ 'CE_openInterest' : ce .get ('openInterest' ,0 ),
195+ 'CE_changeinOpenInterest' : ce .get ('changeinOpenInterest' ,0 ),
196+ 'CE_volume' : ce .get ('totalTradedVolume' ,0 ),
197+ 'CE_ltp' : ce .get ('lastPrice' ,0 ),
198+ 'PE_openInterest' : pe .get ('openInterest' ,0 ),
199+ 'PE_changeinOpenInterest' : pe .get ('changeinOpenInterest' ,0 ),
200+ 'PE_volume' : pe .get ('totalTradedVolume' ,0 ),
201+ 'PE_ltp' : pe .get ('lastPrice' ,0 )
202+ })
203+ except Exception :
204+ continue
205+ time .sleep (1 )
206+
207+ if len (all_data ) == 0 :
208+ print (f"⚠️ No options data fetched for { symbol } . Returning placeholder DataFrame." )
209+ columns = ['date' ,'strikePrice' ,'CE_openInterest' ,'CE_changeinOpenInterest' ,'CE_volume' ,'CE_ltp' ,
210+ 'PE_openInterest' ,'PE_changeinOpenInterest' ,'PE_volume' ,'PE_ltp' ]
211+ options_df = pd .DataFrame (columns = columns )
212+ else :
213+ options_df = pd .DataFrame (all_data )
214+ options_df ['date' ] = pd .to_datetime (options_df ['date' ], errors = 'coerce' )
215+
216+ return options_df
217+
218+ def clean_and_merge (stock_df , options_df ):
219+ # Ensure OHLC columns exist
220+ for col in ['Open' ,'High' ,'Low' ,'Close' ,'Volume' ]:
221+ if col not in stock_df .columns :
222+ stock_df [col ] = 0
223+
224+ stock_df ['Return' ] = stock_df ['Close' ].pct_change ().fillna (0 )
225+ stock_df ['MA_5' ] = stock_df ['Close' ].rolling (5 ).mean ().fillna (method = 'bfill' )
226+ stock_df ['MA_10' ] = stock_df ['Close' ].rolling (10 ).mean ().fillna (method = 'bfill' )
227+ stock_df ['Volatility' ] = stock_df ['Return' ].rolling (5 ).std ().fillna (0 )
228+
229+ # Clean options
230+ for col in ['CE_openInterest' ,'PE_openInterest' ,'CE_changeinOpenInterest' ,'PE_changeinOpenInterest' ,'CE_volume' ,'PE_volume' ]:
231+ if col not in options_df .columns :
232+ options_df [col ] = 0
233+ else :
234+ options_df [col ] = pd .to_numeric (options_df [col ], errors = 'coerce' ).fillna (0 )
235+
236+ if not options_df .empty :
237+ options_agg = options_df .groupby ('date' ).agg ({
238+ 'CE_openInterest' :'sum' ,'PE_openInterest' :'sum' ,
239+ 'CE_changeinOpenInterest' :'sum' ,'PE_changeinOpenInterest' :'sum' ,
240+ 'CE_volume' :'sum' ,'PE_volume' :'sum'
241+ }).reset_index ()
242+ options_agg ['PCR' ] = options_agg ['PE_openInterest' ] / (options_agg ['CE_openInterest' ] + 1e-6 )
243+ stock_df ['date' ] = stock_df ['Datetime' ].dt .date
244+ options_agg ['date_only' ] = options_agg ['date' ].dt .date
245+ merged = pd .merge (stock_df , options_agg , left_on = 'date' , right_on = 'date_only' , how = 'left' ).fillna (0 )
246+ else :
247+ merged = stock_df .copy ()
248+ merged ['CE_openInterest' ] = 0
249+ merged ['PE_openInterest' ] = 0
250+ merged ['CE_changeinOpenInterest' ] = 0
251+ merged ['PE_changeinOpenInterest' ] = 0
252+ merged ['CE_volume' ] = 0
253+ merged ['PE_volume' ] = 0
254+ merged ['PCR' ] = 0
255+
256+ merged ['hour' ] = merged ['Datetime' ].dt .hour
257+ merged ['day' ] = merged ['Datetime' ].dt .day
258+ merged ['weekday' ] = merged ['Datetime' ].dt .weekday
259+
260+ return merged
261+
262+ def period_to_hours (period_str ):
263+ # Convert period like '1mo', '2mo' to approximate hours for placeholder
264+ unit = period_str [- 1 ]
265+ num = int (period_str [:- 1 ])
266+ if unit == 'd' :
267+ return num * 1
268+ if unit == 'mo' :
269+ return num * 30
270+ if unit == 'y' :
271+ return num * 365
272+ return num
273+
274+
275+
276+
277+
121278def train_and_predict (merged_df , sequence_length = 10 , epochs = 30 , use_transfer_learning = True ):
122279 """
123280 Train LSTM model with optional transfer learning.
@@ -146,6 +303,11 @@ def train_and_predict(merged_df, sequence_length=10, epochs=30, use_transfer_lea
146303 X_scaled = scaler_X .fit_transform (X )
147304 y_scaled = scaler_y .fit_transform (y )
148305
306+ if len (X_scaled ) <= sequence_length :
307+ raise ValueError (
308+ f"Not enough data to create sequences. Got { len (X_scaled )} rows, sequence_length={ sequence_length } . "
309+ f"Try using a longer period or a lower interval (e.g., 1d instead of 1h)."
310+ )
149311 X_seq , y_seq = [], []
150312 for i in range (len (X_scaled ) - sequence_length ):
151313 X_seq .append (X_scaled [i :i + sequence_length ])
@@ -252,15 +414,41 @@ def summarize_predictions(pred_df):
252414 return summary_features
253415
254416
417+ # def main_pipeline(ticker, symbol, period, interval, days_to_fetch):
418+ # stock_df = fetch_stock_data(ticker=ticker, period=period, interval=interval)
419+ # options_df = fetch_options_data(symbol=symbol, days_to_fetch=days_to_fetch)
420+ # merged_df = clean_and_merge(stock_df, options_df)
421+ # pred_df = train_and_predict(merged_df)
422+ # summary_dict = summarize_predictions(pred_df)
423+ # return json.dumps(summary_dict, indent=4)
424+
425+
426+
255427def main_pipeline (ticker , symbol , period , interval , days_to_fetch ):
428+ # 1️⃣ Check if ticker is valid
429+ try :
430+ info = yf .Ticker (ticker ).info
431+ if 'regularMarketPrice' not in info :
432+ raise ValueError (f"Ticker { ticker } seems invalid or data unavailable" )
433+ except Exception as e :
434+ raise ValueError (f"Failed to fetch ticker info: { e } " )
435+
436+ # 2️⃣ Fetch stock and options data
256437 stock_df = fetch_stock_data (ticker = ticker , period = period , interval = interval )
257438 options_df = fetch_options_data (symbol = symbol , days_to_fetch = days_to_fetch )
439+
440+ # 3️⃣ Merge and clean
258441 merged_df = clean_and_merge (stock_df , options_df )
442+
443+ # 4️⃣ Train LSTM and predict
259444 pred_df = train_and_predict (merged_df )
445+
446+ # 5️⃣ Summarize predictions
260447 summary_dict = summarize_predictions (pred_df )
261448 return json .dumps (summary_dict , indent = 4 )
262449
263450
451+
264452if __name__ == "__main__" :
265453 import sys
266454
0 commit comments