2929PLOT_PATH = os .path .join (UPLOAD_FOLDER , "plot.png" )
3030FORECAST_PLOT_PATH = os .path .join (UPLOAD_FOLDER , "forecast_plot.png" )
3131REPORT_PATH = os .path .join (UPLOAD_FOLDER , "summary_report.pdf" )
32+ CSV_CACHE = os .path .join (UPLOAD_FOLDER , "cached_upload.csv" )
33+ cached_summary = ""
3234
3335@atexit .register
3436def cleanup_temp_dir ():
3537 TEMP_DIR .cleanup ()
3638
3739log_stream = io .StringIO ()
38- cached_summary = "" # Global cache for OpenAI summary
3940
4041def log_print (* args ):
4142 print (* args , file = log_stream )
@@ -68,34 +69,36 @@ def generate_pdf_report(summary, r2, mse, forecast_dict):
6869 text .textLine (f"{ k } : { v } " )
6970
7071 c .drawText (text )
71-
7272 if os .path .exists (FORECAST_PLOT_PATH ):
7373 c .drawImage (FORECAST_PLOT_PATH , 1 * inch , 1 * inch , width = 5.5 * inch , preserveAspectRatio = True )
74-
7574 c .save ()
7675
77- @app .route ("/" )
78- def index ():
79- return jsonify ({"message" : "AI DataScience Backend is running on Azure." })
76+ @app .route ("/get-columns" , methods = ["POST" ])
77+ def get_columns ():
78+ file = request .files .get ("file" )
79+ if file is None :
80+ return jsonify ({"error" : "No file uploaded" }), 400
81+ df = pd .read_csv (file )
82+ df .to_csv (CSV_CACHE , index = False )
83+ return jsonify ({"columns" : df .columns .tolist ()})
8084
8185@app .route ("/upload" , methods = ["POST" ])
8286def upload_file ():
8387 global cached_summary
84-
8588 log_stream .truncate (0 )
8689 log_stream .seek (0 )
8790
8891 file = request .files .get ("file" )
8992 x_col = request .form .get ("x_column" )
9093 y_col = request .form .get ("y_column" )
91- model_choice = request .form .get ("model" , "linear" )
9294
9395 if file is None or not x_col or not y_col :
9496 return jsonify ({"error" : "Missing file or column selection." }), 400
9597
9698 filepath = os .path .join (UPLOAD_FOLDER , file .filename )
9799 file .save (filepath )
98100 df = pd .read_csv (filepath )
101+ df .to_csv (CSV_CACHE , index = False )
99102
100103 if x_col not in df .columns or y_col not in df .columns :
101104 return jsonify ({"error" : f"'{ x_col } ' or '{ y_col } ' not in dataset." }), 400
@@ -114,15 +117,12 @@ def upload_file():
114117
115118 df ['X_date' ] = pd .to_datetime (df ['X' ], errors = 'coerce' )
116119 use_dates = df ['X_date' ].notna ().sum () >= len (df ) // 2
117- try :
118- if use_dates :
119- df = df .dropna (subset = ['X_date' ])
120- X = df ['X_date' ].map (pd .Timestamp .toordinal ).values .reshape (- 1 , 1 )
121- else :
122- X = df ['X' ].astype (float ).values .reshape (- 1 , 1 )
123- y = df ['Y' ].astype (float ).values
124- except :
125- return jsonify ({"error" : "Failed to parse X or Y as numeric or date." }), 400
120+ if use_dates :
121+ df = df .dropna (subset = ['X_date' ])
122+ X = df ['X_date' ].map (pd .Timestamp .toordinal ).values .reshape (- 1 , 1 )
123+ else :
124+ X = df ['X' ].astype (float ).values .reshape (- 1 , 1 )
125+ y = df ['Y' ].astype (float ).values
126126
127127 model = LinearRegression ()
128128 model .fit (X , y )
@@ -142,11 +142,9 @@ def upload_file():
142142 ]
143143 )
144144 summary = response .choices [0 ].message .content
145- log_print ("Summary generated by OpenAI." )
146- cached_summary = summary # cache for report
147- except Exception as e :
145+ cached_summary = summary
146+ except :
148147 summary = "OpenAI summarization failed."
149- log_print ("OpenAI error:" , str (e ))
150148 cached_summary = summary
151149
152150 return jsonify ({
@@ -164,80 +162,58 @@ def predict():
164162 if not future_x :
165163 return jsonify ({"forecast" : "No future values provided." }), 400
166164
167- try :
168- values = future_x .split ("," )
169- numeric_vals , date_vals = [], []
170- for x in values :
171- try :
172- date_vals .append (datetime .strptime (x .strip (), "%Y-%m-%d" ).toordinal ())
173- except :
174- numeric_vals .append (float (x .strip ()))
175- values_parsed = np .array (date_vals if date_vals else numeric_vals ).reshape (- 1 , 1 )
176- except Exception as e :
177- log_print ("Parsing future_x failed:" , str (e ))
178- return jsonify ({"forecast" : "Invalid format for future values." }), 400
165+ values = future_x .split ("," )
166+ numeric_vals , date_vals = [], []
167+ for x in values :
168+ try :
169+ date_vals .append (datetime .strptime (x .strip (), "%Y-%m-%d" ).toordinal ())
170+ except :
171+ numeric_vals .append (float (x .strip ()))
172+ values_parsed = np .array (date_vals if date_vals else numeric_vals ).reshape (- 1 , 1 )
179173
180- try :
181- files = os .listdir (UPLOAD_FOLDER )
182- csv_file = max (
183- [os .path .join (UPLOAD_FOLDER , f ) for f in files if f .endswith (".csv" )],
184- key = os .path .getctime
185- )
186- df = pd .read_csv (csv_file )
187- df = df .dropna ()
188- df .columns = ['X' , 'Y' ]
189- df ['X_date' ] = pd .to_datetime (df ['X' ], errors = 'coerce' )
190- use_dates = df ['X_date' ].notna ().sum () >= len (df ) // 2
191-
192- if use_dates :
193- df = df .dropna (subset = ['X_date' ])
194- X = df ['X_date' ].map (pd .Timestamp .toordinal ).values .reshape (- 1 , 1 )
195- else :
196- X = df ['X' ].astype (float ).values .reshape (- 1 , 1 )
197- y = df ['Y' ].astype (float ).values
198-
199- model = LinearRegression ()
200- model .fit (X , y )
201-
202- y_future = model .predict (values_parsed )
203- result = {
204- datetime .fromordinal (int (x )).strftime ("%Y-%m-%d" ) if use_dates else float (x ): round (p , 2 )
205- for x , p in zip (values_parsed .flatten (), y_future )
206- }
207-
208- X_all = np .concatenate ((X , values_parsed ))
209- x_min , x_max = X_all .min (), X_all .max ()
210- x_plot = np .linspace (x_min , x_max , 200 ).reshape (- 1 , 1 )
211- y_plot = model .predict (x_plot )
212-
213- plt .figure ()
214- plt .scatter (X , y , label = 'Training Data' , alpha = 0.6 )
215- plt .plot (x_plot , y_plot , color = 'blue' , label = 'Regression Line' )
216- plt .scatter (values_parsed , y_future , color = 'red' , label = 'Forecast' , marker = 'x' )
217- plt .legend ()
218- plt .xlabel ('X' )
219- plt .ylabel ('Y' )
220- plt .title ('Forecast with Regression Line' )
221- plt .savefig (FORECAST_PLOT_PATH )
222- plt .close ()
223-
224- # PDF report generation
225- generate_pdf_report (cached_summary , r2_score (y , model .predict (X )), mean_squared_error (y , model .predict (X )), result )
226-
227- return jsonify ({
228- "forecast" : result ,
229- "log" : log_stream .getvalue (),
230- "plot_url" : f"{ BACKEND_BASE_URL } /plot.png" ,
231- "forecast_plot_url" : f"{ BACKEND_BASE_URL } /forecast_plot.png"
232- })
233-
234- except Exception as e :
235- log_print ("Prediction failed:" , str (e ))
236- return jsonify ({
237- "forecast" : "Prediction failed." ,
238- "log" : log_stream .getvalue (),
239- "plot_url" : None
240- })
174+ df = pd .read_csv (CSV_CACHE )
175+ df .columns = ['X' , 'Y' ]
176+ df ['X_date' ] = pd .to_datetime (df ['X' ], errors = 'coerce' )
177+ use_dates = df ['X_date' ].notna ().sum () >= len (df ) // 2
178+ if use_dates :
179+ df = df .dropna (subset = ['X_date' ])
180+ X = df ['X_date' ].map (pd .Timestamp .toordinal ).values .reshape (- 1 , 1 )
181+ else :
182+ X = df ['X' ].astype (float ).values .reshape (- 1 , 1 )
183+ y = df ['Y' ].astype (float ).values
184+
185+ model = LinearRegression ()
186+ model .fit (X , y )
187+
188+ y_future = model .predict (values_parsed )
189+ result = {
190+ datetime .fromordinal (int (x )).strftime ("%Y-%m-%d" ) if use_dates else float (x ): round (p , 2 )
191+ for x , p in zip (values_parsed .flatten (), y_future )
192+ }
193+
194+ x_min , x_max = min (X .min (), values_parsed .min ()), max (X .max (), values_parsed .max ())
195+ x_plot = np .linspace (x_min , x_max , 200 ).reshape (- 1 , 1 )
196+ y_plot = model .predict (x_plot )
197+
198+ plt .figure ()
199+ plt .scatter (X , y , label = 'Training Data' , alpha = 0.6 )
200+ plt .plot (x_plot , y_plot , color = 'blue' , label = 'Regression Line' )
201+ plt .scatter (values_parsed , y_future , color = 'red' , label = 'Forecast' , marker = 'x' )
202+ plt .legend ()
203+ plt .xlabel ('X' )
204+ plt .ylabel ('Y' )
205+ plt .title ('Forecast with Regression Line' )
206+ plt .savefig (FORECAST_PLOT_PATH )
207+ plt .close ()
208+
209+ generate_pdf_report (cached_summary , r2_score (y , model .predict (X )), mean_squared_error (y , model .predict (X )), result )
210+
211+ return jsonify ({
212+ "forecast" : result ,
213+ "log" : log_stream .getvalue (),
214+ "plot_url" : f"{ BACKEND_BASE_URL } /plot.png" ,
215+ "forecast_plot_url" : f"{ BACKEND_BASE_URL } /forecast_plot.png"
216+ })
241217
242218@app .route ("/plot.png" )
243219def serve_plot ():
0 commit comments