@@ -419,6 +419,102 @@ async def extract_data_base64(
419419 )
420420
421421
422+ @app .post ("/extract/calibrated" , response_model = ExtractionResult )
423+ async def extract_calibrated (
424+ file : UploadFile = File (..., description = "Chart image" ),
425+ calibration_json : str = None ,
426+ client_ip : str = Depends (get_client_ip )
427+ ):
428+ """
429+ Extract data using user-provided calibration points.
430+
431+ **Use this for Dense charts where automatic extraction fails.**
432+
433+ The user provides reference points mapping pixel positions to actual values.
434+ The API then extracts data points and applies the calibration transform.
435+
436+ **calibration_json format:**
437+ ```json
438+ {
439+ "x_axis": [
440+ {"pixel": 100, "value": 0},
441+ {"pixel": 500, "value": 20}
442+ ],
443+ "y_axis": [
444+ {"pixel": 350, "value": 0},
445+ {"pixel": 50, "value": 30}
446+ ]
447+ }
448+ ```
449+
450+ Provide at least 2 points per axis for linear interpolation.
451+ """
452+
453+ if not rate_limiter .is_allowed (client_ip ):
454+ raise HTTPException (status_code = 429 , detail = "Rate limit exceeded" )
455+
456+ if not file .content_type or not file .content_type .startswith ("image/" ):
457+ raise HTTPException (status_code = 400 , detail = "Invalid file type" )
458+
459+ start = time .time ()
460+
461+ try :
462+ image_bytes = await file .read ()
463+ if len (image_bytes ) > 10 * 1024 * 1024 :
464+ raise HTTPException (status_code = 400 , detail = "File too large" )
465+
466+ temp_path = image_to_temp_path (image_bytes )
467+
468+ try :
469+ import json
470+ calibration = None
471+ if calibration_json :
472+ try :
473+ calibration = json .loads (calibration_json )
474+ except json .JSONDecodeError :
475+ raise HTTPException (status_code = 400 , detail = "Invalid calibration JSON" )
476+
477+ # Use CV pipeline with calibration points
478+ result = extract_chart (
479+ image_path = temp_path ,
480+ calibration_points = calibration ,
481+ use_mistral = True ,
482+ generate_overlay_image = False
483+ )
484+
485+ csv_lines = ["x,y" ]
486+ for point in result .data :
487+ csv_lines .append (f"{ point [0 ]} ,{ point [1 ]} " )
488+ csv_content = "\n " .join (csv_lines )
489+
490+ data = parse_csv_to_data (csv_content )
491+ warnings = [f"[{ w .code .value } ] { w .message } " for w in result .warnings ]
492+ if calibration :
493+ warnings .insert (0 , "[CALIBRATED] Using user-provided calibration points" )
494+
495+ processing_time = int ((time .time () - start ) * 1000 )
496+
497+ return ExtractionResult (
498+ success = True ,
499+ chart_type = result .chart_type .value ,
500+ confidence = round (result .confidence .overall (), 3 ),
501+ data = data ,
502+ csv = csv_content ,
503+ warnings = warnings ,
504+ processing_time_ms = processing_time
505+ )
506+
507+ finally :
508+ import os
509+ if os .path .exists (temp_path ):
510+ os .unlink (temp_path )
511+
512+ except HTTPException :
513+ raise
514+ except Exception as e :
515+ raise HTTPException (status_code = 500 , detail = f"Calibrated extraction failed: { str (e )} " )
516+
517+
422518if __name__ == "__main__" :
423519 import uvicorn
424520 uvicorn .run (app , host = "0.0.0.0" , port = 8000 )
0 commit comments