Skip to content

Commit 5e45549

Browse files
committed
feat: add /extract/calibrated endpoint for manual Dense chart calibration
1 parent abb48e2 commit 5e45549

File tree

2 files changed

+200
-0
lines changed

2 files changed

+200
-0
lines changed

api/main.py

Lines changed: 96 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -419,6 +419,102 @@ async def extract_data_base64(
419419
)
420420

421421

422+
@app.post("/extract/calibrated", response_model=ExtractionResult)
423+
async def extract_calibrated(
424+
file: UploadFile = File(..., description="Chart image"),
425+
calibration_json: str = None,
426+
client_ip: str = Depends(get_client_ip)
427+
):
428+
"""
429+
Extract data using user-provided calibration points.
430+
431+
**Use this for Dense charts where automatic extraction fails.**
432+
433+
The user provides reference points mapping pixel positions to actual values.
434+
The API then extracts data points and applies the calibration transform.
435+
436+
**calibration_json format:**
437+
```json
438+
{
439+
"x_axis": [
440+
{"pixel": 100, "value": 0},
441+
{"pixel": 500, "value": 20}
442+
],
443+
"y_axis": [
444+
{"pixel": 350, "value": 0},
445+
{"pixel": 50, "value": 30}
446+
]
447+
}
448+
```
449+
450+
Provide at least 2 points per axis for linear interpolation.
451+
"""
452+
453+
if not rate_limiter.is_allowed(client_ip):
454+
raise HTTPException(status_code=429, detail="Rate limit exceeded")
455+
456+
if not file.content_type or not file.content_type.startswith("image/"):
457+
raise HTTPException(status_code=400, detail="Invalid file type")
458+
459+
start = time.time()
460+
461+
try:
462+
image_bytes = await file.read()
463+
if len(image_bytes) > 10 * 1024 * 1024:
464+
raise HTTPException(status_code=400, detail="File too large")
465+
466+
temp_path = image_to_temp_path(image_bytes)
467+
468+
try:
469+
import json
470+
calibration = None
471+
if calibration_json:
472+
try:
473+
calibration = json.loads(calibration_json)
474+
except json.JSONDecodeError:
475+
raise HTTPException(status_code=400, detail="Invalid calibration JSON")
476+
477+
# Use CV pipeline with calibration points
478+
result = extract_chart(
479+
image_path=temp_path,
480+
calibration_points=calibration,
481+
use_mistral=True,
482+
generate_overlay_image=False
483+
)
484+
485+
csv_lines = ["x,y"]
486+
for point in result.data:
487+
csv_lines.append(f"{point[0]},{point[1]}")
488+
csv_content = "\n".join(csv_lines)
489+
490+
data = parse_csv_to_data(csv_content)
491+
warnings = [f"[{w.code.value}] {w.message}" for w in result.warnings]
492+
if calibration:
493+
warnings.insert(0, "[CALIBRATED] Using user-provided calibration points")
494+
495+
processing_time = int((time.time() - start) * 1000)
496+
497+
return ExtractionResult(
498+
success=True,
499+
chart_type=result.chart_type.value,
500+
confidence=round(result.confidence.overall(), 3),
501+
data=data,
502+
csv=csv_content,
503+
warnings=warnings,
504+
processing_time_ms=processing_time
505+
)
506+
507+
finally:
508+
import os
509+
if os.path.exists(temp_path):
510+
os.unlink(temp_path)
511+
512+
except HTTPException:
513+
raise
514+
except Exception as e:
515+
raise HTTPException(status_code=500, detail=f"Calibrated extraction failed: {str(e)}")
516+
517+
422518
if __name__ == "__main__":
423519
import uvicorn
424520
uvicorn.run(app, host="0.0.0.0", port=8000)

wiki/Manual-Calibration.md

Lines changed: 104 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,104 @@
1+
# Manual Calibration Guide
2+
3+
> **Use this when automatic extraction fails on Dense charts.**
4+
5+
---
6+
7+
## When to Use
8+
9+
- LLM extraction returns inaccurate data
10+
- Chart has 20+ closely-spaced data points
11+
- You need precise values for research
12+
13+
---
14+
15+
## How It Works
16+
17+
1. Open your chart image in an image editor
18+
2. Note the pixel coordinates of known reference points
19+
3. Provide these as calibration points
20+
4. API uses your calibration to transform pixel → values
21+
22+
---
23+
24+
## API Usage
25+
26+
### Endpoint
27+
28+
```
29+
POST /extract/calibrated
30+
```
31+
32+
### Parameters
33+
34+
| Parameter | Type | Description |
35+
|-----------|------|-------------|
36+
| `file` | file | Chart image |
37+
| `calibration_json` | string | JSON with reference points |
38+
39+
### Calibration Format
40+
41+
```json
42+
{
43+
"x_axis": [
44+
{"pixel": 100, "value": 0},
45+
{"pixel": 500, "value": 20}
46+
],
47+
"y_axis": [
48+
{"pixel": 350, "value": 0},
49+
{"pixel": 50, "value": 30}
50+
]
51+
}
52+
```
53+
54+
**Important:**
55+
- Provide at least 2 points per axis
56+
- For X-axis: pixel increases left→right, value increases accordingly
57+
- For Y-axis: pixel decreases bottom→top (images have Y=0 at top)
58+
59+
---
60+
61+
## Example
62+
63+
### curl
64+
65+
```bash
66+
curl -X POST "https://chart2csv.kikuai.dev/extract/calibrated" \
67+
-F "file=@dense_chart.png" \
68+
-F 'calibration_json={"x_axis":[{"pixel":50,"value":0},{"pixel":550,"value":20}],"y_axis":[{"pixel":350,"value":0},{"pixel":50,"value":30}]}'
69+
```
70+
71+
### Python
72+
73+
```python
74+
import requests
75+
import json
76+
77+
calibration = {
78+
"x_axis": [
79+
{"pixel": 50, "value": 0},
80+
{"pixel": 550, "value": 20}
81+
],
82+
"y_axis": [
83+
{"pixel": 350, "value": 0},
84+
{"pixel": 50, "value": 30}
85+
]
86+
}
87+
88+
with open("dense_chart.png", "rb") as f:
89+
response = requests.post(
90+
"https://chart2csv.kikuai.dev/extract/calibrated",
91+
files={"file": f},
92+
data={"calibration_json": json.dumps(calibration)}
93+
)
94+
95+
print(response.json()["csv"])
96+
```
97+
98+
---
99+
100+
## Tips
101+
102+
1. **Getting pixel coordinates**: Use any image editor (GIMP, Photoshop, Preview) to hover over axis tick marks
103+
2. **Accuracy**: More calibration points = more accurate results
104+
3. **Y-axis direction**: Remember that image Y coordinates are inverted (0 at top)

0 commit comments

Comments
 (0)