Skip to content

Commit b5f24b3

Browse files
authored
Merge pull request #2 from KikuAI-Lab/claude/review-code-quality-X326M
docs: add comprehensive code quality and documentation review
2 parents a973c67 + 570b2c2 commit b5f24b3

File tree

10 files changed

+1904
-232
lines changed

10 files changed

+1904
-232
lines changed

.env.example

Lines changed: 132 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,2 +1,133 @@
1-
# Chart2CSV API Environment
1+
# Chart2CSV Environment Configuration
2+
# Copy this file to .env and fill in your values
3+
# Never commit .env files to version control!
4+
5+
# ============================================
6+
# API Keys
7+
# ============================================
8+
9+
# Mistral AI API Key (required for LLM extraction and Mistral OCR)
10+
# Get your API key at: https://console.mistral.ai/
11+
# Used for: LLM-based chart extraction (default mode)
212
MISTRAL_API_KEY=your_mistral_api_key_here
13+
14+
15+
# ============================================
16+
# CORS Configuration (Production)
17+
# ============================================
18+
19+
# Comma-separated list of allowed origins for CORS
20+
# Example: ALLOWED_ORIGINS=https://example.com,https://app.example.com
21+
# Leave empty to use defaults:
22+
# - https://kiku-jw.github.io
23+
# - https://chart2csv.kikuai.dev
24+
# - http://localhost:3000
25+
ALLOWED_ORIGINS=
26+
27+
28+
# ============================================
29+
# Application Settings
30+
# ============================================
31+
32+
# Environment: development | staging | production
33+
# Affects logging level and error detail exposure
34+
ENV=production
35+
36+
# API Host and Port (for local development)
37+
# Production deployments typically use reverse proxy
38+
API_HOST=0.0.0.0
39+
API_PORT=8000
40+
41+
# Log Level: DEBUG | INFO | WARNING | ERROR | CRITICAL
42+
LOG_LEVEL=INFO
43+
44+
45+
# ============================================
46+
# Rate Limiting
47+
# ============================================
48+
49+
# Maximum requests per minute per IP
50+
# Default: 20
51+
RATE_LIMIT_PER_MINUTE=20
52+
53+
54+
# ============================================
55+
# File Upload Limits
56+
# ============================================
57+
58+
# Maximum file size in MB
59+
# Default: 10
60+
MAX_FILE_SIZE_MB=10
61+
62+
# Maximum image dimensions (width or height in pixels)
63+
# Default: 10000
64+
MAX_IMAGE_DIMENSION=10000
65+
66+
67+
# ============================================
68+
# OCR Configuration
69+
# ============================================
70+
71+
# OCR Backend: tesseract | mistral | auto
72+
# - tesseract: Use Tesseract OCR (offline, faster)
73+
# - mistral: Use Mistral Vision API (online, more accurate)
74+
# - auto: Try Tesseract first, fallback to Mistral if available
75+
# Default: auto
76+
OCR_BACKEND=auto
77+
78+
# Tesseract executable path (optional, auto-detected if in PATH)
79+
# TESSERACT_CMD=/usr/bin/tesseract
80+
81+
82+
# ============================================
83+
# Extraction Defaults
84+
# ============================================
85+
86+
# Default extraction mode: llm | cv | auto
87+
# - llm: Use LLM vision (Pixtral) - fastest, most accurate
88+
# - cv: Use computer vision pipeline - works offline
89+
# - auto: Try LLM first, fall back to CV
90+
# Default: llm
91+
DEFAULT_EXTRACTION_MODE=llm
92+
93+
# Enable overlay image generation
94+
# Helps debug extraction quality
95+
# Default: false (for performance)
96+
GENERATE_OVERLAY=false
97+
98+
99+
# ============================================
100+
# Cache Configuration
101+
# ============================================
102+
103+
# Enable OCR result caching
104+
# Speeds up repeated extractions of same image
105+
# Default: true
106+
ENABLE_OCR_CACHE=true
107+
108+
# Cache directory
109+
# Default: .cache/ocr
110+
CACHE_DIR=.cache/ocr
111+
112+
# Cache TTL in days (0 = no expiration)
113+
# Default: 7
114+
CACHE_TTL_DAYS=7
115+
116+
117+
# ============================================
118+
# Development / Debugging
119+
# ============================================
120+
121+
# Enable debug mode (verbose logging, detailed errors)
122+
# WARNING: Do not enable in production!
123+
# Default: false
124+
DEBUG=false
125+
126+
# Preserve temporary files for debugging
127+
# Temp images normally deleted after processing
128+
# Default: false
129+
KEEP_TEMP_FILES=false
130+
131+
# Enable FastAPI auto-reload (development only)
132+
# Default: false
133+
RELOAD=false

.gitignore

Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -54,9 +54,30 @@ results/
5454
output/
5555
*.png.overlay.png
5656
bench_report.json
57+
temp_*.png
58+
temp_*.jpg
59+
temp_*.jpeg
60+
61+
# Environment variables and secrets
62+
.env
63+
.env.local
64+
.env.*.local
65+
*.key
66+
credentials.json
5767

5868
# Logs
5969
*.log
70+
logs/
71+
72+
# Coverage reports
73+
.coverage.*
74+
coverage.xml
75+
*.cover
76+
.hypothesis/
6077

6178
# Distribution
6279
MANIFEST
80+
81+
# Type checking
82+
.pytype/
83+
pyrightconfig.json

.pre-commit-config.yaml

Lines changed: 71 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,71 @@
1+
# Pre-commit hooks configuration for Chart2CSV
2+
# Install: pip install pre-commit
3+
# Setup: pre-commit install
4+
# Run manually: pre-commit run --all-files
5+
6+
repos:
7+
# Code formatting with Black
8+
- repo: https://github.com/psf/black
9+
rev: 23.12.1
10+
hooks:
11+
- id: black
12+
language_version: python3.8
13+
args: [--line-length=100]
14+
15+
# Import sorting with isort
16+
- repo: https://github.com/pycqa/isort
17+
rev: 5.13.2
18+
hooks:
19+
- id: isort
20+
args: [--profile=black, --line-length=100]
21+
22+
# Linting with Ruff (fast alternative to flake8)
23+
- repo: https://github.com/astral-sh/ruff-pre-commit
24+
rev: v0.1.9
25+
hooks:
26+
- id: ruff
27+
args: [--fix, --exit-non-zero-on-fix]
28+
29+
# Type checking with mypy
30+
- repo: https://github.com/pre-commit/mirrors-mypy
31+
rev: v1.8.0
32+
hooks:
33+
- id: mypy
34+
additional_dependencies:
35+
- types-Pillow
36+
- types-requests
37+
args: [--ignore-missing-imports, --no-strict-optional]
38+
exclude: ^(tests/|scripts/)
39+
40+
# General file checks
41+
- repo: https://github.com/pre-commit/pre-commit-hooks
42+
rev: v4.5.0
43+
hooks:
44+
- id: trailing-whitespace
45+
args: [--markdown-linebreak-ext=md]
46+
- id: end-of-file-fixer
47+
- id: check-yaml
48+
- id: check-json
49+
- id: check-toml
50+
- id: check-added-large-files
51+
args: [--maxkb=1000]
52+
- id: check-case-conflict
53+
- id: check-merge-conflict
54+
- id: detect-private-key
55+
- id: mixed-line-ending
56+
args: [--fix=lf]
57+
58+
# Security checks
59+
- repo: https://github.com/PyCQA/bandit
60+
rev: 1.7.6
61+
hooks:
62+
- id: bandit
63+
args: [-c, pyproject.toml]
64+
additional_dependencies: ["bandit[toml]"]
65+
exclude: ^(tests/|scripts/)
66+
67+
# CI: Skip slow hooks in CI environment
68+
ci:
69+
skip: [mypy, bandit]
70+
autofix_prs: true
71+
autoupdate_schedule: weekly

0 commit comments

Comments
 (0)