Skip to content

Commit bf2dcca

Browse files
committed
P5-PR4: HumbleFax IMAP scaffold (disabled by default) — offload blocking IMAP ops to thread; PDF-only, sanitized filenames, caps on count/MB; no PHI in logs; background start/stop hooks
1 parent 8648431 commit bf2dcca

File tree

3 files changed

+376
-0
lines changed

3 files changed

+376
-0
lines changed

api/app/main.py

Lines changed: 33 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -32,6 +32,7 @@
3232
from .phaxio_service import get_phaxio_service
3333
from .sinch_service import get_sinch_service
3434
from .signalwire_service import get_signalwire_service
35+
from .services.imap_humblefax import HumbleFaxImapWorker
3536
from .config_loader import load_provider_secrets
3637
from .providers import sinch as sinch_inbound_adapter
3738
from .providers import phaxio as phaxio_inbound_adapter
@@ -82,6 +83,8 @@
8283
# Expose phaxio_service module for tests that reference app.phaxio_service
8384
from . import phaxio_service as _phaxio_module # noqa: E402
8485
app.phaxio_service = _phaxio_module # type: ignore[attr-defined]
86+
app.state.hf_imap_worker = None # type: ignore[attr-defined]
87+
app.state.hf_imap_task = None # type: ignore[attr-defined]
8588

8689
# Log documented credential fallbacks (no secret values)
8790
try:
@@ -168,6 +171,36 @@ def require_admin(x_api_key: Optional[str] = Header(default=None)):
168171
raise HTTPException(401, detail="Admin authentication failed")
169172
return info
170173

174+
175+
# ===== Phase 5: HumbleFax IMAP worker (disabled by default) =====
176+
@app.on_event("startup")
177+
async def _start_hf_imap_worker():
178+
try:
179+
worker = HumbleFaxImapWorker()
180+
if worker.enabled and worker.configured():
181+
app.state.hf_imap_worker = worker # type: ignore[attr-defined]
182+
app.state.hf_imap_task = asyncio.create_task(worker.run_forever()) # type: ignore[attr-defined]
183+
except Exception:
184+
# Non-fatal; worker stays disabled
185+
pass
186+
187+
188+
@app.on_event("shutdown")
189+
async def _stop_hf_imap_worker():
190+
try:
191+
w = getattr(app.state, "hf_imap_worker", None)
192+
t = getattr(app.state, "hf_imap_task", None)
193+
if w is not None:
194+
w.stop()
195+
if t is not None:
196+
# Give it a moment to wind down
197+
try:
198+
await asyncio.wait_for(t, timeout=2)
199+
except Exception:
200+
pass
201+
except Exception:
202+
pass
203+
171204
router_cfg_v4 = APIRouter(prefix="/admin/config/v4", tags=["ConfigurationV4"], dependencies=[Depends(require_admin)])
172205

173206

api/app/services/imap_humblefax.py

Lines changed: 186 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,186 @@
1+
"""
2+
HumbleFax IMAP polling scaffold (Phase 5):
3+
4+
- Disabled by default; enable with HUMBLEFAX_IMAP_ENABLED=true
5+
- Offloads blocking IMAP ops to a thread to avoid blocking the event loop
6+
- PDF-only attachments; sanitize filenames; cap count and total bytes
7+
- No PHI in logs; records inbound faxes with minimal canonical fields
8+
"""
9+
10+
from __future__ import annotations
11+
12+
import imaplib
13+
import email
14+
from email.message import Message
15+
import os
16+
import time
17+
import hashlib
18+
from pathlib import Path
19+
from typing import List, Tuple, Optional
20+
21+
import anyio
22+
23+
from ..config import settings
24+
from ..db import SessionLocal
25+
from ..audit import audit_event
26+
27+
28+
def _env_bool(name: str, default: bool = False) -> bool:
29+
v = os.getenv(name, str(default).lower())
30+
return str(v).lower() in {"1", "true", "yes", "on"}
31+
32+
33+
def _sanitize_filename(name: str) -> str:
34+
base = name.replace("\\", "/").split("/")[-1]
35+
# keep alnum, dash, underscore, dot
36+
safe = "".join(ch for ch in base if ch.isalnum() or ch in {"-", "_", "."})
37+
if not safe:
38+
safe = "attachment.pdf"
39+
return safe[:128]
40+
41+
42+
class HumbleFaxImapWorker:
43+
def __init__(self) -> None:
44+
self.enabled = _env_bool("HUMBLEFAX_IMAP_ENABLED", False)
45+
self.server = os.getenv("HUMBLEFAX_IMAP_SERVER", "")
46+
self.username = os.getenv("HUMBLEFAX_IMAP_USERNAME", "")
47+
self.password = os.getenv("HUMBLEFAX_IMAP_PASSWORD", "")
48+
self.port = int(os.getenv("HUMBLEFAX_IMAP_PORT", "993") or "993")
49+
self.use_ssl = _env_bool("HUMBLEFAX_IMAP_SSL", True)
50+
self.poll_seconds = max(30, int(os.getenv("HUMBLEFAX_IMAP_POLL_INTERVAL", "300") or "300"))
51+
self.max_attach_count = max(1, int(os.getenv("HUMBLEFAX_IMAP_MAX_ATTACH_COUNT", "3") or "3"))
52+
self.max_attach_mb = max(1, int(os.getenv("HUMBLEFAX_IMAP_MAX_ATTACH_MB", "25") or "25"))
53+
self._stop = False
54+
55+
def configured(self) -> bool:
56+
return bool(self.server and self.username and self.password)
57+
58+
async def run_forever(self) -> None:
59+
if not self.enabled or not self.configured():
60+
return
61+
while not self._stop:
62+
try:
63+
await anyio.to_thread.run_sync(self._poll_once)
64+
except Exception:
65+
# Swallow errors; back off briefly
66+
await anyio.sleep(5)
67+
# Jittered backoff around poll interval (±10%)
68+
jitter = max(1, int(self.poll_seconds * 0.1))
69+
await anyio.sleep(self.poll_seconds - jitter)
70+
await anyio.sleep(jitter)
71+
72+
def stop(self) -> None:
73+
self._stop = True
74+
75+
def _connect(self) -> imaplib.IMAP4:
76+
if self.use_ssl:
77+
return imaplib.IMAP4_SSL(self.server, self.port)
78+
return imaplib.IMAP4(self.server, self.port)
79+
80+
def _poll_once(self) -> None:
81+
"""Blocking IMAP poll — runs in a worker thread."""
82+
try:
83+
imap = self._connect()
84+
except Exception:
85+
return
86+
try:
87+
imap.login(self.username, self.password)
88+
imap.select("INBOX")
89+
typ, data = imap.search(None, 'UNSEEN')
90+
if typ != 'OK':
91+
return
92+
uids = (data[0].decode().split() if data and data[0] else [])
93+
for uid in uids:
94+
try:
95+
self._process_message(imap, uid)
96+
# Mark seen
97+
try:
98+
imap.store(uid, '+FLAGS', '(\\Seen)')
99+
except Exception:
100+
pass
101+
except Exception:
102+
# Skip on error, continue with next message
103+
continue
104+
finally:
105+
try:
106+
imap.logout()
107+
except Exception:
108+
pass
109+
110+
def _process_message(self, imap: imaplib.IMAP4, uid: str) -> None:
111+
typ, msg_data = imap.fetch(uid, '(RFC822)')
112+
if typ != 'OK' or not msg_data:
113+
return
114+
raw = None
115+
for part in msg_data:
116+
if isinstance(part, tuple):
117+
raw = part[1]
118+
break
119+
if not raw:
120+
return
121+
msg: Message = email.message_from_bytes(raw)
122+
attach_saved = 0
123+
total_bytes = 0
124+
for part in msg.walk():
125+
if part.get_content_disposition() != 'attachment':
126+
continue
127+
filename = part.get_filename() or 'attachment.pdf'
128+
safe_name = _sanitize_filename(filename)
129+
ctype = (part.get_content_type() or '').lower()
130+
if not safe_name.lower().endswith('.pdf') and 'pdf' not in ctype:
131+
continue
132+
payload = part.get_payload(decode=True) or b''
133+
size = len(payload)
134+
# Enforce caps
135+
if attach_saved >= self.max_attach_count:
136+
break
137+
if (total_bytes + size) > (self.max_attach_mb * 1024 * 1024):
138+
break
139+
140+
# Persist safely into fax data dir
141+
job_id = email.utils.make_msgid().strip('<>') or str(int(time.time()))
142+
out_dir = Path(settings.fax_data_dir) / 'inbound' / 'humblefax'
143+
out_dir.mkdir(parents=True, exist_ok=True)
144+
out_path = out_dir / f"{job_id}-{safe_name}"
145+
with open(out_path, 'wb') as f:
146+
f.write(payload)
147+
sha256_hex = hashlib.sha256(payload).hexdigest()
148+
149+
# Record in DB (minimal canonical fields)
150+
from ..db import InboundFax # type: ignore
151+
from datetime import datetime
152+
with SessionLocal() as db:
153+
fx = InboundFax(
154+
id=job_id,
155+
from_number=None,
156+
to_number=None,
157+
status='received',
158+
backend='humblefax',
159+
inbound_backend='humblefax',
160+
provider_sid=uid,
161+
pages=None,
162+
size_bytes=size,
163+
sha256=sha256_hex,
164+
pdf_path=str(out_path),
165+
tiff_path=None,
166+
mailbox_label='imap',
167+
retention_until=None,
168+
pdf_token=None,
169+
pdf_token_expires_at=None,
170+
created_at=datetime.utcnow(),
171+
received_at=datetime.utcnow(),
172+
updated_at=datetime.utcnow(),
173+
)
174+
try:
175+
db.add(fx)
176+
db.commit()
177+
except Exception:
178+
db.rollback()
179+
try:
180+
audit_event('inbound_received', job_id=job_id, backend='humblefax')
181+
except Exception:
182+
pass
183+
184+
attach_saved += 1
185+
total_bytes += size
186+

scripts/fix_build_workflow.sh

Lines changed: 157 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,157 @@
1+
#!/bin/bash
2+
# Quick fix to make build.yml work for direct pushes too
3+
4+
echo "=== Fixing build.yml to work with direct pushes ==="
5+
echo ""
6+
echo "The build workflow currently only runs contracts on PRs."
7+
echo "This change will make it run on both PRs and direct pushes."
8+
echo ""
9+
10+
# Create the fixed version
11+
cat > .github/workflows/build.yml.fixed << 'EOF'
12+
name: CI
13+
14+
on:
15+
pull_request:
16+
paths:
17+
- "api/**"
18+
- "api/admin_ui/**"
19+
- "config/**"
20+
- ".github/workflows/**"
21+
- "Dockerfile*"
22+
push:
23+
branches: [ "**" ]
24+
25+
jobs:
26+
changes:
27+
runs-on: ubuntu-latest
28+
outputs:
29+
api: ${{ steps.filter.outputs.api }}
30+
ui: ${{ steps.filter.outputs.ui }}
31+
docs: ${{ steps.filter.outputs.docs }}
32+
steps:
33+
- uses: actions/checkout@v4
34+
- uses: dorny/paths-filter@v3
35+
id: filter
36+
with:
37+
filters: |
38+
api:
39+
- 'api/**'
40+
- 'Dockerfile'
41+
ui:
42+
- 'api/admin_ui/**'
43+
- 'api/admin_ui/Dockerfile'
44+
docs:
45+
- 'docs/**'
46+
- 'config/provider_traits.json'
47+
48+
contracts:
49+
needs: changes
50+
# FIXED: Run on both PRs and pushes to auto-tunnel/development
51+
if: ${{ github.event_name == 'pull_request' || (github.event_name == 'push' && (github.ref == 'refs/heads/auto-tunnel' || github.ref == 'refs/heads/development')) }}
52+
runs-on: ubuntu-latest
53+
steps:
54+
- uses: actions/checkout@v4
55+
with:
56+
fetch-depth: 0
57+
- name: Set up Python
58+
uses: actions/setup-python@v5
59+
with:
60+
python-version: '3.11'
61+
- name: Install Python deps for OpenAPI generation
62+
run: |
63+
python -m pip install --upgrade pip
64+
pip install -r api/requirements.txt
65+
- name: Install jsonschema
66+
run: |
67+
python -m pip install --upgrade pip
68+
pip install jsonschema
69+
- name: Install ripgrep for contract greps
70+
run: |
71+
sudo apt-get update
72+
sudo apt-get install -y ripgrep
73+
- name: Validate provider traits
74+
run: |
75+
python scripts/ci/validate_provider_traits.py
76+
- name: Generate OpenAPI (FastAPI app.openapi)
77+
run: |
78+
python - << 'PY'
79+
import os, sys, json
80+
sys.path.insert(0, 'api')
81+
os.environ.setdefault('FAXBOT_TEST_MODE', 'true')
82+
os.environ.setdefault('FAX_DISABLED', 'true')
83+
os.environ.setdefault('DATABASE_URL', 'sqlite:///./test_openapi_ci_build.yml.db')
84+
from app.main import app
85+
spec = app.openapi()
86+
with open('openapi.json', 'w') as f:
87+
json.dump(spec, f, indent=2)
88+
print('✅ Generated openapi.json')
89+
PY
90+
- name: Diff against pinned snapshot (if present)
91+
run: |
92+
if [ -f docs/pinned-openapi.json ]; then
93+
echo "Pinned snapshot found. Running diff..."
94+
python scripts/ci/diff_openapi.py || (echo "OpenAPI drift" && exit 1)
95+
else
96+
echo "No pinned snapshot at docs/pinned-openapi.json; skipping diff (green)."
97+
fi
98+
- name: Contract greps
99+
run: bash scripts/ci/greps.sh
100+
101+
build-api:
102+
needs: [changes, contracts]
103+
if: needs.changes.outputs.api == 'true'
104+
runs-on: ubuntu-latest
105+
steps:
106+
- uses: actions/checkout@v4
107+
- name: Set up Buildx
108+
uses: docker/setup-buildx-action@v3
109+
- name: Login to GHCR
110+
uses: docker/login-action@v3
111+
with:
112+
registry: ghcr.io
113+
username: ${{ github.actor }}
114+
password: ${{ secrets.GITHUB_TOKEN }}
115+
- name: Build API with cache
116+
uses: docker/build-push-action@v5
117+
with:
118+
context: .
119+
file: ./Dockerfile
120+
push: false
121+
tags: ghcr.io/${{ toLower(github.repository_owner) }}/faxbot-api:pr-${{ github.run_id }}
122+
cache-from: type=registry,ref=ghcr.io/${{ toLower(github.repository_owner) }}/faxbot-api:cache
123+
cache-to: type=registry,ref=ghcr.io/${{ toLower(github.repository_owner) }}/faxbot-api:cache,mode=max
124+
build-args: |
125+
BUILDKIT_INLINE_CACHE=1
126+
127+
build-ui:
128+
needs: [changes, contracts]
129+
if: needs.changes.outputs.ui == 'true'
130+
runs-on: ubuntu-latest
131+
steps:
132+
- uses: actions/checkout@v4
133+
- uses: actions/setup-node@v4
134+
with:
135+
node-version: 18
136+
cache: 'npm'
137+
cache-dependency-path: api/admin_ui/package-lock.json
138+
- name: Install UI deps
139+
working-directory: api/admin_ui
140+
run: npm ci --prefer-offline --no-audit
141+
- name: Typecheck + build
142+
working-directory: api/admin_ui
143+
run: npm run build
144+
EOF
145+
146+
echo "Fixed version created at .github/workflows/build.yml.fixed"
147+
echo ""
148+
echo "The change: contracts job will now run on:"
149+
echo " - All pull requests (as before)"
150+
echo " - Direct pushes to auto-tunnel branch"
151+
echo " - Direct pushes to development branch"
152+
echo ""
153+
echo "To apply this fix:"
154+
echo " mv .github/workflows/build.yml.fixed .github/workflows/build.yml"
155+
echo " git add .github/workflows/build.yml"
156+
echo " git commit -m 'fix(ci): Allow build workflow to run on direct pushes to auto-tunnel'"
157+
echo " git push"

0 commit comments

Comments
 (0)