Skip to content

Commit 8b88ca0

Browse files
authored
Merge pull request #151 from serabi/fix/kosync-hash-mismatch
Fix KoSync hash mismatch and harden admin endpoints
2 parents 316c2d0 + a76cb04 commit 8b88ca0

File tree

3 files changed

+329
-34
lines changed

3 files changed

+329
-34
lines changed

src/api/kosync_server.py

Lines changed: 163 additions & 32 deletions
Original file line numberDiff line numberDiff line change
@@ -50,15 +50,15 @@ def decorated_function(*args, **kwargs):
5050
expected_password = os.environ.get("KOSYNC_KEY")
5151

5252
if not expected_user or not expected_password:
53-
logger.error("❌ KOSync Integrated Server: Credentials not configured in settings")
53+
logger.error(f"❌ KOSync Integrated Server: Credentials not configured in settings (request from {request.remote_addr})")
5454
return jsonify({"error": "Server not configured"}), 500
5555

5656
expected_hash = hash_kosync_key(expected_password)
5757

5858
if user and expected_user and user.lower() == expected_user.lower() and (key == expected_password or key == expected_hash):
5959
return f(*args, **kwargs)
6060

61-
logger.warning(f"⚠️ KOSync Integrated Server: Unauthorized access attempt from '{request.remote_addr}' (user: '{user}'")
61+
logger.warning(f"⚠️ KOSync Integrated Server: Unauthorized access attempt from '{request.remote_addr}' (user: '{user}')")
6262
return jsonify({"error": "Unauthorized"}), 401
6363
return decorated_function
6464

@@ -129,42 +129,61 @@ def kosync_get_progress(doc_id):
129129
"""
130130
Fetch progress for a specific document.
131131
Returns 502 (not 404) if document not found, per kosync-dotnet spec.
132+
133+
Lookup order:
134+
1. Direct hash match in kosync_documents
135+
2. Book lookup by kosync_doc_id
136+
3. Sibling hash resolution (same book, different epub hash)
137+
4. Background auto-discovery for completely unknown hashes
132138
"""
133-
kosync_doc = _database_service.get_kosync_document(doc_id)
139+
logger.info(f"KOSync: GET progress for doc {doc_id[:8]}... from {request.remote_addr}")
134140

141+
# Step 1: Direct hash lookup
142+
kosync_doc = _database_service.get_kosync_document(doc_id)
135143
if kosync_doc:
136-
return jsonify({
137-
"device": kosync_doc.device or "",
138-
"device_id": kosync_doc.device_id or "",
139-
"document": kosync_doc.document_hash,
140-
"percentage": float(kosync_doc.percentage) if kosync_doc.percentage else 0,
141-
"progress": kosync_doc.progress or "",
142-
"timestamp": int(kosync_doc.timestamp.timestamp()) if kosync_doc.timestamp else 0
143-
}), 200
144+
# If linked to a book, always check siblings for freshest progress.
145+
# This prevents "shadow" docs (created by sync-bot PUTs) from returning
146+
# stale data when the real device hash has advanced further.
147+
if kosync_doc.linked_abs_id:
148+
book = _database_service.get_book(kosync_doc.linked_abs_id)
149+
if book:
150+
return _respond_from_book_states(doc_id, book)
144151

145-
# Fallback: Check mapped book with State data
152+
has_progress = kosync_doc.percentage and float(kosync_doc.percentage) > 0
153+
if has_progress:
154+
return jsonify({
155+
"device": kosync_doc.device or "",
156+
"device_id": kosync_doc.device_id or "",
157+
"document": kosync_doc.document_hash,
158+
"percentage": float(kosync_doc.percentage) if kosync_doc.percentage else 0,
159+
"progress": kosync_doc.progress or "",
160+
"timestamp": int(kosync_doc.timestamp.timestamp()) if kosync_doc.timestamp else 0
161+
}), 200
162+
# Document exists but has no progress and no linked book — fall through
163+
# to try sibling resolution for better data
164+
165+
# Step 2: Book lookup by kosync_doc_id
146166
book = _database_service.get_book_by_kosync_id(doc_id)
147167
if book:
148-
states = _database_service.get_states_for_book(book.abs_id)
149-
if not states:
150-
return jsonify({"message": "Document not found on server"}), 502
151-
152-
kosync_state = next((s for s in states if s.client_name.lower() == 'kosync'), None)
153-
if kosync_state:
154-
latest_state = kosync_state
155-
else:
156-
latest_state = max(states, key=lambda s: s.last_updated if s.last_updated else 0)
157-
158-
return jsonify({
159-
"device": "abs-kosync-bridge",
160-
"device_id": "abs-kosync-bridge",
161-
"document": doc_id,
162-
"percentage": float(latest_state.percentage) if latest_state.percentage else 0,
163-
"progress": (latest_state.xpath or latest_state.cfi) if hasattr(latest_state, 'xpath') else "",
164-
"timestamp": int(latest_state.last_updated) if latest_state.last_updated else 0
165-
}), 200
166-
167-
logger.debug(f"KOSync: Document not found: {doc_id[:8]}...")
168+
return _respond_from_book_states(doc_id, book)
169+
170+
# Step 3: Sibling hash resolution — find the book via other linked hashes
171+
resolved_book = _resolve_book_by_sibling_hash(doc_id, existing_doc=kosync_doc)
172+
if resolved_book:
173+
_register_hash_for_book(doc_id, resolved_book)
174+
return _respond_from_book_states(doc_id, resolved_book)
175+
176+
# Step 4: Unknown hash — register stub and start background discovery
177+
auto_create = os.environ.get('AUTO_CREATE_EBOOK_MAPPING', 'true').lower() == 'true'
178+
if auto_create and doc_id not in _active_scans:
179+
_active_scans.add(doc_id)
180+
from src.db.models import KosyncDocument as KD
181+
stub = KD(document_hash=doc_id)
182+
_database_service.save_kosync_document(stub)
183+
logger.info(f"🔍 KOSync: Created stub for unknown hash {doc_id[:8]}..., starting background discovery")
184+
threading.Thread(target=_run_get_auto_discovery, args=(doc_id,), daemon=True).start()
185+
186+
logger.warning(f"⚠️ KOSync: Document not found: {doc_id[:8]}... (GET from {request.remote_addr})")
168187
return jsonify({"message": "Document not found on server"}), 502
169188

170189

@@ -181,12 +200,16 @@ def kosync_put_progress():
181200

182201
data = request.json
183202
if not data:
203+
logger.warning(f"KOSync: PUT progress with no JSON data from {request.remote_addr}")
184204
return jsonify({"error": "No data"}), 400
185205

186206
doc_hash = data.get('document')
187207
if not doc_hash:
208+
logger.warning(f"KOSync: PUT progress with no document ID from {request.remote_addr}")
188209
return jsonify({"error": "Missing document ID"}), 400
189210

211+
logger.info(f"KOSync: PUT progress request for doc {doc_hash[:8]}... from {request.remote_addr} (device: {data.get('device', 'unknown')})")
212+
190213
percentage = data.get('percentage', 0)
191214
progress = data.get('progress', '')
192215
device = data.get('device', '')
@@ -557,6 +580,114 @@ def _try_find_epub_by_hash(doc_hash: str) -> Optional[str]:
557580
return None
558581

559582

583+
# ---------------- GET Fallback Helpers ----------------
584+
585+
def _respond_from_book_states(doc_id, book):
586+
"""Build a GET response from a book's state data. Returns (response, status_code)."""
587+
states = _database_service.get_states_for_book(book.abs_id)
588+
589+
# Also check sibling kosync_documents for device-specific progress
590+
sibling_docs = _database_service.get_kosync_documents_for_book(book.abs_id)
591+
docs_with_progress = [d for d in sibling_docs if d.percentage and float(d.percentage) > 0]
592+
if docs_with_progress:
593+
best_doc = max(docs_with_progress, key=lambda d: float(d.percentage))
594+
logger.info(f"KOSync: Resolved {doc_id[:8]}... to '{book.abs_title}' via sibling hash {best_doc.document_hash[:8]}... ({float(best_doc.percentage):.2%})")
595+
return jsonify({
596+
"device": best_doc.device or "abs-kosync-bridge",
597+
"device_id": best_doc.device_id or "abs-kosync-bridge",
598+
"document": doc_id,
599+
"percentage": float(best_doc.percentage),
600+
"progress": best_doc.progress or "",
601+
"timestamp": int(best_doc.timestamp.timestamp()) if best_doc.timestamp else 0
602+
}), 200
603+
604+
if not states:
605+
return jsonify({"message": "Document not found on server"}), 502
606+
607+
kosync_state = next((s for s in states if s.client_name.lower() == 'kosync'), None)
608+
latest_state = kosync_state or max(states, key=lambda s: s.last_updated if s.last_updated else 0)
609+
610+
return jsonify({
611+
"device": "abs-kosync-bridge",
612+
"device_id": "abs-kosync-bridge",
613+
"document": doc_id,
614+
"percentage": float(latest_state.percentage) if latest_state.percentage else 0,
615+
"progress": (latest_state.xpath or latest_state.cfi) if hasattr(latest_state, 'xpath') else "",
616+
"timestamp": int(latest_state.last_updated) if latest_state.last_updated else 0
617+
}), 200
618+
619+
620+
def _resolve_book_by_sibling_hash(doc_id: str, existing_doc=None):
621+
"""
622+
Try to resolve an unknown hash to a known book using DB-only lookups.
623+
Checks if any other KosyncDocument with the same filename is already linked.
624+
"""
625+
# Check if this hash has a filename cached (from a prior scan/PUT)
626+
doc = existing_doc or _database_service.get_kosync_document(doc_id)
627+
if doc and doc.filename:
628+
# Find a sibling document with the same filename that's linked to a book
629+
sibling = _database_service.get_kosync_doc_by_filename(doc.filename)
630+
if sibling and sibling.linked_abs_id and sibling.document_hash != doc_id:
631+
book = _database_service.get_book(sibling.linked_abs_id)
632+
if book:
633+
logger.info(f"🔗 KOSync: Resolved {doc_id[:8]}... to '{book.abs_title}' via filename sibling")
634+
return book
635+
636+
# Check if the filename matches a book's ebook_filename directly
637+
book = _database_service.get_book_by_ebook_filename(doc.filename)
638+
if book:
639+
logger.info(f"🔗 KOSync: Resolved {doc_id[:8]}... to '{book.abs_title}' via ebook filename match")
640+
return book
641+
642+
return None
643+
644+
645+
def _register_hash_for_book(doc_id: str, book):
646+
"""Register a new hash and link it to an existing book."""
647+
from src.db.models import KosyncDocument as KD
648+
649+
existing = _database_service.get_kosync_document(doc_id)
650+
if existing:
651+
if not existing.linked_abs_id:
652+
_database_service.link_kosync_document(doc_id, book.abs_id)
653+
logger.info(f"🔗 KOSync: Linked existing document {doc_id[:8]}... to '{book.abs_title}'")
654+
else:
655+
doc = KD(document_hash=doc_id, linked_abs_id=book.abs_id)
656+
_database_service.save_kosync_document(doc)
657+
logger.info(f"🔗 KOSync: Created and linked new document {doc_id[:8]}... to '{book.abs_title}'")
658+
659+
660+
def _run_get_auto_discovery(doc_id: str):
661+
"""Background auto-discovery triggered by GET for an unknown hash.
662+
Finds the matching epub and links the hash to an existing book."""
663+
try:
664+
logger.info(f"🔍 KOSync: Background discovery (GET) for {doc_id[:8]}...")
665+
epub_filename = _try_find_epub_by_hash(doc_id)
666+
667+
if not epub_filename:
668+
logger.info(f"🔍 KOSync: GET-discovery found no epub for {doc_id[:8]}...")
669+
return
670+
671+
# Update stub with filename
672+
doc = _database_service.get_kosync_document(doc_id)
673+
if doc and not doc.filename:
674+
doc.filename = epub_filename
675+
_database_service.save_kosync_document(doc)
676+
677+
# Try to find an existing book that uses this epub
678+
book = _database_service.get_book_by_ebook_filename(epub_filename)
679+
if book:
680+
_database_service.link_kosync_document(doc_id, book.abs_id)
681+
logger.info(f"✅ KOSync: GET-discovery linked {doc_id[:8]}... to '{book.abs_title}'")
682+
return
683+
684+
logger.info(f"🔍 KOSync: GET-discovery found epub '{epub_filename}' but no matching book")
685+
except Exception as e:
686+
logger.error(f"❌ Error in GET auto-discovery: {e}")
687+
finally:
688+
_active_scans.discard(doc_id)
689+
690+
560691
# ---------------- KOSync Document Management API ----------------
561692

562693
@kosync_admin_bp.route('/api/kosync-documents', methods=['GET'])

src/db/database_service.py

Lines changed: 26 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -204,7 +204,8 @@ def save_book(self, book: Book) -> Book:
204204
if existing:
205205
# Update existing book
206206
for attr in ['abs_title', 'ebook_filename', 'original_ebook_filename', 'kosync_doc_id',
207-
'transcript_file', 'status', 'duration', 'sync_mode', 'storyteller_uuid']:
207+
'transcript_file', 'status', 'duration', 'sync_mode', 'storyteller_uuid',
208+
'abs_ebook_item_id']:
208209
if hasattr(book, attr):
209210
setattr(existing, attr, getattr(book, attr))
210211
session.flush()
@@ -564,6 +565,30 @@ def get_kosync_document_by_linked_book(self, abs_id: str) -> Optional[KosyncDocu
564565
session.expunge(doc)
565566
return doc
566567

568+
def get_kosync_documents_for_book(self, abs_id: str) -> List[KosyncDocument]:
569+
"""Get ALL KOSync documents linked to a specific ABS book."""
570+
with self.get_session() as session:
571+
docs = session.query(KosyncDocument).filter(
572+
KosyncDocument.linked_abs_id == abs_id
573+
).all()
574+
for doc in docs:
575+
session.expunge(doc)
576+
return docs
577+
578+
def get_book_by_ebook_filename(self, filename: str) -> Optional['Book']:
579+
"""Find a book by its ebook filename (current or original)."""
580+
from sqlalchemy import or_
581+
with self.get_session() as session:
582+
book = session.query(Book).filter(
583+
or_(
584+
Book.ebook_filename == filename,
585+
Book.original_ebook_filename == filename
586+
)
587+
).first()
588+
if book:
589+
session.expunge(book)
590+
return book
591+
567592
def get_kosync_doc_by_filename(self, filename: str) -> Optional[KosyncDocument]:
568593
"""Find a KOSync document by its associated filename."""
569594
with self.get_session() as session:
@@ -912,4 +937,3 @@ def should_migrate(self) -> bool:
912937
return False
913938

914939

915-

0 commit comments

Comments
 (0)