richardr1126
diff --git a/‎README.md‎
Lines changed: 85 additions & 44 deletions b/‎README.md‎
Lines changed: 85 additions & 44 deletions
diff --git a/‎package.json‎
Lines changed: 2 additions & 0 deletions b/‎package.json‎
Lines changed: 2 additions & 0 deletions
diff --git a/‎pnpm-lock.yaml‎
Lines changed: 24 additions & 0 deletions b/‎pnpm-lock.yaml‎
Lines changed: 24 additions & 0 deletions
diff --git a/‎src/app/api/documents/route.ts‎
Lines changed: 5 additions & 3 deletions b/‎src/app/api/documents/route.ts‎
Lines changed: 5 additions & 3 deletions
diff --git a/‎src/app/api/tts/route.ts‎
Lines changed: 1 addition & 33 deletions b/‎src/app/api/tts/route.ts‎
Lines changed: 1 addition & 33 deletions
@@ -7,28 +7,63 @@
 
 [![Discussions](https://img.shields.io/badge/Discussions-Ask%20a%20Question-blue)](../../discussions)
 
-# OpenReader WebUI 📄🔊
+# 📄🔊 OpenReader WebUI
 
-OpenReader WebUI is a document reader with Text-to-Speech capabilities, offering a TTS read along experience with narration for EPUB, PDF, TXT, MD, and DOCX documents. It supports multiple TTS providers including OpenAI, Deepinfra, and custom OpenAI-compatible endpoints like [Kokoro-FastAPI](https://github.com/remsky/Kokoro-FastAPI) and [Orpheus-FastAPI](https://github.com/Lex-au/Orpheus-FastAPI)
+OpenReader WebUI is an open source text to speech document reader web app built using Next.js, offering a TTS read along experience with narration for EPUB, PDF, TXT, MD, and DOCX documents. It supports multiple TTS providers including OpenAI, Deepinfra, and custom OpenAI-compatible endpoints like [Kokoro-FastAPI](https://github.com/remsky/Kokoro-FastAPI) and [Orpheus-FastAPI](https://github.com/Lex-au/Orpheus-FastAPI)
 
-- 🎯 **Multi-Provider TTS Support**: 
-  - **OpenAI**: tts-1, tts-1-hd, gpt-4o-mini-tts models with voices (alloy, echo, fable, onyx, nova, shimmer)
+- 🧠 **(New) Smart Sentence-Aware Narration**: EPUB and PDF playback use shared NLP (compromise) and smart sentence continuation to merge sentences that span pages/chapters for smoother TTS trying to prevent hard cuts at page breaks
+- 🎧 **(New) Reliable Audiobook Export**: Create and export audiobooks from PDF and EPUB files **(in m4b or mp3 format using ffmpeg)** with resumable, chapter/page-based export and per-chapter regeneration
+- 🎯 **(New) Multi-Provider TTS Support**: 
   - **Deepinfra**: Kokoro-82M, Orpheus-3B, Sesame-1B models with extensive voice libraries
-  - **Custom OpenAI-Compatible**: Any OpenAI-compatible endpoint with custom voice sets
-- 💾 **Local-First Architecture**: Uses IndexedDB browser storage for documents
-- 🛜 **Optional Server-side documents**: Manually upload documents to the next backend for all users to download
-- 📖 **Read Along Experience**: Follow along with highlighted text as the TTS narrates
-- 📄 **Document formats**: EPUB, PDF, TXT, MD, DOCX (with libreoffice installed)
-- 🎧 **Audiobook Creation**: Create and export audiobooks from PDF and ePub files **(in m4b format with ffmpeg and aac TTS output)**
+  - **OpenAI API ($$)**: tts-1, tts-1-hd, gpt-4o-mini-tts models
+  - **Kokoro-FastAPI**: Self-hosted OpenAI-compatible TTS API server supporting Kokoro-82M and multi-voice combinations (like `af_heart+bf_emma`)
+  - **Orpheus-FastAPI**: Self-hosted OpenAI-compatible TTS API server supporting Orpheus-3B
+  - And other Custom OpenAI-compatible endpoints with a `/v1/audio/voices` endpoint
+- 🚀 **(New) Optimized TTS Pipeline**: Next.js TTS backend with in-memory LRU audio cache, ETag-aware responses, and in-flight request de-duplication for faster repeat playback
+- 💾 **Local-First Architecture**: IndexedDB browser storage for documents and settings (now using Dexie.js)
+- 🛜 **Optional Server-side documents**: Manually upload documents to the Next.js backend (and Docker `docstore`) for all users to download
+- 📖 **Read Along Experience**: Follow along with highlighted text as the TTS narrates PDF files, with per-sentence navigation and skip controls
+- 📄 **Document formats**: EPUB, PDF, TXT, MD, DOCX (with libreoffice installed, plus hardened DOCX→PDF conversion for better reliability)
 - 🎨 **Customizable Experience**: 
   - 🔑 Select TTS provider (OpenAI, Deepinfra, or Custom OpenAI-compatible)
   - 🔐 Set TTS API base URL and optional API key
   - 🎨 Multiple app theme options
   - And more...
 
-### 🛠️ Work in progress
-- [ ] **Native .docx support** (currently requires libreoffice)
-- [ ] **Accessibility Improvements**
+<details>
+<summary>
+
+### 🆕 What's New in v1.0.0
+
+</summary>
+
+- 🧠 **Smart sentence continuation**  
+  - EPUB and PDF playback now use smarter sentence splitting and continuation metadata so sentences that cross page/chapter boundaries are merged before hitting the TTS API.  
+  - This yields more natural narration and fewer awkward pauses when a sentence spans multiple pages or EPUB spine items
+- 🎧 **Chapter/page-based audiobook export with resume & regeneration**  
+  - Per-chapter/per-page generation to disk with persistent `bookId`  
+  - Resumable generation (can cancel and continue later)  
+  - Per-chapter regeneration & deletion  
+  - Final combined **M4B** or **MP3** download with embedded chapter metadata.  
+- 💾 **Dexie-backed local storage & sync**  
+  - All document types (PDF, EPUB, TXT/MD-as-HTML) and config are stored via a unified Dexie layer on top of IndexedDB.  
+  - Document lists use live Dexie queries (no manual refresh needed), and server sync now correctly includes text/markdown documents as part of the library backup.  
+- 🗣️ **Kokoro multi-voice selection & utilities**  
+  - Kokoro models now support multi-voice combination, with provider-aware limits and helpers (not supported on OpenAI or Deepinfra)
+- ⚡ **Faster, more efficient TTS backend proxy**  
+  - In-memory **LRU caching** for audio responses with configurable size/TTL  
+  - **ETag** support (`304` on cache hits) + `X-Cache` headers (`HIT` / `MISS` / `INFLIGHT`)  
+- 📄 **More robust DOCX → PDF conversion**  
+  - DOCX conversion now uses isolated per-job LibreOffice profiles and temp directories, polls for a stable output file size, and aggressively cleans up temp files.  
+  - This reduces cross-job interference and flakiness when converting multiple DOCX files in parallel.
+- ♿ **Accessibility & layout improvements**  
+  - Dialogs and folder toggles expose proper roles and ARIA attributes.  
+  - PDF/EPUB/HTML readers use a full-height app shell with a sticky bottom TTS bar, improved scrollbars, and refined focus styles.
+- ✅ **End-to-end Playwright test suite with TTS mocks**  
+  - Deterministic TTS responses in tests via a reusable Playwright route mock.  
+  - Coverage for accessibility, upload, navigation, folder management, deletion flows, and playback across all document types.
+
+</details>
 
 ## 🐳 Docker Quick Start
 
@@ -78,12 +113,18 @@ docker pull ghcr.io/richardr1126/openreader-webui:latest
 
 ### 🗣️ Local Kokoro-FastAPI Quick-start (CPU or GPU)
 
-You can run the Kokoro TTS API server directly with Docker. **We are not responsible for issues with Kokoro-FastAPI.** For best performance, use an NVIDIA GPU (for GPU version) or Apple Silicon (for CPU version).
+You can run the Kokoro TTS API server directly with Docker. **We are not responsible for issues with [Kokoro-FastAPI](https://github.com/remsky/Kokoro-FastAPI).** For best performance, use an NVIDIA GPU (for GPU version) or Apple Silicon (for CPU version).
 
 > **Note:** When using these, set the `API_BASE` env var to `http://host.docker.internal:8880/v1` or `http://kokoro-tts:8880/v1`.
 > You can also use the example `docker-compose.yml` in `examples/docker-compose.yml` if you prefer Docker Compose.
 
-**CPU Version:**
+<details>
+<summary>
+
+**Docker CPU**
+
+</summary>
+
 ```bash
 docker run -d \
   --name kokoro-tts \
@@ -99,7 +140,15 @@ docker run -d \
   ghcr.io/remsky/kokoro-fastapi-cpu:v0.2.4
 ```
 
-**GPU Version:**
+</details>
+
+<details>
+<summary>
+
+**Docker GPU**
+
+</summary>
+
 ```bash
 docker run -d \
   --name kokoro-tts \
@@ -113,23 +162,31 @@ docker run -d \
   ghcr.io/remsky/kokoro-fastapi-gpu:v0.2.4
 ```
 
+</details>
+
 > **Note:**
 > - These commands are for running the Kokoro TTS API server only. For issues or support, see the [Kokoro-FastAPI repository](https://github.com/remsky/Kokoro-FastAPI).
 > - The GPU version requires NVIDIA Docker support and works best with NVIDIA GPUs. The CPU version works best on Apple Silicon or modern x86 CPUs.
 > - Adjust environment variables as needed for your hardware and use case.
 
-## Dev Installation
+## Local Development Installation
 
 ### Prerequisites
-- Node.js & npm or pnpm (recommended: use [nvm](https://github.com/nvm-sh/nvm) for Node.js)
+- Node.js (recommended: use [nvm](https://github.com/nvm-sh/nvm))
+- pnpm (recommended) or npm
+    ```bash
+    npm install -g pnpm
+    ```
+- A TTS API server (Kokoro-FastAPI, Orpheus-FastAPI, Deepinfra, OpenAI, etc.) running and accessible
 Optionally required for different features:
 - [FFmpeg](https://ffmpeg.org) (required for audiobook m4b creation only)
-  - On Linux: `sudo apt install ffmpeg`
-  - On MacOS: `brew install ffmpeg`
+    ```bash
+    brew install ffmpeg
+    ```
 - [libreoffice](https://www.libreoffice.org) (required for DOCX files)
-  - On Linux: `sudo apt install libreoffice`
-  - On MacOS: `brew install libreoffice`
-
+    ```bash
+    brew install libreoffice
+    ```
 ### Steps
 
 1. Clone the repository:
@@ -142,12 +199,7 @@ Optionally required for different features:
 
    With pnpm (recommended):
    ```bash
-   pnpm install
-   ```
-   
-   Or with npm:
-   ```bash
-   npm install
+   pnpm i # or npm i
    ```
 
 3. Configure the environment:
@@ -161,26 +213,15 @@ Optionally required for different features:
 
    With pnpm (recommended):
    ```bash
-   pnpm dev
-   ```
-   
-   Or with npm:
-   ```bash
-   npm run dev
+   pnpm dev # or npm run dev
    ```
 
    or build and run the production server:
 
    With pnpm:
    ```bash
-   pnpm build
-   pnpm start
-   ```
-   
-   Or with npm:
-   ```bash
-   npm run build
-   npm start
+   pnpm build # or npm run build
+   pnpm start # or npm start
    ```
 
    Visit [http://localhost:3003](http://localhost:3003) to run the app.
@@ -217,7 +258,7 @@ This project would not be possible without standing on the shoulders of these gi
 
 - **Framework:** Next.js (React)
 - **Containerization:** Docker
-- **Storage:** IndexedDB (in browser db store)
+- **Storage:** Dexie + IndexedDB (in-browser local database)
 - **PDF:** 
   - [react-pdf](https://github.com/wojtekmaj/react-pdf)
   - [pdf.js](https://mozilla.github.io/pdf.js/)
 
@@ -17,6 +17,8 @@
     "cmpstr": "^3.0.4",
     "compromise": "^14.14.4",
     "core-js": "^3.46.0",
+    "dexie": "^4.2.1",
+    "dexie-react-hooks": "^4.2.0",
     "epubjs": "^0.3.93",
     "howler": "^2.2.4",
     "lru-cache": "^11.2.2",
 
@@ -1,6 +1,7 @@
 import { writeFile, readFile, readdir, mkdir, unlink } from 'fs/promises';
 import { NextRequest, NextResponse } from 'next/server';
 import path from 'path';
+import type { BaseDocument, SyncedDocument } from '@/types/documents';
 
 const DOCS_DIR = path.join(process.cwd(), 'docstore');
 
@@ -17,9 +18,10 @@ export async function POST(req: NextRequest) {
   try {
     await ensureDocsDir();
     const data = await req.json();
+    const documents = data.documents as SyncedDocument[];
 
     // Save document metadata and content
-    for (const doc of data.documents) {
+    for (const doc of documents) {
       const docPath = path.join(DOCS_DIR, `${doc.id}.json`);
       const contentPath = path.join(DOCS_DIR, `${doc.id}.${doc.type}`);
 
@@ -49,7 +51,7 @@ export async function POST(req: NextRequest) {
 export async function GET() {
   try {
     await ensureDocsDir();
-    const documents = [];
+    const documents: SyncedDocument[] = [];
 
     const files = await readdir(DOCS_DIR);
     const jsonFiles = files.filter(file => file.endsWith('.json'));
@@ -58,7 +60,7 @@ export async function GET() {
       const docPath = path.join(DOCS_DIR, file);
 
       try {
-        const metadata = JSON.parse(await readFile(docPath, 'utf8'));
+        const metadata = JSON.parse(await readFile(docPath, 'utf8')) as BaseDocument;
         const contentPath = path.join(DOCS_DIR, `${metadata.id}.${metadata.type}`);
         const content = await readFile(contentPath);
 
 
@@ -23,36 +23,6 @@ const ttsAudioCache = new LRUCache<string, AudioBufferValue>({
   ttl: TTS_CACHE_TTL_MS,
 });
 
-// Concurrency controls and in-flight de-duplication
-const TTS_MAX_CONCURRENCY = Number(process.env.TTS_MAX_CONCURRENCY || 4);
-
-class Semaphore {
-  private permits: number;
-  private queue: Array<() => void> = [];
-  constructor(max: number) {
-    this.permits = Math.max(1, max);
-  }
-  async acquire(): Promise<() => void> {
-    if (this.permits > 0) {
-      this.permits -= 1;
-      return this.release.bind(this);
-    }
-    return new Promise<() => void>((resolve) => {
-      this.queue.push(() => {
-        this.permits -= 1;
-        resolve(this.release.bind(this));
-      });
-    });
-  }
-  private release() {
-    this.permits += 1;
-    const next = this.queue.shift();
-    if (next) next();
-  }
-}
-
-const ttsSemaphore = new Semaphore(TTS_MAX_CONCURRENCY);
-
 type InflightEntry = {
   promise: Promise<ArrayBuffer>;
   controller: AbortController;
@@ -211,7 +181,7 @@ export async function POST(req: NextRequest) {
       });
     }
 
-    // De-duplicate identical in-flight requests and bound upstream concurrency
+    // De-duplicate identical in-flight requests
     const existing = inflightRequests.get(cacheKey);
     if (existing) {
       console.log('TTS in-flight JOIN for key:', cacheKey.slice(0, 8));
@@ -247,14 +217,12 @@ export async function POST(req: NextRequest) {
       controller,
       consumers: 1,
       promise: (async () => {
-        const release = await ttsSemaphore.acquire();
         try {
           const buffer = await fetchTTSBufferWithRetry(openai, createParams, controller.signal);
           // Save to cache
           ttsAudioCache.set(cacheKey, buffer);
           return buffer;
         } finally {
-          release();
           inflightRequests.delete(cacheKey);
         }
       })()