diff --git a/examples/vue-vuetify/components.d.ts b/examples/vue-vuetify/components.d.ts index 3ea7477a..a4129bbc 100644 --- a/examples/vue-vuetify/components.d.ts +++ b/examples/vue-vuetify/components.d.ts @@ -12,7 +12,6 @@ declare module 'vue' { Drawer: typeof import('./src/components/drawer-system/Drawer.vue')['default'] DrawerProvider: typeof import('./src/components/drawer-system/DrawerProvider.vue')['default'] DrawerToggleButton: typeof import('./src/components/drawer-system/DrawerToggleButton.vue')['default'] - HelloWorld: typeof import('./src/components/HelloWorld.vue')['default'] PageControls: typeof import('./src/components/PageControls.vue')['default'] PrintDialog: typeof import('./src/components/PrintDialog.vue')['default'] Search: typeof import('./src/components/Search.vue')['default'] diff --git a/packages/plugin-selection/src/lib/actions.ts b/packages/plugin-selection/src/lib/actions.ts index 3bf2da8e..5eb41979 100644 --- a/packages/plugin-selection/src/lib/actions.ts +++ b/packages/plugin-selection/src/lib/actions.ts @@ -1,6 +1,7 @@ import { Action } from '@embedpdf/core'; import { PdfPageGeometry, Rect } from '@embedpdf/models'; import { SelectionRangeX } from './types'; +import { GlyphAccelerationModel } from './utils'; export const CACHE_PAGE_GEOMETRY = 'CACHE_PAGE_GEOMETRY'; export const SET_SELECTION = 'SET_SELECTION'; @@ -13,7 +14,7 @@ export const RESET = 'RESET'; export interface CachePageGeometryAction extends Action { type: typeof CACHE_PAGE_GEOMETRY; - payload: { page: number; geo: PdfPageGeometry }; + payload: { page: number; geo: GlyphAccelerationModel }; } export interface SetSelectionAction extends Action { type: typeof SET_SELECTION; @@ -56,7 +57,7 @@ export type SelectionAction = | SetSlicesAction | ResetAction; -export const cachePageGeometry = (page: number, geo: PdfPageGeometry): CachePageGeometryAction => ({ +export const cachePageGeometry = (page: number, geo: GlyphAccelerationModel): CachePageGeometryAction => ({ type: CACHE_PAGE_GEOMETRY, payload: { page, geo }, }); diff --git a/packages/plugin-selection/src/lib/selection-plugin.ts b/packages/plugin-selection/src/lib/selection-plugin.ts index 54a5150b..79075c50 100644 --- a/packages/plugin-selection/src/lib/selection-plugin.ts +++ b/packages/plugin-selection/src/lib/selection-plugin.ts @@ -43,7 +43,14 @@ import { RegisterSelectionOnPageOptions, SelectionRectsCallback, } from './types'; -import { sliceBounds, rectsWithinSlice, glyphAt } from './utils'; +import { + sliceBounds, + rectsWithinSlice, + glyphAt, + findNearestGlyphWithModel, + GlyphAccelerationModel, + buildGlyphAccelerationModel, +} from './utils'; export class SelectionPlugin extends BasePlugin< SelectionPluginConfig, @@ -59,6 +66,7 @@ export class SelectionPlugin extends BasePlugin< /* interactive state */ private selecting = false; private anchor?: { page: number; index: number }; + private mouseDown = false; /** Page callbacks for rect updates */ private pageCallbacks = new Map void>(); @@ -145,50 +153,75 @@ export class SelectionPlugin extends BasePlugin< rects: selector.selectRectsForPage(this.state, pageIndex), boundingRect: selector.selectBoundingRectForPage(this.state, pageIndex), }); + let mouseDownData: { + x: number; + y: number; + num: number; + time: number; + clientX: number; + clientY: number; + } | null = null; const handlers: PointerEventHandlersWithLifecycle = { - onPointerDown: (point: Position, _evt, modeId) => { + onPointerDown: (point: Position, evt, modeId) => { if (!this.enabledModes.has(modeId)) return; - // Clear the selection this.clearSelection(); - // Get geometry from cache (or load if needed) const cached = this.state.geometry[pageIndex]; - if (cached) { - const g = glyphAt(cached, point); - if (g !== -1) { - this.beginSelection(pageIndex, g); - } - } + if (!cached) return; + + const res = findNearestGlyphWithModel(cached, { x: point.x, y: point.y }); + const num = res?.globalIndex ?? -1; + + mouseDownData = { + x: point.x, + y: point.y, + num, + time: performance.now(), + clientX: evt.clientX, + clientY: evt.clientY, + }; + this.mouseDown = true; }, - onPointerMove: (point: Position, _evt, modeId) => { + onPointerMove: (point: Position, evt, modeId) => { if (!this.enabledModes.has(modeId)) return; // Get cached geometry (should be instant if already loaded) const cached = this.state.geometry[pageIndex]; - if (cached) { - const g = glyphAt(cached, point); - - // Update cursor - if (g !== -1) { - this.interactionManagerCapability?.setCursor('selection-text', 'text', 10); - } else { - this.interactionManagerCapability?.removeCursor('selection-text'); - } - - // Update selection if we're selecting - if (this.selecting && g !== -1) { - this.updateSelection(pageIndex, g); - } + if (!cached) return; + const res = findNearestGlyphWithModel(cached, point); + if (!res) { + this.interactionManagerCapability?.removeCursor('selection-text'); + return; + } + if (res.isExactMatch) { + this.interactionManagerCapability?.setCursor('selection-text', 'text', 10); + } else { + this.interactionManagerCapability?.removeCursor('selection-text'); + } + + const g = res.globalIndex; + const isMouseDown = this.mouseDown; + if (isMouseDown && mouseDownData && !this.selecting) { + const deltaX = evt.clientX - mouseDownData.clientX; + const deltaY = evt.clientY - mouseDownData.clientY; + const distance = deltaX * deltaX + deltaY * deltaY; + if (distance > 25) this.beginSelection(pageIndex, g); + } else if (this.selecting) { + this.updateSelection(pageIndex, g); } }, onPointerUp: (_point: Position, _evt, modeId) => { if (!this.enabledModes.has(modeId)) return; + this.mouseDown = false; + mouseDownData = null; this.endSelection(); }, onHandlerActiveEnd: (modeId) => { if (!this.enabledModes.has(modeId)) return; + this.mouseDown = false; + mouseDownData = null; this.clearSelection(); }, }; @@ -228,19 +261,34 @@ export class SelectionPlugin extends BasePlugin< }); } - private getNewPageGeometryAndCache(pageIdx: number): PdfTask { + private getNewPageGeometryAndCache(pageIdx: number): PdfTask { if (!this.coreState.core.document) return PdfTaskHelper.reject({ code: PdfErrorCode.NotFound, message: 'Doc Not Found' }); const page = this.coreState.core.document.pages.find((p) => p.index === pageIdx)!; + const resTask: PdfTask = new Task(); + const task = this.engine.getPageGeometry(this.coreState.core.document, page); task.wait((geo) => { - this.dispatch(cachePageGeometry(pageIdx, geo)); + const model = buildGlyphAccelerationModel(geo); + this.dispatch(cachePageGeometry(pageIdx, model)); + resTask.resolve(model); }, ignore); - return task; + + // listen task abort to abort webworker task + resTask.wait( + () => {}, + (err) => { + if (err.type === 'abort') { + task.abort(err.reason); + } + }, + ); + + return resTask; } /* ── geometry cache ───────────────────────────────────── */ - private getOrLoadGeometry(pageIdx: number): PdfTask { + private getOrLoadGeometry(pageIdx: number): PdfTask { const cached = this.state.geometry[pageIdx]; if (cached) return PdfTaskHelper.resolve(cached); @@ -264,6 +312,7 @@ export class SelectionPlugin extends BasePlugin< private clearSelection() { this.selecting = false; + this.mouseDown = false; this.anchor = undefined; this.dispatch(clearSelection()); this.selChange$.emit(null); @@ -296,10 +345,10 @@ export class SelectionPlugin extends BasePlugin< for (let p = range.start.page; p <= range.end.page; p++) { const geo = this.state.geometry[p]; - const sb = sliceBounds(range, geo, p); + const sb = sliceBounds(range, geo.geo, p); if (!sb) continue; - allRects[p] = rectsWithinSlice(geo!, sb.from, sb.to); + allRects[p] = rectsWithinSlice(geo.geo!, sb.from, sb.to); allSlices[p] = { start: sb.from, count: sb.to - sb.from + 1 }; } diff --git a/packages/plugin-selection/src/lib/types.ts b/packages/plugin-selection/src/lib/types.ts index 91ec242b..9481bf04 100644 --- a/packages/plugin-selection/src/lib/types.ts +++ b/packages/plugin-selection/src/lib/types.ts @@ -1,5 +1,6 @@ import { BasePluginConfig, EventHook } from '@embedpdf/core'; import { PdfPageGeometry, PdfTask, Rect } from '@embedpdf/models'; +import { GlyphAccelerationModel } from './utils'; export interface SelectionPluginConfig extends BasePluginConfig {} @@ -16,7 +17,7 @@ export interface SelectionRangeX { export interface SelectionState { /** page → geometry cache */ - geometry: Record; + geometry: Record; /** current selection or null */ rects: Record; selection: SelectionRangeX | null; diff --git a/packages/plugin-selection/src/lib/utils.ts b/packages/plugin-selection/src/lib/utils.ts index af2b4ddc..1eb6dc74 100644 --- a/packages/plugin-selection/src/lib/utils.ts +++ b/packages/plugin-selection/src/lib/utils.ts @@ -1,4 +1,4 @@ -import { PdfPageGeometry, Position, Rect } from '@embedpdf/models'; +import { PdfGlyphSlim, PdfPageGeometry, Position, Rect } from '@embedpdf/models'; import { SelectionRangeX } from './types'; /** @@ -241,4 +241,439 @@ export function mergeAdjacentRects(textRuns: TextRunInfo[]): Rect[] { } return results; +} + + + +/** + * Fast squared distance calculation - avoids expensive sqrt operation + * Use when you only need to compare distances + */ +function distanceSquared(p1: Position, p2: Position): number { + return (p1.x - p2.x) ** 2 + (p1.y - p2.y) ** 2; +} + +/** + * Calculates the center point of a glyph + * @param glyph - The glyph to find the center of + * @returns Position object representing the glyph's center coordinates + */ +function getGlyphCenter(glyph: PdfGlyphSlim): Position { + return { + x: glyph.x + glyph.width / 2, + y: glyph.y + glyph.height / 2, + }; +} + +/** + * Checks if a point is within the bounds of a glyph + * @param pt - Point to check + * @param glyphBounds - Bounds of the glyph + * @returns true if the point is within the glyph bounds, false otherwise + */ +function isPointInGlyphBounds( + pt: Position, + glyphBounds: { x: number; y: number; width: number; height: number }, +): boolean { + return ( + pt.x >= glyphBounds.x && + pt.x <= glyphBounds.x + glyphBounds.width && + pt.y >= glyphBounds.y && + pt.y <= glyphBounds.y + glyphBounds.height + ); +} + +/** + * Optimized glyph index entry with reduced memory footprint + * Stores only essential data, calculates bounds on-demand from source + */ +interface GlyphIndexEntry { + /** Global character index across the entire page */ + globalIndex: number; + /** Center point of the glyph for distance calculations */ + center: Position; + /** Index of the run this glyph belongs to */ + runIndex: number; + /** Index within the run for quick glyph access */ + glyphIndexInRun: number; +} + +/** + * Helper function to get glyph bounds from the model + * Avoids storing duplicate bound information + */ +function getGlyphBounds(model: GlyphAccelerationModel, entry: GlyphIndexEntry) { + const run = model.geo.runs[entry.runIndex]; + const glyph = run.glyphs[entry.glyphIndexInRun]; + return { + x: glyph.x, + y: glyph.y, + width: glyph.width, + height: glyph.height, + }; +} + +/** + * Represents a horizontal line of text on the page + * Groups glyphs that appear on the same text line + */ +interface TextLineIndex { + /** + * Minimum Y coordinate of this text line + */ + yMin: number; + /** + * Maximum Y coordinate of this text line + */ + yMax: number; + /** + * Average Y coordinate (center line) for this text line + */ + centerY: number; + /** + * All glyphs that belong to this text line, sorted left-to-right + */ + glyphs: GlyphIndexEntry[]; +} + +/** + * A single cell in the spatial acceleration grid + * Contains glyphs that fall within this grid cell's bounds + */ +interface GridCell { + /** + * All glyphs whose centers fall within this grid cell + */ + glyphs: GlyphIndexEntry[]; +} + +/** + * Complete acceleration structure for efficient text selection + * Organizes glyphs into lines and a spatial grid for fast nearest-neighbor queries + */ +export interface GlyphAccelerationModel { + /** + * All detected text lines on the page, sorted top-to-bottom + */ + lines: TextLineIndex[]; + geo: PdfPageGeometry; +} + +/** + * Builds an acceleration structure for efficient text selection queries + * This function processes the raw PDF geometry and creates optimized data structures + * for fast nearest-neighbor searches and text selection operations + * + * @param geo - Raw PDF page geometry containing all text runs + * @returns Complete acceleration model with lines, grid, and glyph index + */ +export function buildGlyphAccelerationModel(geo: PdfPageGeometry): GlyphAccelerationModel { + // Collect all glyphs from all runs into a flat array with indexing information + const allGlyphs: GlyphIndexEntry[] = []; + const lines: TextLineIndex[] = []; + + // Process each text run in the document + for (let runIndex = 0; runIndex < geo.runs.length; runIndex++) { + const run = geo.runs[runIndex]; + + // Process each glyph within the current run + for (let glyphIndex = 0; glyphIndex < run.glyphs.length; glyphIndex++) { + const glyph = run.glyphs[glyphIndex]; + const center = getGlyphCenter(glyph); + + // Create optimized indexed entry for this glyph + const entry: GlyphIndexEntry = { + globalIndex: run.charStart + glyphIndex, + center, + runIndex, + glyphIndexInRun: glyphIndex, + }; + + allGlyphs.push(entry); + } + } + + // Group glyphs into text lines - highly optimized approach + // Calculate average glyph height from source data to avoid bounds storage + let totalHeight = 0; + let glyphCount = 0; + for (const run of geo.runs) { + for (const glyph of run.glyphs) { + totalHeight += glyph.height; + glyphCount++; + } + } + const avgGlyphHeight = glyphCount > 0 ? totalHeight / glyphCount : 10; + const lineThreshold = Math.max(3, avgGlyphHeight * 0.5); + + // Sort glyphs by Y coordinate for efficient line grouping + allGlyphs.sort((a, b) => a.center.y - b.center.y); + + // Ultra-optimized line grouping using binary-like approach + const lineGroups: GlyphIndexEntry[][] = []; + const lineAverages: number[] = []; // Cache line averages for O(1) lookup + + for (const glyph of allGlyphs) { + let foundLine = false; + + // Binary search-like approach for finding matching line + const checkLines = Math.min(5, lineGroups.length); + for (let i = lineGroups.length - checkLines; i < lineGroups.length; i++) { + if (Math.abs(glyph.center.y - lineAverages[i]) <= lineThreshold) { + const line = lineGroups[i]; + line.push(glyph); + // Update cached average incrementally + lineAverages[i] = (lineAverages[i] * (line.length - 1) + glyph.center.y) / line.length; + foundLine = true; + break; + } + } + + if (!foundLine) { + lineGroups.push([glyph]); + lineAverages.push(glyph.center.y); + } + } + + // Process line groups and calculate properties efficiently + for (const lineGlyphs of lineGroups) { + // Sort glyphs left-to-right within each line + lineGlyphs.sort((a, b) => a.center.x - b.center.x); + + // Calculate line boundaries efficiently by accessing source data + let yMin = Infinity, yMax = -Infinity, centerYSum = 0; + + for (const glyphEntry of lineGlyphs) { + const run = geo.runs[glyphEntry.runIndex]; + const glyph = run.glyphs[glyphEntry.glyphIndexInRun]; + + yMin = Math.min(yMin, glyph.y); + yMax = Math.max(yMax, glyph.y + glyph.height); + centerYSum += glyphEntry.center.y; + } + + const centerY = centerYSum / lineGlyphs.length; + + lines.push({ + yMin, + yMax, + centerY, + glyphs: lineGlyphs, + }); + } + + // Lines are already roughly sorted due to our processing order + lines.sort((a, b) => a.centerY - b.centerY); + + // Calculate overall bounds for the spatial grid - optimized to avoid bounds storage + let minX = Infinity, minY = Infinity, maxX = -Infinity, maxY = -Infinity; + + for (const glyphEntry of allGlyphs) { + const run = geo.runs[glyphEntry.runIndex]; + const glyph = run.glyphs[glyphEntry.glyphIndexInRun]; + + minX = Math.min(minX, glyph.x); + minY = Math.min(minY, glyph.y); + maxX = Math.max(maxX, glyph.x + glyph.width); + maxY = Math.max(maxY, glyph.y + glyph.height); + } + + const gridWidth = maxX - minX; + const gridHeight = maxY - minY; + + // Calculate optimal grid dimensions based on glyph density + const avgGlyphsPerCell = Math.max(5, Math.min(20, Math.sqrt(allGlyphs.length))); // Dynamic target based on document size + const totalCells = Math.max(1, Math.ceil(allGlyphs.length / avgGlyphsPerCell)); + const aspectRatio = gridWidth / gridHeight; + const gridCols = Math.max(1, Math.ceil(Math.sqrt(totalCells * aspectRatio))); + const gridRows = Math.max(1, Math.ceil(totalCells / gridCols)); + + const cellWidth = gridWidth / gridCols; + const cellHeight = gridHeight / gridRows; + + // Initialize empty grid cells + const cells: GridCell[][] = []; + for (let row = 0; row < gridRows; row++) { + cells[row] = []; + for (let col = 0; col < gridCols; col++) { + cells[row][col] = { glyphs: [] }; + } + } + + // Assign glyphs to grid cells based on their positions + for (const glyph of allGlyphs) { + const col = Math.min( + gridCols - 1, + Math.max(0, Math.floor((glyph.center.x - minX) / cellWidth)), + ); + const row = Math.min( + gridRows - 1, + Math.max(0, Math.floor((glyph.center.y - minY) / cellHeight)), + ); + cells[row][col].glyphs.push(glyph); + } + + return { + geo, + lines, + }; +} + +/** + * Represents the result of finding the nearest glyph to a point + * Contains detailed information about the matched glyph and its context + */ +export interface NearestGlyphResult { + /** + * Global character index across the entire page + */ + globalIndex: number; + /** + * Index of the run containing this glyph in PdfPageGeometry.runs array + */ + runIndex: number; + /** + * Index of this glyph within its containing run (0-based) + */ + glyphIndexInRun: number; + /** + * Whether the point is exactly within the glyph bounds (true) or is the nearest glyph (false) + */ + isExactMatch: boolean; + /** + * Distance from the query point to the glyph center in PDF units + */ + distance: number; +} + +/** + * Finds the nearest glyph to a given point using the acceleration model + * Employs a multi-stage search strategy: line-based search first, then grid-based fallback + * + * @param model - Pre-built acceleration model containing glyphs organized by lines and grid + * @param pt - Query point in PDF coordinate space + * @param geo - PDF page geometry containing all text runs (needed for run information) + * @returns Detailed information about the nearest glyph, or null if no glyphs found + */ +export function findNearestGlyphWithModel( + model: GlyphAccelerationModel, + pt: Position, +): NearestGlyphResult | null { + // Handle empty page case + const geo = model.geo; + + if(geo.runs.length === 0) return null + + const candidateLines: TextLineIndex[] = []; + + // Stage 1: Binary search to find the closest text line + let left = 0; + let right = model.lines.length - 1; + let closestLineIndex = 0; + let minYDistance = Infinity; + + while (left <= right) { + const mid = Math.floor((left + right) / 2); + const line = model.lines[mid]; + const yDistance = Math.abs(pt.y - line.centerY); + + if (yDistance < minYDistance) { + minYDistance = yDistance; + closestLineIndex = mid; + } + + if (pt.y < line.centerY) { + right = mid - 1; + } else { + left = mid + 1; + } + } + + // Collect nearby lines for detailed search - adaptive radius based on line density + const searchRadius = Math.min(3, Math.max(1, Math.ceil(model.lines.length / 20))); + const startLine = Math.max(0, closestLineIndex - searchRadius); + const endLine = Math.min(model.lines.length - 1, closestLineIndex + searchRadius); + + for (let i = startLine; i <= endLine; i++) { + candidateLines.push(model.lines[i]); + } + + // Stage 2: Search within candidate lines for the nearest glyph + let minDistance = Infinity; + let nearestGlyph: GlyphIndexEntry | null = null; + + for (const line of candidateLines) { + const yDistanceToLine = Math.abs(pt.y - line.centerY); + + // Binary search within this line to find the closest glyph horizontally + let lineLeft = 0; + let lineRight = line.glyphs.length - 1; + let bestInLine: GlyphIndexEntry | null = null; + let minXDistance = Infinity; + + while (lineLeft <= lineRight) { + const mid = Math.floor((lineLeft + lineRight) / 2); + const glyph = line.glyphs[mid]; + const xDistance = Math.abs(pt.x - glyph.center.x); + + if (xDistance < minXDistance) { + minXDistance = xDistance; + bestInLine = glyph; + } + + if (pt.x < glyph.center.x) { + lineRight = mid - 1; + } else { + lineLeft = mid + 1; + } + } + + // Check the best glyph and its immediate neighbors for more accurate results + const candidateIndices: number[] = []; + if (bestInLine) { + const bestIndex = line.glyphs.indexOf(bestInLine); + candidateIndices.push(bestIndex); + + // Include adjacent glyphs to handle cases where the binary search might have missed + if (bestIndex > 0) candidateIndices.push(bestIndex - 1); + if (bestIndex < line.glyphs.length - 1) candidateIndices.push(bestIndex + 1); + } + + // Evaluate all candidate glyphs using fast squared distance for comparison + for (const index of candidateIndices) { + const glyph = line.glyphs[index]; + const distSq = distanceSquared(pt, glyph.center); + + // Weight the distance to favor glyphs closer to the query point's Y coordinate + const weightedDistanceSq = distSq + (yDistanceToLine * yDistanceToLine * 4); + + if (weightedDistanceSq < minDistance) { + minDistance = weightedDistanceSq; + nearestGlyph = glyph; + } + } + } + + // If no glyph found, return null + if (!nearestGlyph) { + return null; + } + + // Calculate glyph index within its run + const run = geo.runs[nearestGlyph.runIndex]; + const glyphIndexInRun = nearestGlyph.globalIndex - run.charStart; + + // Check if the point is exactly within the glyph bounds + const glyphBounds = getGlyphBounds(model, nearestGlyph); + const isExactMatch = isPointInGlyphBounds(pt, glyphBounds); + + // Return detailed information about the nearest glyph + // Convert squared distance back to actual distance only at the end + return { + globalIndex: nearestGlyph.globalIndex, + runIndex: nearestGlyph.runIndex, + glyphIndexInRun, + isExactMatch, + distance: Math.sqrt(minDistance), + }; }