Skip to content

Commit eb88262

Browse files
committed
Add PDF page virtualization + virtualized scaling + make highlight better
1 parent 86c263c commit eb88262

File tree

5 files changed

+180
-69
lines changed

5 files changed

+180
-69
lines changed

package-lock.json

Lines changed: 52 additions & 2 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

package.json

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,7 @@
1111
"dependencies": {
1212
"@headlessui/react": "^2.2.0",
1313
"@types/howler": "^2.2.12",
14+
"@types/react-window": "^1.8.8",
1415
"@types/string-similarity": "^4.0.2",
1516
"@types/uuid": "^10.0.0",
1617
"compromise": "^14.14.4",
@@ -23,6 +24,7 @@
2324
"react-dom": "^19.0.0",
2425
"react-dropzone": "^14.3.5",
2526
"react-pdf": "^9.2.1",
27+
"react-window": "^1.8.11",
2628
"string-similarity": "^4.0.4",
2729
"uuid": "^11.0.5"
2830
},

src/app/globals.css

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -35,3 +35,7 @@ body {
3535
-webkit-font-smoothing: antialiased;
3636
-moz-osx-font-smoothing: grayscale;
3737
}
38+
39+
.pdf-list > div {
40+
padding: 0 !important;
41+
}

src/components/PDFViewer.tsx

Lines changed: 109 additions & 32 deletions
Original file line numberDiff line numberDiff line change
@@ -1,19 +1,62 @@
11
'use client';
22

3-
import { RefObject } from 'react';
3+
import { RefObject, useCallback, useState, useEffect, useRef } from 'react';
44
import { Document, Page } from 'react-pdf';
5+
import { FixedSizeList } from 'react-window';
56
import 'react-pdf/dist/Page/AnnotationLayer.css';
67
import 'react-pdf/dist/Page/TextLayer.css';
7-
import { useState, useEffect, useRef } from 'react';
88
import { PDFSkeleton } from './PDFSkeleton';
99
import { useTTS } from '@/contexts/TTSContext';
1010
import { usePDF } from '@/contexts/PDFContext';
11+
import { pdfjs } from 'react-pdf';
1112

1213
interface PDFViewerProps {
1314
pdfData: Blob | undefined;
1415
zoomLevel: number;
1516
}
1617

18+
interface PageItemData {
19+
numPages: number;
20+
scale: number;
21+
containerWidth: number;
22+
pageWidth: number;
23+
pageHeight: number;
24+
}
25+
26+
const PageComponent = ({ index, style, data }: { index: number; style: any; data: PageItemData }) => {
27+
const { numPages, scale, pageHeight } = data;
28+
29+
return (
30+
<div style={{
31+
...style,
32+
height: pageHeight,
33+
display: 'flex',
34+
flexDirection: 'column',
35+
alignItems: 'center'
36+
}}>
37+
{index==0 ? (<div className="bg-offbase m-4 px-2 py-0.5 rounded-full w-fit">
38+
<p className="text-xs">
39+
{index + 1} / {numPages}
40+
</p>
41+
</div>) :
42+
(<div className="bg-offbase m-4 px-2 py-0.5 rounded-full w-fit">
43+
<p className="text-xs">
44+
{index + 1} / {numPages}
45+
</p>
46+
</div>)}
47+
<div className="flex justify-center">
48+
<Page
49+
pageNumber={index + 1}
50+
renderAnnotationLayer={true}
51+
renderTextLayer={true}
52+
className="shadow-lg"
53+
scale={scale}
54+
/>
55+
</div>
56+
</div>
57+
);
58+
};
59+
1760
export function PDFViewer({ pdfData, zoomLevel }: PDFViewerProps) {
1861
const [numPages, setNumPages] = useState<number>();
1962
const [containerWidth, setContainerWidth] = useState<number>(0);
@@ -23,6 +66,8 @@ export function PDFViewer({ pdfData, zoomLevel }: PDFViewerProps) {
2366
const [loadingError, setLoadingError] = useState<string>();
2467
const containerRef = useRef<HTMLDivElement>(null);
2568
const { extractTextFromPDF, highlightPattern, clearHighlights, handleTextClick } = usePDF();
69+
const [pageHeight, setPageHeight] = useState(800); // Default height
70+
const [pageSize, setPageSize] = useState({ width: 595, height: 842 }); // A4 default
2671

2772
// Add static styles once during component initialization
2873
const styleElement = document.createElement('style');
@@ -146,12 +191,18 @@ export function PDFViewer({ pdfData, zoomLevel }: PDFViewerProps) {
146191
}, [pdfText, currentSentence, highlightPattern, clearHighlights]);
147192

148193
// Add scale calculation function
149-
const calculateScale = (pageWidth: number = 595) => { // 595 is default PDF width in points
150-
const margin = 24; // 24px padding on each side
151-
const targetWidth = containerWidth - margin;
152-
const baseScale = targetWidth / pageWidth;
194+
const calculateScale = useCallback(() => {
195+
//const margin = 48; // 24px padding on each side
196+
//const targetWidth = containerWidth - margin;
197+
const baseScale = containerWidth / pageSize.width;
153198
return baseScale * (zoomLevel / 100);
154-
};
199+
}, [containerWidth, zoomLevel, pageSize.width]);
200+
201+
const calculatePageHeight = useCallback((viewport: { width: number; height: number }) => {
202+
const scale = calculateScale();
203+
const scaledHeight = viewport.height * scale;
204+
return scaledHeight + 55; // 40px padding top and bottom
205+
}, [calculateScale]);
155206

156207
// Add resize observer effect
157208
useEffect(() => {
@@ -168,9 +219,37 @@ export function PDFViewer({ pdfData, zoomLevel }: PDFViewerProps) {
168219
return () => observer.disconnect();
169220
}, []);
170221

171-
function onDocumentLoadSuccess({ numPages }: { numPages: number }): void {
222+
const handleLoadSuccess = useCallback(async ({ numPages }: { numPages: number }) => {
172223
setNumPages(numPages);
173-
}
224+
225+
if (pdfData) {
226+
try {
227+
// Convert Blob to ArrayBuffer
228+
const arrayBuffer = await pdfData.arrayBuffer();
229+
const pdf = await pdfjs.getDocument(arrayBuffer).promise;
230+
const page = await pdf.getPage(1);
231+
const viewport = page.getViewport({ scale: 1 });
232+
233+
setPageSize({
234+
width: viewport.width,
235+
height: viewport.height
236+
});
237+
238+
const newPageHeight = calculatePageHeight(viewport);
239+
setPageHeight(newPageHeight);
240+
} catch (error) {
241+
console.error('Error measuring page:', error);
242+
}
243+
}
244+
}, [pdfData, calculatePageHeight]);
245+
246+
// Update page height when zoom level changes
247+
useEffect(() => {
248+
if (pageSize.width && pageSize.height) {
249+
const newPageHeight = calculatePageHeight(pageSize);
250+
setPageHeight(newPageHeight);
251+
}
252+
}, [zoomLevel, pageSize, calculatePageHeight]);
174253

175254
return (
176255
<div
@@ -185,30 +264,28 @@ export function PDFViewer({ pdfData, zoomLevel }: PDFViewerProps) {
185264
loading={<PDFSkeleton />}
186265
noData={<PDFSkeleton />}
187266
file={pdfDataUrl}
188-
onLoadSuccess={onDocumentLoadSuccess}
189-
className="flex flex-col items-center m-0"
267+
onLoadSuccess={handleLoadSuccess}
268+
className="flex flex-col items-center"
190269
>
191-
{Array.from(
192-
new Array(numPages),
193-
(el, index) => (
194-
<div key={`page_${index + 1}`}>
195-
<div className="bg-offbase my-4 px-2 py-0.5 rounded-full w-fit">
196-
<p className="text-xs">
197-
{index + 1} / {numPages}
198-
</p>
199-
</div>
200-
<div className="flex justify-center">
201-
<Page
202-
pageNumber={index + 1}
203-
renderAnnotationLayer={true}
204-
renderTextLayer={true}
205-
className="shadow-lg"
206-
scale={calculateScale()}
207-
/>
208-
</div>
209-
</div>
210-
),
211-
)}
270+
{numPages && containerWidth ? (
271+
<FixedSizeList
272+
height={Math.min(window.innerHeight - 100, numPages * pageHeight)}
273+
width={containerWidth}
274+
itemCount={numPages}
275+
itemSize={pageHeight}
276+
itemData={{
277+
numPages,
278+
scale: calculateScale(),
279+
containerWidth,
280+
pageWidth: pageSize.width,
281+
pageHeight: pageHeight,
282+
}}
283+
className="pdf-list"
284+
overscanCount={2} // Add overscanning for smoother scrolling
285+
>
286+
{PageComponent}
287+
</FixedSizeList>
288+
) : null}
212289
</Document>
213290
</div>
214291
);

src/contexts/PDFContext.tsx

Lines changed: 13 additions & 35 deletions
Original file line numberDiff line numberDiff line change
@@ -276,48 +276,26 @@ export function PDFProvider({ children }: { children: ReactNode }) {
276276
text: (node.textContent || '').trim(),
277277
})).filter((node) => node.text.length > 0);
278278

279-
// Calculate the visible area of the container
280-
const containerRect = container.getBoundingClientRect();
281-
const visibleTop = container.scrollTop;
282-
const visibleBottom = visibleTop + containerRect.height;
283-
284-
// Find nodes within the visible area and a buffer zone
285-
const bufferSize = containerRect.height; // One screen height buffer
286-
const visibleNodes = allText.filter(({ element }) => {
287-
const rect = element.getBoundingClientRect();
288-
const elementTop = rect.top - containerRect.top + container.scrollTop;
289-
return elementTop >= (visibleTop - bufferSize) && elementTop <= (visibleBottom + bufferSize);
290-
});
291-
292-
// Search for the best match within the visible area first
293-
let bestMatch = findBestTextMatch(visibleNodes, cleanPattern, cleanPattern.length * 2);
294-
295-
// If no good match found in visible area, search the entire document
296-
if (bestMatch.rating < 0.3) {
297-
bestMatch = findBestTextMatch(allText, cleanPattern, cleanPattern.length * 2);
298-
}
299-
279+
// Search for the best match in the entire document
280+
const bestMatch = findBestTextMatch(allText, cleanPattern, cleanPattern.length * 2);
300281
const similarityThreshold = bestMatch.lengthDiff < cleanPattern.length * 0.3 ? 0.3 : 0.5;
301282

302-
if (bestMatch.rating >= similarityThreshold) {
283+
if (bestMatch.rating >= similarityThreshold && bestMatch.elements.length > 0) {
284+
// Highlight all matching elements
303285
bestMatch.elements.forEach((element) => {
304286
element.style.backgroundColor = 'grey';
305287
element.style.opacity = '0.4';
306288
});
307289

308-
if (bestMatch.elements.length > 0) {
309-
const element = bestMatch.elements[0];
310-
const elementRect = element.getBoundingClientRect();
311-
const elementTop = elementRect.top - containerRect.top + container.scrollTop;
312-
313-
// Only scroll if the element is outside the visible area
314-
if (elementTop < visibleTop || elementTop > visibleBottom) {
315-
container.scrollTo({
316-
top: elementTop - containerRect.height / 3, // Position the highlight in the top third
317-
behavior: 'smooth',
318-
});
319-
}
320-
}
290+
// Get the first element to scroll to
291+
const firstElement = bestMatch.elements[0];
292+
293+
// Scroll the element into view with smooth behavior
294+
firstElement.scrollIntoView({
295+
behavior: 'smooth',
296+
block: 'center',
297+
inline: 'nearest'
298+
});
321299
}
322300
}, [clearHighlights, findBestTextMatch]);
323301

0 commit comments

Comments
 (0)