Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
56 commits
Select commit Hold shift + click to select a range
5e1902c
chore: add pdfjs-dist typings
fukudasjp Oct 15, 2021
824c2d7
fix: control PDF rendering tasks properly
fukudasjp Nov 9, 2021
c5e09cb
feat: add pdf text layer support
fukudasjp Oct 20, 2021
9f0dcd5
fix: apply review comments
fukudasjp Nov 16, 2021
cc06446
refactor: extract text rendering hook
fukudasjp Nov 18, 2021
de5731f
refactor: extract async func call hook
fukudasjp Nov 19, 2021
f64fa07
fix: revise how to import css from pdfjs
fukudasjp Nov 24, 2021
85e3538
Merge remote-tracking branch 'upstream/master' into feat/redner-pdf-text
fukudasjp Nov 24, 2021
3c08df9
fix: install @types/pdfjs-dist to yarn2
fukudasjp Nov 24, 2021
c722221
feat: add script to update style
fukudasjp Nov 24, 2021
69ff042
refactor: revise script for importing css
fukudasjp Nov 24, 2021
c0771fc
feat: add types and common utilities
fukudasjp Nov 10, 2021
66c7c56
feat: add option for bbox text to processDoc
fukudasjp Oct 15, 2021
47855f4
feat: add text layer classes
fukudasjp Nov 10, 2021
66b3e43
feat: add highlighting logic and README
fukudasjp Nov 10, 2021
e43318a
feat: add PDF highlight component
fukudasjp Nov 10, 2021
1d46e11
fix: fix readme
fukudasjp Nov 10, 2021
9b33674
fix: revise readme
fukudasjp Nov 10, 2021
73c2ff8
fix: fix readme
fukudasjp Nov 17, 2021
050db2d
refactor: extract logic of iterating range rects
fukudasjp Nov 17, 2021
f62c419
fix: apply review comments
fukudasjp Nov 17, 2021
3e06fda
fix: remove unnecessary commets
fukudasjp Nov 17, 2021
b1defdd
fix: highlighting on header and footer
fukudasjp Nov 17, 2021
7a6ef58
fix: fix boxUtil test failure
fukudasjp Nov 18, 2021
3d02caf
refactor: use one bbox intersection logic
fukudasjp Nov 18, 2021
3b810af
feat: add Japanese PDF sample
fukudasjp Nov 18, 2021
488b160
refactor: move cell trim method
fukudasjp Nov 19, 2021
dd9d5a0
fix: pdfjs typings version
fukudasjp Nov 24, 2021
5811f4f
fix: adapt to latest PdfViewer
fukudasjp Nov 24, 2021
17feb71
fix: use postcss to manupulate pdfjs-web css
fukudasjp Dec 2, 2021
158b8a2
fix: add comment to the style update script
fukudasjp Dec 2, 2021
0705d2e
refactor: move useAsyncFunctionCall to utils
fukudasjp Dec 2, 2021
9e9cad5
fix: name of package script
fukudasjp Dec 2, 2021
6ba0703
Merge remote-tracking branch 'upstream/master' into feat/redner-pdf-text
fukudasjp Dec 2, 2021
052336a
fix: apply CI comment
fukudasjp Dec 2, 2021
e081c92
fix: fix broken logic
fukudasjp Dec 2, 2021
68d895d
fix: remove unused code
fukudasjp Dec 2, 2021
8c82fae
fix: apply review comments around pdfjs css
fukudasjp Dec 3, 2021
2c28bd9
fix: pdfjs typings version
fukudasjp Dec 3, 2021
032842c
fix: apply review comments
fukudasjp Dec 3, 2021
58ec1c5
Merge branch 'feat/redner-pdf-text' into feat/highligh-on-pdf
fukudasjp Dec 3, 2021
0ccaf6c
Merge remote-tracking branch 'upstream/master' into feat/highligh-on-pdf
fukudasjp Dec 6, 2021
3eedcf8
refactor: refactor common utils
fukudasjp Dec 6, 2021
c13942b
refactor: refacto getTextBoxMapping
fukudasjp Dec 6, 2021
a9dd38e
fix: fix yarn error
fukudasjp Dec 6, 2021
f6fbcd2
fix: fix test error
fukudasjp Dec 6, 2021
dc09472
refactor: move utility methods
fukudasjp Dec 6, 2021
0967b05
fix: apply review comments
fukudasjp Dec 6, 2021
bb448d5
fix: remove unnecessary change
fukudasjp Dec 6, 2021
d651ea8
chore: add comment
fukudasjp Dec 6, 2021
fbac5ee
Merge remote-tracking branch 'upstream/master' into feat/highligh-on-pdf
fukudasjp Dec 7, 2021
e1fe864
chore: upadte yarn lock file
fukudasjp Dec 7, 2021
0911538
fix: apply review comments
fukudasjp Dec 7, 2021
457f2ec
refactor: extract common props
fukudasjp Dec 7, 2021
89e4f75
feat: export PdfViewerWithHighlight via DocPreview
fukudasjp Dec 7, 2021
21799dc
fix: fix app build
fukudasjp Dec 8, 2021
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions examples/discovery-search-app/package.json
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,7 @@
"carbon-components": "^10.6.0",
"carbon-components-react": "^7.7.0",
"classnames": "^2.2.6",
"core-js": "^2.6.12",
"cors": "^2.8.5",
"dotenv": "^8.1.0",
"express": "^4.17.1",
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@ import SimpleDocument from './components/SimpleDocument/SimpleDocument';
import withErrorBoundary, { WithErrorBoundaryProps } from 'utils/hoc/withErrorBoundary';
import { defaultMessages, Messages } from './messages';
import HtmlView from './components/HtmlView/HtmlView';
import PdfViewerWithHighlight from './components/PdfViewerHighlight/PdfViewerWithHighlight';
import { isCsvFile, isJsonFile } from './utils/documentData';

const { ZOOM_IN, ZOOM_OUT } = PreviewToolbar;
Expand Down Expand Up @@ -154,6 +155,7 @@ function PreviewDocument({
const ErrorBoundDocumentPreview: any = withErrorBoundary(DocumentPreview);
ErrorBoundDocumentPreview.PreviewToolbar = PreviewToolbar;
ErrorBoundDocumentPreview.PreviewDocument = PreviewDocument;
ErrorBoundDocumentPreview.PdfViewerWithHighlight = PdfViewerWithHighlight;

export default ErrorBoundDocumentPreview;
export { ErrorBoundDocumentPreview as DocumentPreview };

Large diffs are not rendered by default.

Original file line number Diff line number Diff line change
@@ -0,0 +1,55 @@
{
"document_id": "feab8705259090b89fbcbb15942cb10d",
"result_metadata": {
"collection_id": "b6cdf1cd-902c-8ea3-0000-017d32224d8f"
},
"enriched_text": [
{
"entities": [
{
"model_name": "natural_language_understanding",
"mentions": [
{
"confidence": 0.9950965,
"location": {
"end": 2,
"begin": 0
},
"text": "最初"
}
],
"text": "最初",
"type": "Ordinal"
}
]
}
],
"metadata": {
"parent_document_id": "feab8705259090b89fbcbb15942cb10d",
"customer_id": "IBMid-270001M55T"
},
"extracted_metadata": {
"sha1": "4FF2B41ED7A77975ABB21D9E4025DF31335E6451",
"numPages": "1",
"filename": "DiscoComponents-ja-updated.pdf",
"file_type": "pdf",
"text_mappings": "{\"text_mappings\":[{\"page\":{\"page_number\":1,\"bbox\":[54.51987838745117,87.82411193847656,400.4930725097656,194.260009765625]},\"field\":{\"name\":\"title\",\"index\":0,\"span\":[0,20]}},{\"page\":{\"page_number\":1,\"bbox\":[54.51987838745117,411.83612060546875,262.9510192871094,425.62003993988037]},\"field\":{\"name\":\"subtitle\",\"index\":0,\"span\":[0,19]}},{\"page\":{\"page_number\":1,\"bbox\":[268.46466064453125,416.1183776855469,325.5726318359375,425.375319480896]},\"field\":{\"name\":\"subtitle\",\"index\":1,\"span\":[0,3]}},{\"page\":{\"page_number\":1,\"bbox\":[54.51987838745117,644.3582763671875,313.07745361328125,653.6152181625366]},\"field\":{\"name\":\"subtitle\",\"index\":2,\"span\":[0,15]}},{\"page\":{\"page_number\":1,\"bbox\":[54.51987838745117,456.12786865234375,95.6172866821289,463.06002855300903]},\"field\":{\"name\":\"text\",\"index\":0,\"span\":[0,4]}},{\"page\":{\"page_number\":1,\"bbox\":[100.0745620727539,452.9471435546875,257.0570983886719,463.06002855300903]},\"field\":{\"name\":\"text\",\"index\":0,\"span\":[4,27]}},{\"page\":{\"page_number\":1,\"bbox\":[261.5120849609375,452.9471435546875,408.1592712402344,463.0600233078003]},\"field\":{\"name\":\"text\",\"index\":0,\"span\":[27,49]}},{\"page\":{\"page_number\":1,\"bbox\":[412.5315856933594,456.12786865234375,464.3571472167969,463.06002855300903]},\"field\":{\"name\":\"text\",\"index\":0,\"span\":[49,54]}},{\"page\":{\"page_number\":1,\"bbox\":[54.51987838745117,452.9471435546875,534.0211791992188,596.2600049972534]},\"field\":{\"name\":\"text\",\"index\":0,\"span\":[54,234]}},{\"page\":{\"page_number\":1,\"bbox\":[54.519996643066406,679.4979858398438,535.1033325195312,723.2200269699097]},\"field\":{\"name\":\"text\",\"index\":0,\"span\":[234,353]}}],\"pages\":[{\"page_number\":0,\"height\":842.0,\"width\":595.0,\"origin\":\"TopLeft\"}]}",
"title": "Discovery Component README Japanese",
"publicationdate": "2021-11-18"
},
"subtitle": ["Discovery Component", "の使用", "サンプルアプリケーションの実行"],
"html": "<html><head><meta charset=\"UTF-8\"/><meta name=\"publicationdate\" content=\"2021-11-18\"/><meta name=\"numPages\" content=\"1\"/><title>Discovery Component README Japanese</title><style>.css_1902558513 { font: bold 18.96pt '/MS-PGothic-Bold'; } .css_904416330 { font: 11.04pt '/SymbolMT'; } .css_1548729052 { font: bold 18.96pt '/Tahoma-Bold-Bold'; } .css_2121319508 { font: bold 54.96pt '/Tahoma-Bold-Bold'; } .css_1950597664 { font: 13.92pt '/Tahoma'; } .css_1579914921 { font: 13.92pt '/MS-PGothic'; }</style></head><body><section id=\"1\" data-level=\"1\"><p text-alignment=\"left\"><span class=\"title css_2121319508\"><bbox page=\"1\" x=\"54.51987838745117\" y=\"87.82411193847656\" height=\"106.43589782714844\" width=\"345.97319412231445\">Discovery Components</bbox></span></p><p text-alignment=\"left\"><span class=\"subtitle css_1548729052\"><bbox page=\"1\" x=\"54.51987838745117\" y=\"411.83612060546875\" height=\"13.783919334411621\" width=\"208.4311408996582\">Discovery Component</bbox></span></p><p text-alignment=\"left\"><span class=\"subtitle css_1902558513\"><bbox page=\"1\" x=\"268.46466064453125\" y=\"416.1183776855469\" height=\"9.256941795349121\" width=\"57.10797119140625\">の使用</bbox></span></p><p text-alignment=\"left\"><span class=\"text css_1579914921\"><bbox page=\"1\" x=\"54.51987838745117\" y=\"456.12786865234375\" height=\"6.932159900665283\" width=\"41.097408294677734\">最初に</bbox></span></p><p text-alignment=\"left\"><span class=\"text css_1950597664\"><bbox page=\"1\" x=\"100.0745620727539\" y=\"452.9471435546875\" height=\"10.112884998321533\" width=\"156.98253631591797\">IBM Watson Discovery の</bbox></span></p><p text-alignment=\"left\"><span class=\"text css_1950597664\"><bbox page=\"1\" x=\"261.5120849609375\" y=\"452.9471435546875\" height=\"10.112879753112793\" width=\"146.64718627929688\">Improve and Customize</bbox></span></p><p text-alignment=\"left\"><span class=\"text css_1579914921\"><bbox page=\"1\" x=\"412.5315856933594\" y=\"456.12786865234375\" height=\"6.932159900665283\" width=\"51.8255615234375\">ページで</bbox></span></p><p text-alignment=\"left\"><span class=\"text css_1950597664\"><bbox page=\"1\" x=\"54.51987838745117\" y=\"452.9471435546875\" height=\"143.31286144256592\" width=\"479.5013008117676\">Document retrieval プロジェクトをカスタマイズする必要があります。たとえばファセットや検索 バーや検索結果を設定できます。その後 Discovery component を使ったアプリケ ーションを作成します。アプリケーションは指定したプロジェクトの設定をロードしま す。 必要なソフトウェア: git, nvm, yarn または npm</bbox></span></p><p text-alignment=\"left\"><span class=\"subtitle css_1902558513\"><bbox page=\"1\" x=\"54.51987838745117\" y=\"644.3582763671875\" height=\"9.256941795349121\" width=\"258.5575752258301\">サンプルアプリケーションの実行</bbox></span></p><p text-alignment=\"left\"><span class=\"text css_904416330\"><bbox page=\"1\" x=\"54.519996643066406\" y=\"679.4979858398438\" height=\"43.72204113006592\" width=\"480.58333587646484\">• サンプルアプリケーションはこのライブラリーが提供するコアコンポーネントのカタログです。実際のデ ータを使ってコンポーネントがどのように動くかを簡単に見ることができます。コードを変更して、カスタ マイズする方法を確認することもできます。</bbox></span></p></section></body></html>",
"text": [
"最初に IBM Watson Discovery の Improve and Customize ページで Document retrieval プロジェクトをカスタマイズする必要があります。たとえばファセットや検索 バーや検索結果を設定できます。その後 Discovery component を使ったアプリケ ーションを作成します。アプリケーションは指定したプロジェクトの設定をロードしま す。 必要なソフトウェア: git, nvm, yarn または npm • サンプルアプリケーションはこのライブラリーが提供するコアコンポーネントのカタログです。実際のデ ータを使ってコンポーネントがどのように動くかを簡単に見ることができます。コードを変更して、カスタ マイズする方法を確認することもできます。"
],
"title": "Discovery Components",
"document_passages": [
{
"passage_text": "Discovery Components",
"start_offset": 0,
"end_offset": 20,
"field": "title"
}
],
"table_results_references": []
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,131 @@
import React, { FC, useMemo, useEffect } from 'react';
import cx from 'classnames';
import { settings } from 'carbon-components';
import { QueryResult } from 'ibm-watson/discovery/v2';
import { ProcessedDoc } from 'utils/document';
import { TextMappings } from '../../types';
import { PdfDisplayProps } from '../PdfViewer/types';
import { PdfRenderedText } from '../PdfViewer/PdfViewerTextLayer';
import { ExtractedDocumentInfo } from './utils/common/documentUtils';
import { Highlighter } from './utils/Highlighter';
import { HighlightProps } from './types';

type Props = PdfDisplayProps &
HighlightProps & {
/**
* Class name to style highlight layer
*/
className?: string;

/**
* Parsed document information
*/
parsedDocument: ExtractedDocumentInfo | null;

/**
* PDF text content information in a page from parsed PDF
*/
pdfRenderedText: PdfRenderedText | null;
};

/**
* Text highlight layer for PdfViewer
*/
const PdfViewerHighlight: FC<Props> = ({
className,
highlightClassName,
document,
parsedDocument,
page,
highlights,
pdfRenderedText,
scale,
_useHtmlBbox = true,
_usePdfTextItem = true
}) => {
const highlighter = useHighlighter({
document,
textMappings: parsedDocument?.textMappings,
processedDoc: _useHtmlBbox ? parsedDocument?.processedDoc : undefined,
pdfRenderedText: (_usePdfTextItem && pdfRenderedText) || undefined,
pageNum: page
});

const { textDivs } = pdfRenderedText || {};
useEffect(() => {
if (highlighter) {
highlighter.setTextContentDivs(textDivs);
}
}, [highlighter, textDivs]);

const highlightBoxes = useMemo(() => {
return highlights.map(highlight => {
return highlighter?.getHighlight(highlight);
});
}, [highlighter, highlights]);

return (
<div className={cx(`${settings.prefix}--document-preview-pdf-viewer-highlight`, className)}>
{highlightBoxes.map((hl, hlIndex) => {
return (
<React.Fragment key={`k-${hlIndex}`}>
{hl?.boxes.map((item, index) => {
const padding = 0;
const [left, top, right, bottom] = item.bbox;
return (
<div
key={`${left}${top}${right}${bottom}_${index}`}
className={cx(
`${settings.prefix}--document-preview-pdf-viewer-highlight--item`,
highlightClassName,
hl.className
)}
style={{
left: `${(left - padding) * scale}px`,
top: `${(top - padding) * scale}px`,
width: `${(right - left + padding) * scale}px`,
height: `${(bottom - top + padding) * scale}px`
}}
data-testid="highlight"
/>
);
})}
</React.Fragment>
);
})}
</div>
);
};

const useHighlighter = ({
document,
textMappings,
processedDoc,
pdfRenderedText,
pageNum
}: {
document: QueryResult;
textMappings?: TextMappings;
processedDoc?: ProcessedDoc;
pdfRenderedText?: PdfRenderedText;
pageNum: number;
}) => {
return useMemo(() => {
if (textMappings) {
return new Highlighter({
document,
textMappings,
pageNum,
htmlBboxInfo: processedDoc && {
bboxes: processedDoc.bboxes,
styles: processedDoc.styles
},
pdfTextContentInfo:
pdfRenderedText?.textContent && pdfRenderedText?.viewport ? pdfRenderedText : undefined
});
}
return null;
}, [document, pageNum, pdfRenderedText, processedDoc, textMappings]);
};

export default PdfViewerHighlight;
Original file line number Diff line number Diff line change
@@ -0,0 +1,28 @@
// Carbon highlight color for white theme
// https://www.carbondesignsystem.com/guidelines/color/usage/
$highlight: #d0e2ff;

.withTextSelection {
display: flex;

.rightPane {
flex: 1 1 30%;
height: 100vh;
overflow-y: scroll;

p {
margin-bottom: 0.5rem;
}
}
.text {
overflow-wrap: break-word;
white-space: pre-wrap;
font-size: 10pt;
font-family: 'Courier New', Courier, monospace;
}

.highlight {
opacity: 0.3;
background: darken($highlight, 30%);
}
}
Loading