55\usepackage {hyperref } % hyperlinks
66\usepackage {url } % simple URL typesetting
77\usepackage {booktabs } % professional-quality tables
8+ \usepackage {array } % extended column defs
9+ \usepackage {multirow } % multirow cells
10+ \usepackage {makecell } % line breaks in cells
811\usepackage {amsfonts } % blackboard math symbols
912\usepackage {nicefrac } % compact symbols for 1/2, etc.
1013\usepackage {microtype } % microtypography
@@ -128,12 +131,14 @@ \section{4 Performance}
128131
129132\begin {table }[h]
130133\caption {Table 1: Runtime characteristics of Docling with the standard model pipeline and settings, on our test dataset of 225 pages, on two different systems. OCR is disabled. We show the time-to-solution (TTS), computed throughput in pages per second, and the peak memory used (resident set size) for both the Docling-native PDF backend and for the pypdfium backend, using 4 and 16 threads.}
131- \begin {tabular }{|l|l|l|l|l|l|l|l|}
132- \hline
133- CPU & Thread budget & native backend & native backend & native backend & pypdfium backend & pypdfium backend & pypdfium backend \\ \hline
134- & & TTS & Pages/s & Mem & TTS & Pages/s & Mem \\ \hline
135- Apple M3 Max & 4 & 177 s 167 s & 1.27 1.34 & 6.20 GB & 103 s 92 s & 2.18 2.45 & 2.56 GB \\ \hline
136- (16 cores) Intel(R) Xeon E5-2690 & 16 4 16 & 375 s 244 s & 0.60 0.92 & 6.16 GB & 239 s 143 s & 0.94 1.57 & 2.42 GB \\ \hline
134+ \begin {tabular }{llllllll}
135+ \toprule
136+ CPU & Thread budget & \multicolumn {3}{l}{native backend} & \multicolumn {3}{l}{pypdfium backend} \\
137+ \midrule
138+ & & TTS & Pages/s & Mem & TTS & Pages/s & Mem \\
139+ Apple M3 Max & 4 & 177 s 167 s & 1.27 1.34 & 6.20 GB & 103 s 92 s & 2.18 2.45 & 2.56 GB \\
140+ (16 cores) Intel(R) Xeon E5-2690 & 16 4 16 & 375 s 244 s & 0.60 0.92 & 6.16 GB & 239 s 143 s & 0.94 1.57 & 2.42 GB \\
141+ \bottomrule
137142\end {tabular }
138143\end {table }
139144
@@ -276,10 +281,12 @@ \section{ACMReference Format:}
276281Table 2: Prediction performance (
[email protected] ) of object detection networks on DocLayNet test set. The MRCNN (Mask R-CNN) and FRCNN (Faster R-CNN) models with ResNet-50 or ResNet-101 backbone were trained based on the network architectures from the detectron2 model zoo (Mask R-CNN R50, R101-FPN 3x, Faster R-CNN R101-FPN 3x), with default configurations. The YOLO implementation utilized was YOLOv5x6 [13]. All models were initialised using pre-trained weights from the COCO 2017 dataset.
277282
278283\begin {table }[h]
279- \begin {tabular }{|l|l|l|l|l|}
280- \hline
281- & human & MRCNN R50 R101 & FRCNN R101 & YOLO v5x6 \\ \hline
282- Caption Footnote Formula List-item Page-footer Page-header Picture Section-header Table Text Title All & 84-89 83-91 83-85 87-88 93-94 85-89 69-71 83-84 77-81 84-86 & 68.4 71.5 70.9 60.1 63.4 81.2 80.8 61.6 59.3 71.9 70.0 71.7 72.7 67.6 69.3 82.2 82.9 85.8 76.7 80.4 72.4 73.5 & 70.1 73.7 63.5 81.0 58.9 72.0 72.0 68.4 82.2 85.4 79.9 73.4 & 77.7 77.2 66.2 86.2 61.1 67.9 74.6 86.3 88.1 82.7 76.8 \\ \hline
284+ \begin {tabular }{lllll}
285+ \toprule
286+ & human & MRCNN R50 R101 & FRCNN R101 & YOLO v5x6 \\
287+ \midrule
288+ Caption Footnote Formula List-item Page-footer Page-header Picture Section-header Table Text Title All & 84-89 83-91 83-85 87-88 93-94 85-89 69-71 83-84 77-81 84-86 & 68.4 71.5 70.9 60.1 63.4 81.2 80.8 61.6 59.3 71.9 70.0 71.7 72.7 67.6 69.3 82.2 82.9 85.8 76.7 80.4 72.4 73.5 & 70.1 73.7 63.5 81.0 58.9 72.0 72.0 68.4 82.2 85.4 79.9 73.4 & 77.7 77.2 66.2 86.2 61.1 67.9 74.6 86.3 88.1 82.7 76.8 \\
289+ \bottomrule
283290\end {tabular }
284291\end {table }
285292
@@ -356,22 +363,24 @@ \section{Baselines for Object Detection}
356363\end {figure }
357364
358365\begin {table }[h]
359- \begin {tabular }{|l|l|l|l|l|l|l|l|l|l|l|l|}
360- \hline
361- class label & Count & \% of Total & \% of Total & \% of Total & triple inter-annotator mAP @ 0.5-0.95 (\% ) & triple inter-annotator mAP @ 0.5-0.95 (\% ) & triple inter-annotator mAP @ 0.5-0.95 (\% ) & triple inter-annotator mAP @ 0.5-0.95 (\% ) & triple inter-annotator mAP @ 0.5-0.95 (\% ) & triple inter-annotator mAP @ 0.5-0.95 (\% ) & triple inter-annotator mAP @ 0.5-0.95 (\% ) \\ \hline
362- class label & Count & Train & Test & Val & All & Fin & Man & Sci & Law & Pat & Ten \\ \hline
363- Caption & 22524 & 2.04 & 1.77 & 2.32 & 84-89 & 40-61 & 86-92 & 94-99 & 95-99 & 69-78 & n/a \\ \hline
364- Footnote & 6318 & 0.60 & 0.31 & 0.58 & 83-91 & n/a & 100 & 62-88 & 85-94 & n/a & 82-97 \\ \hline
365- Formula & 25027 & 2.25 & 1.90 & 2.96 & 83-85 & n/a & n/a & 84-87 & 86-96 & n/a & n/a \\ \hline
366- List-item & 185660 & 17.19 & 13.34 & 15.82 & 87-88 & 74-83 & 90-92 & 97-97 & 81-85 & 75-88 & 93-95 \\ \hline
367- Page-footer & 70878 & 6.51 & 5.58 & 6.00 & 93-94 & 88-90 & 95-96 & 100 & 92-97 & 100 & 96-98 \\ \hline
368- Page-header & 58022 & 5.10 & 6.70 & 5.06 & 85-89 & 66-76 & 90-94 & 98-100 & 91-92 & 97-99 & 81-86 \\ \hline
369- Picture & 45976 & 4.21 & 2.78 & 5.31 & 69-71 & 56-59 & 82-86 & 69-82 & 80-95 & 66-71 & 59-76 \\ \hline
370- Section-header & 142884 & 12.60 & 15.77 & 12.85 & 83-84 & 76-81 & 90-92 & 94-95 & 87-94 & 69-73 & 78-86 \\ \hline
371- Table & 34733 & 3.20 & 2.27 & 3.60 & 77-81 & 75-80 & 83-86 & 98-99 & 58-80 & 79-84 & 70-85 \\ \hline
372- Text & 510377 & 45.82 & 49.28 & 45.00 & 84-86 & 81-86 & 88-93 & 89-93 & 87-92 & 71-79 & 87-95 \\ \hline
373- Title & 5071 & 0.47 & 0.30 & 0.50 & 60-72 & 24-63 & 50-63 & 94-100 & 82-96 & 68-79 & 24-56 \\ \hline
374- Total & 1107470 & 941123 & 99816 & 66531 & 82-83 & 71-74 & 79-81 & 89-94 & 86-91 & 71-76 & 68-85 \\ \hline
366+ \begin {tabular }{llllllllllll}
367+ \toprule
368+ \multirow {2}{*}{class label} & \multirow {2}{*}{Count} & \multicolumn {3}{l}{\% of Total} & \multicolumn {7}{l}{triple inter-annotator mAP @ 0.5-0.95 (\% )} \\
369+ \midrule
370+ & & Train & Test & Val & All & Fin & Man & Sci & Law & Pat & Ten \\
371+ Caption & 22524 & 2.04 & 1.77 & 2.32 & 84-89 & 40-61 & 86-92 & 94-99 & 95-99 & 69-78 & n/a \\
372+ Footnote & 6318 & 0.60 & 0.31 & 0.58 & 83-91 & n/a & 100 & 62-88 & 85-94 & n/a & 82-97 \\
373+ Formula & 25027 & 2.25 & 1.90 & 2.96 & 83-85 & n/a & n/a & 84-87 & 86-96 & n/a & n/a \\
374+ List-item & 185660 & 17.19 & 13.34 & 15.82 & 87-88 & 74-83 & 90-92 & 97-97 & 81-85 & 75-88 & 93-95 \\
375+ Page-footer & 70878 & 6.51 & 5.58 & 6.00 & 93-94 & 88-90 & 95-96 & 100 & 92-97 & 100 & 96-98 \\
376+ Page-header & 58022 & 5.10 & 6.70 & 5.06 & 85-89 & 66-76 & 90-94 & 98-100 & 91-92 & 97-99 & 81-86 \\
377+ Picture & 45976 & 4.21 & 2.78 & 5.31 & 69-71 & 56-59 & 82-86 & 69-82 & 80-95 & 66-71 & 59-76 \\
378+ Section-header & 142884 & 12.60 & 15.77 & 12.85 & 83-84 & 76-81 & 90-92 & 94-95 & 87-94 & 69-73 & 78-86 \\
379+ Table & 34733 & 3.20 & 2.27 & 3.60 & 77-81 & 75-80 & 83-86 & 98-99 & 58-80 & 79-84 & 70-85 \\
380+ Text & 510377 & 45.82 & 49.28 & 45.00 & 84-86 & 81-86 & 88-93 & 89-93 & 87-92 & 71-79 & 87-95 \\
381+ Title & 5071 & 0.47 & 0.30 & 0.50 & 60-72 & 24-63 & 50-63 & 94-100 & 82-96 & 68-79 & 24-56 \\
382+ Total & 1107470 & 941123 & 99816 & 66531 & 82-83 & 71-74 & 79-81 & 89-94 & 86-91 & 71-76 & 68-85 \\
383+ \bottomrule
375384\end {tabular }
376385\end {table }
377386
@@ -392,22 +401,24 @@ \section{Baselines for Object Detection}
392401\end {figure }
393402
394403\begin {table }[h]
395- \begin {tabular }{|l|l|l|l|l|l|l|l|l|l|l|l|}
396- \hline
397- & & \% of Total & \% of Total & \% of Total & triple inter- annotator mAP @ 0.5-0.95 (\% ) & triple inter- annotator mAP @ 0.5-0.95 (\% ) & triple inter- annotator mAP @ 0.5-0.95 (\% ) & triple inter- annotator mAP @ 0.5-0.95 (\% ) & triple inter- annotator mAP @ 0.5-0.95 (\% ) & triple inter- annotator mAP @ 0.5-0.95 (\% ) & triple inter- annotator mAP @ 0.5-0.95 (\% ) \\ \hline
398- class label & Count & Train & Test & Val & All & Fin & Man & Sci & Law & Pat & Ten \\ \hline
399- Caption & 22524 & 2.04 & 1.77 & 2.32 & 84-89 & 40-61 & 86-92 & 94-99 & 95-99 & 69-78 & n/a \\ \hline
400- Footnote & 6318 & 0.60 & 0.31 & 0.58 & 83-91 & n/a & 100 & 62-88 & 85-94 & n/a & 82-97 \\ \hline
401- Formula & 25027 & 2.25 & 1.90 & 2.96 & 83-85 & n/a & n/a & 84-87 & 86-96 & n/a & n/a \\ \hline
402- List-item & 185660 & 17.19 & 13.34 & 15.82 & 87-88 & 74-83 & 90-92 & 97-97 & 81-85 & 75-88 & 93-95 \\ \hline
403- Page- footer & 70878 & 6.51 & 5.58 & 6.00 & 93-94 & 88-90 & 95-96 & 100 & 92-97 & 100 & 96-98 \\ \hline
404- Page- header & 58022 & 5.10 & 6.70 & 5.06 & 85-89 & 66-76 & 90-94 & 98-100 & 91-92 & 97-99 & 81-86 \\ \hline
405- Picture & 45976 & 4.21 & 2.78 & 5.31 & 69-71 & 56-59 & 82-86 & 69-82 & 80-95 & 66-71 & 59-76 \\ \hline
406- Section- header & 142884 & 12.60 & 15.77 & 12.85 & 83-84 & 76-81 & 90-92 & 94-95 & 87-94 & 69-73 & 78-86 \\ \hline
407- Table & 34733 & 3.20 & 2.27 & 3.60 & 77-81 & 75-80 & 83-86 & 98-99 & 58-80 & 79-84 & 70-85 \\ \hline
408- Text & 510377 & 45.82 & 49.28 & 45.00 & 84-86 & 81-86 & 88-93 & 89-93 & 87-92 & 71-79 & 87-95 \\ \hline
409- Title & 5071 & 0.47 & 0.30 & 0.50 & 60-72 & 24-63 & 50-63 & 94-100 & 82-96 & 68-79 & 24-56 \\ \hline
410- Total & 1107470 & 941123 & 99816 & 66531 & 82-83 & 71-74 & 79-81 & 89-94 & 86-91 & 71-76 & 68-85 \\ \hline
404+ \begin {tabular }{llllllllllll}
405+ \toprule
406+ & & \% of Total & \% of Total & \% of Total & triple inter- annotator mAP @ 0.5-0.95 (\% ) & triple inter- annotator mAP @ 0.5-0.95 (\% ) & triple inter- annotator mAP @ 0.5-0.95 (\% ) & triple inter- annotator mAP @ 0.5-0.95 (\% ) & triple inter- annotator mAP @ 0.5-0.95 (\% ) & triple inter- annotator mAP @ 0.5-0.95 (\% ) & triple inter- annotator mAP @ 0.5-0.95 (\% ) \\
407+ \midrule
408+ class label & Count & Train & Test & Val & All & Fin & Man & Sci & Law & Pat & Ten \\
409+ Caption & 22524 & 2.04 & 1.77 & 2.32 & 84-89 & 40-61 & 86-92 & 94-99 & 95-99 & 69-78 & n/a \\
410+ Footnote & 6318 & 0.60 & 0.31 & 0.58 & 83-91 & n/a & 100 & 62-88 & 85-94 & n/a & 82-97 \\
411+ Formula & 25027 & 2.25 & 1.90 & 2.96 & 83-85 & n/a & n/a & 84-87 & 86-96 & n/a & n/a \\
412+ List-item & 185660 & 17.19 & 13.34 & 15.82 & 87-88 & 74-83 & 90-92 & 97-97 & 81-85 & 75-88 & 93-95 \\
413+ Page- footer & 70878 & 6.51 & 5.58 & 6.00 & 93-94 & 88-90 & 95-96 & 100 & 92-97 & 100 & 96-98 \\
414+ Page- header & 58022 & 5.10 & 6.70 & 5.06 & 85-89 & 66-76 & 90-94 & 98-100 & 91-92 & 97-99 & 81-86 \\
415+ Picture & 45976 & 4.21 & 2.78 & 5.31 & 69-71 & 56-59 & 82-86 & 69-82 & 80-95 & 66-71 & 59-76 \\
416+ Section- header & 142884 & 12.60 & 15.77 & 12.85 & 83-84 & 76-81 & 90-92 & 94-95 & 87-94 & 69-73 & 78-86 \\
417+ Table & 34733 & 3.20 & 2.27 & 3.60 & 77-81 & 75-80 & 83-86 & 98-99 & 58-80 & 79-84 & 70-85 \\
418+ Text & 510377 & 45.82 & 49.28 & 45.00 & 84-86 & 81-86 & 88-93 & 89-93 & 87-92 & 71-79 & 87-95 \\
419+ Title & 5071 & 0.47 & 0.30 & 0.50 & 60-72 & 24-63 & 50-63 & 94-100 & 82-96 & 68-79 & 24-56 \\
420+ Total & 1107470 & 941123 & 99816 & 66531 & 82-83 & 71-74 & 79-81 & 89-94 & 86-91 & 71-76 & 68-85 \\
421+ \bottomrule
411422\end {tabular }
412423\end {table }
413424
0 commit comments