Skip to content

Commit 7fec51c

Browse files
committed
fix(layout,table): orientation-aware layout and table detection
Signed-off-by: Clément Doumouro <[email protected]>
1 parent d585691 commit 7fec51c

27 files changed

+37346
-2386
lines changed

tests/data/groundtruth/docling_v1/2305.03393v1-pg9.json

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -213,10 +213,10 @@
213213
"prov": [
214214
{
215215
"bbox": [
216-
139.66741943359375,
216+
139.66746520996094,
217217
322.5054626464844,
218-
475.00927734375,
219-
454.45458984375
218+
475.0093078613281,
219+
454.4546203613281
220220
],
221221
"page": 1,
222222
"span": [

tests/data/groundtruth/docling_v1/2305.03393v1-pg9.pages.json

Lines changed: 27 additions & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -2705,7 +2705,7 @@
27052705
"b": 102.78223000000003,
27062706
"coord_origin": "TOPLEFT"
27072707
},
2708-
"confidence": 0.9373534917831421,
2708+
"confidence": 0.9373531937599182,
27092709
"cells": [
27102710
{
27112711
"index": 0,
@@ -2745,7 +2745,7 @@
27452745
"b": 102.78223000000003,
27462746
"coord_origin": "TOPLEFT"
27472747
},
2748-
"confidence": 0.8858680725097656,
2748+
"confidence": 0.8858677744865417,
27492749
"cells": [
27502750
{
27512751
"index": 1,
@@ -2785,7 +2785,7 @@
27852785
"b": 152.90697999999998,
27862786
"coord_origin": "TOPLEFT"
27872787
},
2788-
"confidence": 0.9806433916091919,
2788+
"confidence": 0.9806435108184814,
27892789
"cells": [
27902790
{
27912791
"index": 2,
@@ -3155,7 +3155,7 @@
31553155
"b": 327.98218,
31563156
"coord_origin": "TOPLEFT"
31573157
},
3158-
"confidence": 0.9591909050941467,
3158+
"confidence": 0.9591910243034363,
31593159
"cells": [
31603160
{
31613161
"index": 15,
@@ -3339,9 +3339,9 @@
33393339
"id": 0,
33403340
"label": "table",
33413341
"bbox": {
3342-
"l": 139.66741943359375,
3343-
"t": 337.54541015625,
3344-
"r": 475.00927734375,
3342+
"l": 139.66746520996094,
3343+
"t": 337.5453796386719,
3344+
"r": 475.0093078613281,
33453345
"b": 469.4945373535156,
33463346
"coord_origin": "TOPLEFT"
33473347
},
@@ -7846,7 +7846,7 @@
78467846
"b": 518.17419,
78477847
"coord_origin": "TOPLEFT"
78487848
},
7849-
"confidence": 0.9589294195175171,
7849+
"confidence": 0.9589295387268066,
78507850
"cells": [
78517851
{
78527852
"index": 91,
@@ -8243,9 +8243,9 @@
82438243
"id": 0,
82448244
"label": "table",
82458245
"bbox": {
8246-
"l": 139.66741943359375,
8247-
"t": 337.54541015625,
8248-
"r": 475.00927734375,
8246+
"l": 139.66746520996094,
8247+
"t": 337.5453796386719,
8248+
"r": 475.0093078613281,
82498249
"b": 469.4945373535156,
82508250
"coord_origin": "TOPLEFT"
82518251
},
@@ -13641,7 +13641,7 @@
1364113641
"b": 102.78223000000003,
1364213642
"coord_origin": "TOPLEFT"
1364313643
},
13644-
"confidence": 0.9373534917831421,
13644+
"confidence": 0.9373531937599182,
1364513645
"cells": [
1364613646
{
1364713647
"index": 0,
@@ -13687,7 +13687,7 @@
1368713687
"b": 102.78223000000003,
1368813688
"coord_origin": "TOPLEFT"
1368913689
},
13690-
"confidence": 0.8858680725097656,
13690+
"confidence": 0.8858677744865417,
1369113691
"cells": [
1369213692
{
1369313693
"index": 1,
@@ -13733,7 +13733,7 @@
1373313733
"b": 152.90697999999998,
1373413734
"coord_origin": "TOPLEFT"
1373513735
},
13736-
"confidence": 0.9806433916091919,
13736+
"confidence": 0.9806435108184814,
1373713737
"cells": [
1373813738
{
1373913739
"index": 2,
@@ -14121,7 +14121,7 @@
1412114121
"b": 327.98218,
1412214122
"coord_origin": "TOPLEFT"
1412314123
},
14124-
"confidence": 0.9591909050941467,
14124+
"confidence": 0.9591910243034363,
1412514125
"cells": [
1412614126
{
1412714127
"index": 15,
@@ -14311,9 +14311,9 @@
1431114311
"id": 0,
1431214312
"label": "table",
1431314313
"bbox": {
14314-
"l": 139.66741943359375,
14315-
"t": 337.54541015625,
14316-
"r": 475.00927734375,
14314+
"l": 139.66746520996094,
14315+
"t": 337.5453796386719,
14316+
"r": 475.0093078613281,
1431714317
"b": 469.4945373535156,
1431814318
"coord_origin": "TOPLEFT"
1431914319
},
@@ -19701,7 +19701,7 @@
1970119701
"b": 518.17419,
1970219702
"coord_origin": "TOPLEFT"
1970319703
},
19704-
"confidence": 0.9589294195175171,
19704+
"confidence": 0.9589295387268066,
1970519705
"cells": [
1970619706
{
1970719707
"index": 91,
@@ -20116,7 +20116,7 @@
2011620116
"b": 152.90697999999998,
2011720117
"coord_origin": "TOPLEFT"
2011820118
},
20119-
"confidence": 0.9806433916091919,
20119+
"confidence": 0.9806435108184814,
2012020120
"cells": [
2012120121
{
2012220122
"index": 2,
@@ -20504,7 +20504,7 @@
2050420504
"b": 327.98218,
2050520505
"coord_origin": "TOPLEFT"
2050620506
},
20507-
"confidence": 0.9591909050941467,
20507+
"confidence": 0.9591910243034363,
2050820508
"cells": [
2050920509
{
2051020510
"index": 15,
@@ -20694,9 +20694,9 @@
2069420694
"id": 0,
2069520695
"label": "table",
2069620696
"bbox": {
20697-
"l": 139.66741943359375,
20698-
"t": 337.54541015625,
20699-
"r": 475.00927734375,
20697+
"l": 139.66746520996094,
20698+
"t": 337.5453796386719,
20699+
"r": 475.0093078613281,
2070020700
"b": 469.4945373535156,
2070120701
"coord_origin": "TOPLEFT"
2070220702
},
@@ -26084,7 +26084,7 @@
2608426084
"b": 518.17419,
2608526085
"coord_origin": "TOPLEFT"
2608626086
},
26087-
"confidence": 0.9589294195175171,
26087+
"confidence": 0.9589295387268066,
2608826088
"cells": [
2608926089
{
2609026090
"index": 91,
@@ -26499,7 +26499,7 @@
2649926499
"b": 102.78223000000003,
2650026500
"coord_origin": "TOPLEFT"
2650126501
},
26502-
"confidence": 0.9373534917831421,
26502+
"confidence": 0.9373531937599182,
2650326503
"cells": [
2650426504
{
2650526505
"index": 0,
@@ -26545,7 +26545,7 @@
2654526545
"b": 102.78223000000003,
2654626546
"coord_origin": "TOPLEFT"
2654726547
},
26548-
"confidence": 0.8858680725097656,
26548+
"confidence": 0.8858677744865417,
2654926549
"cells": [
2655026550
{
2655126551
"index": 1,

tests/data/groundtruth/docling_v2/2305.03393v1-pg9.json

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -336,9 +336,9 @@
336336
{
337337
"page_no": 1,
338338
"bbox": {
339-
"l": 139.66741943359375,
340-
"t": 454.45458984375,
341-
"r": 475.00927734375,
339+
"l": 139.66746520996094,
340+
"t": 454.4546203613281,
341+
"r": 475.0093078613281,
342342
"b": 322.5054626464844,
343343
"coord_origin": "BOTTOMLEFT"
344344
},

0 commit comments

Comments
 (0)