Skip to content

Commit c14d7df

Browse files
author
Maksym Lysak
committed
Updated test cases
Signed-off-by: Maksym Lysak <mly@zurich.ibm.com>
1 parent 6116d6a commit c14d7df

39 files changed

+5924
-5886
lines changed

tests/data/groundtruth/docling_v2/example_01.html.json

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
{
22
"schema_name": "DoclingDocument",
3-
"version": "1.8.0",
3+
"version": "1.9.0",
44
"name": "example_01",
55
"origin": {
66
"mimetype": "text/html",

tests/data/groundtruth/docling_v2/example_01_images.html.json

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
{
22
"schema_name": "DoclingDocument",
3-
"version": "1.7.0",
3+
"version": "1.9.0",
44
"name": "example_01",
55
"origin": {
66
"mimetype": "text/html",

tests/data/groundtruth/docling_v2/example_02.html.json

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
{
22
"schema_name": "DoclingDocument",
3-
"version": "1.8.0",
3+
"version": "1.9.0",
44
"name": "example_02",
55
"origin": {
66
"mimetype": "text/html",

tests/data/groundtruth/docling_v2/example_03.html.json

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
{
22
"schema_name": "DoclingDocument",
3-
"version": "1.8.0",
3+
"version": "1.9.0",
44
"name": "example_03",
55
"origin": {
66
"mimetype": "text/html",

tests/data/groundtruth/docling_v2/example_04.html.json

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
{
22
"schema_name": "DoclingDocument",
3-
"version": "1.8.0",
3+
"version": "1.9.0",
44
"name": "example_04",
55
"origin": {
66
"mimetype": "text/html",

tests/data/groundtruth/docling_v2/example_05.html.json

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
{
22
"schema_name": "DoclingDocument",
3-
"version": "1.8.0",
3+
"version": "1.9.0",
44
"name": "example_05",
55
"origin": {
66
"mimetype": "text/html",
Lines changed: 10 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -1,13 +1,11 @@
11
item-0 at level 0: unspecified: group _root_
2-
item-1 at level 1: text: This is a div with text.
3-
item-2 at level 1: text: This is another div with text.
4-
item-3 at level 1: text: This is a regular paragraph.
5-
item-4 at level 1: text: This is a third div
6-
item-5 at level 1: text: with a new line.
7-
item-6 at level 1: section: group details
8-
item-7 at level 2: text: Heading for the details element
9-
item-8 at level 2: text: Description of the details element.
10-
item-9 at level 1: inline: group group
11-
item-10 at level 2: text: This is a fourth div with a
12-
item-11 at level 2: text: bold
13-
item-12 at level 2: text: paragraph.
2+
item-1 at level 1: text: This is a div with text. This is another div with text.
3+
item-2 at level 1: text: This is a regular paragraph.
4+
item-3 at level 1: text: This is a third div with a new line.
5+
item-4 at level 1: section: group details
6+
item-5 at level 2: text: Heading for the details element
7+
item-6 at level 2: text: Description of the details element.
8+
item-7 at level 1: inline: group group
9+
item-8 at level 2: text: This is a fourth div with a
10+
item-9 at level 2: text: bold
11+
item-10 at level 2: text: paragraph.

tests/data/groundtruth/docling_v2/example_06.html.json

Lines changed: 16 additions & 46 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
{
22
"schema_name": "DoclingDocument",
3-
"version": "1.8.0",
3+
"version": "1.9.0",
44
"name": "example_06",
55
"origin": {
66
"mimetype": "text/html",
@@ -29,12 +29,6 @@
2929
{
3030
"$ref": "#/texts/3"
3131
},
32-
{
33-
"$ref": "#/texts/4"
34-
},
35-
{
36-
"$ref": "#/texts/5"
37-
},
3832
{
3933
"$ref": "#/groups/0"
4034
},
@@ -54,10 +48,10 @@
5448
},
5549
"children": [
5650
{
57-
"$ref": "#/texts/6"
51+
"$ref": "#/texts/4"
5852
},
5953
{
60-
"$ref": "#/texts/7"
54+
"$ref": "#/texts/5"
6155
}
6256
],
6357
"content_layer": "body",
@@ -71,13 +65,13 @@
7165
},
7266
"children": [
7367
{
74-
"$ref": "#/texts/8"
68+
"$ref": "#/texts/6"
7569
},
7670
{
77-
"$ref": "#/texts/9"
71+
"$ref": "#/texts/7"
7872
},
7973
{
80-
"$ref": "#/texts/10"
74+
"$ref": "#/texts/8"
8175
}
8276
],
8377
"content_layer": "body",
@@ -107,8 +101,8 @@
107101
"content_layer": "body",
108102
"label": "text",
109103
"prov": [],
110-
"orig": "This is a div with text.",
111-
"text": "This is a div with text."
104+
"orig": "This is a div with text. This is another div with text.",
105+
"text": "This is a div with text. This is another div with text."
112106
},
113107
{
114108
"self_ref": "#/texts/2",
@@ -119,47 +113,23 @@
119113
"content_layer": "body",
120114
"label": "text",
121115
"prov": [],
122-
"orig": "This is another div with text.",
123-
"text": "This is another div with text."
124-
},
125-
{
126-
"self_ref": "#/texts/3",
127-
"parent": {
128-
"$ref": "#/body"
129-
},
130-
"children": [],
131-
"content_layer": "body",
132-
"label": "text",
133-
"prov": [],
134116
"orig": "This is a regular paragraph.",
135117
"text": "This is a regular paragraph."
136118
},
137119
{
138-
"self_ref": "#/texts/4",
139-
"parent": {
140-
"$ref": "#/body"
141-
},
142-
"children": [],
143-
"content_layer": "body",
144-
"label": "text",
145-
"prov": [],
146-
"orig": "This is a third div",
147-
"text": "This is a third div"
148-
},
149-
{
150-
"self_ref": "#/texts/5",
120+
"self_ref": "#/texts/3",
151121
"parent": {
152122
"$ref": "#/body"
153123
},
154124
"children": [],
155125
"content_layer": "body",
156126
"label": "text",
157127
"prov": [],
158-
"orig": "with a new line.",
159-
"text": "with a new line."
128+
"orig": "This is a third div with a new line.",
129+
"text": "This is a third div with a new line."
160130
},
161131
{
162-
"self_ref": "#/texts/6",
132+
"self_ref": "#/texts/4",
163133
"parent": {
164134
"$ref": "#/groups/0"
165135
},
@@ -171,7 +141,7 @@
171141
"text": "Heading for the details element"
172142
},
173143
{
174-
"self_ref": "#/texts/7",
144+
"self_ref": "#/texts/5",
175145
"parent": {
176146
"$ref": "#/groups/0"
177147
},
@@ -183,7 +153,7 @@
183153
"text": "Description of the details element."
184154
},
185155
{
186-
"self_ref": "#/texts/8",
156+
"self_ref": "#/texts/6",
187157
"parent": {
188158
"$ref": "#/groups/1"
189159
},
@@ -195,7 +165,7 @@
195165
"text": "This is a fourth div with a"
196166
},
197167
{
198-
"self_ref": "#/texts/9",
168+
"self_ref": "#/texts/7",
199169
"parent": {
200170
"$ref": "#/groups/1"
201171
},
@@ -214,7 +184,7 @@
214184
}
215185
},
216186
{
217-
"self_ref": "#/texts/10",
187+
"self_ref": "#/texts/8",
218188
"parent": {
219189
"$ref": "#/groups/1"
220190
},

tests/data/groundtruth/docling_v2/example_06.html.md

Lines changed: 2 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1,12 +1,8 @@
1-
This is a div with text.
2-
3-
This is another div with text.
1+
This is a div with text. This is another div with text.
42

53
This is a regular paragraph.
64

7-
This is a third div
8-
9-
with a new line.
5+
This is a third div with a new line.
106

117
Heading for the details element
128

tests/data/groundtruth/docling_v2/example_07.html.json

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
{
22
"schema_name": "DoclingDocument",
3-
"version": "1.8.0",
3+
"version": "1.9.0",
44
"name": "example_07",
55
"origin": {
66
"mimetype": "text/html",

0 commit comments

Comments
 (0)