Skip to content

Commit d820620

Browse files
committed
Docx reader: properly calculate table column widths.
Previously we assumed that every table took up the full text width. Now we read the text width from the document's sectPr. Closes #9837. Closes #11147.
1 parent bbd7b60 commit d820620

16 files changed

+1438
-553
lines changed

src/Text/Pandoc/Readers/Docx.hs

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -833,8 +833,7 @@ bodyPartToBlocks (Tbl mbsty cap grid look parts) = do
833833
alignments = case rows of
834834
[] -> replicate width Pandoc.AlignDefault
835835
Docx.Row _ cs : _ -> concatMap getAlignment cs
836-
totalWidth = sum grid
837-
widths = (\w -> ColWidth (fromInteger w / fromInteger totalWidth)) <$> grid
836+
widths = map ColWidth grid
838837

839838
extStylesEnabled <- asks (isEnabled Ext_styles . docxOptions)
840839
let attr = case mbsty of

src/Text/Pandoc/Readers/Docx/Parse.hs

Lines changed: 31 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -110,6 +110,7 @@ data ReaderEnv = ReaderEnv { envNotes :: Notes
110110
, envParStyles :: ParStyleMap
111111
, envLocation :: DocumentLocation
112112
, envDocXmlPath :: FilePath
113+
, envTextWidth :: Int
113114
}
114115
deriving Show
115116

@@ -272,7 +273,7 @@ data BodyPart = Paragraph ParagraphStyle [ParPart]
272273
| HRule
273274
deriving Show
274275

275-
type TblGrid = [Integer]
276+
type TblGrid = [Double]
276277

277278
newtype TblLook = TblLook {firstRowFormatting::Bool}
278279
deriving Show
@@ -403,6 +404,7 @@ archiveToDocxWithWarnings archive = do
403404
rels = archiveToRelationships archive docXmlPath
404405
media = filteredFilesFromArchive archive filePathIsMedia
405406
(styles, parstyles) = archiveToStyles archive
407+
textWidth = archiveToTextWidth archive
406408
rEnv = ReaderEnv { envNotes = notes
407409
, envComments = comments
408410
, envNumbering = numbering
@@ -413,6 +415,7 @@ archiveToDocxWithWarnings archive = do
413415
, envParStyles = parstyles
414416
, envLocation = InDocument
415417
, envDocXmlPath = docXmlPath
418+
, envTextWidth = fromMaybe 9638 textWidth
416419
}
417420
rState = ReaderState { stateWarnings = []
418421
, stateFldCharState = []
@@ -636,6 +639,20 @@ archiveToNumbering :: Archive -> Numbering
636639
archiveToNumbering archive =
637640
fromMaybe (Numbering mempty [] []) (archiveToNumbering' archive)
638641

642+
archiveToTextWidth :: Archive -> Maybe Int
643+
archiveToTextWidth zf = do
644+
entry <- findEntryByPath "word/document.xml" zf
645+
docElem <- parseXMLFromEntry entry
646+
let ns = elemToNameSpaces docElem
647+
sectElem <- findChildByName ns "w" "body" docElem >>= findChildByName ns "w" "sectPr"
648+
pgWidth <- findChildByName ns "w" "pgSz" sectElem
649+
>>= findAttrByName ns "w" "w" >>= safeRead
650+
pgMar <- findChildByName ns "w" "pgMar" sectElem
651+
leftMargin <- findAttrByName ns "w" "left" pgMar >>= safeRead
652+
rightMargin <- findAttrByName ns "w" "right" pgMar >>= safeRead
653+
gutter <- findAttrByName ns "w" "gutter" pgMar >>= safeRead
654+
return $ pgWidth - (leftMargin + rightMargin + gutter)
655+
639656
elemToNotes :: NameSpaces -> Text -> Element -> Maybe (M.Map T.Text Element)
640657
elemToNotes ns notetype element
641658
| isElem ns "w" (notetype <> "s") element =
@@ -664,11 +681,20 @@ elemToComments _ _ = M.empty
664681
---------------------------------------------
665682

666683
elemToTblGrid :: NameSpaces -> Element -> D TblGrid
667-
elemToTblGrid ns element | isElem ns "w" "tblGrid" element =
684+
elemToTblGrid ns element | isElem ns "w" "tblGrid" element = do
668685
let cols = findChildrenByName ns "w" "gridCol" element
669-
in
670-
mapD (\e -> maybeToD (findAttrByName ns "w" "w" e >>= stringToInteger))
671-
cols
686+
textWidth <- asks envTextWidth
687+
-- space between cols is 10 twips, so we subtract this:
688+
let totalWidth = textWidth - (10 * (length cols - 1))
689+
let toFraction :: Int -> Double
690+
toFraction x = fromIntegral x / fromIntegral totalWidth
691+
let normalizeFractions xs =
692+
case sum xs of
693+
tot | tot > 1.0 -> map (/ tot) xs
694+
_ -> xs
695+
normalizeFractions <$>
696+
mapD (\e -> maybeToD (findAttrByName ns "w" "w" e >>=
697+
fmap toFraction . safeRead)) cols
672698
elemToTblGrid _ _ = throwError WrongElem
673699

674700
elemToTblLook :: NameSpaces -> Element -> D TblLook

test/command/9002.md

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,11 +1,11 @@
11
```
22
% pandoc command/9002.docx -t html
33
^D
4-
<table>
4+
<table style="width:40%;">
55
<caption><p>This is my table!</p></caption>
66
<colgroup>
7-
<col style="width: 50%" />
8-
<col style="width: 50%" />
7+
<col style="width: 20%" />
8+
<col style="width: 20%" />
99
</colgroup>
1010
<thead>
1111
<tr>

test/docx/0_level_headers.native

Lines changed: 146 additions & 52 deletions
Original file line numberDiff line numberDiff line change
@@ -1,52 +1,146 @@
1-
[Table ("",[],[]) (Caption Nothing
2-
[])
3-
[(AlignDefault,ColWidth 1.0)]
4-
(TableHead ("",[],[])
5-
[])
6-
[(TableBody ("",[],[]) (RowHeadColumns 0)
7-
[]
8-
[Row ("",[],[])
9-
[Cell ("",[],[]) AlignDefault (RowSpan 1) (ColSpan 1)
10-
[]]
11-
,Row ("",[],[])
12-
[Cell ("",[],[]) AlignDefault (RowSpan 1) (ColSpan 1)
13-
[Plain [Str "User\8217s",Space,Str "Guide"]]]
14-
,Row ("",[],[])
15-
[Cell ("",[],[]) AlignDefault (RowSpan 1) (ColSpan 1)
16-
[]]
17-
,Row ("",[],[])
18-
[Cell ("",[],[]) AlignDefault (RowSpan 1) (ColSpan 1)
19-
[]]
20-
,Row ("",[],[])
21-
[Cell ("",[],[]) AlignDefault (RowSpan 1) (ColSpan 1)
22-
[]]
23-
,Row ("",[],[])
24-
[Cell ("",[],[]) AlignDefault (RowSpan 1) (ColSpan 1)
25-
[Plain [Str "11",Space,Str "August",Space,Str "2017"]]]
26-
,Row ("",[],[])
27-
[Cell ("",[],[]) AlignDefault (RowSpan 1) (ColSpan 1)
28-
[]]
29-
,Row ("",[],[])
30-
[Cell ("",[],[]) AlignDefault (RowSpan 1) (ColSpan 1)
31-
[]]
32-
,Row ("",[],[])
33-
[Cell ("",[],[]) AlignDefault (RowSpan 1) (ColSpan 1)
34-
[]]
35-
,Row ("",[],[])
36-
[Cell ("",[],[]) AlignDefault (RowSpan 1) (ColSpan 1)
37-
[]]])]
38-
(TableFoot ("",[],[])
39-
[])
40-
,Para [Str "CONTENTS"]
41-
,Para [Strong [Str "Section",Space,Str "Page"]]
42-
,Para [Str "FIGURES",Space,Link ("",[],[]) [Str "iv"] ("#figures","")]
43-
,Para [Str "TABLES",Space,Link ("",[],[]) [Str "v"] ("#tables","")]
44-
,Para [Str "SECTION",Space,Str "1",Space,Str "Introduction",Space,Link ("",[],[]) [Str "2"] ("#introduction","")]
45-
,Header 1 ("figures",["Heading-0"],[]) [Str "FIGURES"]
46-
,Para [Strong [Str "Figure",Space,Str "Page"]]
47-
,Para [Strong [Str "No",Space,Str "table",Space,Str "of",Space,Str "figures",Space,Str "entries",Space,Str "found."]]
48-
,Header 1 ("tables",["Heading-0"],[]) [Str "TABLES"]
49-
,Para [Strong [Str "Table",Space,Str "Page"]]
50-
,Para [Strong [Str "No",Space,Str "table",Space,Str "of",Space,Str "figures",Space,Str "entries",Space,Str "found."]]
51-
,Header 1 ("introduction",[],[]) [Str "Introduction"]
52-
,Para [Str "Nothing",Space,Str "to",Space,Str "introduce,",Space,Str "yet."]]
1+
[ Table
2+
( "" , [] , [] )
3+
(Caption Nothing [])
4+
[ ( AlignDefault , ColWidth 0.8615384615384616 ) ]
5+
(TableHead ( "" , [] , [] ) [])
6+
[ TableBody
7+
( "" , [] , [] )
8+
(RowHeadColumns 0)
9+
[]
10+
[ Row
11+
( "" , [] , [] )
12+
[ Cell
13+
( "" , [] , [] ) AlignDefault (RowSpan 1) (ColSpan 1) []
14+
]
15+
, Row
16+
( "" , [] , [] )
17+
[ Cell
18+
( "" , [] , [] )
19+
AlignDefault
20+
(RowSpan 1)
21+
(ColSpan 1)
22+
[ Plain [ Str "User\8217s" , Space , Str "Guide" ] ]
23+
]
24+
, Row
25+
( "" , [] , [] )
26+
[ Cell
27+
( "" , [] , [] ) AlignDefault (RowSpan 1) (ColSpan 1) []
28+
]
29+
, Row
30+
( "" , [] , [] )
31+
[ Cell
32+
( "" , [] , [] ) AlignDefault (RowSpan 1) (ColSpan 1) []
33+
]
34+
, Row
35+
( "" , [] , [] )
36+
[ Cell
37+
( "" , [] , [] ) AlignDefault (RowSpan 1) (ColSpan 1) []
38+
]
39+
, Row
40+
( "" , [] , [] )
41+
[ Cell
42+
( "" , [] , [] )
43+
AlignDefault
44+
(RowSpan 1)
45+
(ColSpan 1)
46+
[ Plain
47+
[ Str "11"
48+
, Space
49+
, Str "August"
50+
, Space
51+
, Str "2017"
52+
]
53+
]
54+
]
55+
, Row
56+
( "" , [] , [] )
57+
[ Cell
58+
( "" , [] , [] ) AlignDefault (RowSpan 1) (ColSpan 1) []
59+
]
60+
, Row
61+
( "" , [] , [] )
62+
[ Cell
63+
( "" , [] , [] ) AlignDefault (RowSpan 1) (ColSpan 1) []
64+
]
65+
, Row
66+
( "" , [] , [] )
67+
[ Cell
68+
( "" , [] , [] ) AlignDefault (RowSpan 1) (ColSpan 1) []
69+
]
70+
, Row
71+
( "" , [] , [] )
72+
[ Cell
73+
( "" , [] , [] ) AlignDefault (RowSpan 1) (ColSpan 1) []
74+
]
75+
]
76+
]
77+
(TableFoot ( "" , [] , [] ) [])
78+
, Para [ Str "CONTENTS" ]
79+
, Para [ Strong [ Str "Section" , Space , Str "Page" ] ]
80+
, Para
81+
[ Str "FIGURES"
82+
, Space
83+
, Link ( "" , [] , [] ) [ Str "iv" ] ( "#figures" , "" )
84+
]
85+
, Para
86+
[ Str "TABLES"
87+
, Space
88+
, Link ( "" , [] , [] ) [ Str "v" ] ( "#tables" , "" )
89+
]
90+
, Para
91+
[ Str "SECTION"
92+
, Space
93+
, Str "1"
94+
, Space
95+
, Str "Introduction"
96+
, Space
97+
, Link ( "" , [] , [] ) [ Str "2" ] ( "#introduction" , "" )
98+
]
99+
, Header
100+
1 ( "figures" , [ "Heading-0" ] , [] ) [ Str "FIGURES" ]
101+
, Para [ Strong [ Str "Figure" , Space , Str "Page" ] ]
102+
, Para
103+
[ Strong
104+
[ Str "No"
105+
, Space
106+
, Str "table"
107+
, Space
108+
, Str "of"
109+
, Space
110+
, Str "figures"
111+
, Space
112+
, Str "entries"
113+
, Space
114+
, Str "found."
115+
]
116+
]
117+
, Header
118+
1 ( "tables" , [ "Heading-0" ] , [] ) [ Str "TABLES" ]
119+
, Para [ Strong [ Str "Table" , Space , Str "Page" ] ]
120+
, Para
121+
[ Strong
122+
[ Str "No"
123+
, Space
124+
, Str "table"
125+
, Space
126+
, Str "of"
127+
, Space
128+
, Str "figures"
129+
, Space
130+
, Str "entries"
131+
, Space
132+
, Str "found."
133+
]
134+
]
135+
, Header
136+
1 ( "introduction" , [] , [] ) [ Str "Introduction" ]
137+
, Para
138+
[ Str "Nothing"
139+
, Space
140+
, Str "to"
141+
, Space
142+
, Str "introduce,"
143+
, Space
144+
, Str "yet."
145+
]
146+
]
-26 Bytes
Binary file not shown.
-26 Bytes
Binary file not shown.

test/docx/golden/tables.docx

-26 Bytes
Binary file not shown.

test/docx/sdt_elements.native

Lines changed: 60 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -1,24 +1,60 @@
1-
[Table ("",[],[]) (Caption Nothing
2-
[])
3-
[(AlignDefault,ColWidth 0.22069570301081556)
4-
,(AlignDefault,ColWidth 0.22069570301081556)
5-
,(AlignDefault,ColWidth 0.5586085939783689)]
6-
(TableHead ("",[],[])
7-
[Row ("",[],[])
8-
[Cell ("",[],[]) AlignCenter (RowSpan 1) (ColSpan 1)
9-
[Plain [Strong [Str "col1Header"]]]
10-
,Cell ("",[],[]) AlignCenter (RowSpan 1) (ColSpan 1)
11-
[Plain [Strong [Str "col2Header"]]]
12-
,Cell ("",[],[]) AlignCenter (RowSpan 1) (ColSpan 1)
13-
[Plain [Strong [Str "col3Header"]]]]])
14-
[(TableBody ("",[],[]) (RowHeadColumns 0)
15-
[]
16-
[Row ("",[],[])
17-
[Cell ("",[],[]) AlignDefault (RowSpan 1) (ColSpan 1)
18-
[Plain [Str "col1",Space,Str "content"]]
19-
,Cell ("",[],[]) AlignDefault (RowSpan 1) (ColSpan 1)
20-
[Plain [Str "Body",Space,Str "copy"]]
21-
,Cell ("",[],[]) AlignDefault (RowSpan 1) (ColSpan 1)
22-
[Plain [Str "col3",Space,Str "content"]]]])]
23-
(TableFoot ("",[],[])
24-
[])]
1+
[ Table
2+
( "" , [] , [] )
3+
(Caption Nothing [])
4+
[ ( AlignDefault , ColWidth 0.16167023554603854 )
5+
, ( AlignDefault , ColWidth 0.16167023554603854 )
6+
, ( AlignDefault , ColWidth 0.40920770877944324 )
7+
]
8+
(TableHead
9+
( "" , [] , [] )
10+
[ Row
11+
( "" , [] , [] )
12+
[ Cell
13+
( "" , [] , [] )
14+
AlignCenter
15+
(RowSpan 1)
16+
(ColSpan 1)
17+
[ Plain [ Strong [ Str "col1Header" ] ] ]
18+
, Cell
19+
( "" , [] , [] )
20+
AlignCenter
21+
(RowSpan 1)
22+
(ColSpan 1)
23+
[ Plain [ Strong [ Str "col2Header" ] ] ]
24+
, Cell
25+
( "" , [] , [] )
26+
AlignCenter
27+
(RowSpan 1)
28+
(ColSpan 1)
29+
[ Plain [ Strong [ Str "col3Header" ] ] ]
30+
]
31+
])
32+
[ TableBody
33+
( "" , [] , [] )
34+
(RowHeadColumns 0)
35+
[]
36+
[ Row
37+
( "" , [] , [] )
38+
[ Cell
39+
( "" , [] , [] )
40+
AlignDefault
41+
(RowSpan 1)
42+
(ColSpan 1)
43+
[ Plain [ Str "col1" , Space , Str "content" ] ]
44+
, Cell
45+
( "" , [] , [] )
46+
AlignDefault
47+
(RowSpan 1)
48+
(ColSpan 1)
49+
[ Plain [ Str "Body" , Space , Str "copy" ] ]
50+
, Cell
51+
( "" , [] , [] )
52+
AlignDefault
53+
(RowSpan 1)
54+
(ColSpan 1)
55+
[ Plain [ Str "col3" , Space , Str "content" ] ]
56+
]
57+
]
58+
]
59+
(TableFoot ( "" , [] , [] ) [])
60+
]

0 commit comments

Comments
 (0)