Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
66 changes: 45 additions & 21 deletions src/Text/Pandoc/Readers/Docx.hs
Original file line number Diff line number Diff line change
Expand Up @@ -73,7 +73,7 @@ import Control.Monad.State.Strict
import Data.Bifunctor (bimap, first)
import qualified Data.ByteString.Lazy as B
import Data.Default (Default)
import Data.List (delete, intersect, foldl')
import Data.List (partition, delete, intersect, foldl')
import Data.Char (isSpace)
import qualified Data.Map as M
import qualified Data.Text as T
Expand Down Expand Up @@ -169,30 +169,54 @@ spansToKeep = []
divsToKeep :: [ParaStyleName]
divsToKeep = ["Definition", "Definition Term"]

metaStyles :: M.Map ParaStyleName T.Text
metaStyles = M.fromList [ ("Title", "title")
, ("Subtitle", "subtitle")
, ("Author", "author")
, ("Date", "date")
, ("Abstract", "abstract")]
multiMetaStyles :: M.Map ParaStyleName T.Text
multiMetaStyles = M.fromList [ ("Author", "author") ]

sepBodyParts :: [BodyPart] -> ([BodyPart], [BodyPart])
sepBodyParts = span (\bp -> isMetaPar bp || isEmptyPar bp)
-- | Meta Styles where just the first single instance is kept.
singleMetaStyles :: M.Map ParaStyleName T.Text
singleMetaStyles = M.fromList [ ("Title", "title")
, ("Subtitle", "subtitle")
, ("Date", "date")
, ("Abstract", "abstract")]

isMetaPar :: BodyPart -> Bool
isMetaPar (Paragraph pPr _) =
not $ null $ intersect (getStyleNames $ pStyle pPr) (M.keys metaStyles)
isMetaPar _ = False
metaStyles :: M.Map ParaStyleName T.Text
metaStyles = M.union singleMetaStyles multiMetaStyles

isEmptyPar :: BodyPart -> Bool
isEmptyPar (Paragraph _ parParts) =
all isEmptyParPart parParts
sepBodyParts :: [BodyPart] -> ([BodyPart], [BodyPart])
sepBodyParts bps = (multiMetas ++ singleMetas, restWithoutRelevantMeta)
where
isEmptyParPart (PlainRun (Run _ runElems)) = all isEmptyElem runElems
isEmptyParPart _ = False
isEmptyElem (TextRun s) = trim s == ""
isEmptyElem _ = True
isEmptyPar _ = False
-- extract all metas from bps only based on metaStyles
(multiMetas, restWithoutMulti) = partition isMultiMetaPar bps

-- extract the first of every in singleMetaStyles and add to singleMetas, remaining elements to rest
(singleMetas, restWithoutRelevantMeta) = foldr extractSingle ([], restWithoutMulti) (M.keys singleMetaStyles)

extractSingle :: ParaStyleName -> ([BodyPart], [BodyPart]) -> ([BodyPart], [BodyPart])
extractSingle styleName (accSingleMetas, remainingBPs) =
let (found, rest) = extractFirst (isSingleMetaPar styleName) remainingBPs
in (maybeToList found ++ accSingleMetas, rest)

maybeToList :: Maybe a -> [a]
maybeToList Nothing = []
maybeToList (Just x) = [x]

isSingleMetaPar :: ParaStyleName -> BodyPart -> Bool
isSingleMetaPar styleName (Paragraph pPr _) =
styleName `elem` getStyleNames (pStyle pPr)
isSingleMetaPar _ _ = False

extractFirst :: (a -> Bool) -> [a] -> (Maybe a, [a])
extractFirst _ [] = (Nothing, [])
extractFirst predicate (x:xs)
| predicate x = (Just x, xs)
| otherwise = let (found, rest) = extractFirst predicate xs
in (found, x : rest)


isMultiMetaPar :: BodyPart -> Bool
isMultiMetaPar (Paragraph pPr _) =
not $ null $ intersect (getStyleNames $ pStyle pPr) (M.keys multiMetaStyles)
isMultiMetaPar _ = False

bodyPartsToMeta' :: PandocMonad m => [BodyPart] -> DocxContext m (M.Map T.Text MetaValue)
bodyPartsToMeta' [] = return M.empty
Expand Down
6 changes: 5 additions & 1 deletion test/Tests/Readers/Docx.hs
Original file line number Diff line number Diff line change
Expand Up @@ -265,6 +265,10 @@ tests = [ testGroup "document"
"i18n blocks (headers and blockquotes)"
"docx/i18n_blocks.docx"
"docx/i18n_blocks.native"
, testCompare
"Image before Title"
"docx/image-before-title.docx"
"docx/image-before-title.native"
, testCompare
"lists"
"docx/lists.docx"
Expand Down Expand Up @@ -503,7 +507,7 @@ tests = [ testGroup "document"
"docx/metadata.docx"
"docx/metadata.native"
, testCompareWithOpts def{readerStandalone=True}
"stop recording metadata with normal text"
"recording metadata after normal text only if author"
"docx/metadata_after_normal.docx"
"docx/metadata_after_normal.native"
]
Expand Down
234 changes: 182 additions & 52 deletions test/docx/0_level_headers.native
Original file line number Diff line number Diff line change
@@ -1,52 +1,182 @@
[Table ("",[],[]) (Caption Nothing
[])
[(AlignDefault,ColWidth 1.0)]
(TableHead ("",[],[])
[])
[(TableBody ("",[],[]) (RowHeadColumns 0)
[]
[Row ("",[],[])
[Cell ("",[],[]) AlignDefault (RowSpan 1) (ColSpan 1)
[]]
,Row ("",[],[])
[Cell ("",[],[]) AlignDefault (RowSpan 1) (ColSpan 1)
[Plain [Str "User\8217s",Space,Str "Guide"]]]
,Row ("",[],[])
[Cell ("",[],[]) AlignDefault (RowSpan 1) (ColSpan 1)
[]]
,Row ("",[],[])
[Cell ("",[],[]) AlignDefault (RowSpan 1) (ColSpan 1)
[]]
,Row ("",[],[])
[Cell ("",[],[]) AlignDefault (RowSpan 1) (ColSpan 1)
[]]
,Row ("",[],[])
[Cell ("",[],[]) AlignDefault (RowSpan 1) (ColSpan 1)
[Plain [Str "11",Space,Str "August",Space,Str "2017"]]]
,Row ("",[],[])
[Cell ("",[],[]) AlignDefault (RowSpan 1) (ColSpan 1)
[]]
,Row ("",[],[])
[Cell ("",[],[]) AlignDefault (RowSpan 1) (ColSpan 1)
[]]
,Row ("",[],[])
[Cell ("",[],[]) AlignDefault (RowSpan 1) (ColSpan 1)
[]]
,Row ("",[],[])
[Cell ("",[],[]) AlignDefault (RowSpan 1) (ColSpan 1)
[]]])]
(TableFoot ("",[],[])
[])
,Para [Str "CONTENTS"]
,Para [Strong [Str "Section",Space,Str "Page"]]
,Para [Str "FIGURES",Space,Link ("",[],[]) [Str "iv"] ("#figures","")]
,Para [Str "TABLES",Space,Link ("",[],[]) [Str "v"] ("#tables","")]
,Para [Str "SECTION",Space,Str "1",Space,Str "Introduction",Space,Link ("",[],[]) [Str "2"] ("#introduction","")]
,Header 1 ("figures",["Heading-0"],[]) [Str "FIGURES"]
,Para [Strong [Str "Figure",Space,Str "Page"]]
,Para [Strong [Str "No",Space,Str "table",Space,Str "of",Space,Str "figures",Space,Str "entries",Space,Str "found."]]
,Header 1 ("tables",["Heading-0"],[]) [Str "TABLES"]
,Para [Strong [Str "Table",Space,Str "Page"]]
,Para [Strong [Str "No",Space,Str "table",Space,Str "of",Space,Str "figures",Space,Str "entries",Space,Str "found."]]
,Header 1 ("introduction",[],[]) [Str "Introduction"]
,Para [Str "Nothing",Space,Str "to",Space,Str "introduce,",Space,Str "yet."]]
Pandoc
Meta
{ unMeta =
fromList [ ( "title" , MetaInlines [ Str "CONTENTS" ] ) ]
}
[ Table
( "" , [] , [] )
(Caption Nothing [])
[ ( AlignDefault , ColWidth 1.0 ) ]
(TableHead ( "" , [] , [] ) [])
[ TableBody
( "" , [] , [] )
(RowHeadColumns 0)
[]
[ Row
( "" , [] , [] )
[ Cell
( "" , [] , [] )
AlignDefault
(RowSpan 1)
(ColSpan 1)
[]
]
, Row
( "" , [] , [] )
[ Cell
( "" , [] , [] )
AlignDefault
(RowSpan 1)
(ColSpan 1)
[ Plain [ Str "User\8217s" , Space , Str "Guide" ] ]
]
, Row
( "" , [] , [] )
[ Cell
( "" , [] , [] )
AlignDefault
(RowSpan 1)
(ColSpan 1)
[]
]
, Row
( "" , [] , [] )
[ Cell
( "" , [] , [] )
AlignDefault
(RowSpan 1)
(ColSpan 1)
[]
]
, Row
( "" , [] , [] )
[ Cell
( "" , [] , [] )
AlignDefault
(RowSpan 1)
(ColSpan 1)
[]
]
, Row
( "" , [] , [] )
[ Cell
( "" , [] , [] )
AlignDefault
(RowSpan 1)
(ColSpan 1)
[ Plain
[ Str "11"
, Space
, Str "August"
, Space
, Str "2017"
]
]
]
, Row
( "" , [] , [] )
[ Cell
( "" , [] , [] )
AlignDefault
(RowSpan 1)
(ColSpan 1)
[]
]
, Row
( "" , [] , [] )
[ Cell
( "" , [] , [] )
AlignDefault
(RowSpan 1)
(ColSpan 1)
[]
]
, Row
( "" , [] , [] )
[ Cell
( "" , [] , [] )
AlignDefault
(RowSpan 1)
(ColSpan 1)
[]
]
, Row
( "" , [] , [] )
[ Cell
( "" , [] , [] )
AlignDefault
(RowSpan 1)
(ColSpan 1)
[]
]
]
]
(TableFoot ( "" , [] , [] ) [])
, Para [ Strong [ Str "Section" , Space , Str "Page" ] ]
, Para
[ Str "FIGURES"
, Space
, Link ( "" , [] , [] ) [ Str "iv" ] ( "#figures" , "" )
]
, Para
[ Str "TABLES"
, Space
, Link ( "" , [] , [] ) [ Str "v" ] ( "#tables" , "" )
]
, Para
[ Str "SECTION"
, Space
, Str "1"
, Space
, Str "Introduction"
, Space
, Link ( "" , [] , [] ) [ Str "2" ] ( "#introduction" , "" )
]
, Header
1 ( "figures" , [ "Heading-0" ] , [] ) [ Str "FIGURES" ]
, Para [ Strong [ Str "Figure" , Space , Str "Page" ] ]
, Para
[ Strong
[ Str "No"
, Space
, Str "table"
, Space
, Str "of"
, Space
, Str "figures"
, Space
, Str "entries"
, Space
, Str "found."
]
]
, Header
1 ( "tables" , [ "Heading-0" ] , [] ) [ Str "TABLES" ]
, Para [ Strong [ Str "Table" , Space , Str "Page" ] ]
, Para
[ Strong
[ Str "No"
, Space
, Str "table"
, Space
, Str "of"
, Space
, Str "figures"
, Space
, Str "entries"
, Space
, Str "found."
]
]
, Header
1 ( "introduction" , [] , [] ) [ Str "Introduction" ]
, Para
[ Str "Nothing"
, Space
, Str "to"
, Space
, Str "introduce,"
, Space
, Str "yet."
]
]
Binary file added test/docx/image-before-title.docx
Binary file not shown.
Loading