diff --git a/src/Text/Pandoc/Readers/Docx.hs b/src/Text/Pandoc/Readers/Docx.hs index 4ab40f5a61ec..ee3d12eae23d 100644 --- a/src/Text/Pandoc/Readers/Docx.hs +++ b/src/Text/Pandoc/Readers/Docx.hs @@ -73,7 +73,7 @@ import Control.Monad.State.Strict import Data.Bifunctor (bimap, first) import qualified Data.ByteString.Lazy as B import Data.Default (Default) -import Data.List (delete, intersect, foldl') +import Data.List (partition, delete, intersect, foldl') import Data.Char (isSpace) import qualified Data.Map as M import qualified Data.Text as T @@ -169,30 +169,54 @@ spansToKeep = [] divsToKeep :: [ParaStyleName] divsToKeep = ["Definition", "Definition Term"] -metaStyles :: M.Map ParaStyleName T.Text -metaStyles = M.fromList [ ("Title", "title") - , ("Subtitle", "subtitle") - , ("Author", "author") - , ("Date", "date") - , ("Abstract", "abstract")] +multiMetaStyles :: M.Map ParaStyleName T.Text +multiMetaStyles = M.fromList [ ("Author", "author") ] -sepBodyParts :: [BodyPart] -> ([BodyPart], [BodyPart]) -sepBodyParts = span (\bp -> isMetaPar bp || isEmptyPar bp) +-- | Meta Styles where just the first single instance is kept. +singleMetaStyles :: M.Map ParaStyleName T.Text +singleMetaStyles = M.fromList [ ("Title", "title") + , ("Subtitle", "subtitle") + , ("Date", "date") + , ("Abstract", "abstract")] -isMetaPar :: BodyPart -> Bool -isMetaPar (Paragraph pPr _) = - not $ null $ intersect (getStyleNames $ pStyle pPr) (M.keys metaStyles) -isMetaPar _ = False +metaStyles :: M.Map ParaStyleName T.Text +metaStyles = M.union singleMetaStyles multiMetaStyles -isEmptyPar :: BodyPart -> Bool -isEmptyPar (Paragraph _ parParts) = - all isEmptyParPart parParts +sepBodyParts :: [BodyPart] -> ([BodyPart], [BodyPart]) +sepBodyParts bps = (multiMetas ++ singleMetas, restWithoutRelevantMeta) where - isEmptyParPart (PlainRun (Run _ runElems)) = all isEmptyElem runElems - isEmptyParPart _ = False - isEmptyElem (TextRun s) = trim s == "" - isEmptyElem _ = True -isEmptyPar _ = False + -- extract all metas from bps only based on metaStyles + (multiMetas, restWithoutMulti) = partition isMultiMetaPar bps + + -- extract the first of every in singleMetaStyles and add to singleMetas, remaining elements to rest + (singleMetas, restWithoutRelevantMeta) = foldr extractSingle ([], restWithoutMulti) (M.keys singleMetaStyles) + + extractSingle :: ParaStyleName -> ([BodyPart], [BodyPart]) -> ([BodyPart], [BodyPart]) + extractSingle styleName (accSingleMetas, remainingBPs) = + let (found, rest) = extractFirst (isSingleMetaPar styleName) remainingBPs + in (maybeToList found ++ accSingleMetas, rest) + + maybeToList :: Maybe a -> [a] + maybeToList Nothing = [] + maybeToList (Just x) = [x] + + isSingleMetaPar :: ParaStyleName -> BodyPart -> Bool + isSingleMetaPar styleName (Paragraph pPr _) = + styleName `elem` getStyleNames (pStyle pPr) + isSingleMetaPar _ _ = False + + extractFirst :: (a -> Bool) -> [a] -> (Maybe a, [a]) + extractFirst _ [] = (Nothing, []) + extractFirst predicate (x:xs) + | predicate x = (Just x, xs) + | otherwise = let (found, rest) = extractFirst predicate xs + in (found, x : rest) + + +isMultiMetaPar :: BodyPart -> Bool +isMultiMetaPar (Paragraph pPr _) = + not $ null $ intersect (getStyleNames $ pStyle pPr) (M.keys multiMetaStyles) +isMultiMetaPar _ = False bodyPartsToMeta' :: PandocMonad m => [BodyPart] -> DocxContext m (M.Map T.Text MetaValue) bodyPartsToMeta' [] = return M.empty diff --git a/test/Tests/Readers/Docx.hs b/test/Tests/Readers/Docx.hs index d9935967f6c2..94a0438190c5 100644 --- a/test/Tests/Readers/Docx.hs +++ b/test/Tests/Readers/Docx.hs @@ -265,6 +265,10 @@ tests = [ testGroup "document" "i18n blocks (headers and blockquotes)" "docx/i18n_blocks.docx" "docx/i18n_blocks.native" + , testCompare + "Image before Title" + "docx/image-before-title.docx" + "docx/image-before-title.native" , testCompare "lists" "docx/lists.docx" @@ -503,7 +507,7 @@ tests = [ testGroup "document" "docx/metadata.docx" "docx/metadata.native" , testCompareWithOpts def{readerStandalone=True} - "stop recording metadata with normal text" + "recording metadata after normal text only if author" "docx/metadata_after_normal.docx" "docx/metadata_after_normal.native" ] diff --git a/test/docx/0_level_headers.native b/test/docx/0_level_headers.native index 7080063f9547..ebf74cd31d36 100644 --- a/test/docx/0_level_headers.native +++ b/test/docx/0_level_headers.native @@ -1,52 +1,182 @@ -[Table ("",[],[]) (Caption Nothing - []) - [(AlignDefault,ColWidth 1.0)] - (TableHead ("",[],[]) - []) - [(TableBody ("",[],[]) (RowHeadColumns 0) - [] - [Row ("",[],[]) - [Cell ("",[],[]) AlignDefault (RowSpan 1) (ColSpan 1) - []] - ,Row ("",[],[]) - [Cell ("",[],[]) AlignDefault (RowSpan 1) (ColSpan 1) - [Plain [Str "User\8217s",Space,Str "Guide"]]] - ,Row ("",[],[]) - [Cell ("",[],[]) AlignDefault (RowSpan 1) (ColSpan 1) - []] - ,Row ("",[],[]) - [Cell ("",[],[]) AlignDefault (RowSpan 1) (ColSpan 1) - []] - ,Row ("",[],[]) - [Cell ("",[],[]) AlignDefault (RowSpan 1) (ColSpan 1) - []] - ,Row ("",[],[]) - [Cell ("",[],[]) AlignDefault (RowSpan 1) (ColSpan 1) - [Plain [Str "11",Space,Str "August",Space,Str "2017"]]] - ,Row ("",[],[]) - [Cell ("",[],[]) AlignDefault (RowSpan 1) (ColSpan 1) - []] - ,Row ("",[],[]) - [Cell ("",[],[]) AlignDefault (RowSpan 1) (ColSpan 1) - []] - ,Row ("",[],[]) - [Cell ("",[],[]) AlignDefault (RowSpan 1) (ColSpan 1) - []] - ,Row ("",[],[]) - [Cell ("",[],[]) AlignDefault (RowSpan 1) (ColSpan 1) - []]])] - (TableFoot ("",[],[]) - []) -,Para [Str "CONTENTS"] -,Para [Strong [Str "Section",Space,Str "Page"]] -,Para [Str "FIGURES",Space,Link ("",[],[]) [Str "iv"] ("#figures","")] -,Para [Str "TABLES",Space,Link ("",[],[]) [Str "v"] ("#tables","")] -,Para [Str "SECTION",Space,Str "1",Space,Str "Introduction",Space,Link ("",[],[]) [Str "2"] ("#introduction","")] -,Header 1 ("figures",["Heading-0"],[]) [Str "FIGURES"] -,Para [Strong [Str "Figure",Space,Str "Page"]] -,Para [Strong [Str "No",Space,Str "table",Space,Str "of",Space,Str "figures",Space,Str "entries",Space,Str "found."]] -,Header 1 ("tables",["Heading-0"],[]) [Str "TABLES"] -,Para [Strong [Str "Table",Space,Str "Page"]] -,Para [Strong [Str "No",Space,Str "table",Space,Str "of",Space,Str "figures",Space,Str "entries",Space,Str "found."]] -,Header 1 ("introduction",[],[]) [Str "Introduction"] -,Para [Str "Nothing",Space,Str "to",Space,Str "introduce,",Space,Str "yet."]] +Pandoc + Meta + { unMeta = + fromList [ ( "title" , MetaInlines [ Str "CONTENTS" ] ) ] + } + [ Table + ( "" , [] , [] ) + (Caption Nothing []) + [ ( AlignDefault , ColWidth 1.0 ) ] + (TableHead ( "" , [] , [] ) []) + [ TableBody + ( "" , [] , [] ) + (RowHeadColumns 0) + [] + [ Row + ( "" , [] , [] ) + [ Cell + ( "" , [] , [] ) + AlignDefault + (RowSpan 1) + (ColSpan 1) + [] + ] + , Row + ( "" , [] , [] ) + [ Cell + ( "" , [] , [] ) + AlignDefault + (RowSpan 1) + (ColSpan 1) + [ Plain [ Str "User\8217s" , Space , Str "Guide" ] ] + ] + , Row + ( "" , [] , [] ) + [ Cell + ( "" , [] , [] ) + AlignDefault + (RowSpan 1) + (ColSpan 1) + [] + ] + , Row + ( "" , [] , [] ) + [ Cell + ( "" , [] , [] ) + AlignDefault + (RowSpan 1) + (ColSpan 1) + [] + ] + , Row + ( "" , [] , [] ) + [ Cell + ( "" , [] , [] ) + AlignDefault + (RowSpan 1) + (ColSpan 1) + [] + ] + , Row + ( "" , [] , [] ) + [ Cell + ( "" , [] , [] ) + AlignDefault + (RowSpan 1) + (ColSpan 1) + [ Plain + [ Str "11" + , Space + , Str "August" + , Space + , Str "2017" + ] + ] + ] + , Row + ( "" , [] , [] ) + [ Cell + ( "" , [] , [] ) + AlignDefault + (RowSpan 1) + (ColSpan 1) + [] + ] + , Row + ( "" , [] , [] ) + [ Cell + ( "" , [] , [] ) + AlignDefault + (RowSpan 1) + (ColSpan 1) + [] + ] + , Row + ( "" , [] , [] ) + [ Cell + ( "" , [] , [] ) + AlignDefault + (RowSpan 1) + (ColSpan 1) + [] + ] + , Row + ( "" , [] , [] ) + [ Cell + ( "" , [] , [] ) + AlignDefault + (RowSpan 1) + (ColSpan 1) + [] + ] + ] + ] + (TableFoot ( "" , [] , [] ) []) + , Para [ Strong [ Str "Section" , Space , Str "Page" ] ] + , Para + [ Str "FIGURES" + , Space + , Link ( "" , [] , [] ) [ Str "iv" ] ( "#figures" , "" ) + ] + , Para + [ Str "TABLES" + , Space + , Link ( "" , [] , [] ) [ Str "v" ] ( "#tables" , "" ) + ] + , Para + [ Str "SECTION" + , Space + , Str "1" + , Space + , Str "Introduction" + , Space + , Link ( "" , [] , [] ) [ Str "2" ] ( "#introduction" , "" ) + ] + , Header + 1 ( "figures" , [ "Heading-0" ] , [] ) [ Str "FIGURES" ] + , Para [ Strong [ Str "Figure" , Space , Str "Page" ] ] + , Para + [ Strong + [ Str "No" + , Space + , Str "table" + , Space + , Str "of" + , Space + , Str "figures" + , Space + , Str "entries" + , Space + , Str "found." + ] + ] + , Header + 1 ( "tables" , [ "Heading-0" ] , [] ) [ Str "TABLES" ] + , Para [ Strong [ Str "Table" , Space , Str "Page" ] ] + , Para + [ Strong + [ Str "No" + , Space + , Str "table" + , Space + , Str "of" + , Space + , Str "figures" + , Space + , Str "entries" + , Space + , Str "found." + ] + ] + , Header + 1 ( "introduction" , [] , [] ) [ Str "Introduction" ] + , Para + [ Str "Nothing" + , Space + , Str "to" + , Space + , Str "introduce," + , Space + , Str "yet." + ] + ] diff --git a/test/docx/image-before-title.docx b/test/docx/image-before-title.docx new file mode 100644 index 000000000000..dd23d9000786 Binary files /dev/null and b/test/docx/image-before-title.docx differ diff --git a/test/docx/image-before-title.native b/test/docx/image-before-title.native new file mode 100644 index 000000000000..9fd43e58f31b --- /dev/null +++ b/test/docx/image-before-title.native @@ -0,0 +1,97 @@ +Pandoc + Meta + { unMeta = + fromList + [ ( "subtitle" + , MetaInlines + [ Str "This" + , Space + , Str "is" + , Space + , Str "a" + , Space + , Str "subtitle" + ] + ) + , ( "title" + , MetaInlines + [ Str "This" + , Space + , Str "is" + , Space + , Str "a" + , Space + , Str "title" + ] + ) + ] + } + [ Para + [ Image + ( "" + , [] + , [ ( "width" , "6.268055555555556in" ) + , ( "height" , "6.268055555555556in" ) + ] + ) + [ Str "A" + , Space + , Str "cat" + , Space + , Str "holding" + , Space + , Str "a" + , Space + , Str "rainbow" + , Space + , Str "flag." + ] + ( "media/image1.jpeg" , "" ) + ] + , Header + 1 + ( "this-is-a-heading-1" , [] , [] ) + [ Str "This" + , Space + , Str "is" + , Space + , Str "a" + , Space + , Str "heading" + , Space + , Str "1" + ] + , Para + [ Str "This" + , Space + , Str "is" + , Space + , Str "some" + , Space + , Str "text." + ] + , Header + 2 + ( "this-is-a-heading-2" , [] , [] ) + [ Str "This" + , Space + , Str "is" + , Space + , Str "a" + , Space + , Str "heading" + , Space + , Str "2" + ] + , Para + [ Str "This" + , Space + , Str "is" + , Space + , Str "also" + , Space + , Str "some" + , Space + , Str "text." + ] + ] diff --git a/test/docx/metadata_after_normal.native b/test/docx/metadata_after_normal.native index f0e31f8da2da..560b31a54843 100644 --- a/test/docx/metadata_after_normal.native +++ b/test/docx/metadata_after_normal.native @@ -1,7 +1,162 @@ -Pandoc (Meta {unMeta = fromList [("abstract",MetaInlines [Str "This",Space,Str "is",Space,Str "a",Space,Str "test",Space,Str "of",Space,Str "how",Space,Str "this",Space,Str "all",Space,Str "works.",Space,Str "I\8217ve",Space,Str "skipped",Space,Str "lines",Space,Str "here,",Space,Str "which",Space,Str "pandoc",Space,Str "doesn\8217t",Space,Str "do,",Space,Str "but",Space,Str "which",Space,Str "shouldn\8217t",Space,Str "make",Space,Str "a",Space,Str "difference."]),("author",MetaList [MetaInlines [Str "Mary",Space,Str "Ann",Space,Str "Evans"],MetaInlines [Str "Aurore",Space,Str "Dupin"]]),("date",MetaInlines [Str "July",Space,Str "28,",Space,Str "2014"]),("title",MetaInlines [Str "This",Space,Str "Is",Space,Str "the",Space,Str "Title"])]}) -[Para [Str "And",Space,Str "now",Space,Str "this",Space,Str "is",Space,Str "normal",Space,Str "text."] -,Para [Str "This",Space,Str "Is",Space,Str "the",Space,Str "Title"] -,Para [Str "Mary",Space,Str "Ann",Space,Str "Evans"] -,Para [Str "Aurore",Space,Str "Dupin"] -,Para [Str "July",Space,Str "28,",Space,Str "2014"] -,Para [Str "This",Space,Str "is",Space,Str "a",Space,Str "test",Space,Str "of",Space,Str "how",Space,Str "this",Space,Str "all",Space,Str "works.",Space,Str "I\8217ve",Space,Str "skipped",Space,Str "lines",Space,Str "here,",Space,Str "which",Space,Str "pandoc",Space,Str "doesn\8217t",Space,Str "do,",Space,Str "but",Space,Str "which",Space,Str "shouldn\8217t",Space,Str "make",Space,Str "a",Space,Str "difference."]] +Pandoc + Meta + { unMeta = + fromList + [ ( "abstract" + , MetaInlines + [ Str "This" + , Space + , Str "is" + , Space + , Str "a" + , Space + , Str "test" + , Space + , Str "of" + , Space + , Str "how" + , Space + , Str "this" + , Space + , Str "all" + , Space + , Str "works." + , Space + , Str "I\8217ve" + , Space + , Str "skipped" + , Space + , Str "lines" + , Space + , Str "here," + , Space + , Str "which" + , Space + , Str "pandoc" + , Space + , Str "doesn\8217t" + , Space + , Str "do," + , Space + , Str "but" + , Space + , Str "which" + , Space + , Str "shouldn\8217t" + , Space + , Str "make" + , Space + , Str "a" + , Space + , Str "difference." + ] + ) + , ( "author" + , MetaList + [ MetaInlines + [ Str "Mary" + , Space + , Str "Ann" + , Space + , Str "Evans" + ] + , MetaInlines [ Str "Aurore" , Space , Str "Dupin" ] + , MetaInlines + [ Str "Mary" + , Space + , Str "Ann" + , Space + , Str "Evans" + ] + , MetaInlines [ Str "Aurore" , Space , Str "Dupin" ] + ] + ) + , ( "date" + , MetaInlines + [ Str "July" , Space , Str "28," , Space , Str "2014" ] + ) + , ( "title" + , MetaInlines + [ Str "This" + , Space + , Str "Is" + , Space + , Str "the" + , Space + , Str "Title" + ] + ) + ] + } + [ Para + [ Str "And" + , Space + , Str "now" + , Space + , Str "this" + , Space + , Str "is" + , Space + , Str "normal" + , Space + , Str "text." + ] + , Para + [ Str "This" + , Space + , Str "Is" + , Space + , Str "the" + , Space + , Str "Title" + ] + , Para + [ Str "July" , Space , Str "28," , Space , Str "2014" ] + , Para + [ Str "This" + , Space + , Str "is" + , Space + , Str "a" + , Space + , Str "test" + , Space + , Str "of" + , Space + , Str "how" + , Space + , Str "this" + , Space + , Str "all" + , Space + , Str "works." + , Space + , Str "I\8217ve" + , Space + , Str "skipped" + , Space + , Str "lines" + , Space + , Str "here," + , Space + , Str "which" + , Space + , Str "pandoc" + , Space + , Str "doesn\8217t" + , Space + , Str "do," + , Space + , Str "but" + , Space + , Str "which" + , Space + , Str "shouldn\8217t" + , Space + , Str "make" + , Space + , Str "a" + , Space + , Str "difference." + ] + ]