Skip to content

Commit 350bc41

Browse files
committed
Handle figures in HTML reader.
1 parent 2b09363 commit 350bc41

File tree

3 files changed

+71
-19
lines changed

3 files changed

+71
-19
lines changed

src/Text/Pandoc/Extensions.hs

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -127,6 +127,7 @@ data Extension =
127127
| Ext_short_subsuperscripts -- ^ sub-&superscripts w/o closing char (v~i)
128128
| Ext_multiline_tables -- ^ Pandoc-style multiline tables
129129
| Ext_native_divs -- ^ Use Div blocks for contents of <div> tags
130+
| Ext_native_figures -- ^ Use Figure blocks for contenst of <figure> tags.
130131
| Ext_native_spans -- ^ Use Span inlines for contents of <span>
131132
| Ext_native_numbering -- ^ Use output format's native numbering for figures and tables
132133
| Ext_ntb -- ^ ConTeXt Natural Tables
@@ -527,6 +528,7 @@ getAllExtensions f = universalExtensions <> getAll f
527528
getAll "html" = autoIdExtensions <>
528529
extensionsFromList
529530
[ Ext_native_divs
531+
, Ext_native_figures
530532
, Ext_line_blocks
531533
, Ext_native_spans
532534
, Ext_empty_paragraphs

src/Text/Pandoc/Readers/HTML.hs

Lines changed: 40 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -35,6 +35,7 @@ import Data.List.Split (splitWhen)
3535
import Data.List (foldl')
3636
import qualified Data.Map as M
3737
import Data.Maybe (fromMaybe, isJust, isNothing)
38+
import Data.Either (partitionEithers)
3839
import Data.Monoid (First (..))
3940
import qualified Data.Set as Set
4041
import Data.Text (Text)
@@ -57,7 +58,8 @@ import Text.Pandoc.Error
5758
import Text.Pandoc.Logging
5859
import Text.Pandoc.Options (
5960
Extension (Ext_epub_html_exts, Ext_empty_paragraphs, Ext_native_divs,
60-
Ext_native_spans, Ext_raw_html, Ext_line_blocks, Ext_raw_tex),
61+
Ext_native_spans, Ext_raw_html, Ext_line_blocks, Ext_raw_tex,
62+
Ext_native_figures),
6163
ReaderOptions (readerExtensions, readerStripComments),
6264
extensionEnabled)
6365
import Text.Pandoc.Parsing hiding ((<|>))
@@ -535,24 +537,43 @@ pPara = do
535537
<|> return (B.para contents)
536538

537539
pFigure :: PandocMonad m => TagParser m Blocks
538-
pFigure = try $ do
539-
TagOpen _ _ <- pSatisfy (matchTagOpen "figure" [])
540-
skipMany pBlank
541-
let pImg = (\x -> (Just x, Nothing)) <$>
542-
(pInTag TagsOmittable "p" pImage <* skipMany pBlank)
543-
pCapt = (\x -> (Nothing, Just x)) <$> do
544-
bs <- pInTags "figcaption" block
545-
return $ blocksToInlines' $ B.toList bs
546-
pSkip = (Nothing, Nothing) <$ pSatisfy (not . matchTagClose "figure")
547-
res <- many (pImg <|> pCapt <|> pSkip)
548-
let mbimg = msum $ map fst res
549-
let mbcap = msum $ map snd res
550-
TagClose _ <- pSatisfy (matchTagClose "figure")
551-
let caption = fromMaybe mempty mbcap
552-
case B.toList <$> mbimg of
553-
Just [Image attr _ (url, tit)] ->
554-
return $ B.simpleFigureWith attr caption url tit
555-
_ -> mzero
540+
pFigure = do
541+
has_native_figures <-
542+
extensionEnabled Ext_native_figures <$> getOption readerExtensions
543+
if has_native_figures
544+
then pNativeFigure
545+
else try $ do
546+
TagOpen _ _ <- pSatisfy (matchTagOpen "figure" [])
547+
skipMany pBlank
548+
let pImg = (\x -> (Just x, Nothing)) <$>
549+
(pInTag TagsOmittable "p" pImage <* skipMany pBlank)
550+
pCapt = (\x -> (Nothing, Just x)) <$> do
551+
bs <- pInTags "figcaption" block
552+
return $ blocksToInlines' $ B.toList bs
553+
pSkip = (Nothing, Nothing) <$ pSatisfy (not . matchTagClose "figure")
554+
-- res :: [(Maybe Inlines, Maybe Inlines)]
555+
-- [(Just img, Nothing), (Nothing, Just caption), ...]
556+
res <- many (pImg <|> pCapt <|> pSkip)
557+
-- Takes the first image and the first caption, if any, drop the rest.
558+
let mbimg = msum $ map fst res
559+
let mbcap = msum $ map snd res -- mbcap :: Maybe Inlines
560+
TagClose _ <- pSatisfy (matchTagClose "figure")
561+
let caption = fromMaybe mempty mbcap
562+
-- only process one image
563+
case B.toList <$> mbimg of
564+
Just [Image attr _ (url, tit)] ->
565+
return $ B.simpleFigureWith attr caption url tit
566+
_ -> mzero
567+
568+
pNativeFigure :: PandocMonad m => TagParser m Blocks
569+
pNativeFigure = try $ do
570+
TagOpen tag attrList <- lookAhead $ pSatisfy (matchTagOpen "figure" [])
571+
--let (ident, classes, kvs) = toAttr attr
572+
contents <- pInTags tag (many $ Left <$> pInTags "figcaption" block <|> (Right <$> block))
573+
574+
let (captions, rest) = partitionEithers contents
575+
-- I should capture the caption
576+
return $ B.figureWith (toAttr attrList) (Caption Nothing (B.toList (mconcat captions))) $ mconcat rest
556577

557578
pCodeBlock :: PandocMonad m => TagParser m Blocks
558579
pCodeBlock = try $ do

test/command/figures/figures-html.md

Lines changed: 29 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,5 @@
1+
# Writer
2+
13
HTML5 figure with caption and content.
24

35
```
@@ -47,3 +49,30 @@ HTML4 figure with NO caption and content.
4749
<p>content</p>
4850
</div>
4951
```
52+
53+
# Reader
54+
55+
Figure with caption and multiple elements.
56+
57+
```
58+
% pandoc -f html+native_figures -t native
59+
<figure class="important">
60+
<img src="../media/rId25.jpg" />
61+
<ul> <li> ITEM </li> </ul>
62+
<figcaption> CAP2 </figcaption>
63+
</figure>
64+
^D
65+
[Figure ("",["important"],[]) (Caption Nothing [Plain [Str "CAP2"]]) [Plain [Image ("",[],[]) [] ("../media/rId25.jpg","")],BulletList [[Plain [Str "ITEM"]]]]]
66+
```
67+
68+
Figure without caption.
69+
70+
```
71+
% pandoc -f html+native_figures -t native
72+
<figure class="important">
73+
<img src="../media/rId25.jpg" />
74+
<ul> <li> ITEM </li> </ul>
75+
</figure>
76+
^D
77+
[Figure ("",["important"],[]) (Caption Nothing []) [Plain [Image ("",[],[]) [] ("../media/rId25.jpg","")],BulletList [[Plain [Str "ITEM"]]]]]
78+
```

0 commit comments

Comments
 (0)