Skip to content

Commit 8b907c3

Browse files
committed
Handle contextual spacing for paragraphs
1 parent 48e542b commit 8b907c3

File tree

14 files changed

+283
-76
lines changed

14 files changed

+283
-76
lines changed

pydocx/export/base.py

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -31,6 +31,7 @@ def __init__(self, path):
3131
self.footnote_tracker = []
3232

3333
self.captured_runs = None
34+
self.paragraphs = []
3435
self.complex_field_runs = []
3536

3637
self.node_type_to_export_func_map = {
@@ -299,6 +300,11 @@ def yield_body_children(self, body):
299300
return self.yield_numbering_spans(body.children)
300301

301302
def export_paragraph(self, paragraph):
303+
if self.first_pass:
304+
# To properly handle contextual spacing we need to know what is the style
305+
# of the previous and next paragraphs. So, we save all the paragraphs here.
306+
self.paragraphs.append(paragraph)
307+
302308
children = self.yield_paragraph_children(paragraph)
303309
results = self.yield_nested(children, self.export_node)
304310
if paragraph.effective_properties:

pydocx/export/html.py

Lines changed: 76 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,6 @@
55
print_function,
66
unicode_literals,
77
)
8-
98
import base64
109
import posixpath
1110
from itertools import chain
@@ -187,8 +186,8 @@ def style(self):
187186
'margin': '0px auto',
188187
},
189188
'p': {
190-
'margin-top': '0px',
191-
'margin-bottom': '0px'
189+
'margin-top': '0',
190+
'margin-bottom': '0'
192191
}
193192
}
194193

@@ -259,7 +258,10 @@ def export_footnote(self, footnote):
259258
return tag.apply(results, allow_empty=False)
260259

261260
def get_paragraph_tag(self, paragraph):
262-
if paragraph.is_empty:
261+
if self.in_table_cell and paragraph.parent.properties.is_continue_vertical_merge:
262+
# We ignore such paragraphs here because are added via rowspan
263+
return
264+
elif paragraph.is_empty:
263265
return HtmlTag('p', custom_text=HTML_WHITE_SPACE)
264266

265267
heading_style = paragraph.heading_style
@@ -340,13 +342,78 @@ def export_paragraph_property_indentation(self, paragraph, results):
340342

341343
def get_paragraph_property_spacing(self, paragraph):
342344
style = {}
345+
if self.first_pass:
346+
return style
347+
348+
previous_paragraph = None
349+
next_paragraph = None
350+
previous_paragraph_spacing = None
351+
next_paragraph_spacing = None
352+
spacing_after = None
353+
spacing_before = None
354+
355+
current_paragraph_spacing = paragraph.get_spacing()
356+
current_par_index = self.paragraphs.index(paragraph)
357+
358+
if current_par_index > 0:
359+
previous_paragraph = self.paragraphs[current_par_index - 1]
360+
previous_paragraph_spacing = previous_paragraph.get_spacing()
361+
if current_par_index < len(self.paragraphs) - 1:
362+
next_paragraph = self.paragraphs[current_par_index + 1]
363+
next_paragraph_spacing = next_paragraph.get_spacing()
364+
365+
if next_paragraph:
366+
current_after = current_paragraph_spacing['after'] or 0
367+
next_before = next_paragraph_spacing['before'] or 0
368+
369+
same_style = current_paragraph_spacing['parent_style'] == \
370+
next_paragraph_spacing['parent_style']
371+
372+
if same_style:
373+
if not current_paragraph_spacing['contextual_spacing']:
374+
if next_paragraph_spacing['contextual_spacing']:
375+
spacing_after = current_after
376+
else:
377+
if current_after > next_before:
378+
spacing_after = current_after
379+
else:
380+
if current_after > next_before:
381+
spacing_after = current_after
382+
else:
383+
spacing_after = current_paragraph_spacing['after']
384+
385+
if previous_paragraph:
386+
current_before = current_paragraph_spacing['before'] or 0
387+
prev_after = previous_paragraph_spacing['after'] or 0
388+
389+
same_style = current_paragraph_spacing['parent_style'] == \
390+
previous_paragraph_spacing['parent_style']
391+
392+
if same_style:
393+
if not current_paragraph_spacing['contextual_spacing']:
394+
if previous_paragraph_spacing['contextual_spacing']:
395+
if current_before > prev_after:
396+
spacing_before = current_before - prev_after
397+
else:
398+
spacing_before = 0
399+
else:
400+
if current_before > prev_after:
401+
spacing_before = current_before
402+
else:
403+
if current_before > prev_after:
404+
spacing_before = current_before
405+
else:
406+
spacing_before = current_paragraph_spacing['before']
407+
408+
if current_paragraph_spacing['line']:
409+
style['line-height'] = '{0}%'.format(current_paragraph_spacing['line'] * 100)
410+
411+
if spacing_after:
412+
style['margin-bottom'] = '{0:.2f}em'.format(convert_twips_to_ems(spacing_after))
343413

344-
spacing = paragraph.get_spacing()
414+
if spacing_before:
415+
style['margin-top'] = '{0:.2f}em'.format(convert_twips_to_ems(spacing_before))
345416

346-
if spacing['line']:
347-
style['line-height'] = '%s%%' % (spacing['line'] * 100)
348-
if spacing['after']:
349-
style['margin-bottom'] = '{0:.2f}em'.format(convert_twips_to_ems(spacing['after']))
350417
if style:
351418
style = {
352419
'style': convert_dictionary_to_style_fragment(style)

pydocx/openxml/wordprocessing/paragraph.py

Lines changed: 58 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -6,17 +6,18 @@
66
)
77

88
from pydocx.models import XmlModel, XmlCollection, XmlChild
9+
from pydocx.openxml.wordprocessing.bookmark import Bookmark
10+
from pydocx.openxml.wordprocessing.deleted_run import DeletedRun
911
from pydocx.openxml.wordprocessing.hyperlink import Hyperlink
12+
from pydocx.openxml.wordprocessing.inserted_run import InsertedRun
1013
from pydocx.openxml.wordprocessing.paragraph_properties import ParagraphProperties # noqa
1114
from pydocx.openxml.wordprocessing.run import Run
12-
from pydocx.openxml.wordprocessing.tab_char import TabChar
13-
from pydocx.openxml.wordprocessing.text import Text
14-
from pydocx.openxml.wordprocessing.smart_tag_run import SmartTagRun
15-
from pydocx.openxml.wordprocessing.inserted_run import InsertedRun
16-
from pydocx.openxml.wordprocessing.deleted_run import DeletedRun
1715
from pydocx.openxml.wordprocessing.sdt_run import SdtRun
1816
from pydocx.openxml.wordprocessing.simple_field import SimpleField
19-
from pydocx.openxml.wordprocessing.bookmark import Bookmark
17+
from pydocx.openxml.wordprocessing.smart_tag_run import SmartTagRun
18+
from pydocx.openxml.wordprocessing.tab_char import TabChar
19+
from pydocx.openxml.wordprocessing.text import Text
20+
from pydocx.openxml.wordprocessing.table_cell import TableCell
2021
from pydocx.util.memoize import memoized
2122

2223

@@ -231,27 +232,62 @@ def get_spacing(self):
231232
"""
232233
results = {
233234
'line': None,
234-
'after': None
235+
'after': None,
236+
'before': None,
237+
'contextual_spacing': False,
238+
'parent_style': None
235239
}
236240

237-
default_properties_spacing = self.default_doc_styles.paragraph.properties
238-
no_spacing_properties = not self.properties or self.properties.no_spacing
241+
# Get the paragraph_properties from the parent styles
242+
style_paragraph_properties = None
243+
for style in self.get_style_chain_stack():
244+
if style.paragraph_properties:
245+
style_paragraph_properties = style.paragraph_properties
246+
break
247+
248+
if style_paragraph_properties:
249+
results['contextual_spacing'] = bool(style_paragraph_properties.contextual_spacing)
250+
251+
default_paragraph_properties = None
252+
if self.default_doc_styles and self.default_doc_styles.paragraph:
253+
default_paragraph_properties = self.default_doc_styles.paragraph.properties
239254

240-
if not default_properties_spacing and no_spacing_properties:
255+
# Spacing properties can be defined in multiple places and we need to get some
256+
# kind of order of check
257+
properties_order = [None, None, None]
258+
if self.properties:
259+
properties_order[0] = self.properties
260+
if isinstance(self.parent, TableCell):
261+
properties_order[1] = self.parent.parent_table.get_paragraph_properties()
262+
if not self.properties or not self.properties.spacing_properties:
263+
properties_order[2] = default_paragraph_properties
264+
265+
spacing_properties = None
266+
contextual_spacing = None
267+
268+
for properties in properties_order:
269+
if spacing_properties is None:
270+
spacing_properties = getattr(properties, 'spacing_properties', None)
271+
if contextual_spacing is None:
272+
contextual_spacing = getattr(properties, 'contextual_spacing', None)
273+
274+
if not spacing_properties:
241275
return results
242276

243-
if no_spacing_properties:
244-
properties = default_properties_spacing
245-
else:
246-
properties = self.properties
277+
if contextual_spacing is not None:
278+
results['contextual_spacing'] = bool(contextual_spacing)
247279

248-
spacing_line = properties.to_int('spacing_line')
249-
spacing_after = properties.to_int('spacing_after')
280+
if self.properties:
281+
results['parent_style'] = self.properties.parent_style
250282

251-
if default_properties_spacing and spacing_line is None \
252-
and bool(properties.spacing_after_auto_spacing):
283+
spacing_line = spacing_properties.to_int('line')
284+
spacing_after = spacing_properties.to_int('after')
285+
spacing_before = spacing_properties.to_int('before')
286+
287+
if default_paragraph_properties and spacing_line is None \
288+
and bool(spacing_properties.after_auto_spacing):
253289
# get the spacing_line from the default definition
254-
spacing_line = default_properties_spacing.to_int('spacing_line')
290+
spacing_line = default_paragraph_properties.spacing_properties.to_int('line')
255291

256292
if spacing_line:
257293
line = spacing_line / 240.0
@@ -262,4 +298,7 @@ def get_spacing(self):
262298
if spacing_after is not None:
263299
results['after'] = spacing_after
264300

301+
if spacing_before is not None:
302+
results['before'] = spacing_before
303+
265304
return results

pydocx/openxml/wordprocessing/paragraph_properties.py

Lines changed: 4 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,7 @@
77

88
from pydocx.models import XmlModel, XmlChild
99
from pydocx.openxml.wordprocessing.numbering_properties import NumberingProperties # noqa
10+
from pydocx.openxml.wordprocessing.paragraph_spacing import ParagraphSpacing
1011
from pydocx.types import OnOff
1112

1213

@@ -26,11 +27,9 @@ class ParagraphProperties(XmlModel):
2627
indentation_first_line = XmlChild(name='ind', attrname='firstLine')
2728
indentation_hanging = XmlChild(name='ind', attrname='hanging')
2829

30+
contextual_spacing = XmlChild(type=OnOff, name='contextualSpacing', attrname='val')
2931
# paragraph spacing
30-
spacing_after = XmlChild(name='spacing', attrname='after')
31-
spacing_line = XmlChild(name='spacing', attrname='line')
32-
spacing_line_rule = XmlChild(name='spacing', attrname='lineRule')
33-
spacing_after_auto_spacing = XmlChild(type=OnOff, name='spacing', attrname='afterAutospacing')
32+
spacing_properties = XmlChild(type=ParagraphSpacing)
3433

3534
@property
3635
def start_margin_position(self):
@@ -70,13 +69,4 @@ def no_indentation(self):
7069
self.indentation_hanging,
7170
self.indentation_right,
7271
self.indentation_first_line,
73-
))
74-
75-
@property
76-
def no_spacing(self):
77-
return not any((
78-
self.spacing_line,
79-
self.spacing_after,
80-
self.spacing_after_auto_spacing,
81-
self.spacing_line_rule,
82-
))
72+
))
Lines changed: 27 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,27 @@
1+
# coding: utf-8
2+
from __future__ import (
3+
absolute_import,
4+
print_function,
5+
unicode_literals,
6+
)
7+
8+
from pydocx.models import XmlModel, XmlAttribute
9+
from pydocx.types import OnOff
10+
11+
12+
class ParagraphSpacing(XmlModel):
13+
XML_TAG = 'spacing'
14+
15+
after = XmlAttribute(name='after')
16+
before = XmlAttribute(name='before')
17+
line = XmlAttribute(name='line')
18+
line_rule = XmlAttribute(name='lineRule')
19+
after_auto_spacing = XmlAttribute(type=OnOff, name='afterAutospacing')
20+
21+
def to_int(self, attribute, default=None):
22+
# TODO would be nice if this integer conversion was handled
23+
# implicitly by the model somehow
24+
try:
25+
return int(getattr(self, attribute, default))
26+
except (ValueError, TypeError):
27+
return default

pydocx/openxml/wordprocessing/style.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,7 @@
77

88
from pydocx.models import XmlModel, XmlChild, XmlAttribute
99
from pydocx.openxml.wordprocessing.run_properties import RunProperties
10+
from pydocx.openxml.wordprocessing.paragraph_properties import ParagraphProperties
1011

1112

1213
class Style(XmlModel):
@@ -16,6 +17,7 @@ class Style(XmlModel):
1617
style_id = XmlAttribute(name='styleId', default='')
1718
name = XmlChild(attrname='val', default='')
1819
run_properties = XmlChild(type=RunProperties)
20+
paragraph_properties = XmlChild(type=ParagraphProperties)
1921
parent_style = XmlChild(name='basedOn', attrname='val')
2022

2123
def is_a_heading(self):

pydocx/openxml/wordprocessing/table.py

Lines changed: 29 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -7,13 +7,16 @@
77

88
from collections import defaultdict
99

10-
from pydocx.models import XmlModel, XmlCollection
10+
from pydocx.models import XmlModel, XmlCollection, XmlChild
1111
from pydocx.openxml.wordprocessing.table_row import TableRow
12+
from pydocx.openxml.wordprocessing.table_properties import TableProperties
1213

1314

1415
class Table(XmlModel):
1516
XML_TAG = 'tbl'
1617

18+
properties = XmlChild(type=TableProperties)
19+
1720
rows = XmlCollection(
1821
TableRow,
1922
)
@@ -45,3 +48,28 @@ def calculate_table_cell_spans(self):
4548
if active_rowspan_for_column:
4649
cell_to_rowspan_count[active_rowspan_for_column] += 1 # noqa
4750
return dict(cell_to_rowspan_count)
51+
52+
def get_style_chain_stack(self):
53+
if not self.properties:
54+
return
55+
56+
parent_style = self.properties.parent_style
57+
if not parent_style:
58+
return
59+
60+
part = getattr(self.container, 'style_definitions_part', None)
61+
if part:
62+
style_stack = part.get_style_chain_stack('table', parent_style)
63+
for result in style_stack:
64+
yield result
65+
66+
def get_paragraph_properties(self):
67+
"""Get default style paragraph properties for table"""
68+
69+
paragraph_properties = None
70+
for style in self.get_style_chain_stack():
71+
if style.paragraph_properties:
72+
paragraph_properties = style.paragraph_properties
73+
break
74+
75+
return paragraph_properties

pydocx/openxml/wordprocessing/table_cell.py

Lines changed: 9 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,6 @@
66
)
77

88
from pydocx.models import XmlModel, XmlCollection, XmlChild
9-
from pydocx.openxml.wordprocessing.paragraph import Paragraph
109
from pydocx.openxml.wordprocessing.table_cell_properties import TableCellProperties # noqa
1110

1211

@@ -16,6 +15,14 @@ class TableCell(XmlModel):
1615
properties = XmlChild(type=TableCellProperties)
1716

1817
children = XmlCollection(
19-
Paragraph,
18+
'wordprocessing.Paragraph',
2019
'wordprocessing.Table',
2120
)
21+
22+
@property
23+
def parent_table(self):
24+
return self.parent.parent
25+
26+
@property
27+
def table_properties(self):
28+
return self.parent_table.properties

0 commit comments

Comments
 (0)