diff --git a/pydocx/constants.py b/pydocx/constants.py index 9f7c1788..435a20a7 100644 --- a/pydocx/constants.py +++ b/pydocx/constants.py @@ -33,6 +33,12 @@ POINTS_PER_EM = 12 +# Defined in 17.15.1.25 +DEFAULT_AUTOMATIC_TAB_STOP_INTERVAL = 720 # twips + +# Define the whitespace character +HTML_WHITE_SPACE = ' ' + PYDOCX_STYLES = { 'insert': { 'color': 'green', diff --git a/pydocx/export/base.py b/pydocx/export/base.py index 67b002bc..ae1b3330 100644 --- a/pydocx/export/base.py +++ b/pydocx/export/base.py @@ -31,6 +31,7 @@ def __init__(self, path): self.footnote_tracker = [] self.captured_runs = None + self.paragraphs = [] self.complex_field_runs = [] self.node_type_to_export_func_map = { @@ -299,6 +300,11 @@ def yield_body_children(self, body): return self.yield_numbering_spans(body.children) def export_paragraph(self, paragraph): + if self.first_pass: + # To properly handle contextual spacing we need to know what is the style + # of the previous and next paragraphs. So, we save all the paragraphs here. + self.paragraphs.append(paragraph) + children = self.yield_paragraph_children(paragraph) results = self.yield_nested(children, self.export_node) if paragraph.effective_properties: @@ -310,10 +316,7 @@ def yield_paragraph_children(self, paragraph): yield child def get_paragraph_styles_to_apply(self, paragraph): - properties = paragraph.effective_properties property_rules = [ - (properties.justification, self.export_paragraph_property_justification), - (True, self.export_paragraph_property_indentation), ] for actual_value, handler in property_rules: if actual_value: @@ -338,7 +341,6 @@ def export_run(self, run): if self.first_pass: if self.captured_runs is not None: self.captured_runs.append(run) - # TODO squash multiple sequential text nodes into one? results = self.yield_nested(run.children, self.export_node) if run.effective_properties: diff --git a/pydocx/export/html.py b/pydocx/export/html.py index 18e3ea53..f3e0f50d 100644 --- a/pydocx/export/html.py +++ b/pydocx/export/html.py @@ -5,7 +5,6 @@ print_function, unicode_literals, ) - import base64 import posixpath from itertools import chain @@ -17,7 +16,8 @@ POINTS_PER_EM, PYDOCX_STYLES, TWIPS_PER_POINT, - EMUS_PER_PIXEL + EMUS_PER_PIXEL, + HTML_WHITE_SPACE ) from pydocx.export.base import PyDocXExporter from pydocx.export.numbering_span import NumberingItem @@ -101,6 +101,7 @@ def __init__( allow_self_closing=False, closed=False, allow_whitespace=False, + custom_text=None, **attrs ): self.tag = tag @@ -108,6 +109,7 @@ def __init__( self.attrs = attrs self.closed = closed self.allow_whitespace = allow_whitespace + self.custom_text = custom_text def apply(self, results, allow_empty=True): if not allow_empty: @@ -116,6 +118,10 @@ def apply(self, results, allow_empty=True): return sequence = [[self]] + + if self.custom_text: + sequence.append([self.custom_text]) + if results is not None: sequence.append(results) @@ -178,6 +184,18 @@ def style(self): styles = { 'body': { 'margin': '0px auto', + }, + 'p': { + 'margin-top': '0', + 'margin-bottom': '0' + }, + 'ol': { + 'margin-top': '0', + 'margin-bottom': '0' + }, + 'ul': { + 'margin-top': '0', + 'margin-bottom': '0' } } @@ -248,17 +266,20 @@ def export_footnote(self, footnote): return tag.apply(results, allow_empty=False) def get_paragraph_tag(self, paragraph): + if isinstance(paragraph.parent, wordprocessing.TableCell): + cell_properties = paragraph.parent.properties + if cell_properties and cell_properties.is_continue_vertical_merge: + # We ignore such paragraphs here because are added via rowspan + return + if paragraph.is_empty: + return HtmlTag('p', custom_text=HTML_WHITE_SPACE) + heading_style = paragraph.heading_style if heading_style: tag = self.get_heading_tag(paragraph) if tag: return tag - if self.in_table_cell: - return - if paragraph.has_structured_document_parent(): - return - if isinstance(paragraph.parent, NumberingItem): - return + return HtmlTag('p') def get_heading_tag(self, paragraph): @@ -277,12 +298,10 @@ def get_heading_tag(self, paragraph): def export_paragraph(self, paragraph): results = super(PyDocXHTMLExporter, self).export_paragraph(paragraph) - results = is_not_empty_and_not_only_whitespace(results) - if results is None: - return - tag = self.get_paragraph_tag(paragraph) if tag: + attrs = self.get_paragraph_styles(paragraph) + tag.attrs.update(attrs) results = tag.apply(results) for result in results: @@ -291,9 +310,21 @@ def export_paragraph(self, paragraph): def export_paragraph_property_justification(self, paragraph, results): # TODO these classes could be applied on the paragraph, and not as # inline spans - alignment = paragraph.effective_properties.justification # TODO These alignment values are for traditional conformance. Strict # conformance uses different values + attrs = self.get_paragraph_property_justification(paragraph) + if attrs: + tag = HtmlTag('span', **attrs) + results = tag.apply(results, allow_empty=False) + return results + + def get_paragraph_property_justification(self, paragraph): + attrs = {} + if not paragraph.effective_properties: + return attrs + + alignment = paragraph.effective_properties.justification + if alignment in [JUSTIFY_LEFT, JUSTIFY_CENTER, JUSTIFY_RIGHT]: pydocx_class = 'pydocx-{alignment}'.format( alignment=alignment, @@ -301,42 +332,174 @@ def export_paragraph_property_justification(self, paragraph, results): attrs = { 'class': pydocx_class, } - tag = HtmlTag('span', **attrs) - results = tag.apply(results, allow_empty=False) elif alignment is not None: # TODO What if alignment is something else? pass - return results + + return attrs def export_paragraph_property_indentation(self, paragraph, results): # TODO these classes should be applied on the paragraph, and not as # inline styles - properties = paragraph.effective_properties + attrs = self.get_paragraph_property_indentation(paragraph) + + if attrs: + tag = HtmlTag('span', **attrs) + results = tag.apply(results, allow_empty=False) + + return results + def get_paragraph_property_spacing(self, paragraph): style = {} + if self.first_pass: + return style - # Numbering properties can define a text indentation on a paragraph - if properties.numbering_properties: - indentation_left = None - indentation_first_line = None + try: + current_par_index = self.paragraphs.index(paragraph) + except ValueError: + return style + + previous_paragraph = None + next_paragraph = None + previous_paragraph_spacing = None + next_paragraph_spacing = None + spacing_after = None + spacing_before = None + + current_paragraph_spacing = paragraph.get_spacing() + + if current_par_index > 0: + previous_paragraph = self.paragraphs[current_par_index - 1] + previous_paragraph_spacing = previous_paragraph.get_spacing() + if current_par_index < len(self.paragraphs) - 1: + next_paragraph = self.paragraphs[current_par_index + 1] + next_paragraph_spacing = next_paragraph.get_spacing() + + if next_paragraph: + current_after = current_paragraph_spacing['after'] or 0 + next_before = next_paragraph_spacing['before'] or 0 + + same_style = current_paragraph_spacing['parent_style'] == \ + next_paragraph_spacing['parent_style'] + + if same_style: + if not current_paragraph_spacing['contextual_spacing']: + if next_paragraph_spacing['contextual_spacing']: + spacing_after = current_after + else: + if current_after > next_before: + spacing_after = current_after + else: + if current_after > next_before: + spacing_after = current_after + else: + spacing_after = current_paragraph_spacing['after'] + + if previous_paragraph: + current_before = current_paragraph_spacing['before'] or 0 + prev_after = previous_paragraph_spacing['after'] or 0 + + same_style = current_paragraph_spacing['parent_style'] == \ + previous_paragraph_spacing['parent_style'] + + if same_style: + if not current_paragraph_spacing['contextual_spacing']: + if previous_paragraph_spacing['contextual_spacing']: + if current_before > prev_after: + spacing_before = current_before - prev_after + else: + spacing_before = 0 + else: + if current_before > prev_after: + spacing_before = current_before + else: + if current_before > prev_after: + spacing_before = current_before + else: + spacing_before = current_paragraph_spacing['before'] + + if current_paragraph_spacing['line']: + style['line-height'] = '{0}%'.format(current_paragraph_spacing['line'] * 100) - paragraph_num_level = paragraph.get_numbering_level() + if spacing_after: + style['margin-bottom'] = '{0:.2f}em'.format(convert_twips_to_ems(spacing_after)) - if paragraph_num_level: - listing_style = self.export_listing_paragraph_property_indentation( - paragraph, - paragraph_num_level.paragraph_properties, - include_text_indent=True + if spacing_before: + style['margin-top'] = '{0:.2f}em'.format(convert_twips_to_ems(spacing_before)) + + if style: + style = { + 'style': convert_dictionary_to_style_fragment(style) + } + + return style + + def get_paragraph_property_indentation(self, paragraph): + style = {} + attrs = {} + properties = paragraph.effective_properties + + indentation_right = None + indentation_left = 0 + indentation_first_line = None + span_paragraph_properties = None + span_indentation_left = 0 + + try: + if isinstance(paragraph.parent, NumberingItem): + span_paragraph_properties = paragraph.parent.numbering_span.numbering_level.\ + paragraph_properties + span_indentation_left = span_paragraph_properties.to_int( + 'indentation_left', + default=0 ) - if 'text-indent' in listing_style and listing_style['text-indent'] != '0.00em': - style['text-indent'] = listing_style['text-indent'] - style['display'] = 'inline-block' - else: - indentation_left = properties.to_int('indentation_left') - indentation_first_line = properties.to_int('indentation_first_line') + span_indentation_hanging = span_paragraph_properties.to_int( + 'indentation_hanging', + default=0 + ) + if span_paragraph_properties: + indentation_left -= (span_indentation_left - span_indentation_hanging) - indentation_right = properties.to_int('indentation_right') + except AttributeError: + pass + + if properties: + indentation_right = properties.to_int('indentation_right') + + if properties.numbering_properties is None: + # For paragraph inside list we need to properly adjust indentations + # by recalculating their indentations based on the parent span + indentation_left = properties.to_int('indentation_left', default=0) + indentation_first_line = properties.to_int('indentation_first_line', default=0) + + if isinstance(paragraph.parent, NumberingItem): + if properties.is_list_paragraph and properties.no_indentation: + indentation_left = 0 + elif span_paragraph_properties: + indentation_left -= span_indentation_left + # In this case we don't need to set text-indent separately because + # it's part of the left margin + indentation_left += indentation_first_line + indentation_first_line = None + else: + # TODO Here we may encounter fake lists and not always margins are + # set properly. + pass + else: + indentation_left = None + indentation_first_line = None + paragraph_num_level = paragraph.get_numbering_level() + + if paragraph_num_level: + listing_style = self.export_listing_paragraph_property_indentation( + paragraph, + paragraph_num_level.paragraph_properties, + include_text_indent=True + ) + if 'text-indent' in listing_style and \ + listing_style['text-indent'] != '0.00em': + style['text-indent'] = listing_style['text-indent'] if indentation_right: right = convert_twips_to_ems(indentation_right) @@ -349,16 +512,34 @@ def export_paragraph_property_indentation(self, paragraph, results): if indentation_first_line: first_line = convert_twips_to_ems(indentation_first_line) style['text-indent'] = '{0:.2f}em'.format(first_line) - style['display'] = 'inline-block' if style: attrs = { 'style': convert_dictionary_to_style_fragment(style) } - tag = HtmlTag('span', **attrs) - results = tag.apply(results, allow_empty=False) - return results + return attrs + + def get_paragraph_styles(self, paragraph): + attributes = {} + + property_rules = [ + (True, self.get_paragraph_property_justification), + (True, self.get_paragraph_property_indentation), + (True, self.get_paragraph_property_spacing), + ] + for actual_value, handler in property_rules: + if actual_value: + handler_results = handler(paragraph) + for attr_name in ['style', 'class']: + new_value = handler_results.get(attr_name, '') + if new_value: + if attr_name in attributes: + attributes[attr_name] += ';%s' % new_value + else: + attributes[attr_name] = '%s' % new_value + + return attributes def export_listing_paragraph_property_indentation( self, @@ -435,7 +616,7 @@ def export_listing_paragraph_property_indentation( margin_left = convert_twips_to_ems(margin_left) style['margin-left'] = '{0:.2f}em'.format(margin_left) - # we don't allow negative hanging + # We don't allow negative hanging if hanging < 0: hanging = 0 @@ -671,9 +852,10 @@ def export_table_cell(self, table_cell): tag = HtmlTag('td', **attrs) numbering_spans = self.yield_numbering_spans(table_cell.children) - results = self.yield_nested_with_line_breaks_between_paragraphs( + + results = self.yield_nested( numbering_spans, - self.export_node, + self.export_node ) if tag: results = tag.apply(results) @@ -819,19 +1001,16 @@ def export_numbering_span(self, numbering_span): return tag.apply(results) def export_numbering_item(self, numbering_item): - results = self.yield_nested_with_line_breaks_between_paragraphs( - numbering_item.children, - self.export_node, - ) + results = super(PyDocXHTMLExporter, self).export_numbering_item(numbering_item) style = None if numbering_item.children: - level_properties = numbering_item.numbering_span.\ + level_properties = numbering_item.numbering_span. \ numbering_level.paragraph_properties # get the first paragraph properties which will contain information # on how to properly indent listing item - paragraph = numbering_item.children[0] + paragraph = numbering_item.get_first_child() style = self.export_listing_paragraph_property_indentation(paragraph, level_properties) diff --git a/pydocx/export/numbering_span.py b/pydocx/export/numbering_span.py index 809ff672..1986c4dd 100644 --- a/pydocx/export/numbering_span.py +++ b/pydocx/export/numbering_span.py @@ -14,9 +14,7 @@ from pydocx.openxml.wordprocessing.tab_char import TabChar from pydocx.openxml.wordprocessing.text import Text from pydocx.util.memoize import memoized - -# Defined in 17.15.1.25 -DEFAULT_AUTOMATIC_TAB_STOP_INTERVAL = 720 # twips +from pydocx.constants import DEFAULT_AUTOMATIC_TAB_STOP_INTERVAL roman_numeral_map = tuple(zip( (1000, 900, 500, 400, 100, 90, 50, 40, 10, 9, 5, 4, 1), diff --git a/pydocx/models.py b/pydocx/models.py index a8b2e2b6..cf472cc8 100644 --- a/pydocx/models.py +++ b/pydocx/models.py @@ -308,6 +308,8 @@ def load(cls, element, **load_kwargs): if field.name is not None: attr_name = field.name value = element.attrib.get(attr_name, field.default) + if callable(field.type): + value = field.type(value) kwargs[field_name] = value # Child tag fields may specify a handler/type, which is responsible for @@ -395,3 +397,11 @@ def child_handler(child): # Create a new instance using the values we've calculated return cls(**kwargs) + + @property + def default_doc_styles(self): + part = getattr(self.container, 'style_definitions_part', None) + if part: + return part.styles.doc_defaults + + return None diff --git a/pydocx/openxml/packaging/style_definitions_part.py b/pydocx/openxml/packaging/style_definitions_part.py index 7f1cda25..347e0ec0 100644 --- a/pydocx/openxml/packaging/style_definitions_part.py +++ b/pydocx/openxml/packaging/style_definitions_part.py @@ -51,28 +51,39 @@ def get_style_chain_stack(self, style_type, style_id): styleB styleC ''' - visited_styles = set() - visited_styles.add(style_id) + visited_styles = set() styles = self.styles.get_styles_by_type(style_type) - base_style = styles.get(style_id) - - if base_style: - yield base_style - - # Build up the stack of styles to merge together - current_style = base_style - while current_style: - if not current_style.parent_style: - # The current style doesn't have a parent style - break - if current_style.parent_style in visited_styles: - # Loop detected - break - style = styles.get(current_style.parent_style) - if not style: - # Style doesn't exist - break - visited_styles.add(style.style_id) - yield style - current_style = style + styles_to_apply = {} + + def yield_styles_parent_stack(base_style): + if base_style: + yield base_style + + # Build up the stack of styles to merge together + current_style = base_style + while current_style: + if not current_style.parent_style: + # The current style doesn't have a parent style + break + if current_style.parent_style in visited_styles: + # Loop detected + break + style = styles.get(current_style.parent_style) + if not style: + # Style doesn't exist + break + visited_styles.add(style.style_id) + yield style + current_style = style + + if not style_id: + # In this case we need to check the default defined styles + styles_to_apply = self.styles.get_default_styles_by_type(style_type) + else: + styles_to_apply[style_id] = styles.get(style_id) + + for style_id, style in styles_to_apply.items(): + visited_styles.add(style_id) + for s in yield_styles_parent_stack(style): + yield s diff --git a/pydocx/openxml/wordprocessing/abstract_num.py b/pydocx/openxml/wordprocessing/abstract_num.py index cdad313e..63116793 100644 --- a/pydocx/openxml/wordprocessing/abstract_num.py +++ b/pydocx/openxml/wordprocessing/abstract_num.py @@ -5,6 +5,7 @@ unicode_literals, ) +from pydocx.constants import DEFAULT_AUTOMATIC_TAB_STOP_INTERVAL from pydocx.models import XmlModel, XmlChild, XmlAttribute, XmlCollection from pydocx.openxml.wordprocessing.level import Level @@ -40,7 +41,7 @@ def get_indentation_between_levels(self): lvl1_ind = self.levels[1].paragraph_properties.to_int('indentation_left', default=0) ind_step = lvl1_ind - lvl0_ind - except IndexError: - ind_step = 720 # default one + except (IndexError, AttributeError): + ind_step = DEFAULT_AUTOMATIC_TAB_STOP_INTERVAL # default one return ind_step diff --git a/pydocx/openxml/wordprocessing/doc_defaults.py b/pydocx/openxml/wordprocessing/doc_defaults.py new file mode 100644 index 00000000..b76d4a54 --- /dev/null +++ b/pydocx/openxml/wordprocessing/doc_defaults.py @@ -0,0 +1,29 @@ +# coding: utf-8 +from __future__ import ( + absolute_import, + print_function, + unicode_literals, +) + +from pydocx.models import XmlModel, XmlChild +from pydocx.openxml.wordprocessing.paragraph_properties import ParagraphProperties +from pydocx.openxml.wordprocessing.run_properties import RunProperties + + +class ParagraphStyleDefaults(XmlModel): + XML_TAG = 'pPrDefault' + + properties = XmlChild(type=ParagraphProperties) + + +class RunStyleDefaults(XmlModel): + XML_TAG = 'rPrDefault' + + properties = XmlChild(type=RunProperties) + + +class DocDefaults(XmlModel): + XML_TAG = 'docDefaults' + + paragraph = XmlChild(type=ParagraphStyleDefaults) + run = XmlChild(type=RunStyleDefaults) diff --git a/pydocx/openxml/wordprocessing/paragraph.py b/pydocx/openxml/wordprocessing/paragraph.py index bdb6b387..3b26a46e 100644 --- a/pydocx/openxml/wordprocessing/paragraph.py +++ b/pydocx/openxml/wordprocessing/paragraph.py @@ -4,19 +4,20 @@ print_function, unicode_literals, ) -from pydocx.util.memoize import memoized + from pydocx.models import XmlModel, XmlCollection, XmlChild +from pydocx.openxml.wordprocessing.bookmark import Bookmark +from pydocx.openxml.wordprocessing.deleted_run import DeletedRun from pydocx.openxml.wordprocessing.hyperlink import Hyperlink +from pydocx.openxml.wordprocessing.inserted_run import InsertedRun from pydocx.openxml.wordprocessing.paragraph_properties import ParagraphProperties # noqa from pydocx.openxml.wordprocessing.run import Run -from pydocx.openxml.wordprocessing.tab_char import TabChar -from pydocx.openxml.wordprocessing.text import Text -from pydocx.openxml.wordprocessing.smart_tag_run import SmartTagRun -from pydocx.openxml.wordprocessing.inserted_run import InsertedRun -from pydocx.openxml.wordprocessing.deleted_run import DeletedRun from pydocx.openxml.wordprocessing.sdt_run import SdtRun from pydocx.openxml.wordprocessing.simple_field import SimpleField -from pydocx.openxml.wordprocessing.bookmark import Bookmark +from pydocx.openxml.wordprocessing.smart_tag_run import SmartTagRun +from pydocx.openxml.wordprocessing.tab_char import TabChar +from pydocx.openxml.wordprocessing.text import Text +from pydocx.util.memoize import memoized class Paragraph(XmlModel): @@ -35,17 +36,83 @@ class Paragraph(XmlModel): Bookmark ) - def __init__(self, **kwargs): - super(Paragraph, self).__init__(**kwargs) - self._effective_properties = None + @property + def is_empty(self): + if not self.children: + return True + + # we may have cases when a paragraph has a Bookmark with name '_GoBack' + # and we should treat it as empty paragraph + if len(self.children) == 1: + first_child = self.children[0] + if isinstance(first_child, Bookmark) and \ + first_child.name in ('_GoBack',): + return True + # We can have cases when only run properties are defined and no text + elif not getattr(first_child, "children", None): + return True + return False + + def _get_properties_inherited_from_parent_table(self): + from pydocx.openxml.wordprocessing.table import Table + + inherited_properties = {} + + parent_table = self.get_first_ancestor(Table) + if parent_table: + style_stack = parent_table.get_style_chain_stack() + for style in reversed(list(style_stack)): + if style.paragraph_properties: + inherited_properties.update( + dict(style.paragraph_properties.fields), + ) + return inherited_properties + + def _get_inherited_properties_from_parent_style(self): + inherited_properties = {} + style_stack = self.get_style_chain_stack() + for style in reversed(list(style_stack)): + if style.paragraph_properties: + inherited_properties.update( + dict(style.paragraph_properties.fields), + ) + return inherited_properties @property + def inherited_properties(self): + properties = {} + + if self.default_doc_styles and \ + getattr(self.default_doc_styles.paragraph, 'properties'): + properties.update( + dict(self.default_doc_styles.paragraph.properties.fields), + ) + properties.update( + self._get_inherited_properties_from_parent_style(), + ) + # Tables can also define custom paragraph pr + properties.update( + self._get_properties_inherited_from_parent_table(), + ) + + # TODO When enable this make sure that you check the paragraph margins logic + # numbering_level = self.get_numbering_level() + # if numbering_level and numbering_level.paragraph_properties: + # properties.update( + # dict(numbering_level.paragraph_properties.fields), + # ) + + return ParagraphProperties(**properties) + + @property + @memoized def effective_properties(self): - # TODO need to calculate effective properties like Run - if not self._effective_properties: - properties = self.properties - self._effective_properties = properties - return self._effective_properties + inherited_properties = self.inherited_properties + effective_properties = {} + effective_properties.update(dict(inherited_properties.fields)) + if self.properties: + effective_properties.update(dict(self.properties.fields)) + return ParagraphProperties(**effective_properties) @property def numbering_definition(self): @@ -56,12 +123,9 @@ def has_structured_document_parent(self): return self.has_ancestor(SdtBlock) def get_style_chain_stack(self): - if not self.properties: - return - - parent_style = self.properties.parent_style - if not parent_style: - return + # Even if parent style is not defined we still need to check the default style + # properties applied + parent_style = getattr(self.properties, 'parent_style', None) # TODO the getattr is necessary because of footnotes. From the context # of a footnote, a paragraph's container is the footnote part, which @@ -97,9 +161,9 @@ def get_numbering_definition(self): part = getattr(self.container, 'numbering_definitions_part', None) if not part: return - if not self.effective_properties: + if not self.properties: return - numbering_properties = self.effective_properties.numbering_properties + numbering_properties = self.properties.numbering_properties if not numbering_properties: return return part.numbering.get_numbering_definition( @@ -111,9 +175,9 @@ def get_numbering_level(self): numbering_definition = self.get_numbering_definition() if not numbering_definition: return - if not self.effective_properties: + if not self.properties: return - numbering_properties = self.effective_properties.numbering_properties + numbering_properties = self.properties.numbering_properties if not numbering_properties: return return numbering_definition.get_level( @@ -206,3 +270,39 @@ def get_indentation(self, indentation, only_level_ind=False): ind = level.paragraph_properties.to_int(indentation, default=0) return ind + + def get_spacing(self): + """Get paragraph spacing according to: + ECMA-376, 3rd Edition (June, 2011), + Fundamentals and Markup Language Reference ยง 17.3.1.33. + """ + results = { + 'line': None, + 'after': None, + 'before': None, + 'contextual_spacing': bool(self.effective_properties.contextual_spacing), + 'parent_style': self.effective_properties.parent_style + } + + spacing_properties = self.effective_properties.spacing_properties + + if spacing_properties is None: + return results + + spacing_line = spacing_properties.to_int('line') + spacing_after = spacing_properties.to_int('after') + spacing_before = spacing_properties.to_int('before') + + if spacing_line: + line = float("%.2f" % (spacing_line / 240.0)) + # default line spacing is 1 so no need to add attribute + if line != 1.0: + results['line'] = line + + if spacing_after is not None: + results['after'] = spacing_after + + if spacing_before is not None: + results['before'] = spacing_before + + return results diff --git a/pydocx/openxml/wordprocessing/paragraph_properties.py b/pydocx/openxml/wordprocessing/paragraph_properties.py index c6bbc374..dd1e6712 100644 --- a/pydocx/openxml/wordprocessing/paragraph_properties.py +++ b/pydocx/openxml/wordprocessing/paragraph_properties.py @@ -7,6 +7,8 @@ from pydocx.models import XmlModel, XmlChild from pydocx.openxml.wordprocessing.numbering_properties import NumberingProperties # noqa +from pydocx.openxml.wordprocessing.paragraph_spacing import ParagraphSpacing +from pydocx.types import OnOff class ParagraphProperties(XmlModel): @@ -25,6 +27,10 @@ class ParagraphProperties(XmlModel): indentation_first_line = XmlChild(name='ind', attrname='firstLine') indentation_hanging = XmlChild(name='ind', attrname='hanging') + contextual_spacing = XmlChild(type=OnOff, name='contextualSpacing', attrname='val') + # paragraph spacing + spacing_properties = XmlChild(type=ParagraphSpacing) + @property def start_margin_position(self): # Regarding indentation, @@ -51,3 +57,16 @@ def to_int(self, attribute, default=None): return int(getattr(self, attribute, default)) except (ValueError, TypeError): return default + + @property + def is_list_paragraph(self): + return self.parent_style == 'ListParagraph' + + @property + def no_indentation(self): + return not any(( + self.indentation_left, + self.indentation_hanging, + self.indentation_right, + self.indentation_first_line, + )) diff --git a/pydocx/openxml/wordprocessing/paragraph_spacing.py b/pydocx/openxml/wordprocessing/paragraph_spacing.py new file mode 100644 index 00000000..6912bdd3 --- /dev/null +++ b/pydocx/openxml/wordprocessing/paragraph_spacing.py @@ -0,0 +1,27 @@ +# coding: utf-8 +from __future__ import ( + absolute_import, + print_function, + unicode_literals, +) + +from pydocx.models import XmlModel, XmlAttribute +from pydocx.types import OnOff + + +class ParagraphSpacing(XmlModel): + XML_TAG = 'spacing' + + after = XmlAttribute(name='after') + before = XmlAttribute(name='before') + line = XmlAttribute(name='line') + line_rule = XmlAttribute(name='lineRule') + after_auto_spacing = XmlAttribute(type=OnOff, name='afterAutospacing') + + def to_int(self, attribute, default=None): + # TODO would be nice if this integer conversion was handled + # implicitly by the model somehow + try: + return int(getattr(self, attribute, default)) + except (ValueError, TypeError): + return default diff --git a/pydocx/openxml/wordprocessing/run.py b/pydocx/openxml/wordprocessing/run.py index 2acdb944..3095ce54 100644 --- a/pydocx/openxml/wordprocessing/run.py +++ b/pydocx/openxml/wordprocessing/run.py @@ -46,12 +46,9 @@ class Run(XmlModel): ) def get_style_chain_stack(self): - if not self.properties: - return - - parent_style = self.properties.parent_style - if not parent_style: - return + # Even if parent style is not defined we still need to check the default style + # properties applied + parent_style = getattr(self.properties, 'parent_style', None) # TODO the getattr is necessary because of footnotes. From the context # of a footnote, a paragraph's container is the footnote part, which @@ -90,6 +87,11 @@ def _get_inherited_properties_from_parent_style(self): @property def inherited_properties(self): properties = {} + if self.default_doc_styles and getattr(self.default_doc_styles.run, 'properties'): + properties.update( + dict(self.default_doc_styles.run.properties.fields), + ) + properties.update( self._get_properties_inherited_from_parent_paragraph(), ) diff --git a/pydocx/openxml/wordprocessing/style.py b/pydocx/openxml/wordprocessing/style.py index 2092d30c..feefe0c6 100644 --- a/pydocx/openxml/wordprocessing/style.py +++ b/pydocx/openxml/wordprocessing/style.py @@ -5,17 +5,21 @@ unicode_literals, ) +from pydocx.types import OnOff from pydocx.models import XmlModel, XmlChild, XmlAttribute from pydocx.openxml.wordprocessing.run_properties import RunProperties +from pydocx.openxml.wordprocessing.paragraph_properties import ParagraphProperties class Style(XmlModel): XML_TAG = 'style' style_type = XmlAttribute(name='type', default='paragraph') + style_default = XmlAttribute(type=OnOff, name='default', default='0') style_id = XmlAttribute(name='styleId', default='') name = XmlChild(attrname='val', default='') run_properties = XmlChild(type=RunProperties) + paragraph_properties = XmlChild(type=ParagraphProperties) parent_style = XmlChild(name='basedOn', attrname='val') def is_a_heading(self): diff --git a/pydocx/openxml/wordprocessing/styles.py b/pydocx/openxml/wordprocessing/styles.py index 46bf6287..c9acf7a7 100644 --- a/pydocx/openxml/wordprocessing/styles.py +++ b/pydocx/openxml/wordprocessing/styles.py @@ -7,22 +7,31 @@ from collections import defaultdict -from pydocx.models import XmlCollection, XmlModel +from pydocx.models import XmlCollection, XmlModel, XmlChild from pydocx.openxml.wordprocessing.style import Style +from pydocx.openxml.wordprocessing.doc_defaults import DocDefaults class Styles(XmlModel): XML_TAG = 'styles' styles = XmlCollection(Style) + doc_defaults = XmlChild(type=DocDefaults) def __init__(self, styles=None, *args, **kwargs): super(Styles, self).__init__(styles=styles, *args, **kwargs) styles_by_type = defaultdict(dict) + default_styles_by_type = defaultdict(dict) for style in self.styles: styles_by_type[style.style_type][style.style_id] = style + if bool(style.style_default): + default_styles_by_type[style.style_type][style.style_id] = style self.styles_by_type = dict(styles_by_type) + self.default_styles_by_type = dict(default_styles_by_type) def get_styles_by_type(self, style_type): return self.styles_by_type.get(style_type, {}) + + def get_default_styles_by_type(self, style_type): + return self.default_styles_by_type.get(style_type, {}) diff --git a/pydocx/openxml/wordprocessing/table.py b/pydocx/openxml/wordprocessing/table.py index 61314b29..cea35b9b 100644 --- a/pydocx/openxml/wordprocessing/table.py +++ b/pydocx/openxml/wordprocessing/table.py @@ -7,13 +7,16 @@ from collections import defaultdict -from pydocx.models import XmlModel, XmlCollection +from pydocx.models import XmlModel, XmlCollection, XmlChild from pydocx.openxml.wordprocessing.table_row import TableRow +from pydocx.openxml.wordprocessing.table_properties import TableProperties class Table(XmlModel): XML_TAG = 'tbl' + properties = XmlChild(type=TableProperties) + rows = XmlCollection( TableRow, ) @@ -45,3 +48,14 @@ def calculate_table_cell_spans(self): if active_rowspan_for_column: cell_to_rowspan_count[active_rowspan_for_column] += 1 # noqa return dict(cell_to_rowspan_count) + + def get_style_chain_stack(self): + # Even if parent style is not defined we still need to check the default style + # properties applied + parent_style = getattr(self.properties, 'parent_style', None) + + part = getattr(self.container, 'style_definitions_part', None) + if part: + style_stack = part.get_style_chain_stack('table', parent_style) + for result in style_stack: + yield result diff --git a/pydocx/openxml/wordprocessing/table_cell.py b/pydocx/openxml/wordprocessing/table_cell.py index 8a538e86..309a8c2c 100644 --- a/pydocx/openxml/wordprocessing/table_cell.py +++ b/pydocx/openxml/wordprocessing/table_cell.py @@ -6,7 +6,6 @@ ) from pydocx.models import XmlModel, XmlCollection, XmlChild -from pydocx.openxml.wordprocessing.paragraph import Paragraph from pydocx.openxml.wordprocessing.table_cell_properties import TableCellProperties # noqa @@ -16,6 +15,14 @@ class TableCell(XmlModel): properties = XmlChild(type=TableCellProperties) children = XmlCollection( - Paragraph, + 'wordprocessing.Paragraph', 'wordprocessing.Table', ) + + @property + def parent_table(self): + return self.parent.parent + + @property + def table_properties(self): + return self.parent_table.properties diff --git a/pydocx/openxml/wordprocessing/table_cell_properties.py b/pydocx/openxml/wordprocessing/table_cell_properties.py index 2a94c9b0..7ae94c31 100644 --- a/pydocx/openxml/wordprocessing/table_cell_properties.py +++ b/pydocx/openxml/wordprocessing/table_cell_properties.py @@ -21,7 +21,11 @@ def should_close_previous_vertical_merge(self): # of preceding cells shall be closed. if self.vertical_merge is None: return True + return not self.is_continue_vertical_merge + + @property + def is_continue_vertical_merge(self): + if self.vertical_merge is None: + return False merge = self.vertical_merge.get('val', 'continue') - if merge != 'continue': - return True - return False + return merge == 'continue' diff --git a/pydocx/openxml/wordprocessing/table_properties.py b/pydocx/openxml/wordprocessing/table_properties.py new file mode 100644 index 00000000..76b005d2 --- /dev/null +++ b/pydocx/openxml/wordprocessing/table_properties.py @@ -0,0 +1,14 @@ +# coding: utf-8 +from __future__ import ( + absolute_import, + print_function, + unicode_literals, +) + +from pydocx.models import XmlModel, XmlChild + + +class TableProperties(XmlModel): + XML_TAG = 'tblPr' + + parent_style = XmlChild(name='tblStyle', attrname='val') diff --git a/pydocx/test/testcases.py b/pydocx/test/testcases.py index 50e144c1..0024367a 100644 --- a/pydocx/test/testcases.py +++ b/pydocx/test/testcases.py @@ -50,6 +50,9 @@ '.pydocx-tab {display:inline-block;width:4em}' '.pydocx-underline {text-decoration:underline}' 'body {margin:0px auto;width:51.00em}' + 'ol {margin-bottom:0;margin-top:0}' + 'p {margin-bottom:0;margin-top:0}' + 'ul {margin-bottom:0;margin-top:0}' '' ) diff --git a/pydocx/types.py b/pydocx/types.py index 722024ab..327008c0 100644 --- a/pydocx/types.py +++ b/pydocx/types.py @@ -12,6 +12,12 @@ def __init__(self, value): def __bool__(self): return self.__nonzero__() + def __repr__(self): + return '{klass}({kwargs})'.format( + klass=self.__class__.__name__, + kwargs=bool(self) + ) + class OnOff(SimpleType): ''' diff --git a/tests/export/html/test_field_code.py b/tests/export/html/test_field_code.py index 77033818..274e011c 100644 --- a/tests/export/html/test_field_code.py +++ b/tests/export/html/test_field_code.py @@ -206,6 +206,7 @@ def test_spanning_multiple_paragraphs(self): expected_html = '''
Link:
+DDD.
diff --git a/tests/export/html/test_heading.py b/tests/export/html/test_heading.py index b77dfc2d..f50176cb 100644 --- a/tests/export/html/test_heading.py +++ b/tests/export/html/test_heading.py @@ -274,7 +274,7 @@ def test_heading_in_a_nested_list_numbering_is_preserved_with_strong(self): expected_html = '''foo
foo
+bar
bare paragraph
foo
bare paragraph
foo
+bare paragraph
+single list item
before table
| foo | +foo |
after table
@@ -739,7 +741,7 @@ def test_single_lvl_list_has_precedence_over_headings(self): expected_html = '''non-heading list item
' self.assert_document_generates_html(document, expected_html) def test_undefined_relationship(self): diff --git a/tests/export/html/test_markup_compatibility.py b/tests/export/html/test_markup_compatibility.py index 6b89dc25..47efcce5 100644 --- a/tests/export/html/test_markup_compatibility.py +++ b/tests/export/html/test_markup_compatibility.py @@ -155,7 +155,9 @@ def test_fallback_contains_a_table(self):
AAABBB
| CCC | +
+ CCC + |
DDDEEE
@@ -204,7 +206,9 @@ def test_fallback_has_invalid_children(self):AAABBB
| CCC | +
+ CCC + |
AAA
+Foo
AAA
+BBB
+Bar
''' @@ -172,11 +178,15 @@ def test_multi_level_list_with_surrounding_paragraphs(self): expected_html = '''Foo
AAA
BBB
CCC
+Foo
AAA
+BBB
+Bar
''' @@ -293,7 +307,9 @@ def test_basic_list_followed_by_list_that_is_heading_and_paragraph(self): expected_html = '''AAA
+Foo
AAA
Bar
BBB
Baz
''' @@ -382,7 +398,7 @@ def test_single_list_followed_by_paragraph(self): expected_html = '''AAA
Foo
''' @@ -421,8 +437,11 @@ def test_single_list_with_bare_paragraph_between_items(self): expected_html = '''AAA
+Foo
+BBB
AAA
foo
''' @@ -592,11 +611,12 @@ def test_missing_level_in_between_valid_levels(self): expected_html = '''AAA
+foo
+BBB
AAA
+''' self.assert_document_generates_html(document, expected_html) @@ -666,8 +689,13 @@ def test_empty_paragraph_in_between_list_items(self): expected_html = '''
AAA
++
BBB
+AAA
+ +BBB
+AAA
++
BBB
+AAA
++
BBB
+CCC
+AAA
++
+
+
BBB
+CCC
+AAA
++
Foo
++
Bar
+CCC
+AAA
++
BBB
+AAA
+BBB
+CCC
+DDD
AAA
BBB
CCC
DDD
''' @@ -1216,11 +1281,15 @@ def test_default_indentation(self): expected_html = '''AAA
BBB
CCC
+AAA
BBB
CCC
+AAA
BBB
CCC
AAA
BBB
CCC
AAA
BBB
CCC
+DDD
+AAA
BBB
+CCC
+DDD
{item}
Foo
Bar
Baz
AAA
BBB
CCC
Foo
Bar
AAA
AA
AB
BA
BB
AA
AB
BA
BB
AAA
BBB
CCC
AAA
BBB
CCC
AA
AAA
AAB
AAC
AACA
AAD
AB
AA
AB
4. AC @@ -2065,8 +2152,8 @@ def test_space_after_dot_followed_by_number_is_converted(self): expected_html = '''
1
2
a
b
c
d
e
a
b
c
d
e
Foo Bar
Foo Bar
AAA
BBB
Foo
Bar
AA
+AB
ABA
+ABB
ABBA
ABBB
ABBC
ABC
AC
' self.assert_document_generates_html(document, expected_html) def test_multiple_runs_with_only_whitespace(self): @@ -34,7 +34,7 @@ def test_multiple_runs_with_only_whitespace(self): document = WordprocessingDocumentFactory() document.add(MainDocumentPart, document_xml) - expected_html = '' + expected_html = '
' self.assert_document_generates_html(document, expected_html) def test_run_with_only_whitespace_styled(self): @@ -51,7 +51,7 @@ def test_run_with_only_whitespace_styled(self): document = WordprocessingDocumentFactory() document.add(MainDocumentPart, document_xml) - expected_html = '' + expected_html = '
' self.assert_document_generates_html(document, expected_html) @@ -154,7 +154,7 @@ def test_single_whitespace_in_text_run_is_preserved(self): expected_html = '
Foo Bar
' self.assert_document_generates_html(document, expected_html) - def test_paragraph_with_only_whitespace_is_ignored(self): + def test_paragraph_with_only_whitespace_is_not_ignored(self): document_xml = '''
ABC
Foo Foo Bar Foo Bar One Two
+ Foo
+ Foo One Two Foo Bar Two Foo Foo Foo Foo Foo Foo Foo
+ AAA BBB AAA BBB CCC DDD AAA BBB CCC AAA BBB CCC DDD EEE FFF GGG AAA BBB DDD AAA BBB CCC DDD AAA BBB CCC AAA AAA BBB CCC DDD EEE FFF GGG HHH AAA BBB CCC DDD AAA BBB CCC AAA BBB CCC DDD AAA BBB CCC AAA
+ BBB
+ CCC AAA
+ BBB
+ CCC AAA BBB CCC AAA BBB Blank Column 1 Column 2 Row 1 First Second Row 2 Third Fourth AAA BBB CCC AAA BBB CCC AAA BBB CCC AAA BBB CCC DDD EEE AAA BBB CCC DDD EEE FFF GGG HHH III JJJ KKK LLL MMM NNN OOO PPP QQQ RRR SSS TTT UUU H2O
+ H
+ 2
+ O
+ nth
+ n
+ th
+ Title Text Text Center Justified Right justified
-
-
- Right justified and pushed in from right
-
-
-
-
-
- Center justified and pushed in from left and it is great and it is the
- coolest thing of all time and I like it and I think it is cool
-
-
-
-
- Left justified and pushed in from left
-
- Center Justified Right justified Right justified and pushed in from right Center justified and pushed in from left and it is great and it is the coolest thing of all time and I like it and I think it is cool Left justified and pushed in from left AAA BBB CCC AAA BBB CCC DDD EEE FFF GGG HHH AAA BBB CCC DDD EEE FFF one two three AAA BBB CCC alpha four xxx yyy www zzz one two AAA1 BBB1 CCC1 CCC11 CCC12 CCC121 CCC122 three AAA2 BBB2 CCC2 four five AAA BBB CCC DDD EEE AAA BBB CCC DDD EEE FFF GGG Heading 1 Heading 2 AAA BBB Heading 3 CCC Heading 4 DDD Heading 5 EEE Heading 6 AAA BBB CCC DDD EEE FFF GGG HHH III JJJ KKK LLL MMM NNN OOO Heading1
- Heading2
-
-
-
-
- Heading3
-
+ Heading2
+
+
+ Heading3
-
+
- AAA CCC DDD FFF GGG III JJJ KKK LLL
- Simple text
- Simple text one two three Cell1 Cell2 Cell3 Cell4 One two Cell1 Cell3 Cell2 And I am writing in the table Cell4 AAA BBB
+ AAA
+
+ BBB
+ CCC DDD AAA BBB CCC DDD EEE FFF GGG 1 2 3 4 5 6 7 8 9 10 11 12 13 First section Item 1 Item 2 Second section Item 1 Item 2 Item 3 Imte 4 AAA BBB CCC DDD EEE FFF GGG This was some content.
'''
@@ -65,7 +65,10 @@ def test_one_row_one_cell_multiple_paragraphs(self):
expected_html = '''
-
Foo
+
'''
@@ -115,12 +118,12 @@ def test_two_rows_two_cells_one_paragraph_each(self):
expected_html = '''
-
Foo
+
Bar
+
'''
@@ -173,7 +176,11 @@ def test_cell_with_character_styles_applied(self):
expected_html = '''
-
Foo
- Bar
+
+
-
One
- Two
+
+
'''
@@ -219,11 +226,13 @@ def test_two_rows_two_cells_with_colspan(self):
expected_html = '''
-
Foo
+
+
'''
@@ -274,11 +283,17 @@ def test_two_rows_two_cells_with_rowspan(self):
expected_html = '''
-
Foo
+
+
-
One
- Two
+
+
'''
@@ -302,7 +317,9 @@ def test_one_row_one_cell_with_empty_paragraph(self):
expected_html = '''
-
Foo
- Bar
+
+
+
+
-
Two
+
+
'''
@@ -329,7 +346,10 @@ def test_one_row_one_cell_with_empty_paragraph_after_other_paragraph(self):
expected_html = '''
-
+
+
'''
@@ -356,7 +376,10 @@ def test_one_row_one_cell_with_empty_paragraph_before_other_paragraph(self):
expected_html = '''
-
Foo
+
+
'''
@@ -384,7 +407,10 @@ def test_one_row_one_cell_with_paragraph_that_has_empty_run_before_other_paragra
expected_html = '''
-
Foo
+
+
'''
@@ -412,7 +438,10 @@ def test_one_row_one_cell_with_paragraph_that_has_empty_run_after_other_paragrap
expected_html = '''
-
Foo
+
+
'''
@@ -440,7 +469,10 @@ def test_one_row_one_cell_with_empty_text_before_other_paragraph(self):
expected_html = '''
-
Foo
+
+
'''
@@ -468,7 +500,10 @@ def test_one_row_one_cell_with_empty_text_after_other_paragraph(self):
expected_html = '''
-
Foo
+
+
+
'''
@@ -496,7 +531,10 @@ def test_one_row_one_cell_with_whitespace_after_other_paragraph(self):
expected_html = '''
-
Foo
+
+
'''
diff --git a/tests/export/html/test_textbox.py b/tests/export/html/test_textbox.py
index 7ad847d1..923da46f 100644
--- a/tests/export/html/test_textbox.py
+++ b/tests/export/html/test_textbox.py
@@ -115,11 +115,15 @@ def test_textbox_with_a_table(self):
document.add(MainDocumentPart, document_xml)
expected_html = '''
-
-
Foo
+
+
-
+
-
- AAA
-
+
+
+
+
+
+
CCC
diff --git a/tests/export/test_docx.py b/tests/export/test_docx.py
index 16fa43d1..b74fbfce 100644
--- a/tests/export/test_docx.py
+++ b/tests/export/test_docx.py
@@ -33,6 +33,8 @@ class ConvertDocxToHtmlTestCase(DocXFixtureTestCaseFactory):
'inline_tags',
'justification',
'paragraph_with_margins',
+ 'paragraph_indentation_inside_lists',
+ 'paragraph_spacing',
'list_in_table',
'lists_with_margins',
'lists_with_styles',
diff --git a/tests/export/test_xml.py b/tests/export/test_xml.py
index 2ad17a88..22fd491a 100644
--- a/tests/export/test_xml.py
+++ b/tests/export/test_xml.py
@@ -131,12 +131,12 @@ class TableTag(TranslationTestCase):
expected_output = '''
-
BBB
+
+
'''
@@ -158,11 +158,17 @@ class RowSpanTestCase(TranslationTestCase):
expected_output = '''
-
AAA
- BBB
+
+
-
CCC
- DDD
+
+
'''
@@ -185,20 +191,20 @@ class NestedTableTag(TranslationTestCase):
expected_output = '''
-
AAA
- BBB
+
+
+
+
-
CCC
+
+
@@ -299,9 +305,9 @@ def get_xml(self):
class SimpleListTestCase(TranslationTestCase):
expected_output = '''
-
AAA
- BBB
+
+
- CCC
+
@@ -228,12 +234,12 @@ class TableWithInvalidTag(TranslationTestCase):
expected_output = '''
-
DDD
- EEE
+
+
-
FFF
- GGG
+
+
'''
@@ -256,11 +262,11 @@ class TableWithListAndParagraph(TranslationTestCase):
-
AAA
- BBB
+
+
- DDD
+
-
- CCC
- DDD
+
-
'''
@@ -329,7 +335,7 @@ def get_xml(self):
class SingleListItemTestCase(TranslationTestCase):
expected_output = '''
-
'''
@@ -355,20 +361,24 @@ def get_xml(self):
class ListWithContinuationTestCase(TranslationTestCase):
expected_output = '''
-
'''
@@ -397,19 +407,20 @@ def get_xml(self):
class ListWithMultipleContinuationTestCase(TranslationTestCase):
expected_output = '''
BBB
-
DDD
- EEE
+
+
-
FFF
- GGG
+
+
-
'''
@@ -437,13 +448,13 @@ def get_xml(self):
class MangledIlvlTestCase(TranslationTestCase):
expected_output = '''
-
BBB
+
-
CCC
+
-
-
-
'''
@@ -465,13 +476,13 @@ class SeperateListsIntoParentListTestCase(TranslationTestCase):
expected_output = '''
'''
@@ -496,12 +507,13 @@ def get_xml(self):
class InvalidIlvlOrderTestCase(TranslationTestCase):
expected_output = '''
-
-
'''
@@ -617,7 +629,7 @@ def get_xml(self):
class RTagWithNoText(TranslationTestCase):
- expected_output = ''
+ expected_output = '
-
'''
@@ -664,11 +677,12 @@ class InsertTagInList(TranslationTestCase):
expected_output = '''
- BBB
+
'''
@@ -695,11 +709,10 @@ class SmartTagInList(TranslationTestCase):
expected_output = '''
- BBB
+
'''
@@ -725,7 +738,7 @@ def get_xml(self):
class SingleListItem(TranslationTestCase):
expected_output = '''
- BBB
+
-
'''
@@ -790,10 +803,11 @@ def get_xml(self):
class MissingIlvl(TranslationTestCase):
expected_output = '''
-
Blank
- Column 1
- Column 2
+
+
+
-
Row 1
- First
- Second
+
+
+
-
Row 2
- Third
- Fourth
+
+
+
-
'''
@@ -814,18 +828,19 @@ def get_xml(self):
class SameNumIdInTable(TranslationTestCase):
expected_output = '''
- BBB
+
-
'''
@@ -858,9 +873,11 @@ def get_xml(self):
class SDTTestCase(TranslationTestCase):
expected_output = '''
-
-
'''
@@ -1023,16 +1040,23 @@ def get_xml(self):
class NestedListTestCase(TranslationTestCase):
expected_output = u"""
-
-
-
@@ -1068,58 +1092,58 @@ class MultipleNestedListTestCase(TranslationTestCase):
expected_output = u"""
-
-
-
-
diff --git a/tests/fixtures/export_from_googledocs.docx b/tests/fixtures/export_from_googledocs.docx
index 9939ffb7..a2601f55 100644
Binary files a/tests/fixtures/export_from_googledocs.docx and b/tests/fixtures/export_from_googledocs.docx differ
diff --git a/tests/fixtures/external_image.docx b/tests/fixtures/external_image.docx
index 2f6aaf60..1a83b4b9 100644
Binary files a/tests/fixtures/external_image.docx and b/tests/fixtures/external_image.docx differ
diff --git a/tests/fixtures/fake_subscript.html b/tests/fixtures/fake_subscript.html
index 6bbf5a11..10fade6f 100644
--- a/tests/fixtures/fake_subscript.html
+++ b/tests/fixtures/fake_subscript.html
@@ -1 +1,5 @@
-
-
-
-
-
+
-
-
-
-
diff --git a/tests/fixtures/lists_with_styles.html b/tests/fixtures/lists_with_styles.html
index 72c5c552..ed3e4e15 100644
--- a/tests/fixtures/lists_with_styles.html
+++ b/tests/fixtures/lists_with_styles.html
@@ -1,13 +1,20 @@
-
-
-
-
diff --git a/tests/fixtures/nested_lists.html b/tests/fixtures/nested_lists.html
index 6d8221f0..c46c87c4 100644
--- a/tests/fixtures/nested_lists.html
+++ b/tests/fixtures/nested_lists.html
@@ -1,30 +1,45 @@
-
-
+
-
-
-
+
-
-
diff --git a/tests/fixtures/nested_lists_different_num_ids.html b/tests/fixtures/nested_lists_different_num_ids.html
index 9f3562a9..b66ebf15 100644
--- a/tests/fixtures/nested_lists_different_num_ids.html
+++ b/tests/fixtures/nested_lists_different_num_ids.html
@@ -1,28 +1,30 @@
-
-
diff --git a/tests/fixtures/nested_table_rowspan.html b/tests/fixtures/nested_table_rowspan.html
index 73e9424b..bc429320 100644
--- a/tests/fixtures/nested_table_rowspan.html
+++ b/tests/fixtures/nested_table_rowspan.html
@@ -1,19 +1,21 @@
-
-
-
-
+
-
AAA
+
-
BBB
+
+
-
CCC
- DDD
+
+
-
EEE
+
+
-
AAA
- BBB
+
+
-
CCC
+
+
-
DDD
- EEE
+
+
-
FFF
- GGG
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
-
BBB
-
+
EEE
EEE
+
-
GGG
-
HHHIII
+
JJJ
+
+
HHH
+
-
KKK
- LLL
+
+
+
+
-
+
-
+
-
- Cell1
- Cell2
-
-
+ Cell3
- Cell4
-
+
+
+
+
+
+
+
-
-
diff --git a/tests/fixtures/simple_table.html b/tests/fixtures/simple_table.html
index a14b830c..000f5247 100644
--- a/tests/fixtures/simple_table.html
+++ b/tests/fixtures/simple_table.html
@@ -1,14 +1,18 @@
+
- Cell1
-
- Cell3
+ Cell2
- And I am writing in the table
+
+
-
Cell4
+
+
-
+
-
+ CCC
-
+
+
+
-
+
-
- AAA
-
-
- BBB
- CCC
-
-
- DDD
-
-
-
- EEE
-
- FFF
-
-
+
- GGG
-
-
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
-
+
-
- 1
- 2
- 3
- 4
-
-
- 5
- 6
- 7
-
-
- 8
- 9
-
-
+ 10
- 11
- 12
- 13
-
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+First section Item 1 Item 2 Second section Item 1 Item 2 Item 3 Imte 4
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
-
diff --git a/tests/fixtures/track_changes_on.html b/tests/fixtures/track_changes_on.html
index 935e97bd..2d6dabad 100644
--- a/tests/fixtures/track_changes_on.html
+++ b/tests/fixtures/track_changes_on.html
@@ -1 +1,3 @@
+
+
-
CCC
- DDD
+
+
+
+
-
EEE
- FFF
+
+
+
+