Skip to content

Commit 8b4267a

Browse files
czoselwinged
andcommitted
fix(docx-template): corruption of libreoffice-originated templates
Certain versions of python-docx and python-docxtemplate cause corruption of files that were originally created with LibreOffice. One effect of that corruption is a duplicate entry in the document-internal files; there are two docProps/core.xml files in the resulting document. Co-Authored-By: David Vogt <[email protected]>
1 parent 587c08a commit 8b4267a

File tree

3 files changed

+90
-1
lines changed

3 files changed

+90
-1
lines changed

document_merge_service/api/apps.py

Lines changed: 54 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -10,3 +10,57 @@ class DefaultConfig(AppConfig):
1010
def ready(self):
1111
if "sqlite3" in settings.DATABASES["default"]["ENGINE"]: # pragma: no cover
1212
TextField.register_lookup(IContains, lookup_name="search")
13+
mitigate_docxtpl_corruption_bug()
14+
15+
16+
def mitigate_docxtpl_corruption_bug():
17+
# This is basically monkey-patching this PR:
18+
# https://github.com/python-openxml/python-docx/pull/1436
19+
20+
# Hold my beer!
21+
from docx.opc.constants import RELATIONSHIP_TYPE
22+
23+
if hasattr(RELATIONSHIP_TYPE, "CORE_PROPERTIES_OFFICEDOCUMENT"): # pragma: no cover
24+
raise Exception(
25+
"The docxtpl mitigation is no longer required, please remove the monkeypatch code"
26+
)
27+
28+
RELATIONSHIP_TYPE.CORE_PROPERTIES_OFFICEDOCUMENT = (
29+
"http://schemas.openxmlformats.org/officedocument/2006/relationships"
30+
"/metadata/core-properties"
31+
)
32+
33+
from docx.opc.package import RT, CorePropertiesPart, OpcPackage, cast
34+
35+
@property
36+
def _core_properties_part(self) -> CorePropertiesPart:
37+
"""|CorePropertiesPart| object related to this package.
38+
39+
Creates a default core properties part if one is not present (not common).
40+
"""
41+
try:
42+
return cast(CorePropertiesPart, self.part_related_by(RT.CORE_PROPERTIES))
43+
except KeyError:
44+
try:
45+
office_document_part = self.part_related_by(
46+
RT.CORE_PROPERTIES_OFFICEDOCUMENT
47+
)
48+
rel = self.relate_to(
49+
office_document_part, RT.CORE_PROPERTIES_OFFICEDOCUMENT
50+
)
51+
self.rels[rel].reltype = RT.CORE_PROPERTIES
52+
return cast(CorePropertiesPart, office_document_part)
53+
except KeyError:
54+
core_properties_part = CorePropertiesPart.default(self)
55+
self.relate_to(core_properties_part, RT.CORE_PROPERTIES)
56+
return core_properties_part
57+
58+
OpcPackage._core_properties_part = _core_properties_part
59+
60+
from docx.opc.rel import _Relationship
61+
62+
@_Relationship.reltype.setter
63+
def reltype(self, value: str):
64+
self._reltype = value
65+
66+
_Relationship.reltype = reltype
4.87 KB
Binary file not shown.

document_merge_service/api/tests/test_template.py

Lines changed: 36 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,9 @@
22
import json
33
import os
44
import re
5-
from collections import namedtuple
5+
import tempfile
6+
import zipfile
7+
from collections import Counter, namedtuple
68

79
import openpyxl
810
import pytest
@@ -916,3 +918,36 @@ def test_placeholder_with_unsupported_operand(
916918
with pytest.raises(exceptions.ValidationError) as exc_info:
917919
serializer.validate({"data": {"E_BAU_NUMBER": 12345}})
918920
assert exc_info.value.args[0] == expected_error
921+
922+
923+
def test_template_merge_docx_libreoffice_bug(
924+
db, client, mock_filefield_name_validation, template, snapshot
925+
):
926+
"""Verify a certain docx corruption bug does not occur.
927+
928+
Certain versions of python-docx and python-docxtemplate cause corruption
929+
of files that were originally created with LibreOffice. One effect of that
930+
corruption is a duplicate entry in the document-internal files; there
931+
are two docProps/core.xml files in the resulting document.
932+
"""
933+
file = django_file("created_with_libreoffice.docx")
934+
template.template.save(os.path.basename(file.name), file)
935+
template.engine = "docx-template"
936+
template.save()
937+
url = reverse("template-merge", args=[template.pk])
938+
939+
response = client.post(url, data={"data": {"test": "Test input"}}, format="json")
940+
941+
with tempfile.NamedTemporaryFile(suffix=".docx") as tmp:
942+
tmp.write(response.content)
943+
tmp.seek(0)
944+
945+
zzz = zipfile.ZipFile(tmp.name)
946+
name_counter = Counter()
947+
name_counter.update([f.filename for f in zzz.filelist])
948+
949+
problematic_names = {
950+
name: count for name, count in name_counter.most_common() if count > 1
951+
}
952+
953+
assert problematic_names == {}, "Duplicate entry in docx file's internal structure"

0 commit comments

Comments
 (0)