@@ -22,9 +22,11 @@ class CellValueFormatter
2222
2323 /** Definition of XML nodes names used to parse data */
2424 const XML_NODE_P = 'p ' ;
25- const XML_NODE_S = 'text:s ' ;
26- const XML_NODE_A = 'text:a ' ;
27- const XML_NODE_SPAN = 'text:span ' ;
25+ const XML_NODE_TEXT_A = 'text:a ' ;
26+ const XML_NODE_TEXT_SPAN = 'text:span ' ;
27+ const XML_NODE_TEXT_S = 'text:s ' ;
28+ const XML_NODE_TEXT_TAB = 'text:tab ' ;
29+ const XML_NODE_TEXT_LINE_BREAK = 'text:line-break ' ;
2830
2931 /** Definition of XML attributes used to parse data */
3032 const XML_ATTRIBUTE_TYPE = 'office:value-type ' ;
@@ -41,6 +43,13 @@ class CellValueFormatter
4143 /** @var \Box\Spout\Common\Helper\Escaper\ODS Used to unescape XML data */
4244 protected $ escaper ;
4345
46+ /** @var array List of XML nodes representing whitespaces and their corresponding value */
47+ private static $ WHITESPACE_XML_NODES = [
48+ self ::XML_NODE_TEXT_S => ' ' ,
49+ self ::XML_NODE_TEXT_TAB => "\t" ,
50+ self ::XML_NODE_TEXT_LINE_BREAK => "\n" ,
51+ ];
52+
4453 /**
4554 * @param bool $shouldFormatDates Whether date/time values should be returned as PHP objects or be formatted as strings
4655 * @param \Box\Spout\Common\Helper\Escaper\ODS $escaper Used to unescape XML data
@@ -96,21 +105,7 @@ protected function formatStringCellValue($node)
96105 $ pNodes = $ node ->getElementsByTagName (self ::XML_NODE_P );
97106
98107 foreach ($ pNodes as $ pNode ) {
99- $ currentPValue = '' ;
100-
101- foreach ($ pNode ->childNodes as $ childNode ) {
102- if ($ childNode instanceof \DOMText) {
103- $ currentPValue .= $ childNode ->nodeValue ;
104- } elseif ($ childNode ->nodeName === self ::XML_NODE_S ) {
105- $ spaceAttribute = $ childNode ->getAttribute (self ::XML_ATTRIBUTE_C );
106- $ numSpaces = (!empty ($ spaceAttribute )) ? (int ) $ spaceAttribute : 1 ;
107- $ currentPValue .= str_repeat (' ' , $ numSpaces );
108- } elseif ($ childNode ->nodeName === self ::XML_NODE_A || $ childNode ->nodeName === self ::XML_NODE_SPAN ) {
109- $ currentPValue .= $ childNode ->nodeValue ;
110- }
111- }
112-
113- $ pNodeValues [] = $ currentPValue ;
108+ $ pNodeValues [] = $ this ->extractTextValueFromNode ($ pNode );
114109 }
115110
116111 $ escapedCellValue = implode ("\n" , $ pNodeValues );
@@ -119,6 +114,62 @@ protected function formatStringCellValue($node)
119114 return $ cellValue ;
120115 }
121116
117+ /**
118+ * @param $pNode
119+ * @return string
120+ */
121+ private function extractTextValueFromNode ($ pNode )
122+ {
123+ $ textValue = '' ;
124+
125+ foreach ($ pNode ->childNodes as $ childNode ) {
126+ if ($ childNode instanceof \DOMText) {
127+ $ textValue .= $ childNode ->nodeValue ;
128+ } elseif ($ this ->isWhitespaceNode ($ childNode ->nodeName )) {
129+ $ textValue .= $ this ->transformWhitespaceNode ($ childNode );
130+ } elseif ($ childNode ->nodeName === self ::XML_NODE_TEXT_A || $ childNode ->nodeName === self ::XML_NODE_TEXT_SPAN ) {
131+ $ textValue .= $ this ->extractTextValueFromNode ($ childNode );
132+ }
133+ }
134+
135+ return $ textValue ;
136+ }
137+
138+ /**
139+ * Returns whether the given node is a whitespace node. It must be one of these:
140+ * - <text:s />
141+ * - <text:tab />
142+ * - <text:line-break />
143+ *
144+ * @param string $nodeName
145+ * @return bool
146+ */
147+ private function isWhitespaceNode ($ nodeName )
148+ {
149+ return isset (self ::$ WHITESPACE_XML_NODES [$ nodeName ]);
150+ }
151+
152+ /**
153+ * The "<text:p>" node can contain the string value directly
154+ * or contain child elements. In this case, whitespaces contain in
155+ * the child elements should be replaced by their XML equivalent:
156+ * - space => <text:s />
157+ * - tab => <text:tab />
158+ * - line break => <text:line-break />
159+ *
160+ * @see https://docs.oasis-open.org/office/v1.2/os/OpenDocument-v1.2-os-part1.html#__RefHeading__1415200_253892949
161+ *
162+ * @param \DOMNode $node The XML node representing a whitespace
163+ * @return string The corresponding whitespace value
164+ */
165+ private function transformWhitespaceNode ($ node )
166+ {
167+ $ countAttribute = $ node ->getAttribute (self ::XML_ATTRIBUTE_C ); // only defined for "<text:s>"
168+ $ numWhitespaces = (!empty ($ countAttribute )) ? (int ) $ countAttribute : 1 ;
169+
170+ return str_repeat (self ::$ WHITESPACE_XML_NODES [$ node ->nodeName ], $ numWhitespaces );
171+ }
172+
122173 /**
123174 * Returns the cell Numeric value from the given node.
124175 *
0 commit comments