Skip to content

Commit 8eb72c9

Browse files
committed
add HTML table parsing
1 parent c9350d3 commit 8eb72c9

File tree

10 files changed

+451
-60
lines changed

10 files changed

+451
-60
lines changed

samples/Sample_26_Html.php

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,20 @@
1414
$html .= '<p>Ordered (numbered) list:</p>';
1515
$html .= '<ol><li>Item 1</li><li>Item 2</li></ol>';
1616

17+
$html .= '<table style="width: 50%; border: 6px #0000FF double;">
18+
<thead>
19+
<tr style="background-color: #FF0000; text-align: center; color: #FFFFFF; font-weight: bold; ">
20+
<th>header a</th>
21+
<th>header b</th>
22+
<th style="background-color: #FFFF00; border-width: 12px"><span style="background-color: #00FF00;">header c</span></th>
23+
</tr>
24+
</thead>
25+
<tbody>
26+
<tr><td style="border-style: dotted;">1</td><td colspan="2">2</td></tr>
27+
<tr><td>4</td><td>5</td><td>6</td></tr>
28+
</tbody>
29+
</table>';
30+
1731
\PhpOffice\PhpWord\Shared\Html::addHtml($section, $html);
1832

1933
// Save file

src/PhpWord/Shared/Converter.php

Lines changed: 33 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -296,25 +296,40 @@ public static function htmlToRgb($value)
296296
*/
297297
public static function cssToPoint($value)
298298
{
299-
preg_match('/^[+-]?([0-9]+.?[0-9]+)?(px|em|ex|%|in|cm|mm|pt|pc)$/i', $value, $matches);
300-
$size = $matches[1];
301-
$unit = $matches[2];
299+
if ($value == '0') {
300+
return 0;
301+
}
302+
if (preg_match('/^[+-]?([0-9]+\.?[0-9]*)?(px|em|ex|%|in|cm|mm|pt|pc)$/i', $value, $matches)) {
303+
$size = $matches[1];
304+
$unit = $matches[2];
302305

303-
switch ($unit) {
304-
case 'pt':
305-
return $size;
306-
case 'px':
307-
return self::pixelToPoint($size);
308-
case 'cm':
309-
return self::cmToPoint($size);
310-
case 'mm':
311-
return self::cmToPoint($size / 10);
312-
case 'in':
313-
return self::inchToPoint($size);
314-
case 'pc':
315-
return self::picaToPoint($size);
316-
default:
317-
return null;
306+
switch ($unit) {
307+
case 'pt':
308+
return $size;
309+
case 'px':
310+
return self::pixelToPoint($size);
311+
case 'cm':
312+
return self::cmToPoint($size);
313+
case 'mm':
314+
return self::cmToPoint($size / 10);
315+
case 'in':
316+
return self::inchToPoint($size);
317+
case 'pc':
318+
return self::picaToPoint($size);
319+
}
318320
}
321+
322+
return null;
323+
}
324+
325+
/**
326+
* Transforms a size in CSS format (eg. 10px, 10px, ...) to twips
327+
*
328+
* @param string $value
329+
* @return float
330+
*/
331+
public static function cssToTwip($value)
332+
{
333+
return self::pointToTwip(self::cssToPoint($value));
319334
}
320335
}

src/PhpWord/Shared/Html.php

Lines changed: 137 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,8 @@
1818
namespace PhpOffice\PhpWord\Shared;
1919

2020
use PhpOffice\PhpWord\Element\AbstractContainer;
21+
use PhpOffice\PhpWord\Element\Row;
22+
use PhpOffice\PhpWord\Element\Table;
2123
use PhpOffice\PhpWord\SimpleType\Jc;
2224

2325
/**
@@ -99,7 +101,7 @@ protected static function parseInlineStyle($node, $styles = array())
99101
protected static function parseNode($node, $element, $styles = array(), $data = array())
100102
{
101103
// Populate styles array
102-
$styleTypes = array('font', 'paragraph', 'list');
104+
$styleTypes = array('font', 'paragraph', 'list', 'table', 'row', 'cell');
103105
foreach ($styleTypes as $styleType) {
104106
if (!isset($styles[$styleType])) {
105107
$styles[$styleType] = array();
@@ -124,10 +126,11 @@ protected static function parseNode($node, $element, $styles = array(), $data =
124126
'u' => array('Property', null, null, $styles, null, 'underline', 'single'),
125127
'sup' => array('Property', null, null, $styles, null, 'superScript', true),
126128
'sub' => array('Property', null, null, $styles, null, 'subScript', true),
127-
'span' => array('Property', null, null, $styles, null, 'span', $node),
128-
'table' => array('Table', $node, $element, $styles, null, 'addTable', true),
129-
'tr' => array('Table', $node, $element, $styles, null, 'addRow', true),
130-
'td' => array('Table', $node, $element, $styles, null, 'addCell', true),
129+
'span' => array('Span', $node, null, $styles, null, null, null),
130+
'table' => array('Table', $node, $element, $styles, null, null, null),
131+
'tr' => array('Row', $node, $element, $styles, null, null, null),
132+
'td' => array('Cell', $node, $element, $styles, null, null, null),
133+
'th' => array('Cell', $node, $element, $styles, null, null, null),
131134
'ul' => array('List', null, null, $styles, $data, 3, null),
132135
'ol' => array('List', null, null, $styles, $data, 7, null),
133136
'li' => array('ListItem', $node, $element, $styles, $data, null, null),
@@ -179,7 +182,7 @@ private static function parseChildNodes($node, $element, $styles, $data)
179182
$cNodes = $node->childNodes;
180183
if (count($cNodes) > 0) {
181184
foreach ($cNodes as $cNode) {
182-
if ($element instanceof AbstractContainer) {
185+
if ($element instanceof AbstractContainer || $element instanceof Table || $element instanceof Row) {
183186
self::parseNode($cNode, $element, $styles, $data);
184187
}
185188
}
@@ -197,7 +200,7 @@ private static function parseChildNodes($node, $element, $styles, $data)
197200
*/
198201
private static function parseParagraph($node, $element, &$styles)
199202
{
200-
$styles['paragraph'] = self::parseInlineStyle($node, $styles['paragraph']);
203+
$styles['paragraph'] = self::recursiveParseStylesInHierarchy($node, $styles['paragraph']);
201204
$newElement = $element->addTextRun($styles['paragraph']);
202205

203206
return $newElement;
@@ -231,7 +234,12 @@ private static function parseHeading($element, &$styles, $argument1)
231234
*/
232235
private static function parseText($node, $element, &$styles)
233236
{
234-
$styles['font'] = self::parseInlineStyle($node, $styles['font']);
237+
$styles['font'] = self::recursiveParseStylesInHierarchy($node, $styles['font']);
238+
239+
//alignment applies on paragraph, not on font. Let's copy it there
240+
if (isset($styles['font']['alignment'])) {
241+
$styles['paragraph']['alignment'] = $styles['font']['alignment'];
242+
}
235243

236244
if (is_callable(array($element, 'addText'))) {
237245
$element->addText($node->nodeValue, $styles['font'], $styles['paragraph']);
@@ -247,16 +255,18 @@ private static function parseText($node, $element, &$styles)
247255
*/
248256
private static function parseProperty(&$styles, $argument1, $argument2)
249257
{
250-
if ($argument1 !== 'span') {
251-
$styles['font'][$argument1] = $argument2;
252-
} else {
253-
if (!is_null($argument2->attributes)) {
254-
$nodeAttr = $argument2->attributes->getNamedItem('style');
255-
if (!is_null($nodeAttr) && property_exists($nodeAttr, 'value')) {
256-
$styles['font'] = self::parseStyle($nodeAttr, $styles['font']);
257-
}
258-
}
259-
}
258+
$styles['font'][$argument1] = $argument2;
259+
}
260+
261+
/**
262+
* Parse span node
263+
*
264+
* @param \DOMNode $node
265+
* @param array &$styles
266+
*/
267+
private static function parseSpan($node, &$styles)
268+
{
269+
self::parseInlineStyle($node, $styles['font']);
260270
}
261271

262272
/**
@@ -270,11 +280,11 @@ private static function parseProperty(&$styles, $argument1, $argument2)
270280
*
271281
* @todo As soon as TableItem, RowItem and CellItem support relative width and height
272282
*/
273-
private static function parseTable($node, $element, &$styles, $argument1)
283+
private static function parseTable($node, $element, &$styles)
274284
{
275-
$styles['paragraph'] = self::parseInlineStyle($node, $styles['paragraph']);
285+
$elementStyles = self::parseInlineStyle($node, $styles['table']);
276286

277-
$newElement = $element->$argument1();
287+
$newElement = $element->addTable($elementStyles);
278288

279289
// $attributes = $node->attributes;
280290
// if ($attributes->getNamedItem('width') !== null) {
@@ -291,6 +301,62 @@ private static function parseTable($node, $element, &$styles, $argument1)
291301
return $newElement;
292302
}
293303

304+
/**
305+
* Parse a table row
306+
*
307+
* @param \DOMNode $node
308+
* @param \PhpOffice\PhpWord\Element\Table $element
309+
* @param array &$styles
310+
* @return \PhpOffice\PhpWord\Element\AbstractContainer $element
311+
*/
312+
private static function parseRow($node, $element, &$styles)
313+
{
314+
$rowStyles = self::parseInlineStyle($node, $styles['row']);
315+
if ($node->parentNode->nodeName == 'thead') {
316+
$rowStyles['tblHeader'] = true;
317+
}
318+
319+
return $element->addRow(null, $rowStyles);
320+
}
321+
322+
/**
323+
* Parse table cell
324+
*
325+
* @param \DOMNode $node
326+
* @param \PhpOffice\PhpWord\Element\Table $element
327+
* @param array &$styles
328+
* @return \PhpOffice\PhpWord\Element\AbstractContainer $element
329+
*/
330+
private static function parseCell($node, $element, &$styles)
331+
{
332+
$cellStyles = self::recursiveParseStylesInHierarchy($node, $styles['cell']);
333+
334+
$colspan = $node->getAttribute('colspan');
335+
if (!empty($colspan)) {
336+
$cellStyles['gridSpan'] = $colspan - 0;
337+
}
338+
339+
return $element->addCell(null, $cellStyles);
340+
}
341+
342+
/**
343+
* Recursively parses styles on parent nodes
344+
* TODO if too slow, add caching of parent nodes, !! everything is static here so watch out for concurrency !!
345+
*
346+
* @param \DOMNode $node
347+
* @param array &$styles
348+
*/
349+
private static function recursiveParseStylesInHierarchy(\DOMNode $node, array $style)
350+
{
351+
$parentStyle = self::parseInlineStyle($node, array());
352+
$style = array_merge($parentStyle, $style);
353+
if ($node->parentNode != null && XML_ELEMENT_NODE == $node->parentNode->nodeType) {
354+
$style = self::recursiveParseStylesInHierarchy($node->parentNode, $style);
355+
}
356+
357+
return $style;
358+
}
359+
294360
/**
295361
* Parse list node
296362
*
@@ -400,9 +466,59 @@ private static function parseStyle($attribute, $styles)
400466
}
401467
$styles['italic'] = $tValue;
402468
break;
469+
case 'border-color':
470+
$styles['color'] = trim($cValue, '#');
471+
break;
472+
case 'border-width':
473+
$styles['borderSize'] = Converter::cssToPoint($cValue);
474+
break;
475+
case 'border-style':
476+
$styles['borderStyle'] = self::mapBorderStyle($cValue);
477+
break;
478+
case 'width':
479+
if (preg_match('/([0-9]+[a-z]+)/', $cValue, $matches)) {
480+
$styles['width'] = Converter::cssToTwip($matches[1]);
481+
$styles['unit'] = \PhpOffice\PhpWord\Style\Table::WIDTH_TWIP;
482+
} elseif (preg_match('/([0-9]+)%/', $cValue, $matches)) {
483+
$styles['width'] = $matches[1] * 50;
484+
$styles['unit'] = \PhpOffice\PhpWord\Style\Table::WIDTH_PERCENT;
485+
} elseif (preg_match('/([0-9]+)/', $cValue, $matches)) {
486+
$styles['width'] = $matches[1];
487+
$styles['unit'] = \PhpOffice\PhpWord\Style\Table::WIDTH_AUTO;
488+
}
489+
break;
490+
case 'border':
491+
if (preg_match('/([0-9]+[^0-9]*)\s+(\#[a-fA-F0-9]+)\s+([a-z]+)/', $cValue, $matches)) {
492+
$styles['borderSize'] = Converter::cssToPoint($matches[1]);
493+
$styles['borderColor'] = trim($matches[2], '#');
494+
$styles['borderStyle'] = self::mapBorderStyle($matches[3]);
495+
}
496+
break;
403497
}
404498
}
405499

406500
return $styles;
407501
}
502+
503+
/**
504+
* Transforms a CSS border style into a word border style
505+
*
506+
* @param string $cssBorderStyle
507+
* @return null|string
508+
*/
509+
private static function mapBorderStyle($cssBorderStyle)
510+
{
511+
if ($cssBorderStyle == null) {
512+
return null;
513+
}
514+
switch ($cssBorderStyle) {
515+
case 'none':
516+
case 'dashed':
517+
case 'dotted':
518+
case 'double':
519+
return $cssBorderStyle;
520+
case 'solid':
521+
return 'single';
522+
}
523+
}
408524
}

0 commit comments

Comments
 (0)