Skip to content

Commit ec514f3

Browse files
committed
Reader: Read section settings and font/paragraph styles
1 parent fa2878e commit ec514f3

File tree

9 files changed

+295
-105
lines changed

9 files changed

+295
-105
lines changed

Classes/PHPWord/Reader/Word2007.php

Lines changed: 257 additions & 32 deletions
Original file line numberDiff line numberDiff line change
@@ -35,7 +35,8 @@
3535
/**
3636
* PHPWord_Reader_Word2007
3737
*/
38-
class PHPWord_Reader_Word2007 extends PHPWord_Reader_Abstract implements PHPWord_Reader_IReader
38+
class PHPWord_Reader_Word2007 extends PHPWord_Reader_Abstract implements
39+
PHPWord_Reader_IReader
3940
{
4041

4142
/**
@@ -54,7 +55,8 @@ public function canRead($pFilename)
5455
{
5556
// Check if file exists
5657
if (!file_exists($pFilename)) {
57-
throw new PHPWord_Exception("Could not open " . $pFilename . " for reading! File does not exist.");
58+
throw new PHPWord_Exception("Could not open " . $pFilename .
59+
" for reading! File does not exist.");
5860
}
5961

6062
$return = false;
@@ -86,9 +88,13 @@ public function canRead($pFilename)
8688
*
8789
* @param ZipArchive $archive
8890
* @param string $fileName
91+
* @param bool $removeNamespace
8992
*/
90-
public function getFromZipArchive($archive, $fileName = '')
91-
{
93+
public function getFromZipArchive(
94+
$archive,
95+
$fileName = '',
96+
$removeNamespace = false
97+
) {
9298
// Root-relative paths
9399
if (strpos($fileName, '//') !== false)
94100
{
@@ -103,9 +109,9 @@ public function getFromZipArchive($archive, $fileName = '')
103109
$contents = $archive->getFromName(substr($fileName, 1));
104110
}
105111

106-
// Stupid hack for namespace
107-
if ($contents != '' && $fileName = 'word/document.xml') {
108-
$contents = preg_replace('~(</?)w:~is', '$1', $contents);
112+
// Remove namespaces from elements and attributes name
113+
if ($removeNamespace) {
114+
$contents = preg_replace('~(</?|\s)w:~is', '$1', $contents);
109115
}
110116

111117
return $contents;
@@ -122,25 +128,26 @@ public function load($pFilename)
122128
{
123129
// Check if file exists
124130
if (!file_exists($pFilename)) {
125-
throw new PHPWord_Exception("Could not open " . $pFilename . " for reading! File does not exist.");
131+
throw new PHPWord_Exception("Could not open " . $pFilename .
132+
" for reading! File does not exist.");
126133
}
127134

128135
// Initialisations
129136
$word = new PHPWord;
130137
$zip = new ZipArchive;
131138
$zip->open($pFilename);
132139

133-
// Read relationships
140+
// Read properties and documents
134141
$rels = simplexml_load_string($this->getFromZipArchive($zip, "_rels/.rels"));
135142
foreach ($rels->Relationship as $rel) {
136143
switch ($rel["Type"]) {
137144
// Core properties
138-
case "http://schemas.openxmlformats.org/package/2006//relationships/metadata/core-properties":
145+
case "http://schemas.openxmlformats.org/package/2006/relationships/metadata/core-properties":
139146
$xmlCore = simplexml_load_string($this->getFromZipArchive($zip, "{$rel['Target']}"));
140147
if (is_object($xmlCore)) {
141148
$xmlCore->registerXPathNamespace("dc", "http://purl.org/dc/elements/1.1/");
142149
$xmlCore->registerXPathNamespace("dcterms", "http://purl.org/dc/terms/");
143-
$xmlCore->registerXPathNamespace("cp", "http://schemas.openxmlformats.org/package/2006//metadata/core-properties");
150+
$xmlCore->registerXPathNamespace("cp", "http://schemas.openxmlformats.org/package/2006/metadata/core-properties");
144151
$docProps = $word->getProperties();
145152
$docProps->setCreator((string) self::array_item($xmlCore->xpath("dc:creator")));
146153
$docProps->setLastModifiedBy((string) self::array_item($xmlCore->xpath("cp:lastModifiedBy")));
@@ -188,32 +195,75 @@ public function load($pFilename)
188195
$dir = dirname($rel["Target"]);
189196
$archive = "$dir/_rels/" . basename($rel["Target"]) . ".rels";
190197
$relsDoc = simplexml_load_string($this->getFromZipArchive($zip, $archive));
191-
$relsDoc->registerXPathNamespace("rel", "http://schemas.openxmlformats.org/package/2006//relationships");
198+
$relsDoc->registerXPathNamespace("rel", "http://schemas.openxmlformats.org/package/2006/relationships");
192199
$xpath = self::array_item($relsDoc->xpath("rel:Relationship[@Type='" .
193200
"http://schemas.openxmlformats.org/officeDocument/2006/relationships/styles']"));
194-
$xmlDoc = simplexml_load_string($this->getFromZipArchive($zip, "{$rel['Target']}"));
195-
if ($xmlDoc->body) {
201+
$xmlDoc = simplexml_load_string($this->getFromZipArchive($zip, "{$rel['Target']}", true));
202+
if (is_object($xmlDoc)) {
196203
$section = $word->createSection();
197-
foreach ($xmlDoc->body->children() as $element) {
198-
switch ($element->getName()) {
199-
case 'p':
200-
if ($element->pPr->sectPr) {
201-
$section = $word->createSection();
202-
continue;
203-
}
204-
if ($element->r) {
205-
if (count($element->r) == 1) {
206-
$section->addText($element->r->t);
207-
} else {
208-
$textRun = $section->createTextRun();
209-
foreach ($element->r as $r) {
210-
$textRun->addText($r->t);
211-
}
212-
}
204+
205+
foreach ($xmlDoc->body->children() as $elm) {
206+
$elmName = $elm->getName();
207+
if ($elmName == 'p') { // Paragraph/section
208+
// Create new section if section section found
209+
if ($elm->pPr->sectPr) {
210+
$section->setSettings($this->loadSectionSettings($elm->pPr));
211+
$section = $word->createSection();
212+
continue;
213+
}
214+
// Has w:r? It's either text or textrun
215+
if ($elm->r) {
216+
// w:r = 1? It's a plain paragraph
217+
if (count($elm->r) == 1) {
218+
$section->addText($elm->r->t,
219+
$this->loadFontStyle($elm->r));
220+
// w:r more than 1? It's a textrun
213221
} else {
214-
$section->addTextBreak();
222+
$textRun = $section->createTextRun();
223+
foreach ($elm->r as $r) {
224+
$textRun->addText($r->t,
225+
$this->loadFontStyle($r));
226+
}
215227
}
216-
break;
228+
// No, it's a textbreak
229+
} else {
230+
$section->addTextBreak();
231+
}
232+
} elseif ($elmName == 'sectPr') {
233+
// Last section setting
234+
$section->setSettings($this->loadSectionSettings($xmlDoc->body));
235+
}
236+
}
237+
}
238+
break;
239+
}
240+
}
241+
242+
// Read styles
243+
$docRels = simplexml_load_string($this->getFromZipArchive($zip, "word/_rels/document.xml.rels"));
244+
foreach ($docRels->Relationship as $rel) {
245+
switch ($rel["Type"]) {
246+
case "http://schemas.openxmlformats.org/officeDocument/2006/relationships/styles":
247+
$xmlStyle = simplexml_load_string($this->getFromZipArchive($zip, "word/{$rel['Target']}", true));
248+
if (is_object($xmlStyle)) {
249+
foreach ($xmlStyle->children() as $elm) {
250+
if ($elm->getName() != 'style') {
251+
continue;
252+
}
253+
unset($pStyle);
254+
unset($fStyle);
255+
$hasParagraphStyle = $elm->pPr && ($elm->pPr != '');
256+
$hasFontStyle = $elm->rPr && ($elm->rPr != '');
257+
$styleName = (string)$elm->name['val'];
258+
if ($hasParagraphStyle) {
259+
$pStyle = $this->loadParagraphStyle($elm);
260+
if (!$hasFontStyle) {
261+
$word->addParagraphStyle($styleName, $pStyle);
262+
}
263+
}
264+
if ($hasFontStyle) {
265+
$fStyle = $this->loadFontStyle($elm);
266+
$word->addFontStyle($styleName, $fStyle, $pStyle);
217267
}
218268
}
219269
}
@@ -225,6 +275,181 @@ public function load($pFilename)
225275
return $word;
226276
}
227277

278+
/**
279+
* Load section settings from SimpleXMLElement
280+
*
281+
* @param SimpleXMLElement $elm
282+
* @return array|string|null
283+
*
284+
* @todo Implement gutter
285+
*/
286+
private function loadSectionSettings($elm)
287+
{
288+
if ($xml = $elm->sectPr) {
289+
$setting = array();
290+
if ($xml->type) {
291+
$setting['breakType'] = (string)$xml->type['val'];
292+
}
293+
if ($xml->pgSz) {
294+
if (isset($xml->pgSz['w'])) {
295+
$setting['pageSizeW'] = (int)$xml->pgSz['w'];
296+
}
297+
if (isset($xml->pgSz['h'])) {
298+
$setting['pageSizeH'] = (int)$xml->pgSz['h'];
299+
}
300+
if (isset($xml->pgSz['orient'])) {
301+
$setting['orientation'] = (string)$xml->pgSz['orient'];
302+
}
303+
}
304+
if ($xml->pgMar) {
305+
if (isset($xml->pgMar['top'])) {
306+
$setting['topMargin'] = (int)$xml->pgMar['top'];
307+
}
308+
if (isset($xml->pgMar['left'])) {
309+
$setting['leftMargin'] = (int)$xml->pgMar['left'];
310+
}
311+
if (isset($xml->pgMar['bottom'])) {
312+
$setting['bottomMargin'] = (int)$xml->pgMar['bottom'];
313+
}
314+
if (isset($xml->pgMar['right'])) {
315+
$setting['rightMargin'] = (int)$xml->pgMar['right'];
316+
}
317+
if (isset($xml->pgMar['header'])) {
318+
$setting['headerHeight'] = (int)$xml->pgMar['header'];
319+
}
320+
if (isset($xml->pgMar['footer'])) {
321+
$setting['footerHeight'] = (int)$xml->pgMar['footer'];
322+
}
323+
if (isset($xml->pgMar['gutter'])) {
324+
// $setting['gutter'] = (int)$xml->pgMar['gutter'];
325+
}
326+
}
327+
if ($xml->cols) {
328+
if (isset($xml->cols['num'])) {
329+
$setting['colsNum'] = (int)$xml->cols['num'];
330+
}
331+
if (isset($xml->cols['space'])) {
332+
$setting['colsSpace'] = (int)$xml->cols['space'];
333+
}
334+
}
335+
return $setting;
336+
} else {
337+
return null;
338+
}
339+
}
340+
341+
/**
342+
* Load paragraph style from SimpleXMLElement
343+
*
344+
* @param SimpleXMLElement $elm
345+
* @return array|string|null
346+
*/
347+
private function loadParagraphStyle($elm)
348+
{
349+
if ($xml = $elm->pPr) {
350+
if ($xml->pStyle) {
351+
return (string)$xml->pStyle['val'];
352+
}
353+
$style = array();
354+
if ($xml->jc) {
355+
$style['align'] = (string)$xml->jc['val'];
356+
}
357+
if ($xml->ind) {
358+
if (isset($xml->ind->left)) {
359+
$style['indent'] = (int)$xml->ind->left;
360+
}
361+
if (isset($xml->ind->hanging)) {
362+
$style['hanging'] = (int)$xml->ind->hanging;
363+
}
364+
if (isset($xml->ind->line)) {
365+
$style['spacing'] = (int)$xml->ind->line;
366+
}
367+
}
368+
if ($xml->spacing) {
369+
if (isset($xml->spacing['after'])) {
370+
$style['spaceAfter'] = (int)$xml->spacing['after'];
371+
}
372+
if (isset($xml->spacing['before'])) {
373+
$style['spaceBefore'] = (int)$xml->spacing['before'];
374+
}
375+
if (isset($xml->spacing['line'])) {
376+
$style['spacing'] = (int)$xml->spacing['line'];
377+
}
378+
}
379+
if ($xml->basedOn) {
380+
$style['basedOn'] = (string)$xml->basedOn['val'];
381+
}
382+
if ($xml->next) {
383+
$style['next'] = (string)$xml->next['val'];
384+
}
385+
if ($xml->widowControl) {
386+
$style['widowControl'] = false;
387+
}
388+
if ($xml->keepNext) {
389+
$style['keepNext'] = true;
390+
}
391+
if ($xml->keepLines) {
392+
$style['keepLines'] = true;
393+
}
394+
if ($xml->pageBreakBefore) {
395+
$style['pageBreakBefore'] = true;
396+
}
397+
return $style;
398+
} else {
399+
return null;
400+
}
401+
}
402+
403+
/**
404+
* Load font style from SimpleXMLElement
405+
*
406+
* @param SimpleXMLElement $elm
407+
* @return array|string|null
408+
*/
409+
private function loadFontStyle($elm)
410+
{
411+
if ($xml = $elm->rPr) {
412+
if ($xml->rStyle) {
413+
return (string)$xml->rStyle['val'];
414+
}
415+
$style = array();
416+
if ($xml->rFonts) {
417+
$style['name'] = (string)$xml->rFonts['ascii'];
418+
}
419+
if ($xml->sz) {
420+
$style['size'] = (int)$xml->sz['val'] / 2;
421+
}
422+
if ($xml->color) {
423+
$style['color'] = (string)$xml->color['val'];
424+
}
425+
if ($xml->b) {
426+
$style['bold'] = true;
427+
}
428+
if ($xml->i) {
429+
$style['italic'] = true;
430+
}
431+
if ($xml->u) {
432+
$style['underline'] = (string)$xml->u['val'];
433+
}
434+
if ($xml->strike) {
435+
$style['strikethrough'] = true;
436+
}
437+
if ($xml->highlight) {
438+
$style['fgColor'] = (string)$xml->highlight['val'];
439+
}
440+
if ($xml->vertAlign) {
441+
if ($xml->vertAlign['val'] == 'superscript') {
442+
$style['superScript'] = true;
443+
} else {
444+
$style['subScript'] = true;
445+
}
446+
}
447+
return $style;
448+
} else {
449+
return null;
450+
}
451+
}
452+
228453
/**
229454
* Get array item
230455
*

Classes/PHPWord/Section.php

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -77,7 +77,16 @@ public function __construct($sectionCount, $settings = null)
7777
{
7878
$this->_sectionCount = $sectionCount;
7979
$this->_settings = new PHPWord_Section_Settings();
80+
$this->setSettings($settings);
81+
}
8082

83+
/**
84+
* Set Section Settings
85+
*
86+
* @param array $settings
87+
*/
88+
public function setSettings($settings = null)
89+
{
8190
if (!is_null($settings) && is_array($settings)) {
8291
foreach ($settings as $key => $value) {
8392
if (substr($key, 0, 1) != '_') {

Classes/PHPWord/Shared/File.php

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -76,7 +76,7 @@ public static function realpath($pFilename)
7676

7777
// Found something?
7878
if ($returnValue == '' || is_null($returnValue)) {
79-
$pathArray = split('/', $pFilename);
79+
$pathArray = explode('/', $pFilename);
8080
while (in_array('..', $pathArray) && $pathArray[0] != '..') {
8181
for ($i = 0; $i < count($pathArray); ++$i) {
8282
if ($pathArray[$i] == '..' && $i > 0) {

Classes/PHPWord/Style/Paragraph.php

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -506,4 +506,4 @@ public function getLineHeight()
506506
{
507507
return $this->lineHeight;
508508
}
509-
}
509+
}

0 commit comments

Comments
 (0)