35
35
/**
36
36
* PHPWord_Reader_Word2007
37
37
*/
38
- class PHPWord_Reader_Word2007 extends PHPWord_Reader_Abstract implements PHPWord_Reader_IReader
38
+ class PHPWord_Reader_Word2007 extends PHPWord_Reader_Abstract implements
39
+ PHPWord_Reader_IReader
39
40
{
40
41
41
42
/**
@@ -54,7 +55,8 @@ public function canRead($pFilename)
54
55
{
55
56
// Check if file exists
56
57
if (!file_exists ($ pFilename )) {
57
- throw new PHPWord_Exception ("Could not open " . $ pFilename . " for reading! File does not exist. " );
58
+ throw new PHPWord_Exception ("Could not open " . $ pFilename .
59
+ " for reading! File does not exist. " );
58
60
}
59
61
60
62
$ return = false ;
@@ -86,9 +88,13 @@ public function canRead($pFilename)
86
88
*
87
89
* @param ZipArchive $archive
88
90
* @param string $fileName
91
+ * @param bool $removeNamespace
89
92
*/
90
- public function getFromZipArchive ($ archive , $ fileName = '' )
91
- {
93
+ public function getFromZipArchive (
94
+ $ archive ,
95
+ $ fileName = '' ,
96
+ $ removeNamespace = false
97
+ ) {
92
98
// Root-relative paths
93
99
if (strpos ($ fileName , '// ' ) !== false )
94
100
{
@@ -103,9 +109,9 @@ public function getFromZipArchive($archive, $fileName = '')
103
109
$ contents = $ archive ->getFromName (substr ($ fileName , 1 ));
104
110
}
105
111
106
- // Stupid hack for namespace
107
- if ($ contents != '' && $ fileName = ' word/document.xml ' ) {
108
- $ contents = preg_replace ('~(</?)w:~is ' , '$1 ' , $ contents );
112
+ // Remove namespaces from elements and attributes name
113
+ if ($ removeNamespace ) {
114
+ $ contents = preg_replace ('~(</?|\s )w:~is ' , '$1 ' , $ contents );
109
115
}
110
116
111
117
return $ contents ;
@@ -122,25 +128,26 @@ public function load($pFilename)
122
128
{
123
129
// Check if file exists
124
130
if (!file_exists ($ pFilename )) {
125
- throw new PHPWord_Exception ("Could not open " . $ pFilename . " for reading! File does not exist. " );
131
+ throw new PHPWord_Exception ("Could not open " . $ pFilename .
132
+ " for reading! File does not exist. " );
126
133
}
127
134
128
135
// Initialisations
129
136
$ word = new PHPWord ;
130
137
$ zip = new ZipArchive ;
131
138
$ zip ->open ($ pFilename );
132
139
133
- // Read relationships
140
+ // Read properties and documents
134
141
$ rels = simplexml_load_string ($ this ->getFromZipArchive ($ zip , "_rels/.rels " ));
135
142
foreach ($ rels ->Relationship as $ rel ) {
136
143
switch ($ rel ["Type " ]) {
137
144
// Core properties
138
- case "http://schemas.openxmlformats.org/package/2006// relationships/metadata/core-properties " :
145
+ case "http://schemas.openxmlformats.org/package/2006/relationships/metadata/core-properties " :
139
146
$ xmlCore = simplexml_load_string ($ this ->getFromZipArchive ($ zip , "{$ rel ['Target ' ]}" ));
140
147
if (is_object ($ xmlCore )) {
141
148
$ xmlCore ->registerXPathNamespace ("dc " , "http://purl.org/dc/elements/1.1/ " );
142
149
$ xmlCore ->registerXPathNamespace ("dcterms " , "http://purl.org/dc/terms/ " );
143
- $ xmlCore ->registerXPathNamespace ("cp " , "http://schemas.openxmlformats.org/package/2006// metadata/core-properties " );
150
+ $ xmlCore ->registerXPathNamespace ("cp " , "http://schemas.openxmlformats.org/package/2006/metadata/core-properties " );
144
151
$ docProps = $ word ->getProperties ();
145
152
$ docProps ->setCreator ((string ) self ::array_item ($ xmlCore ->xpath ("dc:creator " )));
146
153
$ docProps ->setLastModifiedBy ((string ) self ::array_item ($ xmlCore ->xpath ("cp:lastModifiedBy " )));
@@ -188,32 +195,75 @@ public function load($pFilename)
188
195
$ dir = dirname ($ rel ["Target " ]);
189
196
$ archive = "$ dir/_rels/ " . basename ($ rel ["Target " ]) . ".rels " ;
190
197
$ relsDoc = simplexml_load_string ($ this ->getFromZipArchive ($ zip , $ archive ));
191
- $ relsDoc ->registerXPathNamespace ("rel " , "http://schemas.openxmlformats.org/package/2006// relationships " );
198
+ $ relsDoc ->registerXPathNamespace ("rel " , "http://schemas.openxmlformats.org/package/2006/relationships " );
192
199
$ xpath = self ::array_item ($ relsDoc ->xpath ("rel:Relationship[@Type=' " .
193
200
"http://schemas.openxmlformats.org/officeDocument/2006/relationships/styles'] " ));
194
- $ xmlDoc = simplexml_load_string ($ this ->getFromZipArchive ($ zip , "{$ rel ['Target ' ]}" ));
195
- if ($ xmlDoc-> body ) {
201
+ $ xmlDoc = simplexml_load_string ($ this ->getFromZipArchive ($ zip , "{$ rel ['Target ' ]}" , true ));
202
+ if (is_object ( $ xmlDoc) ) {
196
203
$ section = $ word ->createSection ();
197
- foreach ($ xmlDoc ->body ->children () as $ element ) {
198
- switch ($ element ->getName ()) {
199
- case 'p ' :
200
- if ($ element ->pPr ->sectPr ) {
201
- $ section = $ word ->createSection ();
202
- continue ;
203
- }
204
- if ($ element ->r ) {
205
- if (count ($ element ->r ) == 1 ) {
206
- $ section ->addText ($ element ->r ->t );
207
- } else {
208
- $ textRun = $ section ->createTextRun ();
209
- foreach ($ element ->r as $ r ) {
210
- $ textRun ->addText ($ r ->t );
211
- }
212
- }
204
+
205
+ foreach ($ xmlDoc ->body ->children () as $ elm ) {
206
+ $ elmName = $ elm ->getName ();
207
+ if ($ elmName == 'p ' ) { // Paragraph/section
208
+ // Create new section if section section found
209
+ if ($ elm ->pPr ->sectPr ) {
210
+ $ section ->setSettings ($ this ->loadSectionSettings ($ elm ->pPr ));
211
+ $ section = $ word ->createSection ();
212
+ continue ;
213
+ }
214
+ // Has w:r? It's either text or textrun
215
+ if ($ elm ->r ) {
216
+ // w:r = 1? It's a plain paragraph
217
+ if (count ($ elm ->r ) == 1 ) {
218
+ $ section ->addText ($ elm ->r ->t ,
219
+ $ this ->loadFontStyle ($ elm ->r ));
220
+ // w:r more than 1? It's a textrun
213
221
} else {
214
- $ section ->addTextBreak ();
222
+ $ textRun = $ section ->createTextRun ();
223
+ foreach ($ elm ->r as $ r ) {
224
+ $ textRun ->addText ($ r ->t ,
225
+ $ this ->loadFontStyle ($ r ));
226
+ }
215
227
}
216
- break ;
228
+ // No, it's a textbreak
229
+ } else {
230
+ $ section ->addTextBreak ();
231
+ }
232
+ } elseif ($ elmName == 'sectPr ' ) {
233
+ // Last section setting
234
+ $ section ->setSettings ($ this ->loadSectionSettings ($ xmlDoc ->body ));
235
+ }
236
+ }
237
+ }
238
+ break ;
239
+ }
240
+ }
241
+
242
+ // Read styles
243
+ $ docRels = simplexml_load_string ($ this ->getFromZipArchive ($ zip , "word/_rels/document.xml.rels " ));
244
+ foreach ($ docRels ->Relationship as $ rel ) {
245
+ switch ($ rel ["Type " ]) {
246
+ case "http://schemas.openxmlformats.org/officeDocument/2006/relationships/styles " :
247
+ $ xmlStyle = simplexml_load_string ($ this ->getFromZipArchive ($ zip , "word/ {$ rel ['Target ' ]}" , true ));
248
+ if (is_object ($ xmlStyle )) {
249
+ foreach ($ xmlStyle ->children () as $ elm ) {
250
+ if ($ elm ->getName () != 'style ' ) {
251
+ continue ;
252
+ }
253
+ unset($ pStyle );
254
+ unset($ fStyle );
255
+ $ hasParagraphStyle = $ elm ->pPr && ($ elm ->pPr != '' );
256
+ $ hasFontStyle = $ elm ->rPr && ($ elm ->rPr != '' );
257
+ $ styleName = (string )$ elm ->name ['val ' ];
258
+ if ($ hasParagraphStyle ) {
259
+ $ pStyle = $ this ->loadParagraphStyle ($ elm );
260
+ if (!$ hasFontStyle ) {
261
+ $ word ->addParagraphStyle ($ styleName , $ pStyle );
262
+ }
263
+ }
264
+ if ($ hasFontStyle ) {
265
+ $ fStyle = $ this ->loadFontStyle ($ elm );
266
+ $ word ->addFontStyle ($ styleName , $ fStyle , $ pStyle );
217
267
}
218
268
}
219
269
}
@@ -225,6 +275,181 @@ public function load($pFilename)
225
275
return $ word ;
226
276
}
227
277
278
+ /**
279
+ * Load section settings from SimpleXMLElement
280
+ *
281
+ * @param SimpleXMLElement $elm
282
+ * @return array|string|null
283
+ *
284
+ * @todo Implement gutter
285
+ */
286
+ private function loadSectionSettings ($ elm )
287
+ {
288
+ if ($ xml = $ elm ->sectPr ) {
289
+ $ setting = array ();
290
+ if ($ xml ->type ) {
291
+ $ setting ['breakType ' ] = (string )$ xml ->type ['val ' ];
292
+ }
293
+ if ($ xml ->pgSz ) {
294
+ if (isset ($ xml ->pgSz ['w ' ])) {
295
+ $ setting ['pageSizeW ' ] = (int )$ xml ->pgSz ['w ' ];
296
+ }
297
+ if (isset ($ xml ->pgSz ['h ' ])) {
298
+ $ setting ['pageSizeH ' ] = (int )$ xml ->pgSz ['h ' ];
299
+ }
300
+ if (isset ($ xml ->pgSz ['orient ' ])) {
301
+ $ setting ['orientation ' ] = (string )$ xml ->pgSz ['orient ' ];
302
+ }
303
+ }
304
+ if ($ xml ->pgMar ) {
305
+ if (isset ($ xml ->pgMar ['top ' ])) {
306
+ $ setting ['topMargin ' ] = (int )$ xml ->pgMar ['top ' ];
307
+ }
308
+ if (isset ($ xml ->pgMar ['left ' ])) {
309
+ $ setting ['leftMargin ' ] = (int )$ xml ->pgMar ['left ' ];
310
+ }
311
+ if (isset ($ xml ->pgMar ['bottom ' ])) {
312
+ $ setting ['bottomMargin ' ] = (int )$ xml ->pgMar ['bottom ' ];
313
+ }
314
+ if (isset ($ xml ->pgMar ['right ' ])) {
315
+ $ setting ['rightMargin ' ] = (int )$ xml ->pgMar ['right ' ];
316
+ }
317
+ if (isset ($ xml ->pgMar ['header ' ])) {
318
+ $ setting ['headerHeight ' ] = (int )$ xml ->pgMar ['header ' ];
319
+ }
320
+ if (isset ($ xml ->pgMar ['footer ' ])) {
321
+ $ setting ['footerHeight ' ] = (int )$ xml ->pgMar ['footer ' ];
322
+ }
323
+ if (isset ($ xml ->pgMar ['gutter ' ])) {
324
+ // $setting['gutter'] = (int)$xml->pgMar['gutter'];
325
+ }
326
+ }
327
+ if ($ xml ->cols ) {
328
+ if (isset ($ xml ->cols ['num ' ])) {
329
+ $ setting ['colsNum ' ] = (int )$ xml ->cols ['num ' ];
330
+ }
331
+ if (isset ($ xml ->cols ['space ' ])) {
332
+ $ setting ['colsSpace ' ] = (int )$ xml ->cols ['space ' ];
333
+ }
334
+ }
335
+ return $ setting ;
336
+ } else {
337
+ return null ;
338
+ }
339
+ }
340
+
341
+ /**
342
+ * Load paragraph style from SimpleXMLElement
343
+ *
344
+ * @param SimpleXMLElement $elm
345
+ * @return array|string|null
346
+ */
347
+ private function loadParagraphStyle ($ elm )
348
+ {
349
+ if ($ xml = $ elm ->pPr ) {
350
+ if ($ xml ->pStyle ) {
351
+ return (string )$ xml ->pStyle ['val ' ];
352
+ }
353
+ $ style = array ();
354
+ if ($ xml ->jc ) {
355
+ $ style ['align ' ] = (string )$ xml ->jc ['val ' ];
356
+ }
357
+ if ($ xml ->ind ) {
358
+ if (isset ($ xml ->ind ->left )) {
359
+ $ style ['indent ' ] = (int )$ xml ->ind ->left ;
360
+ }
361
+ if (isset ($ xml ->ind ->hanging )) {
362
+ $ style ['hanging ' ] = (int )$ xml ->ind ->hanging ;
363
+ }
364
+ if (isset ($ xml ->ind ->line )) {
365
+ $ style ['spacing ' ] = (int )$ xml ->ind ->line ;
366
+ }
367
+ }
368
+ if ($ xml ->spacing ) {
369
+ if (isset ($ xml ->spacing ['after ' ])) {
370
+ $ style ['spaceAfter ' ] = (int )$ xml ->spacing ['after ' ];
371
+ }
372
+ if (isset ($ xml ->spacing ['before ' ])) {
373
+ $ style ['spaceBefore ' ] = (int )$ xml ->spacing ['before ' ];
374
+ }
375
+ if (isset ($ xml ->spacing ['line ' ])) {
376
+ $ style ['spacing ' ] = (int )$ xml ->spacing ['line ' ];
377
+ }
378
+ }
379
+ if ($ xml ->basedOn ) {
380
+ $ style ['basedOn ' ] = (string )$ xml ->basedOn ['val ' ];
381
+ }
382
+ if ($ xml ->next ) {
383
+ $ style ['next ' ] = (string )$ xml ->next ['val ' ];
384
+ }
385
+ if ($ xml ->widowControl ) {
386
+ $ style ['widowControl ' ] = false ;
387
+ }
388
+ if ($ xml ->keepNext ) {
389
+ $ style ['keepNext ' ] = true ;
390
+ }
391
+ if ($ xml ->keepLines ) {
392
+ $ style ['keepLines ' ] = true ;
393
+ }
394
+ if ($ xml ->pageBreakBefore ) {
395
+ $ style ['pageBreakBefore ' ] = true ;
396
+ }
397
+ return $ style ;
398
+ } else {
399
+ return null ;
400
+ }
401
+ }
402
+
403
+ /**
404
+ * Load font style from SimpleXMLElement
405
+ *
406
+ * @param SimpleXMLElement $elm
407
+ * @return array|string|null
408
+ */
409
+ private function loadFontStyle ($ elm )
410
+ {
411
+ if ($ xml = $ elm ->rPr ) {
412
+ if ($ xml ->rStyle ) {
413
+ return (string )$ xml ->rStyle ['val ' ];
414
+ }
415
+ $ style = array ();
416
+ if ($ xml ->rFonts ) {
417
+ $ style ['name ' ] = (string )$ xml ->rFonts ['ascii ' ];
418
+ }
419
+ if ($ xml ->sz ) {
420
+ $ style ['size ' ] = (int )$ xml ->sz ['val ' ] / 2 ;
421
+ }
422
+ if ($ xml ->color ) {
423
+ $ style ['color ' ] = (string )$ xml ->color ['val ' ];
424
+ }
425
+ if ($ xml ->b ) {
426
+ $ style ['bold ' ] = true ;
427
+ }
428
+ if ($ xml ->i ) {
429
+ $ style ['italic ' ] = true ;
430
+ }
431
+ if ($ xml ->u ) {
432
+ $ style ['underline ' ] = (string )$ xml ->u ['val ' ];
433
+ }
434
+ if ($ xml ->strike ) {
435
+ $ style ['strikethrough ' ] = true ;
436
+ }
437
+ if ($ xml ->highlight ) {
438
+ $ style ['fgColor ' ] = (string )$ xml ->highlight ['val ' ];
439
+ }
440
+ if ($ xml ->vertAlign ) {
441
+ if ($ xml ->vertAlign ['val ' ] == 'superscript ' ) {
442
+ $ style ['superScript ' ] = true ;
443
+ } else {
444
+ $ style ['subScript ' ] = true ;
445
+ }
446
+ }
447
+ return $ style ;
448
+ } else {
449
+ return null ;
450
+ }
451
+ }
452
+
228
453
/**
229
454
* Get array item
230
455
*
0 commit comments