11import {
22 ElementInfo ,
33 ElementType ,
4+ TextParagraph ,
5+ TextParagraphGroup ,
46 XmlDocument ,
57 XmlElement ,
68} from '../types/xml-types' ;
79import { XmlHelper } from './xml-helper' ;
810import HasShapes from '../classes/has-shapes' ;
9- import { FindElementSelector , ShapeModificationCallback } from '../types/types' ;
10- import ModifyTableHelper from './modify-table-helper' ;
11- import { TableData , TableInfo } from '../types/table-types' ;
11+ import { TableInfo } from '../types/table-types' ;
1212
1313export const nsMain =
1414 'http://schemas.openxmlformats.org/presentationml/2006/main' ;
1515export const mapUriType = {
1616 'http://schemas.openxmlformats.org/drawingml/2006/table' : 'table' ,
1717 'http://schemas.openxmlformats.org/drawingml/2006/chart' : 'chart' ,
1818 'http://schemas.microsoft.com/office/drawing/2014/chartex' : 'chartEx' ,
19- 'http://schemas.openxmlformats.org/officeDocument/2006/relationships/oleObject' : 'oleObject' ,
20- 'http://schemas.openxmlformats.org/officeDocument/2006/relationships/hyperlink' : 'hyperlink' ,
19+ 'http://schemas.openxmlformats.org/officeDocument/2006/relationships/oleObject' :
20+ 'oleObject' ,
21+ 'http://schemas.openxmlformats.org/officeDocument/2006/relationships/hyperlink' :
22+ 'hyperlink' ,
2123} ;
2224
2325/**
@@ -118,6 +120,9 @@ export class XmlSlideHelper {
118120 hasTextBody : ! ! XmlSlideHelper . getTextBody ( slideElement ) ,
119121 getXmlElement : ( ) => slideElement ,
120122 getText : ( ) => XmlSlideHelper . parseTextFragments ( slideElement ) ,
123+ getParagraphs : ( ) => XmlSlideHelper . parseTextParagraphs ( slideElement ) ,
124+ getParagraphGroups : ( ) =>
125+ XmlSlideHelper . parseParagraphGroups ( slideElement ) ,
121126 getTableInfo : ( ) => XmlSlideHelper . readTableInfo ( slideElement ) ,
122127 getAltText : ( ) => XmlSlideHelper . getImageAltText ( slideElement ) ,
123128 } ;
@@ -152,13 +157,170 @@ export class XmlSlideHelper {
152157 static parseTextFragments ( shapeNode : XmlElement ) : string [ ] {
153158 const txBody = XmlSlideHelper . getTextBody ( shapeNode ) ;
154159 const textFragments : string [ ] = [ ] ;
160+
161+ if ( ! txBody ) {
162+ return textFragments ;
163+ }
164+
155165 const texts = txBody . getElementsByTagName ( 'a:t' ) ;
156166 for ( let t = 0 ; t < texts . length ; t ++ ) {
157- textFragments . push ( texts . item ( t ) . textContent ) ;
167+ const text = texts . item ( t ) ;
168+ textFragments . push ( text . textContent ) ;
158169 }
159170 return textFragments ;
160171 }
161172
173+ static parseParagraphGroups ( shapeNode : XmlElement ) : TextParagraphGroup [ ] {
174+ const rawParagraphs = XmlSlideHelper . parseTextParagraphs ( shapeNode ) ;
175+ return XmlSlideHelper . groupSimilarParagraphs ( rawParagraphs ) ;
176+ }
177+
178+ static parseTextParagraphs ( shapeNode : XmlElement ) : TextParagraph [ ] {
179+ const textParagraphs : TextParagraph [ ] = [ ] ;
180+
181+ // Find txBody element first
182+ const txBody =
183+ shapeNode . getElementsByTagName ( 'p:txBody' ) [ 0 ] ||
184+ shapeNode . getElementsByTagName ( 'a:txBody' ) [ 0 ] ;
185+
186+ if ( ! txBody ) return textParagraphs ;
187+
188+ // Get all paragraph elements
189+ const paragraphs = txBody . getElementsByTagName ( 'a:p' ) ;
190+
191+ for ( const p of Array . from ( paragraphs ) ) {
192+ const paragraph : TextParagraph = { texts : [ ] } ;
193+
194+ // Check for paragraph properties (indent and bullet)
195+ const pPr = p . getElementsByTagName ( 'a:pPr' ) [ 0 ] ;
196+
197+ if ( pPr ) {
198+ XmlSlideHelper . setParagraphProperties ( pPr , paragraph )
199+ }
200+
201+ // Get all text runs in the paragraph
202+ const runs = p . getElementsByTagName ( 'a:r' ) ;
203+ const texts : string [ ] = [ ] ;
204+
205+ for ( const run of Array . from ( runs ) ) {
206+ XmlSlideHelper . setTextProperties ( run , paragraph )
207+
208+ // Get text content
209+ const textElements = run . getElementsByTagName ( 'a:t' ) ;
210+ for ( const textElement of Array . from ( textElements ) ) {
211+ texts . push ( textElement . textContent || '' ) ;
212+ }
213+ }
214+
215+ // Only add paragraphs that have text content
216+ if ( texts . length > 0 ) {
217+ paragraph . texts = texts ;
218+ textParagraphs . push ( paragraph ) ;
219+ }
220+ }
221+
222+ return textParagraphs ;
223+ }
224+
225+ static setTextProperties ( run : XmlElement , paragraph : TextParagraph ) {
226+ const rPr = run . getElementsByTagName ( 'a:rPr' ) [ 0 ] ;
227+ if ( rPr ) {
228+ const isBold = rPr . getAttribute ( 'b' ) === '1' ;
229+ const isUnderlined = rPr . getAttribute ( 'u' ) === '1' ;
230+ const isItalic = rPr . getAttribute ( 'i' ) === '1' ;
231+ const fontSize = parseInt ( rPr . getAttribute ( 'sz' ) || '0' ) / 100 ; // Convert to points
232+
233+ if ( isBold ) paragraph . isBold = true ;
234+ if ( isItalic ) paragraph . isItalic = true ;
235+ if ( isUnderlined ) paragraph . isUnderlined = true ;
236+ if ( fontSize ) paragraph . fontSize = fontSize ;
237+ }
238+ }
239+
240+ static setParagraphProperties ( pPr : XmlElement , paragraph : TextParagraph ) {
241+ const marL = pPr . getAttribute ( 'marL' ) ;
242+ if ( marL ) {
243+ paragraph . indent = parseInt ( marL ) ;
244+ }
245+
246+ const buChar = pPr . getElementsByTagName ( 'a:buChar' ) [ 0 ] ;
247+ if ( buChar ) {
248+ paragraph . bullet = buChar . getAttribute ( 'char' ) ;
249+ }
250+
251+ // Check for numbered list
252+ const buAutoNum = pPr . getElementsByTagName ( 'a:buAutoNum' ) [ 0 ] ;
253+ if ( buAutoNum ) {
254+ paragraph . isNumbered = true ;
255+ paragraph . numberingType = buAutoNum . getAttribute ( 'type' ) || undefined ;
256+ paragraph . startAt = buAutoNum . getAttribute ( 'startAt' ) || undefined ;
257+ }
258+
259+ // Check for alignment
260+ const algn = pPr . getAttribute ( 'algn' ) ;
261+ if ( algn ) {
262+ paragraph . align = algn as TextParagraph [ 'align' ] ;
263+ }
264+ }
265+
266+ static groupSimilarParagraphs (
267+ paragraphs : TextParagraph [ ] ,
268+ ) : TextParagraphGroup [ ] {
269+ const groups : TextParagraphGroup [ ] = [ ] ;
270+ let currentGroup : TextParagraphGroup | null = null ;
271+
272+ const getDefinedProperties = ( paragraph : TextParagraph ) => {
273+ const properties : Record < string , any > = { } ;
274+
275+ const propertyKeys = [
276+ 'fontSize' ,
277+ 'isBold' ,
278+ 'isItalic' ,
279+ 'isUnderlined' ,
280+ // 'indent',
281+ 'align' ,
282+ 'isNumbered' ,
283+ 'numberingType' ,
284+ 'bullet' ,
285+ 'startAt' ,
286+ ] as const ;
287+
288+ for ( const key of propertyKeys ) {
289+ if ( paragraph [ key ] !== undefined ) {
290+ properties [ key ] = paragraph [ key ] ;
291+ }
292+ }
293+
294+ return properties ;
295+ } ;
296+
297+ for ( const paragraph of paragraphs ) {
298+ const properties = getDefinedProperties ( paragraph ) ;
299+
300+ // Helper function to check if properties match
301+ const propertiesMatch = ( a : any , b : any ) : boolean => {
302+ return JSON . stringify ( a ) === JSON . stringify ( b ) ;
303+ } ;
304+
305+ // If we have no current group or properties don't match, create new group
306+ if (
307+ ! currentGroup ||
308+ ! propertiesMatch ( currentGroup . properties , properties )
309+ ) {
310+ currentGroup = {
311+ properties,
312+ texts : [ ] ,
313+ } ;
314+ groups . push ( currentGroup ) ;
315+ }
316+
317+ // Add text to current group
318+ currentGroup . texts . push ( paragraph . texts . join ( '' ) ) ;
319+ }
320+
321+ return groups ;
322+ }
323+
162324 static getNonVisibleProperties ( shapeNode : XmlElement ) : XmlElement {
163325 return shapeNode . getElementsByTagNameNS ( nsMain , 'cNvPr' ) . item ( 0 ) ;
164326 }
@@ -197,20 +359,24 @@ export class XmlSlideHelper {
197359 static getElementType ( slideElementParent : XmlElement ) : ElementType {
198360 let type = slideElementParent . localName ;
199361
362+ const getUri = ( ) => {
363+ const graphicData =
364+ slideElementParent . getElementsByTagName ( 'a:graphicData' ) [ 0 ] ;
365+ return graphicData . getAttribute ( 'uri' ) ;
366+ } ;
367+
200368 switch ( type ) {
201369 case 'graphicFrame' :
202- const graphicData =
203- slideElementParent . getElementsByTagName ( 'a:graphicData' ) [ 0 ] ;
204- const uri = graphicData . getAttribute ( 'uri' ) ;
205- type = mapUriType [ uri ] ? mapUriType [ uri ] : type ;
370+ type = mapUriType [ getUri ( ) ] || type ;
206371 break ;
207372 case 'oleObj' :
208373 type = 'OLEObject' ;
209374 break ;
210375 }
211376
212377 // Check for hyperlinks
213- const hasHyperlink = slideElementParent . getElementsByTagName ( 'a:hlinkClick' ) ;
378+ const hasHyperlink =
379+ slideElementParent . getElementsByTagName ( 'a:hlinkClick' ) ;
214380 if ( hasHyperlink . length > 0 ) {
215381 type = 'Hyperlink' ;
216382 }
@@ -228,13 +394,15 @@ export class XmlSlideHelper {
228394 y : 0 ,
229395 cx : 0 ,
230396 cy : 0 ,
397+ rot : 0 ,
231398 } ;
232399
233400 if ( ! xFrms . item ( 0 ) ) {
234401 return position ;
235402 }
236403
237404 const xFrm = xFrms . item ( 0 ) ;
405+
238406 const Off = xFrm . getElementsByTagName ( 'a:off' ) . item ( 0 ) ;
239407 const Ext = xFrm . getElementsByTagName ( 'a:ext' ) . item ( 0 ) ;
240408
@@ -243,6 +411,10 @@ export class XmlSlideHelper {
243411 position . cx = XmlSlideHelper . parseCoordinate ( Ext , 'cx' ) ;
244412 position . cy = XmlSlideHelper . parseCoordinate ( Ext , 'cy' ) ;
245413
414+ if ( xFrm . getAttribute ( 'rot' ) ) {
415+ position . rot = parseInt ( xFrm . getAttribute ( 'rot' ) ) ;
416+ }
417+
246418 return position ;
247419 }
248420
0 commit comments