@@ -18,6 +18,8 @@ type BaseOpType = 'delete' | 'create'
1818
1919interface HtmlDiffConfig {
2020 minMatchedSize : number
21+ greedyMatch : boolean
22+ greedyBoundary : number
2123 classNames : {
2224 createText : string
2325 deleteText : string
@@ -29,7 +31,26 @@ interface HtmlDiffConfig {
2931}
3032
3133export interface HtmlDiffOptions {
34+ /**
35+ * Determine the minimum threshold for calculating common subsequences.
36+ * You may adjust it to a value larger than 2, but not lower, due to the potential inclusion of HTML tags in the count.
37+ * @defaultValue 2
38+ */
3239 minMatchedSize ?: number
40+ /**
41+ * When greedyMatch is enabled, if the length of the sub-sequences exceeds greedyBoundary,
42+ * we will use the matched sub-sequences that are sufficiently good, even if they are not optimal, to enhance performance.
43+ * @defaultValue true
44+ */
45+ greedyMatch ?: boolean
46+ /**
47+ * @defaultValue 1000
48+ */
49+ greedyBoundary ?: number
50+ /**
51+ * The classNames for wrapper DOM.
52+ * Use this to configure your own styles without importing the built-in CSS file
53+ */
3354 classNames ?: Partial < {
3455 createText ?: string
3556 deleteText ?: string
@@ -53,6 +74,7 @@ export default class HtmlDiff {
5374 private readonly newWords : string [ ] = [ ]
5475 private readonly matchedBlockList : MatchedBlock [ ] = [ ]
5576 private readonly operationList : Operation [ ] = [ ]
77+ private leastCommonLength : number = Infinity
5678 private unifiedContent ?: string
5779 private sideBySideContents ?: [ string , string ]
5880
@@ -61,6 +83,8 @@ export default class HtmlDiff {
6183 newHtml : string ,
6284 {
6385 minMatchedSize = 2 ,
86+ greedyMatch = true ,
87+ greedyBoundary = 1000 ,
6488 classNames = {
6589 createText : 'html-diff-create-text-wrapper' ,
6690 deleteText : 'html-diff-delete-text-wrapper' ,
@@ -74,6 +98,8 @@ export default class HtmlDiff {
7498 // init config
7599 this . config = {
76100 minMatchedSize,
101+ greedyMatch,
102+ greedyBoundary,
77103 classNames : {
78104 createText : 'html-diff-create-text-wrapper' ,
79105 deleteText : 'html-diff-delete-text-wrapper' ,
@@ -93,8 +119,8 @@ export default class HtmlDiff {
93119 }
94120
95121 // step1: split HTML to atomic words
96- this . oldWords = this . convertHtml2Words ( oldHtml )
97- this . newWords = this . convertHtml2Words ( newHtml )
122+ this . oldWords = this . tokenize ( oldHtml )
123+ this . newWords = this . tokenize ( newHtml )
98124 // step2: find matched blocks
99125 this . matchedBlockList = this . getMatchedBlockList ( )
100126 // step3: generate operation list
@@ -277,11 +303,12 @@ export default class HtmlDiff {
277303 }
278304
279305 /**
280- * convert HTML to word list
281- * "<a> Hello World </a>"
306+ * convert HTML to tokens
307+ * @example
308+ * tokenize("<a> Hello World </a>")
282309 * ["<a>"," ", "Hello", " ", "World", " ", "</a>"]
283310 */
284- private convertHtml2Words ( html : string ) : string [ ] {
311+ private tokenize ( html : string ) : string [ ] {
285312 // atomic word: html tag、continuous numbers or letters、blank space、symbol or other word such as Chinese
286313 return (
287314 html . match (
@@ -329,19 +356,24 @@ export default class HtmlDiff {
329356 }
330357 }
331358
332- const ret = this . computeMatchedBlockList (
333- start ? i : 0 ,
334- end ? e1 + 1 : n1 ,
335- start ? i : 0 ,
336- end ? e2 + 1 : n2 ,
337- )
359+ const oldStart = start ? i : 0
360+ const oldEnd = end ? e1 + 1 : n1
361+ const newStart = start ? i : 0
362+ const newEnd = end ? e2 + 1 : n2
363+ // optimize for big sequences match
364+ if ( this . config . greedyMatch ) {
365+ const commonLength = Math . min ( oldEnd - oldStart , newEnd - newStart )
366+ if ( commonLength > this . config . greedyBoundary ) {
367+ this . leastCommonLength = Math . floor ( commonLength / 3 )
368+ }
369+ }
370+ const ret = this . computeMatchedBlockList ( oldStart , oldEnd , newStart , newEnd )
338371 if ( start ) ret . unshift ( start )
339372 if ( end ) ret . push ( end )
340373
341374 return ret
342375 }
343376
344- // todo difflib
345377 private computeMatchedBlockList (
346378 oldStart : number ,
347379 oldEnd : number ,
@@ -390,13 +422,15 @@ export default class HtmlDiff {
390422 const ret = this . slideBestMatchedBlock ( i , newStart , len )
391423 if ( ret && ( ! bestMatchedBlock || ret . size > bestMatchedBlock . size ) ) {
392424 bestMatchedBlock = ret
425+ if ( ret . size > this . leastCommonLength ) return bestMatchedBlock
393426 }
394427 }
395428 for ( let j = newStart ; j < newEnd ; j ++ ) {
396429 const len = Math . min ( oldEnd - oldStart , newEnd - j )
397430 const ret = this . slideBestMatchedBlock ( oldStart , j , len )
398431 if ( ret && ( ! bestMatchedBlock || ret . size > bestMatchedBlock . size ) ) {
399432 bestMatchedBlock = ret
433+ if ( ret . size > this . leastCommonLength ) return bestMatchedBlock
400434 }
401435 }
402436 return bestMatchedBlock
0 commit comments