33declare (strict_types=1 );
44
55/** 
6-  * SPDX-FileCopyrightText: 2018  Nextcloud GmbH and Nextcloud contributors 
6+  * SPDX-FileCopyrightText: 2025  Nextcloud GmbH and Nextcloud contributors 
77 * SPDX-License-Identifier: AGPL-3.0-or-later 
88 */ 
99
1414 */ 
1515class  DiffService {
1616
17+ 	/** 
18+ 	 * Pattern for markdown checkboxes: - [ ] or - [x] or - [X] 
19+ 	 */ 
20+ 	private  const  CHECKBOX_PATTERN  = '/^(\s*-\s*)\[([ xX])\](.*)/i ' ;
21+ 
22+ 	/** 
23+ 	 * Pattern for code blocks: ``` or ```language 
24+ 	 */ 
25+ 	private  const  CODE_BLOCK_PATTERN  = '/^```/ ' ;
26+ 
27+ 	/** 
28+ 	 * Pattern for callout blocks: ::: info, ::: success, ::: warn, ::: error 
29+ 	 */ 
30+ 	private  const  CALLOUT_BLOCK_PATTERN  = '/^:::\s*(info|success|warn|error)/i ' ;
31+ 
32+ 	/** 
33+ 	 * Pattern for block endings: ::: 
34+ 	 */ 
35+ 	private  const  BLOCK_END_PATTERN  = '/^:::$/ ' ;
36+ 
37+ 	/** 
38+ 	 * Pattern for blockquotes: > at start of line 
39+ 	 */ 
40+ 	private  const  QUOTE_PATTERN  = '/^>\s*/ ' ;
41+ 
42+ 	/** 
43+ 	 * Callout block emojis 
44+ 	 */ 
45+ 	private  const  CALLOUT_EMOJIS  = [
46+ 		'info '  => 'ℹ️ ' ,
47+ 		'success '  => '✅ ' ,
48+ 		'warn '  => '⚠️ ' ,
49+ 		'error '  => '🔴 ' ,
50+ 	];
51+ 
1752	/** 
1853	 * Generate a visual diff between two text strings 
1954	 * 
@@ -135,19 +170,18 @@ private function renderIntelligentDiffHtml(array $operations, array $oldLines, a
135170		}
136171
137172		// Handle intelligent word-level diffing for modified lines 
138- 		return  $ this  ->enhanceWithWordLevelDiff ('' ,  $ operations , $ oldLines , $ newLines );
173+ 		return  $ this  ->enhanceWithWordLevelDiff ($ operations , $ oldLines , $ newLines );
139174	}
140175
141176	/** 
142177	 * Enhance diff with word-level granularity for similar lines 
143178	 * 
144- 	 * @param string $html Current HTML diff 
145179	 * @param array $operations 
146180	 * @param array $oldLines 
147181	 * @param array $newLines 
148182	 * @return string 
149183	 */ 
150- 	private  function  enhanceWithWordLevelDiff (string   $ html ,  array  $ operations , array  $ oldLines , array  $ newLines ): string  {
184+ 	private  function  enhanceWithWordLevelDiff (array  $ operations , array  $ oldLines , array  $ newLines ): string  {
151185		// Find remove/add pairs that might be line modifications 
152186		// First pass: collect all removes and adds 
153187		$ removes  = [];
@@ -209,57 +243,58 @@ private function enhanceWithWordLevelDiff(string $html, array $operations, array
209243
210244		// Third pass: detect modifications from remaining removes/adds 
211245		$ usedAdds  = $ moveDetectedAdds ; // Start with adds already used in moves 
212- 	
246+ 		$ usedRemoves  = $ moveDetectedRemoves ; // Start with removes already used in moves 
247+ 
213248		// Process remaining removes and try to find matching adds for modifications 
214249		foreach  ($ removes  as  $ removeIndex  => $ removeOp ) {
215250			// Skip removes already used in moves 
216- 			if  (in_array ($ removeIndex , $ moveDetectedRemoves  )) {
251+ 			if  (in_array ($ removeIndex , $ usedRemoves  )) {
217252				continue ;
218253			}
219- 		 
254+ 
220255			$ bestMatch  = null ;
221256			$ bestScore  = -1 ;
222257			$ bestAddIndex  = -1 ;
223- 		 
258+ 
224259			$ oldLine  = $ oldLines [$ removeOp ['old_line ' ]] ?? '' ;
225260			$ oldLineNum  = $ removeOp ['old_line ' ] + 1 ;
226- 		 
261+ 
227262			// Look for best matching add operation 
228263			foreach  ($ adds  as  $ addIndex  => $ addOp ) {
229264				if  (in_array ($ addIndex , $ usedAdds )) {
230265					continue ;
231266				}
232- 			 
267+ 
233268				$ newLine  = $ newLines [$ addOp ['new_line ' ]] ?? '' ;
234269				$ newLineNum  = $ addOp ['new_line ' ] + 1 ;
235- 			 
270+ 
236271				// Calculate matching score 
237272				$ score  = 0 ;
238- 			 
273+ 
239274				// Same line number gets highest priority 
240275				if  ($ oldLineNum  === $ newLineNum ) {
241276					$ score  += 100 ;
242277				}
243- 			 
278+ 
244279				// Similar content gets secondary priority 
245280				if  ($ this  ->shouldUseWordLevelDiff ($ oldLine , $ newLine )) {
246281					$ maxLen  = max (strlen ($ oldLine ), strlen ($ newLine ));
247282					$ distance  = levenshtein ($ oldLine , $ newLine );
248283					$ similarity  = 1  - ($ distance  / $ maxLen );
249284					$ score  += $ similarity  * 50 ; // Up to 50 points for similarity 
250285				}
251- 			 
286+ 
252287				// Proximity bonus (closer line numbers get bonus) 
253288				$ proximityBonus  = max (0 , 10  - abs ($ oldLineNum  - $ newLineNum ));
254289				$ score  += $ proximityBonus ;
255- 			 
290+ 
256291				if  ($ score  > $ bestScore ) {
257292					$ bestScore  = $ score ;
258293					$ bestMatch  = $ addOp ;
259294					$ bestAddIndex  = $ addIndex ;
260295				}
261296			}
262- 		 
297+ 
263298			// If we found a good match, create a modify operation 
264299			if  ($ bestMatch  && $ bestScore  > 10 ) { // Minimum threshold 
265300				$ enhancedOps [] = [
@@ -268,9 +303,11 @@ private function enhanceWithWordLevelDiff(string $html, array $operations, array
268303					'new_line '  => $ bestMatch ['new_line ' ]
269304				];
270305				$ usedAdds [] = $ bestAddIndex ;
306+ 				$ usedRemoves [] = $ removeIndex ;
271307			} else  {
272308				// No good match, keep as remove 
273309				$ enhancedOps [] = $ removeOp ;
310+ 				$ usedRemoves [] = $ removeIndex ;
274311			}
275312		}
276313
@@ -284,15 +321,7 @@ private function enhanceWithWordLevelDiff(string $html, array $operations, array
284321
285322		// Add remaining unused remove operations (not involved in moves or modifications) 
286323		foreach  ($ removes  as  $ removeIndex  => $ removeOp ) {
287- 			// Skip removes already used in moves or modifications 
288- 			$ alreadyUsed  = false ;
289- 			foreach  ($ enhancedOps  as  $ op ) {
290- 				if  (($ op ['type ' ] === 'modify '  || $ op ['type ' ] === 'move ' ) && $ op ['old_line ' ] === $ removeOp ['old_line ' ]) {
291- 					$ alreadyUsed  = true ;
292- 					break ;
293- 				}
294- 			}
295- 			if  (!$ alreadyUsed ) {
324+ 			if  (!in_array ($ removeIndex , $ usedRemoves )) {
296325				$ enhancedOps [] = $ removeOp ;
297326			}
298327		}
@@ -303,24 +332,32 @@ private function enhanceWithWordLevelDiff(string $html, array $operations, array
303332		}
304333
305334		// Now rebuild HTML with only changed lines and line number prefixes 
335+ 		// Format each operation for display, using actual line positions in NEW text 
306336		$ lines  = [];
307- 		 
337+ 
308338		foreach  ($ enhancedOps  as  $ operation ) {
309339			switch  ($ operation ['type ' ]) {
310340				case  'add ' :
311341					$ line  = $ newLines [$ operation ['new_line ' ]] ?? '' ;
312- 					$ lineNumber   = $ operation ['new_line ' ] + 1 ; // 1-based line numbers 
342+ 					$ newLineNumber   = $ operation ['new_line ' ] + 1 ; // 1-based line numbers 
313343					// Skip empty line additions 
314344					if  (!empty (trim ($ line ))) {
315- 						$ lines [] = '➕ '  . $ lineNumber  . ' <ins> '  . htmlspecialchars ($ line , ENT_QUOTES , 'UTF-8 ' ) . '</ins> ' ;
345+ 						$ formatted  = $ this  ->formatSpecialLine ($ line );
346+ 						if  ($ formatted  !== null ) {
347+ 							$ lines [] = '✨ '  . $ newLineNumber  . ' <ins> '  . $ formatted  . '</ins> ' ;
348+ 						}
316349					}
317350					break ;
318351				case  'remove ' :
319352					$ line  = $ oldLines [$ operation ['old_line ' ]] ?? '' ;
320- 					$ lineNumber   = $ operation ['old_line ' ] + 1 ; // 1-based line numbers 
353+ 					$ oldLineNumber   = $ operation ['old_line ' ] + 1 ; // 1-based line numbers 
321354					// Skip empty line removals 
322355					if  (!empty (trim ($ line ))) {
323- 						$ lines [] = '🗑️ '  . $ lineNumber  . ' <del> '  . htmlspecialchars ($ line , ENT_QUOTES , 'UTF-8 ' ) . '</del> ' ;
356+ 						$ formatted  = $ this  ->formatSpecialLine ($ line );
357+ 						if  ($ formatted  !== null ) {
358+ 							// Show old line number with strikethrough to indicate it's from old version 
359+ 							$ lines [] = '🗑️<del> '  . $ oldLineNumber  . '</del> <del> '  . $ formatted  . '</del> ' ;
360+ 						}
324361					}
325362					break ;
326363				case  'keep ' :
@@ -329,25 +366,25 @@ private function enhanceWithWordLevelDiff(string $html, array $operations, array
329366				case  'modify ' :
330367					$ oldLine  = $ oldLines [$ operation ['old_line ' ]] ?? '' ;
331368					$ newLine  = $ newLines [$ operation ['new_line ' ]] ?? '' ;
332- 					$ lineNumber   = $ operation ['old_line ' ] + 1 ; // Use old  line number as reference  
333- 					$ lines [] = '✏️ '  . $ lineNumber   . '  '  . $ this  ->generateWordLevelDiff ($ oldLine , $ newLine );
369+ 					$ newLineNumber   = $ operation ['new_line ' ] + 1 ; // 1-based  line numbers  
370+ 					$ lines [] = '✏️ '  . $ newLineNumber   . '  '  . $ this  ->generateWordLevelDiff ($ oldLine , $ newLine );
334371					break ;
335372				case  'move ' :
336373					$ oldLineNum  = $ operation ['old_line ' ] + 1 ;
337374					$ newLineNum  = $ operation ['new_line ' ] + 1 ;
338375					$ content  = htmlspecialchars ($ operation ['content ' ], ENT_QUOTES , 'UTF-8 ' );
339- 					$ lines [] = '🚚 '  . $ newLineNum  . '  '   .  $ content  .  '  (moved  from line   '  . $ oldLineNum  . ') '  ;
376+ 					$ lines [] = '🚚 '  . $ newLineNum  . ' ( from  '  . $ oldLineNum  . ')   '   .  $ content  ;
340377					break ;
341378			}
342379		}
343380
344- 		// Join all lines without line breaks  for clean inline appearance  
381+ 		// Join all lines for display  
345382		if  (empty ($ lines )) {
346383			return  '' ;
347384		}
348- 		 
349- 		// Concatenate lines with spaces  for inline display  
350- 		return  implode ('  ' , $ lines );
385+ 
386+ 		// Concatenate lines with bullet separator  for better readability in single line  
387+ 		return  implode (' |   ' , $ lines );
351388	}
352389
353390	/** 
@@ -404,16 +441,13 @@ private function generateWordLevelDiff(string $oldLine, string $newLine): string
404441	 * @return bool 
405442	 */ 
406443	private  function  isCheckboxChange (string  $ oldLine , string  $ newLine ): bool  {
407- 		// Pattern for markdown checkboxes: - [ ] or - [x] or - [X] 
408- 		$ checkboxPattern  = '/^(\s*-\s*)\[([ xX])\](.*)/i ' ;
409- 		
410- 		preg_match ($ checkboxPattern , $ oldLine , $ oldMatches );
411- 		preg_match ($ checkboxPattern , $ newLine , $ newMatches );
412- 		
413- 		// Both lines must be checkboxes with same prefix/suffix but different checkbox state 
444+ 		preg_match (self ::CHECKBOX_PATTERN , $ oldLine , $ oldMatches );
445+ 		preg_match (self ::CHECKBOX_PATTERN , $ newLine , $ newMatches );
446+ 
447+ 		// Both lines must be checkboxes with different states 
448+ 		// We only require same prefix and different state - suffix can change 
414449		return  !empty ($ oldMatches ) && !empty ($ newMatches ) &&
415- 			   $ oldMatches [1 ] === $ newMatches [1 ] && // Same prefix 
416- 			   $ oldMatches [3 ] === $ newMatches [3 ] && // Same suffix (text after checkbox) 
450+ 			   $ oldMatches [1 ] === $ newMatches [1 ] && // Same prefix (indentation and dash) 
417451			   $ oldMatches [2 ] !== $ newMatches [2 ];   // Different checkbox state 
418452	}
419453
@@ -425,20 +459,61 @@ private function isCheckboxChange(string $oldLine, string $newLine): bool {
425459	 * @return string 
426460	 */ 
427461	private  function  generateCheckboxDiff (string  $ oldLine , string  $ newLine ): string  {
428- 		$ checkboxPattern  = '/^(\s*-\s*)\[([ xX])\](.*)/i ' ;
429- 		
430- 		preg_match ($ checkboxPattern , $ oldLine , $ oldMatches );
431- 		preg_match ($ checkboxPattern , $ newLine , $ newMatches );
432- 		
433- 		$ prefix  = htmlspecialchars ($ oldMatches [1 ], ENT_QUOTES , 'UTF-8 ' );
434- 		$ suffix  = htmlspecialchars ($ oldMatches [3 ], ENT_QUOTES , 'UTF-8 ' );
435- 		
436- 		// Convert checkbox states to emoji symbols 
437- 		$ oldCheckbox  = (trim (strtolower ($ oldMatches [2 ])) === 'x ' ) ? '☑ '  : '☐ ' ;
438- 		$ newCheckbox  = (trim (strtolower ($ newMatches [2 ])) === 'x ' ) ? '☑ '  : '☐ ' ;
439- 		
462+ 		preg_match (self ::CHECKBOX_PATTERN , $ oldLine , $ oldMatches );
463+ 		preg_match (self ::CHECKBOX_PATTERN , $ newLine , $ newMatches );
464+ 
465+ 		$ prefix  = $ oldMatches [1 ];
466+ 		$ oldSuffix  = $ oldMatches [3 ];
467+ 		$ newSuffix  = $ newMatches [3 ];
468+ 
469+ 		// Convert checkbox states to checkbox symbols 
470+ 		$ oldCheckbox  = (trim (strtolower ($ oldMatches [2 ])) === 'x ' ) ? '☑️ '  : '🔲 ' ;
471+ 		$ newCheckbox  = (trim (strtolower ($ newMatches [2 ])) === 'x ' ) ? '☑️ '  : '🔲 ' ;
472+ 
473+ 		// If suffix changed too, show that as well 
474+ 		if  ($ oldSuffix  !== $ newSuffix ) {
475+ 			return  $ prefix  . $ oldCheckbox  . '→ '  . $ newCheckbox  . '  '  . $ this  ->generateWordLevelDiff ($ oldSuffix , $ newSuffix );
476+ 		}
477+ 
440478		// Show clean transition without del/ins tags on the checkboxes themselves 
441- 		return  $ prefix  . $ oldCheckbox  . '→ '  . $ newCheckbox  . $ suffix ;
479+ 		return  $ prefix  . $ oldCheckbox  . '→ '  . $ newCheckbox  . $ oldSuffix ;
480+ 	}
481+ 
482+ 	/** 
483+ 	 * Format special lines (code blocks, callouts, quotes) with emojis 
484+ 	 * 
485+ 	 * @param string $line 
486+ 	 * @return string|null Returns formatted string or null if line should be ignored 
487+ 	 */ 
488+ 	private  function  formatSpecialLine (string  $ line ): ?string  {
489+ 		$ trimmed  = trim ($ line );
490+ 
491+ 		// Ignore block ending markers 
492+ 		if  (preg_match (self ::BLOCK_END_PATTERN , $ trimmed )) {
493+ 			return  null ;
494+ 		}
495+ 
496+ 		// Format code block markers 
497+ 		if  (preg_match (self ::CODE_BLOCK_PATTERN , $ trimmed )) {
498+ 			return  '→📝 ' ;
499+ 		}
500+ 
501+ 		// Format callout block markers 
502+ 		if  (preg_match (self ::CALLOUT_BLOCK_PATTERN , $ trimmed , $ matches )) {
503+ 			$ type  = strtolower ($ matches [1 ]);
504+ 			$ emoji  = self ::CALLOUT_EMOJIS [$ type ] ?? 'ℹ️ ' ;
505+ 			return  '→ '  . $ emoji ;
506+ 		}
507+ 
508+ 		// Format blockquotes 
509+ 		if  (preg_match (self ::QUOTE_PATTERN , $ trimmed )) {
510+ 			// Remove the > marker and return the quoted text with emoji 
511+ 			$ quotedText  = preg_replace (self ::QUOTE_PATTERN , '' , $ line );
512+ 			return  '→💬  '  . htmlspecialchars ($ quotedText , ENT_QUOTES , 'UTF-8 ' );
513+ 		}
514+ 
515+ 		// Return original line if not a special pattern 
516+ 		return  htmlspecialchars ($ line , ENT_QUOTES , 'UTF-8 ' );
442517	}
443518
444519	/** 
0 commit comments