33declare (strict_types=1 );
44
55/**
6- * SPDX-FileCopyrightText: 2018 Nextcloud GmbH and Nextcloud contributors
6+ * SPDX-FileCopyrightText: 2025 Nextcloud GmbH and Nextcloud contributors
77 * SPDX-License-Identifier: AGPL-3.0-or-later
88 */
99
1414 */
1515class DiffService {
1616
17+ /**
18+ * Pattern for markdown checkboxes: - [ ] or - [x] or - [X]
19+ */
20+ private const CHECKBOX_PATTERN = '/^(\s*-\s*)\[([ xX])\](.*)/i ' ;
21+
22+ /**
23+ * Pattern for code blocks: ``` or ```language
24+ */
25+ private const CODE_BLOCK_PATTERN = '/^```/ ' ;
26+
27+ /**
28+ * Pattern for callout blocks: ::: info, ::: success, ::: warn, ::: error
29+ */
30+ private const CALLOUT_BLOCK_PATTERN = '/^:::\s*(info|success|warn|error)/i ' ;
31+
32+ /**
33+ * Pattern for block endings: :::
34+ */
35+ private const BLOCK_END_PATTERN = '/^:::$/ ' ;
36+
37+ /**
38+ * Pattern for blockquotes: > at start of line
39+ */
40+ private const QUOTE_PATTERN = '/^>\s*/ ' ;
41+
42+ /**
43+ * Callout block emojis
44+ */
45+ private const CALLOUT_EMOJIS = [
46+ 'info ' => 'ℹ️ ' ,
47+ 'success ' => '✅ ' ,
48+ 'warn ' => '⚠️ ' ,
49+ 'error ' => '🔴 ' ,
50+ ];
51+
1752 /**
1853 * Generate a visual diff between two text strings
1954 *
@@ -135,19 +170,18 @@ private function renderIntelligentDiffHtml(array $operations, array $oldLines, a
135170 }
136171
137172 // Handle intelligent word-level diffing for modified lines
138- return $ this ->enhanceWithWordLevelDiff ('' , $ operations , $ oldLines , $ newLines );
173+ return $ this ->enhanceWithWordLevelDiff ($ operations , $ oldLines , $ newLines );
139174 }
140175
141176 /**
142177 * Enhance diff with word-level granularity for similar lines
143178 *
144- * @param string $html Current HTML diff
145179 * @param array $operations
146180 * @param array $oldLines
147181 * @param array $newLines
148182 * @return string
149183 */
150- private function enhanceWithWordLevelDiff (string $ html , array $ operations , array $ oldLines , array $ newLines ): string {
184+ private function enhanceWithWordLevelDiff (array $ operations , array $ oldLines , array $ newLines ): string {
151185 // Find remove/add pairs that might be line modifications
152186 // First pass: collect all removes and adds
153187 $ removes = [];
@@ -209,57 +243,58 @@ private function enhanceWithWordLevelDiff(string $html, array $operations, array
209243
210244 // Third pass: detect modifications from remaining removes/adds
211245 $ usedAdds = $ moveDetectedAdds ; // Start with adds already used in moves
212-
246+ $ usedRemoves = $ moveDetectedRemoves ; // Start with removes already used in moves
247+
213248 // Process remaining removes and try to find matching adds for modifications
214249 foreach ($ removes as $ removeIndex => $ removeOp ) {
215250 // Skip removes already used in moves
216- if (in_array ($ removeIndex , $ moveDetectedRemoves )) {
251+ if (in_array ($ removeIndex , $ usedRemoves )) {
217252 continue ;
218253 }
219-
254+
220255 $ bestMatch = null ;
221256 $ bestScore = -1 ;
222257 $ bestAddIndex = -1 ;
223-
258+
224259 $ oldLine = $ oldLines [$ removeOp ['old_line ' ]] ?? '' ;
225260 $ oldLineNum = $ removeOp ['old_line ' ] + 1 ;
226-
261+
227262 // Look for best matching add operation
228263 foreach ($ adds as $ addIndex => $ addOp ) {
229264 if (in_array ($ addIndex , $ usedAdds )) {
230265 continue ;
231266 }
232-
267+
233268 $ newLine = $ newLines [$ addOp ['new_line ' ]] ?? '' ;
234269 $ newLineNum = $ addOp ['new_line ' ] + 1 ;
235-
270+
236271 // Calculate matching score
237272 $ score = 0 ;
238-
273+
239274 // Same line number gets highest priority
240275 if ($ oldLineNum === $ newLineNum ) {
241276 $ score += 100 ;
242277 }
243-
278+
244279 // Similar content gets secondary priority
245280 if ($ this ->shouldUseWordLevelDiff ($ oldLine , $ newLine )) {
246281 $ maxLen = max (strlen ($ oldLine ), strlen ($ newLine ));
247282 $ distance = levenshtein ($ oldLine , $ newLine );
248283 $ similarity = 1 - ($ distance / $ maxLen );
249284 $ score += $ similarity * 50 ; // Up to 50 points for similarity
250285 }
251-
286+
252287 // Proximity bonus (closer line numbers get bonus)
253288 $ proximityBonus = max (0 , 10 - abs ($ oldLineNum - $ newLineNum ));
254289 $ score += $ proximityBonus ;
255-
290+
256291 if ($ score > $ bestScore ) {
257292 $ bestScore = $ score ;
258293 $ bestMatch = $ addOp ;
259294 $ bestAddIndex = $ addIndex ;
260295 }
261296 }
262-
297+
263298 // If we found a good match, create a modify operation
264299 if ($ bestMatch && $ bestScore > 10 ) { // Minimum threshold
265300 $ enhancedOps [] = [
@@ -268,9 +303,11 @@ private function enhanceWithWordLevelDiff(string $html, array $operations, array
268303 'new_line ' => $ bestMatch ['new_line ' ]
269304 ];
270305 $ usedAdds [] = $ bestAddIndex ;
306+ $ usedRemoves [] = $ removeIndex ;
271307 } else {
272308 // No good match, keep as remove
273309 $ enhancedOps [] = $ removeOp ;
310+ $ usedRemoves [] = $ removeIndex ;
274311 }
275312 }
276313
@@ -284,15 +321,7 @@ private function enhanceWithWordLevelDiff(string $html, array $operations, array
284321
285322 // Add remaining unused remove operations (not involved in moves or modifications)
286323 foreach ($ removes as $ removeIndex => $ removeOp ) {
287- // Skip removes already used in moves or modifications
288- $ alreadyUsed = false ;
289- foreach ($ enhancedOps as $ op ) {
290- if (($ op ['type ' ] === 'modify ' || $ op ['type ' ] === 'move ' ) && $ op ['old_line ' ] === $ removeOp ['old_line ' ]) {
291- $ alreadyUsed = true ;
292- break ;
293- }
294- }
295- if (!$ alreadyUsed ) {
324+ if (!in_array ($ removeIndex , $ usedRemoves )) {
296325 $ enhancedOps [] = $ removeOp ;
297326 }
298327 }
@@ -303,24 +332,32 @@ private function enhanceWithWordLevelDiff(string $html, array $operations, array
303332 }
304333
305334 // Now rebuild HTML with only changed lines and line number prefixes
335+ // Format each operation for display, using actual line positions in NEW text
306336 $ lines = [];
307-
337+
308338 foreach ($ enhancedOps as $ operation ) {
309339 switch ($ operation ['type ' ]) {
310340 case 'add ' :
311341 $ line = $ newLines [$ operation ['new_line ' ]] ?? '' ;
312- $ lineNumber = $ operation ['new_line ' ] + 1 ; // 1-based line numbers
342+ $ newLineNumber = $ operation ['new_line ' ] + 1 ; // 1-based line numbers
313343 // Skip empty line additions
314344 if (!empty (trim ($ line ))) {
315- $ lines [] = '➕ ' . $ lineNumber . ' <ins> ' . htmlspecialchars ($ line , ENT_QUOTES , 'UTF-8 ' ) . '</ins> ' ;
345+ $ formatted = $ this ->formatSpecialLine ($ line );
346+ if ($ formatted !== null ) {
347+ $ lines [] = '✨ ' . $ newLineNumber . ' <ins> ' . $ formatted . '</ins> ' ;
348+ }
316349 }
317350 break ;
318351 case 'remove ' :
319352 $ line = $ oldLines [$ operation ['old_line ' ]] ?? '' ;
320- $ lineNumber = $ operation ['old_line ' ] + 1 ; // 1-based line numbers
353+ $ oldLineNumber = $ operation ['old_line ' ] + 1 ; // 1-based line numbers
321354 // Skip empty line removals
322355 if (!empty (trim ($ line ))) {
323- $ lines [] = '🗑️ ' . $ lineNumber . ' <del> ' . htmlspecialchars ($ line , ENT_QUOTES , 'UTF-8 ' ) . '</del> ' ;
356+ $ formatted = $ this ->formatSpecialLine ($ line );
357+ if ($ formatted !== null ) {
358+ // Show old line number with strikethrough to indicate it's from old version
359+ $ lines [] = '🗑️<del> ' . $ oldLineNumber . '</del> <del> ' . $ formatted . '</del> ' ;
360+ }
324361 }
325362 break ;
326363 case 'keep ' :
@@ -329,25 +366,25 @@ private function enhanceWithWordLevelDiff(string $html, array $operations, array
329366 case 'modify ' :
330367 $ oldLine = $ oldLines [$ operation ['old_line ' ]] ?? '' ;
331368 $ newLine = $ newLines [$ operation ['new_line ' ]] ?? '' ;
332- $ lineNumber = $ operation ['old_line ' ] + 1 ; // Use old line number as reference
333- $ lines [] = '✏️ ' . $ lineNumber . ' ' . $ this ->generateWordLevelDiff ($ oldLine , $ newLine );
369+ $ newLineNumber = $ operation ['new_line ' ] + 1 ; // 1-based line numbers
370+ $ lines [] = '✏️ ' . $ newLineNumber . ' ' . $ this ->generateWordLevelDiff ($ oldLine , $ newLine );
334371 break ;
335372 case 'move ' :
336373 $ oldLineNum = $ operation ['old_line ' ] + 1 ;
337374 $ newLineNum = $ operation ['new_line ' ] + 1 ;
338375 $ content = htmlspecialchars ($ operation ['content ' ], ENT_QUOTES , 'UTF-8 ' );
339- $ lines [] = '🚚 ' . $ newLineNum . ' ' . $ content . ' (moved from line ' . $ oldLineNum . ') ' ;
376+ $ lines [] = '🚚 ' . $ newLineNum . ' ( from ' . $ oldLineNum . ') ' . $ content ;
340377 break ;
341378 }
342379 }
343380
344- // Join all lines without line breaks for clean inline appearance
381+ // Join all lines for display
345382 if (empty ($ lines )) {
346383 return '' ;
347384 }
348-
349- // Concatenate lines with spaces for inline display
350- return implode (' ' , $ lines );
385+
386+ // Concatenate lines with bullet separator for better readability in single line
387+ return implode (' | ' , $ lines );
351388 }
352389
353390 /**
@@ -404,16 +441,13 @@ private function generateWordLevelDiff(string $oldLine, string $newLine): string
404441 * @return bool
405442 */
406443 private function isCheckboxChange (string $ oldLine , string $ newLine ): bool {
407- // Pattern for markdown checkboxes: - [ ] or - [x] or - [X]
408- $ checkboxPattern = '/^(\s*-\s*)\[([ xX])\](.*)/i ' ;
409-
410- preg_match ($ checkboxPattern , $ oldLine , $ oldMatches );
411- preg_match ($ checkboxPattern , $ newLine , $ newMatches );
412-
413- // Both lines must be checkboxes with same prefix/suffix but different checkbox state
444+ preg_match (self ::CHECKBOX_PATTERN , $ oldLine , $ oldMatches );
445+ preg_match (self ::CHECKBOX_PATTERN , $ newLine , $ newMatches );
446+
447+ // Both lines must be checkboxes with different states
448+ // We only require same prefix and different state - suffix can change
414449 return !empty ($ oldMatches ) && !empty ($ newMatches ) &&
415- $ oldMatches [1 ] === $ newMatches [1 ] && // Same prefix
416- $ oldMatches [3 ] === $ newMatches [3 ] && // Same suffix (text after checkbox)
450+ $ oldMatches [1 ] === $ newMatches [1 ] && // Same prefix (indentation and dash)
417451 $ oldMatches [2 ] !== $ newMatches [2 ]; // Different checkbox state
418452 }
419453
@@ -425,20 +459,61 @@ private function isCheckboxChange(string $oldLine, string $newLine): bool {
425459 * @return string
426460 */
427461 private function generateCheckboxDiff (string $ oldLine , string $ newLine ): string {
428- $ checkboxPattern = '/^(\s*-\s*)\[([ xX])\](.*)/i ' ;
429-
430- preg_match ($ checkboxPattern , $ oldLine , $ oldMatches );
431- preg_match ($ checkboxPattern , $ newLine , $ newMatches );
432-
433- $ prefix = htmlspecialchars ($ oldMatches [1 ], ENT_QUOTES , 'UTF-8 ' );
434- $ suffix = htmlspecialchars ($ oldMatches [3 ], ENT_QUOTES , 'UTF-8 ' );
435-
436- // Convert checkbox states to emoji symbols
437- $ oldCheckbox = (trim (strtolower ($ oldMatches [2 ])) === 'x ' ) ? '☑ ' : '☐ ' ;
438- $ newCheckbox = (trim (strtolower ($ newMatches [2 ])) === 'x ' ) ? '☑ ' : '☐ ' ;
439-
462+ preg_match (self ::CHECKBOX_PATTERN , $ oldLine , $ oldMatches );
463+ preg_match (self ::CHECKBOX_PATTERN , $ newLine , $ newMatches );
464+
465+ $ prefix = $ oldMatches [1 ];
466+ $ oldSuffix = $ oldMatches [3 ];
467+ $ newSuffix = $ newMatches [3 ];
468+
469+ // Convert checkbox states to checkbox symbols
470+ $ oldCheckbox = (trim (strtolower ($ oldMatches [2 ])) === 'x ' ) ? '☑️ ' : '🔲 ' ;
471+ $ newCheckbox = (trim (strtolower ($ newMatches [2 ])) === 'x ' ) ? '☑️ ' : '🔲 ' ;
472+
473+ // If suffix changed too, show that as well
474+ if ($ oldSuffix !== $ newSuffix ) {
475+ return $ prefix . $ oldCheckbox . '→ ' . $ newCheckbox . ' ' . $ this ->generateWordLevelDiff ($ oldSuffix , $ newSuffix );
476+ }
477+
440478 // Show clean transition without del/ins tags on the checkboxes themselves
441- return $ prefix . $ oldCheckbox . '→ ' . $ newCheckbox . $ suffix ;
479+ return $ prefix . $ oldCheckbox . '→ ' . $ newCheckbox . $ oldSuffix ;
480+ }
481+
482+ /**
483+ * Format special lines (code blocks, callouts, quotes) with emojis
484+ *
485+ * @param string $line
486+ * @return string|null Returns formatted string or null if line should be ignored
487+ */
488+ private function formatSpecialLine (string $ line ): ?string {
489+ $ trimmed = trim ($ line );
490+
491+ // Ignore block ending markers
492+ if (preg_match (self ::BLOCK_END_PATTERN , $ trimmed )) {
493+ return null ;
494+ }
495+
496+ // Format code block markers
497+ if (preg_match (self ::CODE_BLOCK_PATTERN , $ trimmed )) {
498+ return '→📝 ' ;
499+ }
500+
501+ // Format callout block markers
502+ if (preg_match (self ::CALLOUT_BLOCK_PATTERN , $ trimmed , $ matches )) {
503+ $ type = strtolower ($ matches [1 ]);
504+ $ emoji = self ::CALLOUT_EMOJIS [$ type ] ?? 'ℹ️ ' ;
505+ return '→ ' . $ emoji ;
506+ }
507+
508+ // Format blockquotes
509+ if (preg_match (self ::QUOTE_PATTERN , $ trimmed )) {
510+ // Remove the > marker and return the quoted text with emoji
511+ $ quotedText = preg_replace (self ::QUOTE_PATTERN , '' , $ line );
512+ return '→💬 ' . htmlspecialchars ($ quotedText , ENT_QUOTES , 'UTF-8 ' );
513+ }
514+
515+ // Return original line if not a special pattern
516+ return htmlspecialchars ($ line , ENT_QUOTES , 'UTF-8 ' );
442517 }
443518
444519 /**
0 commit comments