diff --git a/CHANGELOG.md b/CHANGELOG.md index ca86cce4f524..e5aa82c80f8d 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,5 +1,9 @@ ## main +### Fixes + +- `[jest-diff]` Fix `diffStrings()` to not break apart surrogate pairs ([#15812](https://github.com/jestjs/jest/pull/15812)) + ## 30.1.3 ### Fixes diff --git a/packages/jest-diff/src/__tests__/diffStringsRaw.test.ts b/packages/jest-diff/src/__tests__/diffStringsRaw.test.ts index 47620b7b2115..4b4d437f661c 100644 --- a/packages/jest-diff/src/__tests__/diffStringsRaw.test.ts +++ b/packages/jest-diff/src/__tests__/diffStringsRaw.test.ts @@ -31,4 +31,40 @@ describe('diffStringsRaw', () => { expect(received).toEqual(expected); }); + + describe('unicode', () => { + test('surrogate pairs', () => { + const expected: Array = [ + new Diff(DIFF_DELETE, '😞'), + new Diff(DIFF_INSERT, 'πŸ˜„'), + ]; + const received = diffStringsRaw('😞', 'πŸ˜„', false); + + expect(received).toEqual(expected); + }); + test('grapheme clusters', () => { + const expected: Array = [ + new Diff(DIFF_DELETE, 'πŸ‘©β€πŸ‘©β€'), + new Diff(DIFF_EQUAL, 'πŸ‘§'), + new Diff(DIFF_DELETE, 'β€πŸ‘¦'), + new Diff(DIFF_EQUAL, ' πŸ‡Ί'), + new Diff(DIFF_DELETE, 'πŸ‡Έ'), + new Diff(DIFF_INSERT, 'πŸ‡¦'), + ]; + const received = diffStringsRaw('πŸ‘©β€πŸ‘©β€πŸ‘§β€πŸ‘¦ πŸ‡ΊπŸ‡Έ', 'πŸ‘§ πŸ‡ΊπŸ‡¦', false); + + expect(received).toEqual(expected); + }); + test('normalization', () => { + const expected: Array = [ + new Diff(DIFF_EQUAL, 'ma'), + new Diff(DIFF_DELETE, 'n\u0303'), + new Diff(DIFF_INSERT, 'Γ±'), + new Diff(DIFF_EQUAL, 'ana'), + ]; + const received = diffStringsRaw('man\u0303ana', 'maΓ±ana', false); + + expect(received).toEqual(expected); + }); + }); }); diff --git a/packages/jest-diff/src/diffStrings.ts b/packages/jest-diff/src/diffStrings.ts index 0b733da62aa6..c1ff9463570f 100644 --- a/packages/jest-diff/src/diffStrings.ts +++ b/packages/jest-diff/src/diffStrings.ts @@ -9,7 +9,11 @@ import diffSequences from '@jest/diff-sequences'; import {DIFF_DELETE, DIFF_EQUAL, DIFF_INSERT, Diff} from './cleanupSemantic'; const diffStrings = (a: string, b: string): Array => { - const isCommon = (aIndex: number, bIndex: number) => a[aIndex] === b[bIndex]; + // Split strings into code points to handle surrogate pairs. + const aCodepoints = [...a]; + const bCodepoints = [...b]; + const isCommon = (aIndex: number, bIndex: number) => + aCodepoints[aIndex] === bCodepoints[bIndex]; let aIndex = 0; let bIndex = 0; @@ -21,25 +25,36 @@ const diffStrings = (a: string, b: string): Array => { bCommon: number, ) => { if (aIndex !== aCommon) { - diffs.push(new Diff(DIFF_DELETE, a.slice(aIndex, aCommon))); + diffs.push( + new Diff(DIFF_DELETE, aCodepoints.slice(aIndex, aCommon).join('')), + ); } if (bIndex !== bCommon) { - diffs.push(new Diff(DIFF_INSERT, b.slice(bIndex, bCommon))); + diffs.push( + new Diff(DIFF_INSERT, bCodepoints.slice(bIndex, bCommon).join('')), + ); } aIndex = aCommon + nCommon; // number of characters compared in a bIndex = bCommon + nCommon; // number of characters compared in b - diffs.push(new Diff(DIFF_EQUAL, b.slice(bCommon, bIndex))); + diffs.push( + new Diff(DIFF_EQUAL, bCodepoints.slice(bCommon, bIndex).join('')), + ); }; - diffSequences(a.length, b.length, isCommon, foundSubsequence); + diffSequences( + aCodepoints.length, + bCodepoints.length, + isCommon, + foundSubsequence, + ); // After the last common subsequence, push remaining change items. - if (aIndex !== a.length) { - diffs.push(new Diff(DIFF_DELETE, a.slice(aIndex))); + if (aIndex !== aCodepoints.length) { + diffs.push(new Diff(DIFF_DELETE, aCodepoints.slice(aIndex).join(''))); } - if (bIndex !== b.length) { - diffs.push(new Diff(DIFF_INSERT, b.slice(bIndex))); + if (bIndex !== bCodepoints.length) { + diffs.push(new Diff(DIFF_INSERT, bCodepoints.slice(bIndex).join(''))); } return diffs;