Properly handle unicode length (#16)

lokiuz · web-flow · commit e33ecc8d0443 · 2016-10-23T12:59:05.000+02:00
* properly handle unicode length, use Array.from to handle surrogate pairs

* tests for unicode
diff --git a/package.json b/package.json
@@ -21,7 +21,6 @@
   ],
   "author": "",
   "license": "MIT",
-  "dependencies": {},
   "devDependencies": {
     "babel-cli": "^6.6.5",
     "babel-core": "^6.7.2",
diff --git a/src/RawParser.js b/src/RawParser.js
@@ -1,5 +1,4 @@
 import ContentNode from './ContentNode';
-
   /**
    * creates nodes with entity keys and the endOffset
    */
@@ -62,6 +61,10 @@ function getRelevantIndexes(text, inlineRanges, entityRanges = []) {
   return uniqueRelevantIndexes.sort((aa, bb) => (aa - bb));
 }
 
+/**
+ * Slices the decoded ucs2 array and encodes the result back to a string representation
+ */
+const getString = (array, from, to) => array.slice(from, to).join('');
 
 export default class RawParser {
 
@@ -91,12 +94,12 @@ export default class RawParser {
                        ? indexes[key + 1] - index
                        : 1;
       // add all the chars up to next relevantIndex
-      const text = this.text.substr(index, distance);
+      const text = getString(this.textArray, index, index + distance);
       node.pushContent(text, characterStyles);
 
       // if thers no next index and thers more text left to push
       if (!indexes[key + 1] && index < end) {
-        node.pushContent(this.text.substring(index + 1, end), this.relevantStyles(end - 1));
+        node.pushContent(getString(this.textArray, index + 1, end), this.relevantStyles(end - 1));
       }
     });
     return node;
@@ -108,7 +111,9 @@ export default class RawParser {
    * the idea is still mostly same as backdraft.js (https://github.com/evanc/backdraft-js)
    */
   parse({ text, inlineStyleRanges: ranges, entityRanges }) {
-    this.text = text;
+    // Some unicode charactes actualy have length of more than 1
+    // this creates an array of code points using es6 string iterator
+    this.textArray = Array.from(text);
     this.ranges = ranges;
     this.iterator = 0;
     // get all the relevant indexes for whole block
diff --git a/test/cleanup.js b/test/cleanup.js
@@ -10,7 +10,6 @@ chai.should();
 const inline = {
   BOLD: (children) => `<strong>${children.join('')}</strong>`,
   ITALIC: (children) => `<em>${children.join('')}</em>`,
-  UND: (children) => `<em>${children.join('')}</em>`,
 };
 
 const atomicBlocks = {
diff --git a/test/raws.js b/test/raws.js
@@ -484,6 +484,81 @@ export const rawWithEmptyBlocks2 = {
   ],
 };
 
+export const rawWithEmoji = {
+  entityMap: {},
+  blocks: [{
+    key: 'az45b',
+    text: 'abc 😀', // eslint-disable-line max-len
+    type: 'unstyled',
+    depth: 0,
+    inlineStyleRanges: [
+      {
+        offset: 0,
+        length: 5,
+        style: 'BOLD',
+      },
+      {
+        offset: 4,
+        length: 1,
+        style: 'ITALIC',
+      },
+    ],
+    entityRanges: [],
+  }],
+};
+
+export const rawWithEmoji2 = {
+  entityMap: {},
+  blocks: [{
+    key: 'az45b',
+    text: '😺😀', // eslint-disable-line max-len
+    type: 'unstyled',
+    depth: 0,
+    inlineStyleRanges: [
+      {
+        offset: 0,
+        length: 1,
+        style: 'BOLD',
+      },
+    ],
+    entityRanges: [],
+  }],
+};
+
+export const rawWithEmoji3 = {
+  entityMap: {
+    0: {
+      type: 'ENTITY',
+      mutability: 'MUTABLE',
+      data: {
+        data: {
+          color: '#ee6a56',
+        },
+      },
+    },
+  },
+  blocks: [{
+    key: 'az45b',
+    text: '😺123456😀', // eslint-disable-line max-len
+    type: 'unstyled',
+    depth: 0,
+    inlineStyleRanges: [
+      {
+        offset: 0,
+        length: 1,
+        style: 'BOLD',
+      },
+    ],
+    entityRanges: [
+      {
+        offset: 6,
+        length: 2,
+        key: 0,
+      },
+    ],
+  }],
+};
+
 export const rawWithEmptyBlocks3 = {
   entityMap: {},
   blocks: [
diff --git a/test/render.js b/test/render.js
@@ -9,8 +9,7 @@ const should = chai.should();
 // render to HTML
 const inline = {
   BOLD: (children) => `<strong>${children.join('')}</strong>`,
-  ITALIC: (children) => `<em>${children.join('')}</em>`,
-  UND: (children) => `<em>${children.join('')}</em>`,
+  ITALIC: (children) => `<em>${children.join('')}</em>`
 };
 
 const blocks = {
@@ -61,7 +60,6 @@ const blocksWithKeys = {
 const inlineNoJoin = {
   BOLD: (children) => `<strong>${children}</strong>`,
   ITALIC: (children) => `<em>${children}</em>`,
-  UND: (children) => `<em>${children}</em>`,
 };
 
 const entitiesNoJoin = {
diff --git a/test/unicode.js b/test/unicode.js
@@ -0,0 +1,43 @@
+import chai from 'chai';
+import redraft from '../src';
+import * as raws from './raws';
+import { joinRecursively } from './helpers';
+
+chai.should();
+
+const inline = {
+  BOLD: (children) => `<strong>${children.join('')}</strong>`,
+  ITALIC: (children) => `<em>${children.join('')}</em>`,
+};
+
+const blocks = {
+  unstyled: (children) => `<p>${joinRecursively(children)}</p>`,
+};
+
+const entities = {
+  ENTITY: (children, entity) => `<div style="color: ${entity.data.color}" >${joinRecursively(children)}</div>`,
+};
+
+const renderers = {
+  inline,
+  blocks,
+  entities,
+};
+
+describe('redraft with unicode', () => {
+  it('should apply ranges properly for surrogate pairs at the end of a block', () => {
+    const rendered = redraft(raws.rawWithEmoji, renderers);
+    const joined = joinRecursively(rendered);
+    joined.should.equal('<p><strong>abc <em>😀</em></strong></p>'); // eslint-disable-line max-len
+  });
+  it('should apply ranges properly for multiple surrogate pairs', () => {
+    const rendered = redraft(raws.rawWithEmoji2, renderers);
+    const joined = joinRecursively(rendered);
+    joined.should.equal('<p><strong>😺</strong>😀</p>'); // eslint-disable-line max-len
+  });
+  it('should handle entities with surrogate pairs', () => {
+    const rendered = redraft(raws.rawWithEmoji3, renderers);
+    const joined = joinRecursively(rendered);
+    joined.should.equal('<p><strong>😺</strong>12345<div style="color: #ee6a56" >6😀</div></p>'); // eslint-disable-line max-len
+  });
+});