Skip to content

Commit e33ecc8

Browse files
authored
Properly handle unicode length (#16)
* properly handle unicode length, use Array.from to handle surrogate pairs * tests for unicode
1 parent 30bce46 commit e33ecc8

File tree

6 files changed

+128
-9
lines changed

6 files changed

+128
-9
lines changed

package.json

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -21,7 +21,6 @@
2121
],
2222
"author": "",
2323
"license": "MIT",
24-
"dependencies": {},
2524
"devDependencies": {
2625
"babel-cli": "^6.6.5",
2726
"babel-core": "^6.7.2",

src/RawParser.js

Lines changed: 9 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,4 @@
11
import ContentNode from './ContentNode';
2-
32
/**
43
* creates nodes with entity keys and the endOffset
54
*/
@@ -62,6 +61,10 @@ function getRelevantIndexes(text, inlineRanges, entityRanges = []) {
6261
return uniqueRelevantIndexes.sort((aa, bb) => (aa - bb));
6362
}
6463

64+
/**
65+
* Slices the decoded ucs2 array and encodes the result back to a string representation
66+
*/
67+
const getString = (array, from, to) => array.slice(from, to).join('');
6568

6669
export default class RawParser {
6770

@@ -91,12 +94,12 @@ export default class RawParser {
9194
? indexes[key + 1] - index
9295
: 1;
9396
// add all the chars up to next relevantIndex
94-
const text = this.text.substr(index, distance);
97+
const text = getString(this.textArray, index, index + distance);
9598
node.pushContent(text, characterStyles);
9699

97100
// if thers no next index and thers more text left to push
98101
if (!indexes[key + 1] && index < end) {
99-
node.pushContent(this.text.substring(index + 1, end), this.relevantStyles(end - 1));
102+
node.pushContent(getString(this.textArray, index + 1, end), this.relevantStyles(end - 1));
100103
}
101104
});
102105
return node;
@@ -108,7 +111,9 @@ export default class RawParser {
108111
* the idea is still mostly same as backdraft.js (https://github.com/evanc/backdraft-js)
109112
*/
110113
parse({ text, inlineStyleRanges: ranges, entityRanges }) {
111-
this.text = text;
114+
// Some unicode charactes actualy have length of more than 1
115+
// this creates an array of code points using es6 string iterator
116+
this.textArray = Array.from(text);
112117
this.ranges = ranges;
113118
this.iterator = 0;
114119
// get all the relevant indexes for whole block

test/cleanup.js

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,6 @@ chai.should();
1010
const inline = {
1111
BOLD: (children) => `<strong>${children.join('')}</strong>`,
1212
ITALIC: (children) => `<em>${children.join('')}</em>`,
13-
UND: (children) => `<em>${children.join('')}</em>`,
1413
};
1514

1615
const atomicBlocks = {

test/raws.js

Lines changed: 75 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -484,6 +484,81 @@ export const rawWithEmptyBlocks2 = {
484484
],
485485
};
486486

487+
export const rawWithEmoji = {
488+
entityMap: {},
489+
blocks: [{
490+
key: 'az45b',
491+
text: 'abc 😀', // eslint-disable-line max-len
492+
type: 'unstyled',
493+
depth: 0,
494+
inlineStyleRanges: [
495+
{
496+
offset: 0,
497+
length: 5,
498+
style: 'BOLD',
499+
},
500+
{
501+
offset: 4,
502+
length: 1,
503+
style: 'ITALIC',
504+
},
505+
],
506+
entityRanges: [],
507+
}],
508+
};
509+
510+
export const rawWithEmoji2 = {
511+
entityMap: {},
512+
blocks: [{
513+
key: 'az45b',
514+
text: '😺😀', // eslint-disable-line max-len
515+
type: 'unstyled',
516+
depth: 0,
517+
inlineStyleRanges: [
518+
{
519+
offset: 0,
520+
length: 1,
521+
style: 'BOLD',
522+
},
523+
],
524+
entityRanges: [],
525+
}],
526+
};
527+
528+
export const rawWithEmoji3 = {
529+
entityMap: {
530+
0: {
531+
type: 'ENTITY',
532+
mutability: 'MUTABLE',
533+
data: {
534+
data: {
535+
color: '#ee6a56',
536+
},
537+
},
538+
},
539+
},
540+
blocks: [{
541+
key: 'az45b',
542+
text: '😺123456😀', // eslint-disable-line max-len
543+
type: 'unstyled',
544+
depth: 0,
545+
inlineStyleRanges: [
546+
{
547+
offset: 0,
548+
length: 1,
549+
style: 'BOLD',
550+
},
551+
],
552+
entityRanges: [
553+
{
554+
offset: 6,
555+
length: 2,
556+
key: 0,
557+
},
558+
],
559+
}],
560+
};
561+
487562
export const rawWithEmptyBlocks3 = {
488563
entityMap: {},
489564
blocks: [

test/render.js

Lines changed: 1 addition & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -9,8 +9,7 @@ const should = chai.should();
99
// render to HTML
1010
const inline = {
1111
BOLD: (children) => `<strong>${children.join('')}</strong>`,
12-
ITALIC: (children) => `<em>${children.join('')}</em>`,
13-
UND: (children) => `<em>${children.join('')}</em>`,
12+
ITALIC: (children) => `<em>${children.join('')}</em>`
1413
};
1514

1615
const blocks = {
@@ -61,7 +60,6 @@ const blocksWithKeys = {
6160
const inlineNoJoin = {
6261
BOLD: (children) => `<strong>${children}</strong>`,
6362
ITALIC: (children) => `<em>${children}</em>`,
64-
UND: (children) => `<em>${children}</em>`,
6563
};
6664

6765
const entitiesNoJoin = {

test/unicode.js

Lines changed: 43 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,43 @@
1+
import chai from 'chai';
2+
import redraft from '../src';
3+
import * as raws from './raws';
4+
import { joinRecursively } from './helpers';
5+
6+
chai.should();
7+
8+
const inline = {
9+
BOLD: (children) => `<strong>${children.join('')}</strong>`,
10+
ITALIC: (children) => `<em>${children.join('')}</em>`,
11+
};
12+
13+
const blocks = {
14+
unstyled: (children) => `<p>${joinRecursively(children)}</p>`,
15+
};
16+
17+
const entities = {
18+
ENTITY: (children, entity) => `<div style="color: ${entity.data.color}" >${joinRecursively(children)}</div>`,
19+
};
20+
21+
const renderers = {
22+
inline,
23+
blocks,
24+
entities,
25+
};
26+
27+
describe('redraft with unicode', () => {
28+
it('should apply ranges properly for surrogate pairs at the end of a block', () => {
29+
const rendered = redraft(raws.rawWithEmoji, renderers);
30+
const joined = joinRecursively(rendered);
31+
joined.should.equal('<p><strong>abc <em>😀</em></strong></p>'); // eslint-disable-line max-len
32+
});
33+
it('should apply ranges properly for multiple surrogate pairs', () => {
34+
const rendered = redraft(raws.rawWithEmoji2, renderers);
35+
const joined = joinRecursively(rendered);
36+
joined.should.equal('<p><strong>😺</strong>😀</p>'); // eslint-disable-line max-len
37+
});
38+
it('should handle entities with surrogate pairs', () => {
39+
const rendered = redraft(raws.rawWithEmoji3, renderers);
40+
const joined = joinRecursively(rendered);
41+
joined.should.equal('<p><strong>😺</strong>12345<div style="color: #ee6a56" >6😀</div></p>'); // eslint-disable-line max-len
42+
});
43+
});

0 commit comments

Comments
 (0)