Skip to content

Commit 9fa4a1c

Browse files
vithprawnsalad
authored andcommitted
Replace runes (#208)
* add failing zalgo test * drop 'runes'; break long messages by words * use 'isomorphic-textencoder' * add 'regenerator-runtime' * allow breaking graphemes
1 parent a5631d5 commit 9fa4a1c

File tree

5 files changed

+340
-86
lines changed

5 files changed

+340
-86
lines changed

package-lock.json

Lines changed: 23 additions & 4 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

package.json

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -7,10 +7,12 @@
77
"dependencies": {
88
"core-js": "^3.0.1",
99
"eventemitter3": "^2.0.2",
10+
"grapheme-splitter": "^1.0.4",
1011
"iconv-lite": "^0.4.24",
12+
"isomorphic-textencoder": "^1.0.1",
1113
"lodash": "^4.17.11",
1214
"middleware-handler": "^0.2.0",
13-
"runes": "github:kiwiirc/runes",
15+
"regenerator-runtime": "^0.13.2",
1416
"socksjs": "^0.5.0"
1517
},
1618
"devDependencies": {

src/client.js

Lines changed: 3 additions & 53 deletions
Original file line numberDiff line numberDiff line change
@@ -8,14 +8,14 @@ var _ = {
88
bind: require('lodash/bind'),
99
};
1010
var EventEmitter = require('eventemitter3');
11-
var runes = require('runes');
1211
var MiddlewareHandler = require('middleware-handler');
1312
var IrcCommandHandler = require('./commands/').CommandHandler;
1413
var IrcMessage = require('./ircmessage');
1514
var Connection = require('./connection');
1615
var NetworkInfo = require('./networkinfo');
1716
var User = require('./user');
1817
var Channel = require('./channel');
18+
var { lineBreak } = require('./linebreak');
1919

2020
var default_transport = null;
2121

@@ -394,7 +394,7 @@ module.exports = class IrcClient extends EventEmitter {
394394
// Maximum length of target + message we can send to the IRC server is 500 characters
395395
// but we need to leave extra room for the sender prefix so the entire message can
396396
// be sent from the IRCd to the target without being truncated.
397-
var blocks = this.stringToBlocks(message, this.options.message_max_length);
397+
var blocks = [...lineBreak(message, { bytes: this.options.message_max_length, allowBreakingWords: true, allowBreakingGraphemes: true })];
398398

399399
blocks.forEach(function(block) {
400400
that.raw(commandName, target, block);
@@ -585,7 +585,7 @@ module.exports = class IrcClient extends EventEmitter {
585585

586586
var commandName = 'ACTION';
587587
var blockLength = this.options.message_max_length - (commandName.length + 3);
588-
var blocks = this.stringToBlocks(message, blockLength);
588+
var blocks = [...lineBreak(message, { bytes: blockLength, allowBreakingWords: true, allowBreakingGraphemes: true })];
589589

590590
blocks.forEach(function(block) {
591591
that.ctcpRequest(target, commandName, block);
@@ -757,54 +757,4 @@ module.exports = class IrcClient extends EventEmitter {
757757
matchAction(match_regex, cb) {
758758
return this.match(match_regex, cb, 'action');
759759
}
760-
761-
/**
762-
* Truncate a string into blocks of a set size
763-
*/
764-
stringToBlocks(str, block_size) {
765-
block_size = block_size || 350;
766-
767-
// Quickly return if input string fits in a single block
768-
if (str.length <= block_size) {
769-
return [str];
770-
}
771-
772-
var chars = runes(str);
773-
var blocks = [];
774-
var start_index = 0;
775-
var end_index = 0;
776-
var current_block_length = 0;
777-
var current_char_length = 0;
778-
779-
do {
780-
do {
781-
current_char_length = chars[end_index].length;
782-
current_block_length += current_char_length;
783-
784-
// If character does not fit in a single block, include it in current block anyway
785-
// and split it later on by falling back to simple substring
786-
if (current_char_length > block_size) {
787-
end_index++;
788-
}
789-
}
790-
while (current_block_length <= block_size && ++end_index < chars.length);
791-
792-
var block = chars.slice(start_index, end_index).join('');
793-
794-
// Fallback to plain substring if we are unable to fit unicode characters in a single block
795-
if (block.length > block_size) {
796-
for (current_char_length = 0; current_char_length < block.length; current_char_length += block_size) {
797-
blocks.push(block.substr(current_char_length, block_size));
798-
}
799-
} else {
800-
blocks.push(block);
801-
}
802-
803-
start_index = end_index;
804-
current_block_length = 0;
805-
}
806-
while (end_index < chars.length);
807-
808-
return blocks;
809-
}
810760
};

src/linebreak.js

Lines changed: 197 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,197 @@
1+
const GraphemeSplitter = require('grapheme-splitter');
2+
const { encode: encodeUTF8 } = require('isomorphic-textencoder');
3+
4+
const graphemeSplitter = new GraphemeSplitter();
5+
6+
/* abstract */ class SubstringTooLargeForLineError extends Error {
7+
/* substring: string */
8+
/* opts: Options */
9+
10+
constructor(substring/* : string */, opts/* : Options */) {
11+
super();
12+
13+
// Maintains proper stack trace for where our error was thrown (only available on V8)
14+
// @ts-ignore
15+
if (Error.captureStackTrace) {
16+
// @ts-ignore
17+
Error.captureStackTrace(this, this.constructor);
18+
}
19+
20+
// Custom debugging information
21+
this.substring = substring;
22+
this.opts = opts;
23+
}
24+
25+
get name() {
26+
return this.constructor.name;
27+
}
28+
}
29+
30+
class WordTooLargeForLineError extends SubstringTooLargeForLineError {
31+
get message() {
32+
return `${size(this.substring)} byte word can't fit in a ${this.opts.bytes} byte block: ${this.substring}`;
33+
}
34+
}
35+
36+
class GraphemeTooLargeForLineError extends SubstringTooLargeForLineError {
37+
get message() {
38+
return `${size(this.substring)} byte grapheme can't fit in a ${this.opts.bytes} byte block: ${this.substring}`;
39+
}
40+
}
41+
42+
class CodepointTooLargeForLineError extends SubstringTooLargeForLineError {
43+
get message() {
44+
return `${size(this.substring)} byte codepoint can't fit in a ${this.opts.bytes} byte block: ${this.substring}`;
45+
}
46+
}
47+
48+
function size(str/* : string */)/* : number */ {
49+
const byteArray = encodeUTF8(str);
50+
const bytes = byteArray.byteLength;
51+
return bytes;
52+
}
53+
54+
/* export interface Options {
55+
bytes: number,
56+
allowBreakingWords?: boolean,
57+
allowBreakingGraphemes?: boolean,
58+
} */
59+
60+
function * lineBreak(str/* : string */, opts/* : Options */)/* : IterableIterator<string> */ {
61+
let line = '';
62+
let previousWhitespace = '';
63+
64+
for (const [word, trailingWhitespace] of wordBreak(str)) {
65+
// word fits in current line
66+
if (size(line) + size(previousWhitespace) + size(word) <= opts.bytes) {
67+
line += previousWhitespace + word;
68+
previousWhitespace = trailingWhitespace;
69+
continue;
70+
}
71+
72+
// can fit word in a line by itself
73+
if (size(word) <= opts.bytes) {
74+
if (line) {
75+
yield line; // yield previously built up line
76+
}
77+
78+
// previously buffered whitespace is discarded as it was replaced by a line break
79+
// store new whitespace for later
80+
previousWhitespace = trailingWhitespace;
81+
82+
line = word; // next line starts with word
83+
continue;
84+
}
85+
86+
// can't fit word into a line by itself
87+
if (!opts.allowBreakingWords) {
88+
throw new WordTooLargeForLineError(word, opts);
89+
}
90+
91+
// try to fit part of word into current line
92+
const wordPreviousWhitespace = trailingWhitespace;
93+
for (const grapheme of graphemeSplitter.iterateGraphemes(word)) {
94+
// can fit next grapheme
95+
if (size(line) + size(previousWhitespace) + size(grapheme) <= opts.bytes) {
96+
line += previousWhitespace + grapheme;
97+
previousWhitespace = '';
98+
continue;
99+
}
100+
101+
// can fit next grapheme into a line by itself
102+
if (size(grapheme) <= opts.bytes) {
103+
if (line) {
104+
yield line;
105+
}
106+
previousWhitespace = '';
107+
line = grapheme;
108+
continue;
109+
}
110+
111+
// grapheme can't fit in a single line
112+
if (!opts.allowBreakingGraphemes) {
113+
throw new GraphemeTooLargeForLineError(grapheme, opts);
114+
}
115+
116+
// break grapheme into codepoints instead
117+
for (const codepoint of grapheme) {
118+
// can fit codepoint into current line
119+
if (size(line) + size(previousWhitespace) + size(codepoint) <= opts.bytes) {
120+
line += previousWhitespace + codepoint;
121+
previousWhitespace = '';
122+
continue;
123+
}
124+
125+
126+
// can fit codepoint into its own line
127+
if (size(codepoint) <= opts.bytes) {
128+
if (line) {
129+
yield line;
130+
}
131+
previousWhitespace = '';
132+
line = codepoint;
133+
continue;
134+
}
135+
136+
// can't fit codepoint into its own line
137+
throw new CodepointTooLargeForLineError(codepoint, opts);
138+
139+
} // end of codepoint loop
140+
141+
} // end of grapheme loop
142+
previousWhitespace = wordPreviousWhitespace;
143+
144+
} // end of [word, trailingWhitespace] loop
145+
146+
// unyielded leftovers when we're done iterating over the input string
147+
if (previousWhitespace) {
148+
if (size(line) + size(previousWhitespace) <= opts.bytes) {
149+
line += previousWhitespace; // retain trailing whitespace on input line if possible
150+
}
151+
}
152+
if (line) {
153+
yield line;
154+
}
155+
}
156+
157+
// yields [word, trailingWhitespace] tuples
158+
function * wordBreak(str/* : string */)/* : IterableIterator<[string, string]> */ {
159+
let word = '';
160+
let trailingWhitespace = '';
161+
162+
for (const grapheme of graphemeSplitter.iterateGraphemes(str)) {
163+
// grapheme is whitespace
164+
if (/^\s+$/.test(grapheme)) {
165+
// collect whitespace
166+
trailingWhitespace += grapheme;
167+
continue;
168+
}
169+
170+
// grapheme is non-whitespace
171+
172+
// start of new word
173+
if (trailingWhitespace) {
174+
yield [word, trailingWhitespace];
175+
word = grapheme;
176+
trailingWhitespace = '';
177+
continue;
178+
}
179+
180+
// continuation of word
181+
word += grapheme;
182+
}
183+
184+
// possible leftovers at end of input string
185+
if (word) {
186+
yield [word, trailingWhitespace];
187+
}
188+
// trailingWhitespace can't be non-empty unless word is non-empty
189+
}
190+
191+
module.exports = {
192+
WordTooLargeForLineError,
193+
GraphemeTooLargeForLineError,
194+
CodepointTooLargeForLineError,
195+
lineBreak,
196+
wordBreak,
197+
};

0 commit comments

Comments
 (0)