Skip to content

Commit 1881ef0

Browse files
authored
fix: null terminated string support for utf16 (#52)
* fix: null terminated string support for utf16 * fix: zero length string is falsy so beware
1 parent b9b683c commit 1881ef0

File tree

3 files changed

+70
-14
lines changed

3 files changed

+70
-14
lines changed

β€ŽREADME.mdβ€Ž

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -151,8 +151,9 @@ var struct = new r.Struct({
151151

152152
### String
153153

154-
A `String` maps a JavaScript string to and from binary encodings. The length can be a constant, taken
155-
from a previous field in the parent structure, or encoded using a number type immediately before the string.
154+
A `String` maps a JavaScript string to and from binary encodings. The length, in bytes, can be a constant,
155+
taken from a previous field in the parent structure, encoded using a number type immediately before the
156+
string.
156157

157158
Fully supported encodings include `'ascii'`, `'utf8'`, `'ucs2'`, `'utf16le'`, `'utf16be'`. Decoding is also possible
158159
with any encoding supported by [TextDecoder](https://developer.mozilla.org/en-US/docs/Web/API/Encoding_API/Encodings),
@@ -172,7 +173,7 @@ var struct = new r.Struct({
172173
});
173174

174175
// null-terminated string (also known as C string)
175-
var str = new r.String(null, 'utf8')
176+
var str = new r.String(null, 'utf8');
176177
```
177178

178179
### Array

β€Žsrc/String.jsβ€Ž

Lines changed: 33 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -12,36 +12,41 @@ class StringT extends Base {
1212
decode(stream, parent) {
1313
let length, pos;
1414

15+
let { encoding } = this;
16+
if (typeof encoding === 'function') {
17+
encoding = encoding.call(parent, parent) || 'ascii';
18+
}
19+
let width = encodingWidth(encoding);
20+
1521
if (this.length != null) {
1622
length = utils.resolveLength(this.length, stream, parent);
1723
} else {
1824
let buffer;
1925
({buffer, length, pos} = stream);
2026

21-
while ((pos < length) && (buffer[pos] !== 0x00)) {
22-
++pos;
27+
while ((pos < length - width + 1) &&
28+
(buffer[pos] !== 0x00 ||
29+
(width === 2 && buffer[pos+1] !== 0x00)
30+
)) {
31+
pos += width;
2332
}
2433

2534
length = pos - stream.pos;
2635
}
2736

28-
let { encoding } = this;
29-
if (typeof encoding === 'function') {
30-
encoding = encoding.call(parent, parent) || 'ascii';
31-
}
3237

3338
const string = stream.readString(length, encoding);
3439

3540
if ((this.length == null) && (stream.pos < stream.length)) {
36-
stream.pos++;
41+
stream.pos+=width;
3742
}
3843

3944
return string;
4045
}
4146

4247
size(val, parent) {
4348
// Use the defined value if no value was given
44-
if (!val) {
49+
if (val === undefined || val === null) {
4550
return utils.resolveLength(this.length, null, parent);
4651
}
4752

@@ -60,7 +65,7 @@ class StringT extends Base {
6065
}
6166

6267
if ((this.length == null)) {
63-
size++;
68+
size += encodingWidth(encoding);
6469
}
6570

6671
return size;
@@ -79,11 +84,29 @@ class StringT extends Base {
7984
stream.writeString(val, encoding);
8085

8186
if ((this.length == null)) {
82-
return stream.writeUInt8(0x00);
87+
return encodingWidth(encoding) == 2 ?
88+
stream.writeUInt16LE(0x0000) :
89+
stream.writeUInt8(0x00);
8390
}
8491
}
8592
}
8693

94+
function encodingWidth(encoding) {
95+
switch(encoding) {
96+
case 'ascii':
97+
case 'utf8': // utf8 is a byte-based encoding for zero-term string
98+
return 1;
99+
case 'utf16le':
100+
case 'utf16-le':
101+
case 'utf16be':
102+
case 'utf16-be':
103+
case 'ucs2':
104+
return 2;
105+
default:
106+
throw new Error('Unknown encoding ' + encoding);
107+
}
108+
}
109+
87110
function byteLength(string, encoding) {
88111
switch (encoding) {
89112
case 'ascii':

β€Žtest/String.jsβ€Ž

Lines changed: 33 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
import assert from 'assert';
2-
import {String as StringT, uint8, DecodeStream, EncodeStream} from 'restructure';
2+
import {String as StringT, uint16le, uint8, DecodeStream, Struct} from 'restructure';
33

44
describe('String', function() {
55
describe('decode', function() {
@@ -40,6 +40,18 @@ describe('String', function() {
4040
const string = new StringT(null, 'utf8');
4141
assert.equal(string.fromBuffer(Buffer.from('🍻')), '🍻');
4242
});
43+
44+
it('should decode two-byte null-terminated string for utf16le', function() {
45+
const stream = new DecodeStream(Buffer.from('🍻\x00', 'utf16le'));
46+
const string = new StringT(null, 'utf16le');
47+
assert.equal(string.decode(stream), '🍻');
48+
assert.equal(stream.pos, 6);
49+
});
50+
51+
it('should decode remainder of buffer when null-byte missing, utf16le', function() {
52+
const string = new StringT(null, 'utf16le');
53+
assert.equal(string.fromBuffer(Buffer.from('🍻', 'utf16le')), '🍻');
54+
});
4355
});
4456

4557
describe('size', function() {
@@ -73,6 +85,11 @@ describe('String', function() {
7385
assert.equal(string.size('🍻'), 5);
7486
});
7587

88+
it('should take null-byte into account, utf16le', function() {
89+
const string = new StringT(null, 'utf16le');
90+
assert.equal(string.size('🍻'), 6);
91+
});
92+
7693
it('should use defined length if no value given', function() {
7794
const array = new StringT(10);
7895
assert.equal(array.size(), 10);
@@ -109,5 +126,20 @@ describe('String', function() {
109126
const string = new StringT(null, 'utf8');
110127
assert.deepEqual(string.toBuffer('🍻'), Buffer.from('🍻\x00'));
111128
});
129+
130+
it('should encode using string length, utf16le', function() {
131+
const string = new StringT(16, 'utf16le');
132+
assert.deepEqual(string.toBuffer('testing'), Buffer.from('testing', 'utf16le'));
133+
});
134+
135+
it('should encode length as number before string utf16le', function() {
136+
const string = new StringT(uint16le, 'utf16le');
137+
assert.deepEqual(string.toBuffer('testing 😜'), Buffer.from('\u0014testing 😜', 'utf16le'));
138+
});
139+
140+
it('should encode two-byte null-terminated string for UTF-16', function() {
141+
const string = new StringT(null, 'utf16le');
142+
assert.deepEqual(string.toBuffer('🍻'), Buffer.from('🍻\x00', 'utf16le'));
143+
});
112144
});
113145
});

0 commit comments

Comments
Β (0)