Skip to content

Commit 6eaf64c

Browse files
committed
src: detect whether the string is one byte representation or not
References: #56090
1 parent c4aa34a commit 6eaf64c

File tree

5 files changed

+113
-0
lines changed

5 files changed

+113
-0
lines changed

doc/api/v8.md

Lines changed: 31 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1304,6 +1304,37 @@ setTimeout(() => {
13041304
}, 1000);
13051305
```
13061306

1307+
## `v8.isStringOneByteRepresentation(content)`
1308+
1309+
<!-- YAML
1310+
added: REPLACEME
1311+
-->
1312+
1313+
* `content` {string}
1314+
* Returns: {boolean}
1315+
1316+
V8 only supports `Latin-1/ISO-8859-1` and `UTF16` as the underlying representations.
1317+
If the `content` uses `Latin-1/ISO-8859-1` as the underlying representation, this function will return true;
1318+
otherwise, it returns false.
1319+
1320+
This method returns false does not mean that the string contains some characters not in `Latin-1/ISO-8859-1`.
1321+
Sometimes a `Latin-1` string may also be represented as `UTF16`.
1322+
1323+
```js
1324+
const { isStringOneByteRepresentation } = require('node:v8');
1325+
const bf = Buffer.alloc(20);
1326+
1327+
function writeString(input) {
1328+
if (isStringOneByteRepresentation(input)) {
1329+
bf.write(input, 'latin1');
1330+
} else {
1331+
bf.write(input, 'utf16le');
1332+
}
1333+
}
1334+
writeString('hello');
1335+
writeString('你好');
1336+
```
1337+
13071338
[HTML structured clone algorithm]: https://developer.mozilla.org/en-US/docs/Web/API/Web_Workers_API/Structured_clone_algorithm
13081339
[Hook Callbacks]: #hook-callbacks
13091340
[V8]: https://developers.google.com/v8/

lib/v8.js

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -104,6 +104,7 @@ const binding = internalBinding('v8');
104104
const {
105105
cachedDataVersionTag,
106106
setFlagsFromString: _setFlagsFromString,
107+
isStringOneByteRepresentation: _isStringOneByteRepresentation,
107108
updateHeapStatisticsBuffer,
108109
updateHeapSpaceStatisticsBuffer,
109110
updateHeapCodeStatisticsBuffer,
@@ -155,6 +156,17 @@ function setFlagsFromString(flags) {
155156
_setFlagsFromString(flags);
156157
}
157158

159+
/**
160+
* Return whether this string uses one byte as underlying representation or not.
161+
* @param {string} content
162+
* @returns {boolean}
163+
*/
164+
function isStringOneByteRepresentation(content) {
165+
validateString(content, 'content');
166+
return _isStringOneByteRepresentation(content);
167+
}
168+
169+
158170
/**
159171
* Gets the current V8 heap statistics.
160172
* @returns {{
@@ -439,4 +451,5 @@ module.exports = {
439451
startupSnapshot,
440452
setHeapSnapshotNearHeapLimit,
441453
GCProfiler,
454+
isStringOneByteRepresentation,
442455
};

src/node_external_reference.h

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,9 @@ namespace node {
1212

1313
using CFunctionCallbackWithOneByteString =
1414
uint32_t (*)(v8::Local<v8::Value>, const v8::FastOneByteString&);
15+
16+
using CFunctionCallbackReturnBool = bool (*)(v8::Local<v8::Value> unused,
17+
v8::Local<v8::Value> receiver);
1518
using CFunctionCallback = void (*)(v8::Local<v8::Value> unused,
1619
v8::Local<v8::Value> receiver);
1720
using CFunctionCallbackReturnDouble =
@@ -90,6 +93,7 @@ class ExternalReferenceRegistry {
9093
#define ALLOWED_EXTERNAL_REFERENCE_TYPES(V) \
9194
V(CFunctionCallback) \
9295
V(CFunctionCallbackWithOneByteString) \
96+
V(CFunctionCallbackReturnBool) \
9397
V(CFunctionCallbackReturnDouble) \
9498
V(CFunctionCallbackReturnInt32) \
9599
V(CFunctionCallbackValueReturnDouble) \

src/node_v8.cc

Lines changed: 28 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -32,6 +32,7 @@
3232
namespace node {
3333
namespace v8_utils {
3434
using v8::Array;
35+
using v8::CFunction;
3536
using v8::Context;
3637
using v8::FunctionCallbackInfo;
3738
using v8::FunctionTemplate;
@@ -238,6 +239,23 @@ void SetFlagsFromString(const FunctionCallbackInfo<Value>& args) {
238239
V8::SetFlagsFromString(*flags, static_cast<size_t>(flags.length()));
239240
}
240241

242+
static void IsStringOneByteRepresentation(
243+
const FunctionCallbackInfo<Value>& args) {
244+
CHECK_EQ(args.Length(), 1);
245+
CHECK(args[0]->IsString());
246+
bool is_one_byte = args[0].As<String>()->IsOneByte();
247+
args.GetReturnValue().Set(is_one_byte);
248+
}
249+
250+
static bool FastIsStringOneByteRepresentation(Local<Value> receiver,
251+
const Local<Value> target) {
252+
CHECK(target->IsString());
253+
return target.As<String>()->IsOneByte();
254+
}
255+
256+
CFunction fast_is_string_one_byte_representation_(
257+
CFunction::Make(FastIsStringOneByteRepresentation));
258+
241259
static const char* GetGCTypeName(v8::GCType gc_type) {
242260
switch (gc_type) {
243261
case v8::GCType::kGCTypeScavenge:
@@ -479,6 +497,13 @@ void Initialize(Local<Object> target,
479497
// Export symbols used by v8.setFlagsFromString()
480498
SetMethod(context, target, "setFlagsFromString", SetFlagsFromString);
481499

500+
// Export symbols used by v8.isStringOneByteRepresentation()
501+
SetFastMethodNoSideEffect(context,
502+
target,
503+
"isStringOneByteRepresentation",
504+
IsStringOneByteRepresentation,
505+
&fast_is_string_one_byte_representation_);
506+
482507
// GCProfiler
483508
Local<FunctionTemplate> t =
484509
NewFunctionTemplate(env->isolate(), GCProfiler::New);
@@ -498,6 +523,9 @@ void RegisterExternalReferences(ExternalReferenceRegistry* registry) {
498523
registry->Register(GCProfiler::New);
499524
registry->Register(GCProfiler::Start);
500525
registry->Register(GCProfiler::Stop);
526+
registry->Register(IsStringOneByteRepresentation);
527+
registry->Register(FastIsStringOneByteRepresentation);
528+
registry->Register(fast_is_string_one_byte_representation_.GetTypeInfo());
501529
}
502530

503531
} // namespace v8_utils
Lines changed: 37 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,37 @@
1+
// Flags: --expose-internals
2+
'use strict';
3+
require('../common');
4+
const assert = require('assert');
5+
const { isStringOneByteRepresentation } = require('v8');
6+
7+
[
8+
undefined,
9+
null,
10+
false,
11+
5n,
12+
5,
13+
Symbol(),
14+
() => {},
15+
{},
16+
].forEach((value) => {
17+
assert.throws(
18+
() => { isStringOneByteRepresentation(value); },
19+
/The "content" argument must be of type string/
20+
);
21+
});
22+
23+
{
24+
const latin1String = 'hello world!';
25+
// Run this inside a for loop to trigger the fast API
26+
for (let i = 0; i < 10_000; i++) {
27+
assert.strictEqual(isStringOneByteRepresentation(latin1String), true);
28+
}
29+
}
30+
31+
{
32+
const utf16String = '你好😀😃';
33+
// Run this inside a for loop to trigger the fast API
34+
for (let i = 0; i < 10_000; i++) {
35+
assert.strictEqual(isStringOneByteRepresentation(utf16String), false);
36+
}
37+
}

0 commit comments

Comments
 (0)