Skip to content

Commit 8d4bf07

Browse files
authored
Add std.encodeUTF8, std.decodeUTF8 (#577)
2 parents 910d6ff + 478c853 commit 8d4bf07

8 files changed

+125
-3
lines changed

core/desugarer.cpp

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -34,7 +34,7 @@ struct BuiltinDecl {
3434
std::vector<UString> params;
3535
};
3636

37-
static unsigned long max_builtin = 35;
37+
static unsigned long max_builtin = 37;
3838
BuiltinDecl jsonnet_builtin_decl(unsigned long builtin)
3939
{
4040
switch (builtin) {
@@ -74,6 +74,8 @@ BuiltinDecl jsonnet_builtin_decl(unsigned long builtin)
7474
case 33: return {U"asciiUpper", {U"str"}};
7575
case 34: return {U"join", {U"sep", U"arr"}};
7676
case 35: return {U"parseJson", {U"str"}};
77+
case 36: return {U"encodeUTF8", {U"str"}};
78+
case 37: return {U"decodeUTF8", {U"arr"}};
7779
default:
7880
std::cerr << "INTERNAL ERROR: Unrecognized builtin function: " << builtin << std::endl;
7981
std::abort();

core/vm.cpp

Lines changed: 73 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -73,6 +73,7 @@ enum FrameKind {
7373
FRAME_UNARY, // e in -e
7474
FRAME_BUILTIN_JOIN_STRINGS, // When executing std.join over strings, used to hold intermediate state.
7575
FRAME_BUILTIN_JOIN_ARRAYS, // When executing std.join over arrays, used to hold intermediate state.
76+
FRAME_BUILTIN_DECODE_UTF8, // When executing std.decodeUTF8, used to hold intermediate state.
7677
};
7778

7879
/** A frame on the stack.
@@ -135,6 +136,9 @@ struct Frame {
135136
UString str;
136137
bool first;
137138

139+
/** Used for accumulating bytes */
140+
std::string bytes;
141+
138142
/** The context is used in error messages to attempt to find a reasonable name for the
139143
* object, function, or thunk value being executed. If it is a thunk, it is filled
140144
* with the value when the frame terminates.
@@ -875,6 +879,8 @@ class Interpreter {
875879
builtins["asciiUpper"] = &Interpreter::builtinAsciiUpper;
876880
builtins["join"] = &Interpreter::builtinJoin;
877881
builtins["parseJson"] = &Interpreter::builtinParseJson;
882+
builtins["encodeUTF8"] = &Interpreter::builtinEncodeUTF8;
883+
builtins["decodeUTF8"] = &Interpreter::builtinDecodeUTF8;
878884
}
879885

880886
/** Clean up the heap, stack, stash, and builtin function ASTs. */
@@ -1308,6 +1314,65 @@ class Interpreter {
13081314
return nullptr;
13091315
}
13101316

1317+
const AST *builtinEncodeUTF8(const LocationRange &loc, const std::vector<Value> &args)
1318+
{
1319+
validateBuiltinArgs(loc, "encodeUTF8", args, {Value::STRING});
1320+
1321+
std::string byteString = encode_utf8(static_cast<HeapString *>(args[0].v.h)->value);
1322+
1323+
scratch = makeArray({});
1324+
auto &elements = static_cast<HeapArray *>(scratch.v.h)->elements;
1325+
for (const auto c : byteString) {
1326+
auto *th = makeHeap<HeapThunk>(idArrayElement, nullptr, 0, nullptr);
1327+
elements.push_back(th);
1328+
th->fill(makeNumber(uint8_t(c)));
1329+
}
1330+
return nullptr;
1331+
}
1332+
1333+
const AST *decodeUTF8(void)
1334+
{
1335+
Frame &f = stack.top();
1336+
const auto& elements = static_cast<HeapArray*>(f.val.v.h)->elements;
1337+
while (f.elementId < elements.size()) {
1338+
auto *th = elements[f.elementId];
1339+
if (th->filled) {
1340+
auto b = th->content;
1341+
if (b.t != Value::NUMBER) {
1342+
std::stringstream ss;
1343+
ss << "Element " << f.elementId << " of the provided array was not a number";
1344+
throw makeError(stack.top().location, ss.str());
1345+
} else {
1346+
double d = b.v.d;
1347+
if (d < 0 || d > 255 || d != int(d)) {
1348+
std::stringstream ss;
1349+
ss << "Element " << f.elementId << " of the provided array was not an integer in range [0,255]";
1350+
throw makeError(stack.top().location, ss.str());
1351+
}
1352+
f.bytes.push_back(uint8_t(d));
1353+
}
1354+
f.elementId++;
1355+
} else {
1356+
stack.newCall(f.location, th, th->self, th->offset, th->upValues);
1357+
return th->body;
1358+
}
1359+
}
1360+
scratch = makeString(decode_utf8(f.bytes));
1361+
return nullptr;
1362+
}
1363+
1364+
const AST *builtinDecodeUTF8(const LocationRange &loc, const std::vector<Value> &args)
1365+
{
1366+
validateBuiltinArgs(loc, "decodeUTF8", args, {Value::ARRAY});
1367+
1368+
Frame &f = stack.top();
1369+
f.kind = FRAME_BUILTIN_DECODE_UTF8;
1370+
f.val = args[0]; // arr
1371+
f.bytes.clear();
1372+
f.elementId = 0;
1373+
return decodeUTF8();
1374+
}
1375+
13111376
const AST *builtinTrace(const LocationRange &loc, const std::vector<Value> &args)
13121377
{
13131378
if(args[0].t != Value::STRING) {
@@ -2855,6 +2920,14 @@ class Interpreter {
28552920
}
28562921
} break;
28572922

2923+
case FRAME_BUILTIN_DECODE_UTF8: {
2924+
auto *ast = decodeUTF8();
2925+
if (ast != nullptr) {
2926+
ast_ = ast;
2927+
goto recurse;
2928+
}
2929+
} break;
2930+
28582931
default:
28592932
std::cerr << "INTERNAL ERROR: Unknown FrameKind: " << f.kind << std::endl;
28602933
std::abort();

doc/ref/stdlib.html

Lines changed: 28 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -863,6 +863,11 @@ <h4 id="parseHex">std.parseHex(str)</h4>
863863
<div class="hgroup-inline">
864864
<div class="panel">
865865
<h4 id="parseJson">std.parseJson(str)</h4>
866+
<p>
867+
Parses a JSON string.
868+
</p>
869+
<em>Available in upcoming release.</em>
870+
<p>Example: <code>std.parseJson('{"foo": "bar"}')</code> yields <code>{"foo": "bar"}</code>.</p>
866871
</div>
867872
<div style="clear: both"></div>
868873
</div>
@@ -871,11 +876,32 @@ <h4 id="parseJson">std.parseJson(str)</h4>
871876
<div class="hgroup">
872877
<div class="hgroup-inline">
873878
<div class="panel">
879+
<h4 id="encodeUTF8">std.encodeUTF8(str)</h4>
880+
<em>Available in upcoming release.</em>
874881
<p>
875-
Parses a JSON string.
882+
Encode a string using <a href="https://en.wikipedia.org/wiki/UTF-8">UTF8</a>. Returns an array of numbers representing bytes.
876883
</p>
884+
</div>
885+
<div style="clear: both"></div>
886+
</div>
887+
</div>
888+
889+
<div class="hgroup">
890+
<div class="hgroup-inline">
891+
<div class="panel">
892+
<h4 id="decodeUTF8">std.decodeUTF8(arr)</h4>
893+
</div>
894+
<div style="clear: both"></div>
895+
</div>
896+
</div>
897+
898+
<div class="hgroup">
899+
<div class="hgroup-inline">
900+
<div class="panel">
877901
<em>Available in upcoming release.</em>
878-
<p>Example: <code>std.parseJson('{"foo": "bar"}')</code> yields <code>{"foo": "bar"}</code>.</p>
902+
<p>
903+
Decode an array of numbers representing bytes using <a href="https://en.wikipedia.org/wiki/UTF-8">UTF8</a>. Returns a string.
904+
</p>
879905
</div>
880906
<div style="clear: both"></div>
881907
</div>
Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
std.decodeUTF8([17.5])
Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,2 @@
1+
RUNTIME ERROR: Element 0 of the provided array was not an integer in range [0,255]
2+
error.decodeUTF8_float.jsonnet:1:1-23
Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
std.decodeUTF8(['foo'])
Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,2 @@
1+
RUNTIME ERROR: Element 0 of the provided array was not a number
2+
error.decodeUTF8_nan.jsonnet:1:1-24

test_suite/stdlib.jsonnet

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -880,4 +880,19 @@ std.assertEqual(std.find('a', ['a']), [0]) &&
880880
std.assertEqual(std.find('a', ['a', ['a'], 'b', 'a']), [0, 3]) &&
881881
std.assertEqual(std.find(['a'], [['a']]), [0]) &&
882882

883+
std.assertEqual(std.encodeUTF8(''), []) &&
884+
std.assertEqual(std.encodeUTF8('A'), [65]) &&
885+
std.assertEqual(std.encodeUTF8('AAA'), [65, 65, 65]) &&
886+
std.assertEqual(std.encodeUTF8('§'), [194, 167]) &&
887+
std.assertEqual(std.encodeUTF8('Zażółć gęślą jaźń'), [90, 97, 197, 188, 195, 179, 197, 130, 196, 135, 32, 103, 196, 153, 197, 155, 108, 196, 133, 32, 106, 97, 197, 186, 197, 132]) &&
888+
std.assertEqual(std.encodeUTF8('😃'), [240, 159, 152, 131]) &&
889+
890+
std.assertEqual(std.decodeUTF8([]), '') &&
891+
std.assertEqual(std.decodeUTF8([65]), 'A') &&
892+
std.assertEqual(std.decodeUTF8([65, 65, 65]), 'AAA') &&
893+
std.assertEqual(std.decodeUTF8([(function(x) 65)(42)]), 'A') &&
894+
std.assertEqual(std.decodeUTF8([65 + 1 - 1]), 'A') &&
895+
std.assertEqual(std.decodeUTF8([90, 97, 197, 188, 195, 179, 197, 130, 196, 135, 32, 103, 196, 153, 197, 155, 108, 196, 133, 32, 106, 97, 197, 186, 197, 132]), 'Zażółć gęślą jaźń') &&
896+
std.assertEqual(std.decodeUTF8([240, 159, 152, 131]), '😃') &&
897+
883898
true

0 commit comments

Comments
 (0)