Skip to content

Commit 7bae0a3

Browse files
committed
render: support text auras: tas, ta, t
Neither `@tas` nor `@ta` explicitly sanitize, instead rely on the atom already being "sane" for those auras. But in those cases, too, we need to take special care: string encoding in atoms is utf-8, whereas js strings are utf-16, so we must call a `TextDecoder`. For `@t` we do need to sanitize the string. We lift logic previously implemented in tloncorp/tlon-apps#3274 into the library, where it belongs.
1 parent b2e7b53 commit 7bae0a3

File tree

2 files changed

+104
-4
lines changed

2 files changed

+104
-4
lines changed

src/render.ts

Lines changed: 68 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -117,12 +117,12 @@ export function rend(coin: coin): string {
117117
case 't':
118118
if (coin.aura[1] === 'a') {
119119
if (coin.aura[2] === 's') {
120-
return 'coin.atom'; //TODO fromCord
120+
return cordToString(coin.atom);
121121
} else {
122-
return '~.' + 'coin.atom'; //TODO fromCord
122+
return '~.' + cordToString(coin.atom);
123123
}
124124
} else {
125-
return '~~' + 'coin.atom'; //TODO fromCord(wood(coin.atom))
125+
return '~~' + encodeString(cordToString(coin.atom));
126126
}
127127
default:
128128
return zco(coin.atom);
@@ -158,6 +158,52 @@ function wack(str: string) {
158158
return str.replaceAll('~', '~~').replaceAll('_', '~-');
159159
}
160160

161+
// encodeString(): encode string into @ta-safe format
162+
//
163+
// using logic from +wood.
164+
// for example, 'some Chars!' becomes '~.some.~43.hars~21.'
165+
// this is url-safe encoding for arbitrary strings.
166+
//
167+
export function encodeString(string: string) {
168+
let out = '';
169+
for (let i = 0; i < string.length; i += 1) {
170+
const char = string[i];
171+
let add = '';
172+
switch (char) {
173+
case ' ':
174+
add = '.';
175+
break;
176+
case '.':
177+
add = '~.';
178+
break;
179+
case '~':
180+
add = '~~';
181+
break;
182+
default: {
183+
const codePoint = string.codePointAt(i);
184+
if (!codePoint) break;
185+
// js strings are encoded in UTF-16, so 16 bits per character.
186+
// codePointAt() reads a _codepoint_ at a character index, and may
187+
// consume up to two js string characters to do so, in the case of
188+
// 16 bit surrogate pseudo-characters. here we detect that case, so
189+
// we can advance the cursor to skip past the additional character.
190+
if (codePoint > 0xffff) i += 1;
191+
if (
192+
(codePoint >= 97 && codePoint <= 122) || // a-z
193+
(codePoint >= 48 && codePoint <= 57) || // 0-9
194+
char === '-'
195+
) {
196+
add = char;
197+
} else {
198+
add = `~${codePoint.toString(16)}.`;
199+
}
200+
}
201+
}
202+
out += add;
203+
}
204+
return out;
205+
}
206+
161207
const UW_ALPHABET = '0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ-~';
162208
function blend(bits: number, alphabet: string, atom: bigint): string {
163209
if (atom === 0n) return alphabet[0];
@@ -176,3 +222,22 @@ function split(str: string, group: number): string {
176222
return str.replace(new RegExp(`(?=(?:.{${group}})+$)(?!^)`, 'g'), '.');
177223
}
178224

225+
function cordToString(atom: bigint): string {
226+
return new TextDecoder('utf-8').decode(atomToByteArray(atom).reverse());
227+
};
228+
229+
//NOTE from nockjs' bigIntToByteArray
230+
//REVIEW original produced [0] for 0n... probably not correct in our contexts!
231+
function atomToByteArray(atom: bigint): Uint8Array {
232+
if (atom === 0n) return new Uint8Array(0);
233+
const hexString = atom.toString(16);
234+
const paddedHexString = hexString.length % 2 === 0 ? hexString : '0' + hexString;
235+
const arrayLength = paddedHexString.length / 2;
236+
const int8Array = new Uint8Array(arrayLength);
237+
for (let i = 0; i < paddedHexString.length; i += 2) {
238+
const hexSubstring = paddedHexString.slice(i, i + 2);
239+
const signedInt = (parseInt(hexSubstring, 16) << 24) >> 24;
240+
int8Array[(i / 2)] = signedInt;
241+
}
242+
return int8Array;
243+
}

test/render.test.ts

Lines changed: 36 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -305,6 +305,41 @@ const DATE_TESTS: {
305305
];
306306
testAuras('date', DATE_AURAS, DATE_TESTS);
307307

308+
const TEXT_AURAS: aura[] = [ 'tas', 'ta', 't' ];
309+
const TEXT_TESTS: {
310+
n: bigint,
311+
tas: string,
312+
ta: string,
313+
t: string
314+
}[] = [
315+
{ n: 0n,
316+
tas: '',
317+
ta: '~.',
318+
t: '~~'
319+
},
320+
{ n: 97n,
321+
tas: 'a',
322+
ta: '~.a',
323+
t: '~~a'
324+
},
325+
{ n: 121404708502375659064812904n,
326+
tas: 'hello-world',
327+
ta: '~.hello-world',
328+
t: '~~hello-world'
329+
},
330+
{ n: 10334410032597741434076685640n,
331+
tas: 'Hello World!',
332+
ta: '~.Hello World!',
333+
t: '~~~48.ello.~57.orld~21.'
334+
},
335+
{ n: 294301677938177654314463611973797746852183254758760570046179940746240825570n,
336+
tas: '★🤠yeehaw👨‍👧‍👦',
337+
ta: '~.★🤠yeehaw👨‍👧‍👦',
338+
t: '~~~2605.~1f920.yeehaw~1f468.~200d.~1f467.~200d.~1f466.'
339+
}
340+
];
341+
testAuras('text', TEXT_AURAS, TEXT_TESTS);
342+
308343
const MANY_COINS: {
309344
coin: coin,
310345
out: string
@@ -321,7 +356,7 @@ const MANY_COINS: {
321356
]
322357
describe('%many coin rendering', () => {
323358
MANY_COINS.map((test) => {
324-
describe(`case ${test.out}`, () => {
359+
describe(test.out, () => {
325360
it('renders', () => {
326361
const res = rend(test.coin);
327362
expect(res).toEqual(test.out);

0 commit comments

Comments
 (0)