Skip to content

Commit 95d03cb

Browse files
authored
feat(stdlib): Add Buffer.getChar (#2262)
1 parent e4caac2 commit 95d03cb

File tree

5 files changed

+92
-1
lines changed

5 files changed

+92
-1
lines changed

compiler/test/stdlib/buffer.test.gr

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -303,6 +303,15 @@ let b = Buffer.make(0)
303303
Buffer.addString(str, b)
304304
assert Buffer.toBytes(a) == Buffer.toBytes(b)
305305

306+
// Buffer.getChar
307+
let buff = Buffer.make(32)
308+
Buffer.addString("ab©✨🍞", buff)
309+
assert Buffer.getChar(0, buff) == 'a'
310+
assert Buffer.getChar(1, buff) == 'b'
311+
assert Buffer.getChar(2, buff) == '©'
312+
assert Buffer.getChar(4, buff) == '✨'
313+
assert Buffer.getChar(7, buff) == '🍞'
314+
306315
// addChar
307316
let char = 'a' // 1 byte
308317
let buf = Buffer.make(0)

stdlib/buffer.gr

Lines changed: 36 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -24,7 +24,7 @@ from "char" include Char
2424
from "runtime/numbers" include Numbers
2525
use Numbers.{ coerceNumberToWasmI32 }
2626
from "runtime/utf8" include Utf8
27-
use Utf8.{ usvEncodeLength }
27+
use Utf8.{ usvEncodeLength, utf8ByteCount, exception MalformedUnicode }
2828
from "runtime/unsafe/offsets" include Offsets
2929
use Offsets.{ _BYTES_LEN_OFFSET, _BYTES_DATA_OFFSET }
3030

@@ -376,6 +376,41 @@ provide let addString = (string, buffer) => {
376376
buffer.len += bytelen
377377
}
378378

379+
/**
380+
* Gets the UTF-8 encoded character at the given byte index.
381+
*
382+
* @param index: The byte index to access
383+
* @param buffer: The buffer to access
384+
* @returns A character starting at the given index
385+
*
386+
* @throws IndexOutOfBounds: When `index` is negative
387+
* @throws IndexOutOfBounds: When `index + 1` is greater than the buffer size
388+
* @throws MalformedUnicode: When the bytes at the index are not a valid UTF-8 sequence
389+
*
390+
* @example
391+
* let buf = Buffer.make(32)
392+
* Buffer.addString("Hello World 🌾", buf)
393+
* assert Buffer.getChar(12, buf) == '🌾'
394+
*
395+
* @since v0.7.1
396+
*/
397+
@unsafe
398+
provide let getChar = (index, buffer) => {
399+
use WasmI32.{ (+), (&), (+), (==), (>) }
400+
checkIsIndexInBounds(index, 1, buffer)
401+
// Note: We do a raw check as we need the byte length before reading the full char
402+
let bytes = buffer.data
403+
let ptr = WasmI32.fromGrain(bytes)
404+
let offset = coerceNumberToWasmI32(index)
405+
let byte = WasmI32.load8U(ptr + offset, _BYTES_DATA_OFFSET)
406+
let charSize = utf8ByteCount(byte)
407+
if (offset + charSize > coerceNumberToWasmI32(buffer.len)) {
408+
throw MalformedUnicode
409+
}
410+
ignore(bytes)
411+
Bytes.getChar(index, bytes)
412+
}
413+
379414
/**
380415
* Appends the bytes of a character to a buffer.
381416
*

stdlib/buffer.md

Lines changed: 45 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -415,6 +415,51 @@ Buffer.addString("Hello", buf)
415415
assert Buffer.toString(buf) == "Hello"
416416
```
417417

418+
### Buffer.**getChar**
419+
420+
<details disabled>
421+
<summary tabindex="-1">Added in <code>next</code></summary>
422+
No other changes yet.
423+
</details>
424+
425+
```grain
426+
getChar: (index: Number, buffer: Buffer) => Char
427+
```
428+
429+
Gets the UTF-8 encoded character at the given byte index.
430+
431+
Parameters:
432+
433+
|param|type|description|
434+
|-----|----|-----------|
435+
|`index`|`Number`|The byte index to access|
436+
|`buffer`|`Buffer`|The buffer to access|
437+
438+
Returns:
439+
440+
|type|description|
441+
|----|-----------|
442+
|`Char`|A character starting at the given index|
443+
444+
Throws:
445+
446+
`IndexOutOfBounds`
447+
448+
* When `index` is negative
449+
* When `index + 1` is greater than the buffer size
450+
451+
`MalformedUnicode`
452+
453+
* When the bytes at the index are not a valid UTF-8 sequence
454+
455+
Examples:
456+
457+
```grain
458+
let buf = Buffer.make(32)
459+
Buffer.addString("Hello World 🌾", buf)
460+
assert Buffer.getChar(12, buf) == '🌾'
461+
```
462+
418463
### Buffer.**addChar**
419464

420465
<details disabled>

stdlib/bytes.gr

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -416,6 +416,7 @@ provide let clear = (bytes: Bytes) => {
416416
* @returns The character that starts at the given index
417417
*
418418
* @throws IndexOutOfBounds: When `index` is negative
419+
* @throws IndexOutOfBounds: When `index + 1` is greater than the bytes size
419420
* @throws MalformedUnicode: When the requested character is not a valid UTF-8 sequence
420421
*
421422
* @example

stdlib/bytes.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -462,6 +462,7 @@ Throws:
462462
`IndexOutOfBounds`
463463

464464
* When `index` is negative
465+
* When `index + 1` is greater than the bytes size
465466

466467
`MalformedUnicode`
467468

0 commit comments

Comments
 (0)