@@ -313,6 +313,19 @@ static inline bool bitstring_insert_integer(term dst_bin, size_t offset, avm_int
313313 */
314314bool bitstring_utf8_encode (avm_int_t c , uint8_t * buf , size_t * out_size );
315315
316+ /**
317+ * @brief Decode a character from UTF-8.
318+ *
319+ * @param c int value to decode to
320+ * @param buf the buffer froom which to decode the sring to or NULL to only compute the
321+ * size.
322+ * @param len the length (in bytes) of the bytes in buf
323+ * @param out_size the size in bytes, on output (if not NULL)
324+ * @return \c true if decoding was successful, \c false if character starting at buf is not a valid
325+ * unicode character
326+ */
327+ bool bitstring_utf8_decode (const uint8_t * buf , size_t len , int32_t * c , size_t * out_size );
328+
316329/**
317330 * @brief Encode a character to UTF-16.
318331 *
@@ -326,6 +339,20 @@ bool bitstring_utf8_encode(avm_int_t c, uint8_t *buf, size_t *out_size);
326339 */
327340bool bitstring_utf16_encode (avm_int_t c , uint8_t * buf , enum BitstringFlags bs_flags , size_t * out_size );
328341
342+ /**
343+ * @brief Decode a character from UTF-16.
344+ *
345+ * @param c int value to decode to
346+ * @param buf the buffer froom which to decode the sring to or NULL to only compute the
347+ * size.
348+ * @param len the length (in bytes) of the bytes in buf
349+ * @param bs_flags flags to decode the character (undefined/little/big/native)
350+ * @param out_size the size in bytes, on output (if not NULL)
351+ * @return \c true if decoding was successful, \c false if character starting at buf is not a valid
352+ * unicode character
353+ */
354+ bool bitstring_utf16_decode (const uint8_t * buf , size_t len , int32_t * c , size_t * out_size , enum BitstringFlags bs_flags );
355+
329356/**
330357 * @brief Encode a character to UTF-32.
331358 *
@@ -337,6 +364,19 @@ bool bitstring_utf16_encode(avm_int_t c, uint8_t *buf, enum BitstringFlags bs_fl
337364 */
338365bool bitstring_utf32_encode (avm_int_t c , uint8_t * buf , enum BitstringFlags bs_flags );
339366
367+ /**
368+ * @brief Decode a character from UTF-32.
369+ *
370+ * @param c int value to decode to
371+ * @param buf the buffer froom which to decode the sring to or NULL to only compute the
372+ * size.
373+ * @param len the length (in bytes) of the bytes in buf
374+ * @param bs_flags flags to decode the character (undefined/little/big/native)
375+ * @return \c true if decoding was successful, \c false if character starting at buf is not a valid
376+ * unicode character
377+ */
378+ bool bitstring_utf32_decode (const uint8_t * buf , size_t len , int32_t * c , enum BitstringFlags bs_flags );
379+
340380/**
341381 * @brief Compute the size of a character when UTF-8 encoded.
342382 *
@@ -379,6 +419,23 @@ static inline bool bitstring_insert_utf8(term dst_bin, size_t offset, avm_int_t
379419 return bitstring_utf8_encode (c , dst , out_size );
380420}
381421
422+ /**
423+ * @brief Match a character in UTF-8 format
424+ *
425+ * @param src_bin binary to match against
426+ * @param offset offset, in bits, to where to start to match the character
427+ * @param c int to decode to
428+ * @param out_size the size in bytes, on output
429+ * @return \c true if encoding was successful, \c false if src_bin at offset is not a valid
430+ * unicode character
431+ */
432+ static inline bool bitstring_match_utf8 (term src_bin , size_t offset , int32_t * c , size_t * out_size )
433+ {
434+ size_t byte_offset = offset >> 3 ; // divide by 8
435+ const uint8_t * src = (const uint8_t * ) term_binary_data (src_bin ) + byte_offset ;
436+ return bitstring_utf8_decode (src , term_binary_size (src_bin ) - byte_offset , c , out_size );
437+ }
438+
382439/**
383440 * @brief Insert a character in UTF-&§ format
384441 *
@@ -397,6 +454,24 @@ static inline bool bitstring_insert_utf16(term dst_bin, size_t offset, avm_int_t
397454 return bitstring_utf16_encode (c , dst , bs_flags , out_size );
398455}
399456
457+ /**
458+ * @brief Match a character in UTF-16 format
459+ *
460+ * @param src_bin binary to match against
461+ * @param offset offset, in bits, to where to start to match the character
462+ * @param c int to decode to
463+ * @param bs_flags flags to decode the character (undefined/little/big/native)
464+ * @param out_size the size in bytes, on output
465+ * @return \c true if encoding was successful, \c false if src_bin at offset is not a valid
466+ * unicode character
467+ */
468+ static inline bool bitstring_match_utf16 (term src_bin , size_t offset , int32_t * c , size_t * out_size , enum BitstringFlags bs_flags )
469+ {
470+ size_t byte_offset = offset >> 3 ; // divide by 8
471+ const uint8_t * src = (const uint8_t * ) term_binary_data (src_bin ) + byte_offset ;
472+ return bitstring_utf16_decode (src , term_binary_size (src_bin ) - byte_offset , c , out_size , bs_flags );
473+ }
474+
400475/**
401476 * @brief Insert a character in UTF-32 format
402477 *
@@ -414,6 +489,23 @@ static inline bool bitstring_insert_utf32(term dst_bin, size_t offset, avm_int_t
414489 return bitstring_utf32_encode (c , dst , bs_flags );
415490}
416491
492+ /**
493+ * @brief Match a character in UTF-32 format
494+ *
495+ * @param src_bin binary to match against
496+ * @param offset offset, in bits, to where to start to match the character
497+ * @param c int to decode to
498+ * @param bs_flags flags to decode the character (undefined/little/big/native)
499+ * @return \c true if encoding was successful, \c false if src_bin at offset is not a valid
500+ * unicode character
501+ */
502+ static inline bool bitstring_match_utf32 (term src_bin , size_t offset , int32_t * c , enum BitstringFlags bs_flags )
503+ {
504+ size_t byte_offset = offset >> 3 ; // divide by 8
505+ const uint8_t * src = (const uint8_t * ) term_binary_data (src_bin ) + byte_offset ;
506+ return bitstring_utf32_decode (src , term_binary_size (src_bin ) - byte_offset , c , bs_flags );
507+ }
508+
417509#ifdef __cplusplus
418510}
419511#endif
0 commit comments