@@ -247,6 +247,157 @@ ly_getutf8(const char **input, uint32_t *utf8_char, size_t *bytes_read)
247247 return LY_SUCCESS ;
248248}
249249
250+ /**
251+ * @brief Check whether an UTF-8 string is equal to a hex string after a bitwise and.
252+ *
253+ * (input & 0x[arg1][arg3][arg5]...) == 0x[arg2][arg4][arg6]...
254+ *
255+ * @param[in] input UTF-8 string.
256+ * @param[in] bytes Number of bytes to compare.
257+ * @param[in] ... 2x @p bytes number of bytes to perform bitwise and and equality operations.
258+ * @return Result of the operation.
259+ */
260+ static int
261+ ly_utf8_and_equal (const char * input , uint8_t bytes , ...)
262+ {
263+ va_list ap ;
264+ int i , and , byte ;
265+
266+ va_start (ap , bytes );
267+ for (i = 0 ; i < bytes ; ++ i ) {
268+ and = va_arg (ap , int );
269+ byte = va_arg (ap , int );
270+
271+ /* compare each byte */
272+ if (((uint8_t )input [i ] & and ) != (uint8_t )byte ) {
273+ return 0 ;
274+ }
275+ }
276+ va_end (ap );
277+
278+ return 1 ;
279+ }
280+
281+ /**
282+ * @brief Check whether an UTF-8 string is smaller than a hex string.
283+ *
284+ * input < 0x[arg1][arg2]...
285+ *
286+ * @param[in] input UTF-8 string.
287+ * @param[in] bytes Number of bytes to compare.
288+ * @param[in] ... @p bytes number of bytes to compare with.
289+ * @return Result of the operation.
290+ */
291+ static int
292+ ly_utf8_less (const char * input , uint8_t bytes , ...)
293+ {
294+ va_list ap ;
295+ int i , byte ;
296+
297+ va_start (ap , bytes );
298+ for (i = 0 ; i < bytes ; ++ i ) {
299+ byte = va_arg (ap , int );
300+
301+ /* compare until bytes differ */
302+ if ((uint8_t )input [i ] > (uint8_t )byte ) {
303+ return 0 ;
304+ } else if ((uint8_t )input [i ] < (uint8_t )byte ) {
305+ return 1 ;
306+ }
307+ }
308+ va_end (ap );
309+
310+ /* equals */
311+ return 0 ;
312+ }
313+
314+ /**
315+ * @brief Check whether an UTF-8 string is greater than a hex string.
316+ *
317+ * input > 0x[arg1][arg2]...
318+ *
319+ * @param[in] input UTF-8 string.
320+ * @param[in] bytes Number of bytes to compare.
321+ * @param[in] ... @p bytes number of bytes to compare with.
322+ * @return Result of the operation.
323+ */
324+ static int
325+ ly_utf8_greater (const char * input , uint8_t bytes , ...)
326+ {
327+ va_list ap ;
328+ int i , byte ;
329+
330+ va_start (ap , bytes );
331+ for (i = 0 ; i < bytes ; ++ i ) {
332+ byte = va_arg (ap , int );
333+
334+ /* compare until bytes differ */
335+ if ((uint8_t )input [i ] > (uint8_t )byte ) {
336+ return 1 ;
337+ } else if ((uint8_t )input [i ] < (uint8_t )byte ) {
338+ return 0 ;
339+ }
340+ }
341+ va_end (ap );
342+
343+ /* equals */
344+ return 0 ;
345+ }
346+
347+ LY_ERR
348+ ly_checkutf8 (const char * input , size_t in_len , size_t * utf8_len )
349+ {
350+ size_t len ;
351+
352+ if (!(input [0 ] & 0x80 )) {
353+ /* one byte character */
354+ len = 1 ;
355+
356+ if (ly_utf8_less (input , 1 , 0x20 ) && (input [0 ] != 0x9 ) && (input [0 ] != 0xa ) && (input [0 ] != 0xd )) {
357+ /* invalid control characters */
358+ return LY_EINVAL ;
359+ }
360+ } else if (((input [0 ] & 0xe0 ) == 0xc0 ) && (in_len > 1 )) {
361+ /* two bytes character */
362+ len = 2 ;
363+
364+ /* (input < 0xC280) || (input > 0xDFBF) || ((input & 0xE0C0) != 0xC080) */
365+ if (ly_utf8_less (input , 2 , 0xC2 , 0x80 ) || ly_utf8_greater (input , 2 , 0xDF , 0xBF ) ||
366+ !ly_utf8_and_equal (input , 2 , 0xE0 , 0xC0 , 0xC0 , 0x80 )) {
367+ return LY_EINVAL ;
368+ }
369+ } else if (((input [0 ] & 0xf0 ) == 0xe0 ) && (in_len > 2 )) {
370+ /* three bytes character */
371+ len = 3 ;
372+
373+ /* (input >= 0xEDA080) && (input <= 0xEDBFBF) */
374+ if (!ly_utf8_less (input , 3 , 0xED , 0xA0 , 0x80 ) && !ly_utf8_greater (input , 3 , 0xED , 0xBF , 0xBF )) {
375+ /* reject UTF-16 surrogates */
376+ return LY_EINVAL ;
377+ }
378+
379+ /* (input < 0xE0A080) || (input > 0xEFBFBF) || ((input & 0xF0C0C0) != 0xE08080) */
380+ if (ly_utf8_less (input , 3 , 0xE0 , 0xA0 , 0x80 ) || ly_utf8_greater (input , 3 , 0xEF , 0xBF , 0xBF ) ||
381+ !ly_utf8_and_equal (input , 3 , 0xF0 , 0xE0 , 0xC0 , 0x80 , 0xC0 , 0x80 )) {
382+ return LY_EINVAL ;
383+ }
384+ } else if (((input [0 ] & 0xf8 ) == 0xf0 ) && (in_len > 3 )) {
385+ /* four bytes character */
386+ len = 4 ;
387+
388+ /* (input < 0xF0908080) || (input > 0xF48FBFBF) || ((input & 0xF8C0C0C0) != 0xF0808080) */
389+ if (ly_utf8_less (input , 4 , 0xF0 , 0x90 , 0x80 , 0x80 ) || ly_utf8_greater (input , 4 , 0xF4 , 0x8F , 0xBF , 0xBF ) ||
390+ !ly_utf8_and_equal (input , 4 , 0xF8 , 0xF0 , 0xC0 , 0x80 , 0xC0 , 0x80 , 0xC0 , 0x80 )) {
391+ return LY_EINVAL ;
392+ }
393+ } else {
394+ return LY_EINVAL ;
395+ }
396+
397+ * utf8_len = len ;
398+ return LY_SUCCESS ;
399+ }
400+
250401LY_ERR
251402ly_pututf8 (char * dst , uint32_t value , size_t * bytes_written )
252403{
0 commit comments