11package io .kafbat .ui .util ;
22
33import java .nio .ByteBuffer ;
4+ import java .nio .CharBuffer ;
5+ import java .nio .charset .CharsetDecoder ;
46import java .nio .charset .StandardCharsets ;
7+ import java .util .List ;
58import java .util .regex .Pattern ;
69
710/**
1013public class ContentUtils {
1114 private static final byte [] HEX_ARRAY = "0123456789ABCDEF" .getBytes (StandardCharsets .US_ASCII );
1215
16+ private static final CharsetDecoder UTF8_DECODER = StandardCharsets .UTF_8 .newDecoder ();
17+
1318 private ContentUtils () {
1419 }
1520
@@ -23,38 +28,29 @@ public static boolean isValidUtf8(byte[] value) {
2328 if (value .length > 10_000 ) {
2429 return true ;
2530 }
26- int i = 0 ;
27- while (i < value .length ) {
28- int b = value [i ] & 0xFF ;
29- int numBytes ;
30- if ((b & 0x80 ) == 0 ) {
31- // 1-byte (ASCII)
32- numBytes = 1 ;
33- } else if ((b & 0xE0 ) == 0xC0 ) {
34- // 2-byte sequence
35- numBytes = 2 ;
36- } else if ((b & 0xF0 ) == 0xE0 ) {
37- // 3-byte sequence
38- numBytes = 3 ;
39- } else if ((b & 0xF8 ) == 0xF0 ) {
40- // 4-byte sequence
41- numBytes = 4 ;
42- } else {
43- // Invalid first byte
44- return false ;
45- }
46- if (i + numBytes > value .length ) {
47- return false ;
48- }
49- // Check continuation bytes
50- for (int j = 1 ; j < numBytes ; j ++) {
51- if ((value [i + j ] & 0xC0 ) != 0x80 ) {
52- return false ;
53- }
54- }
55- i += numBytes ;
31+ try {
32+ CharBuffer decode = UTF8_DECODER .decode (ByteBuffer .wrap (value ));
33+ return decode .chars ().allMatch (ContentUtils ::isValidUtf8 );
34+ } catch (Exception e ) {
35+ return false ;
36+ }
37+ }
38+
39+ public static boolean isValidUtf8 (int c ) {
40+ // SKIP NULL Symbols
41+ if (c == 0 ) {
42+ return false ;
43+ }
44+ // Well known symbols
45+ if (Character .isAlphabetic (c )
46+ || Character .isDigit (c )
47+ || Character .isWhitespace (c )
48+ || Character .isEmoji (c )
49+ ) {
50+ return true ;
5651 }
57- return true ;
52+ // We could read only whitespace controls like
53+ return !Character .isISOControl (c );
5854 }
5955
6056 /**
0 commit comments