4444
4545#include <stdio.h>
4646
47+ #ifdef HAVE_WCHAR_H
48+ #include <wchar.h>
49+ #include <wctype.h>
50+ #endif
51+
4752#include "netdissect-ctype.h"
4853
4954#include "netdissect.h"
5661#define HEXDUMP_HEXSTUFF_PER_LINE \
5762 (HEXDUMP_HEXSTUFF_PER_SHORT * HEXDUMP_SHORTS_PER_LINE)
5863
64+ #ifdef HAVE_WCHAR_H
65+
66+ /*
67+ * The blow is_utf8_printable is taken from ngrep
68+ *
69+ * Check if a UTF-8 character sequence is printable using standard library functions.
70+ * Returns the number of bytes in the UTF-8 character if printable, 0 otherwise.
71+ * Also returns the display width (1 or 2 columns) via the width_out parameter.
72+ *
73+ * This uses mbrtowc() to convert multi-byte UTF-8 to wide char, then iswprint()
74+ * to check if it's printable, and wcwidth() to get the display width.
75+ */
76+ static int is_utf8_printable (const unsigned char * s , size_t max_len , int * width_out ) {
77+ if (!s || max_len == 0 ) return 0 ;
78+
79+ mbstate_t state = {0 };
80+ wchar_t wc ;
81+
82+ size_t len = mbrtowc (& wc , (const char * )s , max_len , & state );
83+
84+ /* Check for errors and incomplete sequences */
85+ if (len == (size_t )-1 ) {
86+ /* Encoding error */
87+ return 0 ;
88+ }
89+
90+ if (len == (size_t )-2 ) {
91+ /* Incomplete multi-byte sequence (need more bytes) */
92+ return 0 ;
93+ }
94+
95+ if (len == 0 ) {
96+ /* Null character */
97+ return 0 ;
98+ }
99+
100+ /* Check if the wide character is printable */
101+ #if defined(_WIN32 ) || defined(_WIN64 )
102+ /* Windows iswprint() is too conservative - be more permissive for UTF-8 */
103+ /* Accept any valid UTF-8 character that's not a control character */
104+ int is_printable = iswprint (wc ) ||
105+ (wc >= 0x80 && wc < 0xD800 ) || /* Most of BMP except surrogates */
106+ (wc >= 0xE000 && wc < 0x110000 ); /* Private use + supplementary planes */
107+
108+ /* But exclude actual control characters */
109+ if (wc < 0x20 || (wc >= 0x7F && wc < 0xA0 )) {
110+ is_printable = 0 ;
111+ }
112+ #else
113+ int is_printable = iswprint (wc );
114+ #endif
115+
116+ if (is_printable ) {
117+ /* Get display width (1 for normal chars, 2 for wide chars like CJK, 0 for combining) */
118+ int w = wcwidth (wc );
119+ if (w < 0 ) w = 1 ; /* Treat non-printable/control as width 1 */
120+ /* Note: wcwidth returns 0 for combining characters, which is correct */
121+ if (width_out ) * width_out = w ;
122+ return (int )len ;
123+ }
124+
125+ return 0 ;
126+ }
127+ #endif
128+
59129void
60130ascii_print (netdissect_options * ndo ,
61131 const u_char * cp , u_int length )
@@ -71,28 +141,51 @@ ascii_print(netdissect_options *ndo,
71141 truncated = TRUE;
72142 }
73143 ND_PRINT ("\n" );
74- while (length != 0 ) {
75- s = GET_U_1 (cp );
76- cp ++ ;
77- length -- ;
78- if (s == '\r' ) {
79- /*
80- * Don't print CRs at the end of the line; they
81- * don't belong at the ends of lines on UN*X,
82- * and the standard I/O library will give us one
83- * on Windows so we don't need to print one
84- * ourselves.
85- *
86- * In the middle of a line, just print a '.'.
87- */
88- if (length > 1 && GET_U_1 (cp ) != '\n' )
89- ND_PRINT ("." );
144+
145+ u_int remaining = length ;
146+
147+ while (length > 0 ) {
148+ int utf8_len , width ;
149+ int j ;
150+
151+ utf8_len = 0 ;
152+
153+ #ifdef HAVE_WCHAR_H
154+ if (ndo -> ndo_utf8 ) {
155+ utf8_len = is_utf8_printable (cp , remaining , & width );
156+ }
157+ #endif
158+
159+ if (utf8_len > 0 ) {
160+ /* Valid printable UTF-8 character */
161+ for (j = 0 ; j < utf8_len ; j ++ )
162+ ND_PRINT ("%c" , cp [j ]);
163+ cp += utf8_len ;
164+ length -= utf8_len ;
165+
90166 } else {
91- if (!ND_ASCII_ISGRAPH (s ) &&
92- (s != '\t' && s != ' ' && s != '\n' ))
93- ND_PRINT ("." );
94- else
95- ND_PRINT ("%c" , s );
167+ s = GET_U_1 (cp );
168+ cp ++ ;
169+ length -- ;
170+ if (s == '\r' ) {
171+ /*
172+ * Don't print CRs at the end of the line; they
173+ * don't belong at the ends of lines on UN*X,
174+ * and the standard I/O library will give us one
175+ * on Windows so we don't need to print one
176+ * ourselves.
177+ *
178+ * In the middle of a line, just print a '.'.
179+ */
180+ if (length > 1 && GET_U_1 (cp ) != '\n' )
181+ ND_PRINT ("." );
182+ } else {
183+ if (!ND_ASCII_ISGRAPH (s ) &&
184+ (s != '\t' && s != ' ' && s != '\n' ))
185+ ND_PRINT ("." );
186+ else
187+ ND_PRINT ("%c" , s );
188+ }
96189 }
97190 }
98191 if (truncated )
@@ -104,52 +197,69 @@ hex_and_ascii_print_with_offset(netdissect_options *ndo, const char *indent,
104197 const u_char * cp , u_int length , u_int offset )
105198{
106199 u_int caplength ;
107- u_int i ;
108- u_int s1 , s2 ;
109- u_int nshorts ;
200+ u_int nbytes_unprinted ;
201+ u_int s1 ;
110202 int truncated = FALSE;
111203 char hexstuff [HEXDUMP_SHORTS_PER_LINE * HEXDUMP_HEXSTUFF_PER_SHORT + 1 ], * hsp ;
112- char asciistuff [ASCII_LINELENGTH + 1 ], * asp ;
204+ char asciistuff [ASCII_LINELENGTH + 1 + 4 ], * asp ;
205+ u_int utf8_bytes_to_skip = 0 ;
113206
114207 caplength = ND_BYTES_AVAILABLE_AFTER (cp );
115208 if (length > caplength ) {
116209 length = caplength ;
117210 truncated = TRUE;
118211 }
119- nshorts = length / sizeof (u_short );
120- i = 0 ;
212+ nbytes_unprinted = 0 ;
121213 hsp = hexstuff ; asp = asciistuff ;
122- while (nshorts != 0 ) {
214+ while (length != 0 ) {
123215 s1 = GET_U_1 (cp );
216+
217+ // insert the leading space of short
218+ if ((hsp - hexstuff ) % HEXDUMP_HEXSTUFF_PER_SHORT == 0 ) {
219+ (void )snprintf (hsp , sizeof (hexstuff ) - (hsp - hexstuff ), " " );
220+ hsp ++ ;
221+ }
222+
223+ // add the byte
224+ (void )snprintf (hsp , sizeof (hexstuff ) - (hsp - hexstuff ), "%02x" , s1 );
225+ hsp += 2 ;
226+
227+ if (utf8_bytes_to_skip > 0 ) {
228+ // only pad the new line
229+ if (nbytes_unprinted == (asp - asciistuff )) {
230+ * (asp ++ ) = ' ' ;
231+ }
232+ utf8_bytes_to_skip -- ;
233+ } else {
234+ // try to add the display (utf8) chars
235+ #ifdef HAVE_WCHAR_H
236+ utf8_bytes_to_skip = ndo -> ndo_utf8 ? is_utf8_printable (cp , length , NULL ) : 0 ;
237+ #endif
238+ if (utf8_bytes_to_skip > 0 ) {
239+ u_int j ;
240+ for (j = 0 ; j < utf8_bytes_to_skip ; j ++ ) {
241+ * (asp ++ ) = (char )GET_U_1 (cp + j );
242+ }
243+ utf8_bytes_to_skip -- ;
244+ } else {
245+ * (asp ++ ) = (char )(ND_ASCII_ISGRAPH (s1 ) ? s1 : '.' );
246+ }
247+ }
248+
124249 cp ++ ;
125- s2 = GET_U_1 (cp );
126- cp ++ ;
127- (void )snprintf (hsp , sizeof (hexstuff ) - (hsp - hexstuff ),
128- " %02x%02x" , s1 , s2 );
129- hsp += HEXDUMP_HEXSTUFF_PER_SHORT ;
130- * (asp ++ ) = (char )(ND_ASCII_ISGRAPH (s1 ) ? s1 : '.' );
131- * (asp ++ ) = (char )(ND_ASCII_ISGRAPH (s2 ) ? s2 : '.' );
132- i ++ ;
133- if (i >= HEXDUMP_SHORTS_PER_LINE ) {
250+ nbytes_unprinted ++ ;
251+ if (nbytes_unprinted >= (HEXDUMP_SHORTS_PER_LINE * sizeof (u_short ))) {
134252 * hsp = * asp = '\0' ;
135253 ND_PRINT ("%s0x%04x: %-*s %s" ,
136254 indent , offset , HEXDUMP_HEXSTUFF_PER_LINE ,
137255 hexstuff , asciistuff );
138- i = 0 ; hsp = hexstuff ; asp = asciistuff ;
256+ nbytes_unprinted = 0 ; hsp = hexstuff ; asp = asciistuff ;
139257 offset += HEXDUMP_BYTES_PER_LINE ;
140258 }
141- nshorts -- ;
142- }
143- if (length & 1 ) {
144- s1 = GET_U_1 (cp );
145- cp ++ ;
146- (void )snprintf (hsp , sizeof (hexstuff ) - (hsp - hexstuff ),
147- " %02x" , s1 );
148- hsp += 3 ;
149- * (asp ++ ) = (char )(ND_ASCII_ISGRAPH (s1 ) ? s1 : '.' );
150- ++ i ;
259+ length -- ;
151260 }
152- if (i > 0 ) {
261+
262+ if (nbytes_unprinted > 0 ) {
153263 * hsp = * asp = '\0' ;
154264 ND_PRINT ("%s0x%04x: %-*s %s" ,
155265 indent , offset , HEXDUMP_HEXSTUFF_PER_LINE ,
@@ -159,6 +269,7 @@ hex_and_ascii_print_with_offset(netdissect_options *ndo, const char *indent,
159269 nd_trunc_longjmp (ndo );
160270}
161271
272+
162273void
163274hex_and_ascii_print (netdissect_options * ndo , const char * indent ,
164275 const u_char * cp , u_int length )
0 commit comments