@@ -262,10 +262,8 @@ as a hex value when showing compiled patterns. We use it in cases when the
262262locale has not been explicitly changed, so as to get consistent output from
263263systems that differ in their output from isprint() even in the "C" locale. */
264264
265- #if defined(EBCDIC ) && !EBCDIC_IO
266- #define PRINTABLE (c ) (ebcdic_to_ascii(c) >= 32 && ebcdic_to_ascii(c) < 127)
267- #elif defined(EBCDIC )
268- #define PRINTABLE (c ) ((c) >= 64 && (c) < 255)
265+ #if defined(EBCDIC )
266+ #define PRINTABLE (c ) printable(c)
269267#else
270268#define PRINTABLE (c ) ((c) >= 32 && (c) < 127)
271269#endif
@@ -275,11 +273,20 @@ format. The input character is encoded in PCRE2's native codepage (EBCDIC, if
275273enabled), but the output may differ in the case where pcre2test uses ASCII input
276274and output. */
277275#if defined(EBCDIC ) && !EBCDIC_IO
278- #define CHAR_OUTPUT (c ) ebcdic_to_ascii(c)
279- #define CHAR_INPUT (c ) ascii_to_ebcdic(c)
276+ #define CHAR_OUTPUT (c ) ebcdic_to_ascii(c)
277+ #define CHAR_OUTPUT_HEX (c ) CHAR_OUTPUT(c)
278+ #define CHAR_INPUT (c ) ascii_to_ebcdic(c)
279+ #define CHAR_INPUT_HEX (c ) CHAR_INPUT(c)
280+ #elif defined(EBCDIC )
281+ #define CHAR_OUTPUT (c ) (c)
282+ #define CHAR_OUTPUT_HEX (c ) ebcdic_to_ascii(c)
283+ #define CHAR_INPUT (c ) (c)
284+ #define CHAR_INPUT_HEX (c ) ascii_to_ebcdic(c)
280285#else
281- #define CHAR_OUTPUT (c ) (c)
282- #define CHAR_INPUT (c ) (c)
286+ #define CHAR_OUTPUT (c ) (c)
287+ #define CHAR_OUTPUT_HEX (c ) CHAR_OUTPUT(c)
288+ #define CHAR_INPUT (c ) (c)
289+ #define CHAR_INPUT_HEX (c ) CHAR_INPUT(c)
283290#endif
284291
285292/* We have to include some of the library source files because we need
@@ -313,10 +320,15 @@ previous definition of PRIV avoids name clashes. */
313320#include "pcre2_tables.c"
314321#include "pcre2_ucd.c"
315322
316- /* Forward-declarations for PRINTABLE(). */
323+ /* Forward-declarations for PRINTABLE(), etc . */
317324
325+ #if defined(EBCDIC )
326+ static BOOL printable (uint32_t c );
327+ #endif
318328#if defined(EBCDIC ) && !EBCDIC_IO
319329static void ascii_to_ebcdic_str (uint8_t * buf , size_t len );
330+ #endif
331+ #if defined(EBCDIC )
320332static uint32_t ascii_to_ebcdic (uint32_t c );
321333static uint32_t ebcdic_to_ascii (uint32_t c );
322334#endif
@@ -3045,14 +3057,71 @@ return (PCRE2_JIT_STACK *)arg;
30453057* EBCDIC support functions *
30463058*************************************************/
30473059
3060+ #if defined(EBCDIC )
3061+ static BOOL
3062+ printable (uint32_t c )
3063+ {
3064+ if ((c >= CHAR_a && c <= CHAR_i ) ||
3065+ (c >= CHAR_j && c <= CHAR_r ) ||
3066+ (c >= CHAR_s && c <= CHAR_z ) ||
3067+ (c >= CHAR_A && c <= CHAR_I ) ||
3068+ (c >= CHAR_J && c <= CHAR_R ) ||
3069+ (c >= CHAR_S && c <= CHAR_Z ) ||
3070+ (c >= CHAR_0 && c <= CHAR_9 ))
3071+ return TRUE;
3072+
3073+ switch (c )
3074+ {
3075+ case CHAR_SPACE :
3076+ case CHAR_EXCLAMATION_MARK :
3077+ case CHAR_QUOTATION_MARK :
3078+ case CHAR_NUMBER_SIGN :
3079+ case CHAR_DOLLAR_SIGN :
3080+ case CHAR_PERCENT_SIGN :
3081+ case CHAR_AMPERSAND :
3082+ case CHAR_APOSTROPHE :
3083+ case CHAR_LEFT_PARENTHESIS :
3084+ case CHAR_RIGHT_PARENTHESIS :
3085+ case CHAR_ASTERISK :
3086+ case CHAR_PLUS :
3087+ case CHAR_COMMA :
3088+ case CHAR_MINUS :
3089+ case CHAR_DOT :
3090+ case CHAR_SLASH :
3091+ case CHAR_COLON :
3092+ case CHAR_SEMICOLON :
3093+ case CHAR_LESS_THAN_SIGN :
3094+ case CHAR_EQUALS_SIGN :
3095+ case CHAR_GREATER_THAN_SIGN :
3096+ case CHAR_QUESTION_MARK :
3097+ case CHAR_COMMERCIAL_AT :
3098+ case CHAR_LEFT_SQUARE_BRACKET :
3099+ case CHAR_BACKSLASH :
3100+ case CHAR_RIGHT_SQUARE_BRACKET :
3101+ case CHAR_CIRCUMFLEX_ACCENT :
3102+ case CHAR_UNDERSCORE :
3103+ case CHAR_GRAVE_ACCENT :
3104+ case CHAR_LEFT_CURLY_BRACKET :
3105+ case CHAR_VERTICAL_LINE :
3106+ case CHAR_RIGHT_CURLY_BRACKET :
3107+ case CHAR_TILDE :
3108+ return TRUE;
3109+ }
3110+
3111+ return FALSE;
3112+ }
3113+ #endif
3114+
30483115#if defined(EBCDIC ) && !EBCDIC_IO
30493116static void
30503117ascii_to_ebcdic_str (uint8_t * buf , size_t len )
30513118{
30523119for (size_t i = 0 ; i < len ; ++ i )
30533120 buf [i ] = ascii_to_ebcdic_1047 [buf [i ]];
30543121}
3122+ #endif
30553123
3124+ #if defined(EBCDIC )
30563125static uint32_t
30573126ascii_to_ebcdic (uint32_t c )
30583127{
@@ -3161,7 +3230,7 @@ if (PRINTABLE(c))
31613230 return 1 ;
31623231 }
31633232
3164- c = CHAR_OUTPUT (c );
3233+ c = CHAR_OUTPUT_HEX (c );
31653234
31663235if (c < 0x100 )
31673236 {
@@ -4983,7 +5052,7 @@ if ((pat_patctl.control & CTL_INFO) != 0)
49835052 fprintf (outfile , "Starting code units:" );
49845053 for (input = 0 ; input < 256 ; input ++ )
49855054 {
4986- int i = CHAR_INPUT (input );
5055+ int i = CHAR_INPUT_HEX (input );
49875056 if ((start_bits [i /8 ] & (1u << (i & 7 ))) != 0 )
49885057 {
49895058 if (c > 75 )
@@ -4998,7 +5067,7 @@ if ((pat_patctl.control & CTL_INFO) != 0)
49985067 }
49995068 else
50005069 {
5001- fprintf (outfile , " \\x%02x" , CHAR_OUTPUT (i ));
5070+ fprintf (outfile , " \\x%02x" , CHAR_OUTPUT_HEX (i ));
50025071 c += 5 ;
50035072 }
50045073 }
@@ -5645,8 +5714,9 @@ if ((pat_patctl.control & CTL_HEXPAT) != 0)
56455714 }
56465715 c = toupper (c );
56475716 d = toupper (d );
5648- * pt ++ = ((isdigit (c )? (c - '0' ) : (c - 'A' + 10 )) << 4 ) +
5649- (isdigit (d )? (d - '0' ) : (d - 'A' + 10 ));
5717+ c = isdigit (c )? (c - '0' ) : (c - 'A' + 10 );
5718+ d = isdigit (d )? (d - '0' ) : (d - 'A' + 10 );
5719+ * pt ++ = CHAR_OUTPUT (CHAR_INPUT_HEX ((c << 4 ) + d ));
56505720 }
56515721 }
56525722 * pt = 0 ;
@@ -7554,6 +7624,7 @@ while ((c = *p++) != 0)
75547624 c -= '0' ;
75557625 while (i ++ < 2 && isdigit (* p ) && * p < '8' )
75567626 c = c * 8 + (* p ++ - '0' );
7627+ c = CHAR_OUTPUT (CHAR_INPUT_HEX (c ));
75577628
75587629 encoding = (utf && c > 255 )? FORCE_UTF : FORCE_RAW ;
75597630 break ;
@@ -7572,6 +7643,7 @@ while ((c = *p++) != 0)
75727643 }
75737644 else c = c * 8 + (* pt - '0' );
75747645 }
7646+ c = CHAR_OUTPUT (CHAR_INPUT_HEX (c ));
75757647 if (i == 0 || * pt != '}' )
75767648 {
75777649 fprintf (outfile , "** Malformed \\o{ escape\n" );
@@ -7603,6 +7675,7 @@ while ((c = *p++) != 0)
76037675 }
76047676 else c = c * 16 + (tolower (* pt ) - (isdigit (* pt )? '0' : 'a' - 10 ));
76057677 }
7678+ c = CHAR_OUTPUT (CHAR_INPUT_HEX (c ));
76067679 if (i == 0 || * pt != '}' )
76077680 {
76087681 fprintf (outfile , "** Malformed \\x{ escape\n" );
@@ -7622,6 +7695,7 @@ while ((c = *p++) != 0)
76227695 c = c * 16 + (tolower (* p ) - (isdigit (* p )? '0' : 'a' - 10 ));
76237696 p ++ ;
76247697 }
7698+ c = CHAR_OUTPUT (CHAR_INPUT_HEX (c ));
76257699#if defined SUPPORT_PCRE2_8
76267700 if (utf && (test_mode == PCRE8_MODE )) encoding = FORCE_RAW ;
76277701#endif
0 commit comments