Skip to content

Commit 4b9f908

Browse files
committed
WIP for some real EBCDIC tests
1 parent 35eafc9 commit 4b9f908

29 files changed

+2108
-1131
lines changed

.github/workflows/dev.yml

Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -337,6 +337,27 @@ jobs:
337337
- name: Test
338338
run: bazelisk test //... --enable_runfiles --incompatible_strict_action_env --test_output=all
339339

340+
ebcdic:
341+
# Tests the full support for EBCDIC on a non-EBCDIC platform, using a
342+
# hardcoded EBCDIC-1047 codepage.
343+
name: EBCDIC
344+
runs-on: ubuntu-24.04
345+
steps:
346+
- name: Checkout
347+
uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
348+
with:
349+
submodules: true
350+
351+
- name: Configure
352+
# TODO: Add the new CFLAGS_GCC when merging with the other PR
353+
run: cmake -DPCRE2_SUPPORT_JIT=OFF -DPCRE2_SUPPORT_UNICODE=OFF -DPCRE2_EBCDIC=ON -DPCRE2_EBCDIC_IGNORING_COMPILER=ON -DPCRE2_DEBUG=ON -DCMAKE_COMPILE_WARNING_AS_ERROR=ON -DCMAKE_BUILD_TYPE=Release -B build
354+
355+
- name: Build
356+
run: cd build && make -j3
357+
358+
- name: Test
359+
run: cd build && ../RunTest
360+
340361
heron:
341362
# Job to verify that the tasks performed by PrepareRelease have been done. It is
342363
# the committer's responsibility (currently) to run PrepareRelease themselves when

CMakeLists.txt

Lines changed: 55 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -265,6 +265,10 @@ set(
265265

266266
set(PCRE2_EBCDIC_NL25 OFF CACHE BOOL "Use 0x25 as EBCDIC NL character instead of 0x15; implies EBCDIC.")
267267

268+
set(PCRE2_EBCDIC_IGNORING_COMPILER OFF CACHE BOOL "Force EBCDIC 1047 using numeric literals rather than C character literals; implies EBCDIC.")
269+
270+
option(PCRE2_REBUILD_CHARTABLES "Rebuild char tables" OFF)
271+
268272
set(
269273
PCRE2_LINK_SIZE
270274
"2"
@@ -572,13 +576,42 @@ if(NEWLINE_DEFAULT STREQUAL "")
572576
)
573577
endif()
574578

579+
set(REBUILD_CHARTABLES OFF)
580+
if(PCRE2_REBUILD_CHARTABLES)
581+
set(REBUILD_CHARTABLES ON)
582+
endif()
583+
584+
set(EBCDIC OFF)
575585
if(PCRE2_EBCDIC)
576-
set(EBCDIC 1)
586+
set(EBCDIC ON)
577587
endif()
578588

579589
if(PCRE2_EBCDIC_NL25)
580-
set(EBCDIC 1)
581-
set(EBCDIC_NL25 1)
590+
set(EBCDIC ON)
591+
set(EBCDIC_NL25 ON)
592+
endif()
593+
594+
if(PCRE2_EBCDIC_IGNORING_COMPILER)
595+
set(EBCDIC ON)
596+
set(EBCDIC_IGNORING_COMPILER ON)
597+
endif()
598+
599+
# Make sure that if EBCDIC is set (without EBCDIC_IGNORING_COMPILER), then
600+
# REBUILD_CHARTABLES is also enabled.
601+
# Also check that UTF support is not requested, because PCRE2 cannot handle
602+
# EBCDIC and UTF in the same build. To do so it would need to use different
603+
# character constants depending on the mode.
604+
# Also, EBCDIC cannot be used with 16-bit and 32-bit libraries.
605+
if(EBCDIC)
606+
if(NOT EBCDIC_IGNORING_COMPILER)
607+
set(REBUILD_CHARTABLES ON)
608+
endif()
609+
if(PCRE2_SUPPORT_UNICODE)
610+
message(FATAL_ERROR "Support for EBCDIC and Unicode cannot be enabled at the same time")
611+
endif()
612+
if(PCRE2_BUILD_PCRE2_16 OR PCRE2_BUILD_PCRE2_32)
613+
message(FATAL_ERROR "EBCDIC support is available only for the 8-bit library")
614+
endif()
582615
endif()
583616

584617
# Output files
@@ -652,8 +685,7 @@ endif()
652685

653686
# Character table generation
654687

655-
option(PCRE2_REBUILD_CHARTABLES "Rebuild char tables" OFF)
656-
if(PCRE2_REBUILD_CHARTABLES)
688+
if(REBUILD_CHARTABLES)
657689
add_executable(pcre2_dftables src/pcre2_dftables.c)
658690
add_custom_command(
659691
OUTPUT ${PROJECT_BINARY_DIR}/pcre2_chartables.c
@@ -663,8 +695,12 @@ if(PCRE2_REBUILD_CHARTABLES)
663695
COMMENT "Generating character tables (pcre2_chartables.c) for current locale"
664696
VERBATIM
665697
)
666-
else()
698+
elseif(NOT PCRE2_EBCDIC)
667699
configure_file(${PROJECT_SOURCE_DIR}/src/pcre2_chartables.c.dist ${PROJECT_BINARY_DIR}/pcre2_chartables.c COPYONLY)
700+
elseif(PCRE2_EBCDIC_NL25)
701+
configure_file(${PROJECT_SOURCE_DIR}/src/pcre2_chartables.c.ebcdic-1047-nl25 ${PROJECT_BINARY_DIR}/pcre2_chartables.c COPYONLY)
702+
else()
703+
configure_file(${PROJECT_SOURCE_DIR}/src/pcre2_chartables.c.ebcdic-1047-nl15 ${PROJECT_BINARY_DIR}/pcre2_chartables.c COPYONLY)
668704
endif()
669705

670706
# Source code
@@ -1342,9 +1378,19 @@ if(PCRE2_SHOW_REPORT)
13421378
message(STATUS " Newline char/sequence ............. : ${PCRE2_NEWLINE}")
13431379
message(STATUS " \\R matches only ANYCRLF ........... : ${PCRE2_SUPPORT_BSR_ANYCRLF}")
13441380
message(STATUS " \\C is disabled .................... : ${PCRE2_NEVER_BACKSLASH_C}")
1345-
message(STATUS " EBCDIC coding ..................... : ${PCRE2_EBCDIC}")
1346-
message(STATUS " EBCDIC coding with NL=0x25 ........ : ${PCRE2_EBCDIC_NL25}")
1347-
message(STATUS " Rebuild char tables ............... : ${PCRE2_REBUILD_CHARTABLES}")
1381+
1382+
if(NOT EBCDIC)
1383+
set(EBCDIC_NL_CODE "n/a")
1384+
elseif(EBCDIC_NL25)
1385+
set(EBCDIC_NL_CODE "0x25")
1386+
else()
1387+
set(EBCDIC_NL_CODE "0x15")
1388+
endif()
1389+
message(STATUS " EBCDIC coding ..................... : ${EBCDIC}")
1390+
message(STATUS " EBCDIC code for NL ................ : ${EBCDIC_NL_CODE}")
1391+
message(STATUS " EBCDIC coding ignoring compiler ... : ${PCRE2_EBCDIC_IGNORING_COMPILER}")
1392+
message(STATUS " Rebuild char tables ............... : ${REBUILD_CHARTABLES}")
1393+
13481394
message(STATUS " Internal link size ................ : ${PCRE2_LINK_SIZE}")
13491395
message(STATUS " Maximum variable lookbehind ....... : ${PCRE2_MAX_VARLOOKBEHIND}")
13501396
message(STATUS " Parentheses nest limit ............ : ${PCRE2_PARENS_NEST_LIMIT}")

Makefile.am

Lines changed: 16 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -362,9 +362,21 @@ src/pcre2_chartables.c: pcre2_dftables$(EXEEXT)
362362
rm -f $@
363363
./pcre2_dftables$(EXEEXT) $@
364364
else
365+
if WITH_EBCDIC
366+
if WITH_EBCDIC_NL25
367+
src/pcre2_chartables.c: $(srcdir)/src/pcre2_chartables.c.ebcdic-1047-nl25
368+
rm -f $@
369+
$(LN_S) $(abs_srcdir)/src/pcre2_chartables.c.ebcdic-1047-nl25 $(abs_builddir)/src/pcre2_chartables.c
370+
else # WITH_EBCDIC_NL25
371+
src/pcre2_chartables.c: $(srcdir)/src/pcre2_chartables.c.ebcdic-1047-nl15
372+
rm -f $@
373+
$(LN_S) $(abs_srcdir)/src/pcre2_chartables.c.ebcdic-1047-nl15 $(abs_builddir)/src/pcre2_chartables.c
374+
endif # WITH_EBCDIC_NL25
375+
else # WITH_EBCDIC
365376
src/pcre2_chartables.c: $(srcdir)/src/pcre2_chartables.c.dist
366377
rm -f $@
367378
$(LN_S) $(abs_srcdir)/src/pcre2_chartables.c.dist $(abs_builddir)/src/pcre2_chartables.c
379+
endif # WITH_EBCDIC
368380
endif # WITH_REBUILD_CHARTABLES
369381

370382
BUILT_SOURCES = src/pcre2_chartables.c
@@ -460,7 +472,10 @@ endif # WITH_PCRE2_32
460472
# The pcre2_chartables.c.dist file is the default version of
461473
# pcre2_chartables.c, used unless --enable-rebuild-chartables is specified.
462474

463-
EXTRA_DIST += src/pcre2_chartables.c.dist
475+
EXTRA_DIST += \
476+
src/pcre2_chartables.c.dist \
477+
src/pcre2_chartables.c.ebcdic-1047-nl15 \
478+
src/pcre2_chartables.c.ebcdic-1047-nl25
464479
CLEANFILES += src/pcre2_chartables.c
465480

466481
# The JIT compiler lives in a separate directory, but its files are #included

README

Lines changed: 20 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -309,11 +309,22 @@ library. They are also documented in the pcre2build man page.
309309

310310
--enable-ebcdic --disable-unicode
311311

312-
This automatically implies --enable-rebuild-chartables (see above). However,
313-
when PCRE2 is built this way, it always operates in EBCDIC. It cannot support
314-
both EBCDIC and UTF-8/16/32. There is a second option, --enable-ebcdic-nl25,
315-
which specifies that the code value for the EBCDIC NL character is 0x25
316-
instead of the default 0x15.
312+
This automatically implies --enable-rebuild-chartables (see above), in order
313+
to ensure that you have the correct default character tables for your system's
314+
codepage. There is an exception when you set --enable-ebcdic-ignoring-compiler
315+
(see below), which allows using a default set of EBCDIC 1047 character tables
316+
rather than forcing use of --enable-rebuild-chartables.
317+
318+
When PCRE2 is built with EBCDIC support, it always operates in EBCDIC. It
319+
cannot support both EBCDIC and ASCII or UTF-8/16/32.
320+
321+
There is a second option, --enable-ebcdic-nl25, which specifies that the code
322+
value for the EBCDIC NL character is 0x25 instead of the default 0x15.
323+
324+
There is a third option, --enable-ebcdic-ignoring-compiler, which disregards
325+
the compiler's codepage for determining the numeric value of C character
326+
constants such as 'z', and instead forces PCRE2 to use numeric constants for
327+
the EBCDIC 1047 codepage instead.
317328

318329
. If you specify --enable-debug, additional debugging code is included in the
319330
build. This option is intended for use by the PCRE2 maintainers.
@@ -822,6 +833,10 @@ The distribution should contain the files listed below.
822833
src/pcre2_chartables.c.dist a default set of character tables that assume
823834
ASCII coding; unless --enable-rebuild-chartables is
824835
specified, used by copying to pcre2_chartables.c
836+
src/pcre2_chartables.c.ebcdic-1047-{nl15,nl25} a default set of character
837+
tables for EBCDIC 1047; used if
838+
--enable-ebcdic-ignoring-compiler is specified
839+
without --enable-rebuild-chartables
825840

826841
src/pcre2posix.c )
827842
src/pcre2_auto_possess.c )

RunTest

Lines changed: 79 additions & 53 deletions
Original file line numberDiff line numberDiff line change
@@ -45,15 +45,9 @@
4545
# very much more stack than normal. In environments where the stack can be
4646
# set at runtime, -bigstack sets a gigantic stack.
4747
#
48-
# There are two special cases where only one argument is allowed:
49-
#
50-
# If the first and only argument is "ebcdic", the script runs the special
51-
# EBCDIC test that can be useful for checking certain EBCDIC features, even
52-
# when run in an ASCII environment. PCRE2 must be built with EBCDIC support for
53-
# this test to be run.
54-
#
55-
# If the script is obeyed as "RunTest list", a list of available tests is
56-
# output, but none of them are run.
48+
# Special cases where only one argument is allowed:
49+
# - If the script is invoked as "RunTest list", a list of available tests is
50+
# output, but none of them are run.
5751
###############################################################################
5852

5953
# Define test titles in variables so that they can be output as a list. Some
@@ -92,6 +86,7 @@ title26="Test 26: Unicode property tests (compatible with Perl >= 5.38)"
9286
title27="Test 27: Auto-generated unicode property tests"
9387
maxtest=27
9488
titleheap="Test 'heap': Environment-specific heap tests"
89+
titleEBC="Test 'ebcdic': EBCDIC-specific tests"
9590

9691
if [ $# -eq 1 -a "$1" = "list" ]; then
9792
echo $title0
@@ -124,6 +119,7 @@ if [ $# -eq 1 -a "$1" = "list" ]; then
124119
echo $title27
125120
echo ""
126121
echo $titleheap
122+
echo $titleEBC
127123
echo ""
128124
echo "Numbered tests are automatically run if nothing selected."
129125
echo "Named tests must be explicitly selected."
@@ -357,6 +353,12 @@ support32=$?
357353
$sim $pcre2test -C backslash-C >/dev/null
358354
supportBSC=$?
359355

356+
# Check if compiled in EBCDIC mode, and whether we have EBCDIC I/O
357+
$sim $pcre2test -C ebcdic >/dev/null
358+
ebcdic=$?
359+
$sim $pcre2test -C ebcdic-io >/dev/null
360+
ebcdic_io=$?
361+
360362
# Initialize all bitsizes skipped
361363

362364
test8=skip
@@ -435,34 +437,38 @@ if [ $do0 = no -a $do1 = no -a $do2 = no -a $do3 = no -a \
435437
$do24 = no -a $do25 = no -a $do26 = no -a $do27 = no -a \
436438
$doheap = no -a $doebcdic = no \
437439
]; then
438-
do0=yes
439-
do1=yes
440-
do2=yes
441-
do3=yes
442-
do4=yes
443-
do5=yes
444-
do6=yes
445-
do7=yes
446-
do8=yes
447-
do9=yes
448-
do10=yes
449-
do11=yes
450-
do12=yes
451-
do13=yes
452-
do14=yes
453-
do15=yes
454-
do16=yes
455-
do17=yes
456-
do18=yes
457-
do19=yes
458-
do20=yes
459-
do21=yes
460-
do22=yes
461-
do23=yes
462-
do24=yes
463-
do25=yes
464-
do26=yes
465-
do27=yes
440+
if [ $ebcdic -eq 0 ] ; then
441+
do0=yes
442+
do1=yes
443+
do2=yes
444+
do3=yes
445+
do4=yes
446+
do5=yes
447+
do6=yes
448+
do7=yes
449+
do8=yes
450+
do9=yes
451+
do10=yes
452+
do11=yes
453+
do12=yes
454+
do13=yes
455+
do14=yes
456+
do15=yes
457+
do16=yes
458+
do17=yes
459+
do18=yes
460+
do19=yes
461+
do20=yes
462+
do21=yes
463+
do22=yes
464+
do23=yes
465+
do24=yes
466+
do25=yes
467+
do26=yes
468+
do27=yes
469+
else
470+
doebcdic=yes
471+
fi
466472
fi
467473

468474
# Handle any explicit skips at this stage, so that an argument list may consist
@@ -921,24 +927,44 @@ for bmode in "$test8" "$test16" "$test32"; do
921927
checkresult $? heap-$bits ""
922928
fi
923929

924-
# End of loop for 8/16/32-bit tests
925-
done
926-
927-
928-
# ------ Special EBCDIC Test -------
930+
# Special EBCDIC tests
929931

930-
if [ $doebcdic = yes ] ; then
931-
$sim $valgrind $pcre2test -C ebcdic >/dev/null
932-
ebcdic=$?
933-
if [ $ebcdic -ne 1 ] ; then
934-
echo "Cannot run EBCDIC tests: EBCDIC support not compiled"
935-
exit 1
932+
if [ $doebcdic = yes ] ; then
933+
echo $titleEBC
934+
if [ $ebcdic -ne 1 ] ; then
935+
echo "Cannot run EBCDIC tests: EBCDIC support not compiled"
936+
exit 1
937+
fi
938+
if [ $ebcdic_io -eq 0 ] ; then
939+
# Our testdata files are in ASCII, and the pcre2test program is using
940+
# ASCII input: all easy.
941+
for opt in "" "-dfa"; do
942+
$sim $valgrind $pcre2test -q $setstack $bmode $opt $testdata/testinputEBC >testtry
943+
checkresult $? EBC "$opt"
944+
done
945+
else
946+
echo "Cannot run EBCDIC tests:"
947+
echo " Ironically we do not support running these tests on an actual"
948+
echo " EBCDIC system. The testdata files shipped with PCRE2 are in ASCII."
949+
echo " You may be able to run the tests manually if you know which"
950+
echo " EBCDIC codepage you used when compiling PCRE2, and then convert"
951+
echo " the testdata to match. For example, if the C compiler used to build"
952+
echo " PCRE2 was using IBM-1047:"
953+
echo ""
954+
echo " iconv -f ISO8859-1 -t IBM-1047 <testdata/testinputEBC >testinputEBC-native"
955+
echo " pcre2test -q -$bmode testinputEBC-native >testoutputEBC-native"
956+
echo " [ $? -eq 0 ] || echo 'pcre2test failed'"
957+
echo " iconv -f IBM-1047 -t ISO8859-1 <testoutputEBC-native >testoutputEBC-ascii"
958+
echo " $cf testdata/testoutputEBC testoutputEBC-ascii"
959+
echo ""
960+
echo "This is speculative. The PCRE2 maintainers do not have access to an"
961+
echo "EBCDIC system to test this. Please report back if you try it."
962+
exit 1
963+
fi
936964
fi
937-
for opt in "" "-dfa"; do
938-
$sim $valgrind $pcre2test -q $opt $testdata/testinputEBC >testtry
939-
checkresult $? EBC "$opt"
940-
done
941-
fi
965+
966+
# End of loop for 8/16/32-bit tests
967+
done
942968

943969

944970
# Clean up local working files

config-cmake.h.in

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -38,6 +38,7 @@
3838
#cmakedefine BSR_ANYCRLF 1
3939
#cmakedefine EBCDIC 1
4040
#cmakedefine EBCDIC_NL25 1
41+
#cmakedefine EBCDIC_IGNORING_COMPILER 1
4142
#cmakedefine HEAP_MATCH_RECURSE 1
4243
#cmakedefine NEVER_BACKSLASH_C 1
4344

0 commit comments

Comments
 (0)