Skip to content

Commit 80f266d

Browse files
committed
utf8n_to_uvchr_msgs(): Macroize a common paradigm
Each of the cases in the loop had a somewhat obscure conditional that had a few variants. This commit creates a macro that puts the complication in one place.
1 parent 5f8a785 commit 80f266d

File tree

1 file changed

+59
-49
lines changed

1 file changed

+59
-49
lines changed

utf8.c

Lines changed: 59 additions & 49 deletions
Original file line numberDiff line numberDiff line change
@@ -1673,6 +1673,39 @@ Perl__utf8n_to_uvchr_msgs_helper(const U8 *s,
16731673
*msgs = NULL;
16741674
}
16751675

1676+
1677+
/* Returns 0 if no message needs to be generated for this problem even
1678+
* if everything else says to. Otherwise returns the warning category
1679+
* to use for the message.
1680+
*
1681+
* No message need be generated if the UTF8_CHECK_ONLY flag has been
1682+
* set by the caller. Otherwise, a message should be generated if
1683+
* either:
1684+
* 1) the caller has furnished a structure into which messages should
1685+
* be returned to it (so it itself can decide what to do); or
1686+
* 2) warnings are enabled for either of the category parameters to the
1687+
* macro.
1688+
*
1689+
* The 'warning' parameter is the higher priority warning category to
1690+
* check. The macro calls ckWARN_d(warning), so warnings for it are
1691+
* considered to be on by default.
1692+
*
1693+
* The second, lower priority category is optional. To specify not to
1694+
* use one, call the macro
1695+
* like: NEED_MESSAGE(WARN_FOO,,)
1696+
* Otherwise like: NEED_MESSAGE(WARN_FOO, ckWARN_d, WARN_BAR)
1697+
*
1698+
* The second parameter could also have been ckWARN to specify that the
1699+
* second category isn't on by default.
1700+
*
1701+
* When called without a second category, the macro outputs a bunch of
1702+
* zeroes that the compiler should fold to nothing */
1703+
#define NEED_MESSAGE(warning, extra_ckWARN, extra_category) \
1704+
((flags & UTF8_CHECK_ONLY) ? 0 : \
1705+
((ckWARN_d(warning)) ? warning : \
1706+
((extra_ckWARN(extra_category +0)) ? extra_category +0 : \
1707+
((msgs) ? warning : 0))))
1708+
16761709
while (possible_problems) { /* Handle each possible problem */
16771710
U32 pack_warn = 0;
16781711
char * message = NULL;
@@ -1722,29 +1755,24 @@ Perl__utf8n_to_uvchr_msgs_helper(const U8 *s,
17221755
|| (flags & (UTF8_WARN_SUPER|UTF8_WARN_PERL_EXTENDED)))
17231756
{
17241757

1725-
/* The warnings code explicitly says it doesn't handle the
1726-
* case of packWARN2 and two categories which have
1727-
* parent-child relationship. Even if it works now to
1728-
* raise the warning if either is enabled, it wouldn't
1729-
* necessarily do so in the future. We output (only) the
1730-
* most dire warning */
1731-
if (! (flags & UTF8_CHECK_ONLY)) {
1732-
if (ckWARN_d(WARN_UTF8)) {
1733-
pack_warn = packWARN(WARN_UTF8);
1734-
}
1735-
else if (ckWARN_d(WARN_NON_UNICODE)) {
1736-
pack_warn = packWARN(WARN_NON_UNICODE);
1737-
}
1738-
else if (msgs) {
1739-
pack_warn = packWARN(WARN_UTF8);
1740-
}
1741-
1742-
if (pack_warn) {
1743-
message = Perl_form(aTHX_ "%s: %s (overflows)",
1758+
/* Overflow is a hybrid. If the word size on this platform
1759+
* were wide enough for this to not overflow, a non-Unicode
1760+
* code point would have been generated. If the caller
1761+
* wanted warnings for such code points, the warning
1762+
* category would be WARN_NON_UNICODE, On the other hand,
1763+
* overflow is considered a malformation, which is serious,
1764+
* and the category would be just WARN_UTF8. We clearly
1765+
* should warn if either category is enabled, but which
1766+
* category to use? Historically, we've used 'utf8' if it
1767+
* is enabled; and that seems like the more severe
1768+
* category, more befitting a malformation. */
1769+
pack_warn = NEED_MESSAGE(WARN_UTF8,
1770+
ckWARN_d, WARN_NON_UNICODE);
1771+
if (pack_warn) {
1772+
message = Perl_form(aTHX_ "%s: %s (overflows)",
17441773
malformed_text,
17451774
_byte_dump_string(s0, curlen, 0));
1746-
this_flag_bit = UTF8_GOT_OVERFLOW;
1747-
}
1775+
this_flag_bit = UTF8_GOT_OVERFLOW;
17481776
}
17491777
}
17501778

@@ -1761,9 +1789,7 @@ Perl__utf8n_to_uvchr_msgs_helper(const U8 *s,
17611789
assert(0);
17621790

17631791
disallowed = TRUE;
1764-
if ( (msgs
1765-
|| ckWARN_d(WARN_UTF8)) && ! (flags & UTF8_CHECK_ONLY))
1766-
{
1792+
if (NEED_MESSAGE(WARN_UTF8,,)) {
17671793
pack_warn = packWARN(WARN_UTF8);
17681794
message = Perl_form(aTHX_ "%s (empty string)",
17691795
malformed_text);
@@ -1778,9 +1804,7 @@ Perl__utf8n_to_uvchr_msgs_helper(const U8 *s,
17781804

17791805
if (! (flags & UTF8_ALLOW_CONTINUATION)) {
17801806
disallowed = TRUE;
1781-
if (( msgs
1782-
|| ckWARN_d(WARN_UTF8)) && ! (flags & UTF8_CHECK_ONLY))
1783-
{
1807+
if (NEED_MESSAGE(WARN_UTF8,,)) {
17841808
pack_warn = packWARN(WARN_UTF8);
17851809
message = Perl_form(aTHX_
17861810
"%s: %s (unexpected continuation byte 0x%02x,"
@@ -1799,9 +1823,7 @@ Perl__utf8n_to_uvchr_msgs_helper(const U8 *s,
17991823

18001824
if (! (flags & UTF8_ALLOW_SHORT)) {
18011825
disallowed = TRUE;
1802-
if (( msgs
1803-
|| ckWARN_d(WARN_UTF8)) && ! (flags & UTF8_CHECK_ONLY))
1804-
{
1826+
if (NEED_MESSAGE(WARN_UTF8,,)) {
18051827
pack_warn = packWARN(WARN_UTF8);
18061828
message = Perl_form(aTHX_
18071829
"%s: %s (too short; %d byte%s available, need %d)",
@@ -1822,9 +1844,7 @@ Perl__utf8n_to_uvchr_msgs_helper(const U8 *s,
18221844

18231845
if (! (flags & UTF8_ALLOW_NON_CONTINUATION)) {
18241846
disallowed = TRUE;
1825-
if (( msgs
1826-
|| ckWARN_d(WARN_UTF8)) && ! (flags & UTF8_CHECK_ONLY))
1827-
{
1847+
if (NEED_MESSAGE(WARN_UTF8,,)) {
18281848

18291849
/* If we don't know for sure that the input length is
18301850
* valid, avoid as much as possible reading past the
@@ -1849,9 +1869,7 @@ Perl__utf8n_to_uvchr_msgs_helper(const U8 *s,
18491869
if (flags & UTF8_WARN_SURROGATE) {
18501870
*errors |= UTF8_GOT_SURROGATE;
18511871

1852-
if ( ! (flags & UTF8_CHECK_ONLY)
1853-
&& (msgs || ckWARN_d(WARN_SURROGATE)))
1854-
{
1872+
if (NEED_MESSAGE(WARN_SURROGATE,,)) {
18551873
pack_warn = packWARN(WARN_SURROGATE);
18561874

18571875
/* These are the only errors that can occur with a
@@ -1881,9 +1899,7 @@ Perl__utf8n_to_uvchr_msgs_helper(const U8 *s,
18811899
if (flags & UTF8_WARN_SUPER) {
18821900
*errors |= UTF8_GOT_SUPER;
18831901

1884-
if ( ! (flags & UTF8_CHECK_ONLY)
1885-
&& (msgs || ckWARN_d(WARN_NON_UNICODE)))
1886-
{
1902+
if (NEED_MESSAGE(WARN_NON_UNICODE,,)) {
18871903
pack_warn = packWARN(WARN_NON_UNICODE);
18881904

18891905
if (orig_problems & UTF8_GOT_TOO_SHORT) {
@@ -1904,10 +1920,8 @@ Perl__utf8n_to_uvchr_msgs_helper(const U8 *s,
19041920
* and before possibly bailing out, so that the more dire
19051921
* warning will override the regular one. */
19061922
if (UNLIKELY(UTF8_IS_PERL_EXTENDED(s0))) {
1907-
if ( ! (flags & UTF8_CHECK_ONLY)
1908-
&& (flags & (UTF8_WARN_PERL_EXTENDED|UTF8_WARN_SUPER))
1909-
&& (msgs || ( ckWARN_d(WARN_NON_UNICODE)
1910-
|| ckWARN(WARN_PORTABLE))))
1923+
if ( (flags & (UTF8_WARN_PERL_EXTENDED|UTF8_WARN_SUPER))
1924+
&& NEED_MESSAGE(WARN_NON_UNICODE, ckWARN, WARN_PORTABLE))
19111925
{
19121926
pack_warn = packWARN2(WARN_NON_UNICODE, WARN_PORTABLE);
19131927

@@ -1957,9 +1971,7 @@ Perl__utf8n_to_uvchr_msgs_helper(const U8 *s,
19571971
if (flags & UTF8_WARN_NONCHAR) {
19581972
*errors |= UTF8_GOT_NONCHAR;
19591973

1960-
if ( ! (flags & UTF8_CHECK_ONLY)
1961-
&& (msgs || ckWARN_d(WARN_NONCHAR)))
1962-
{
1974+
if (NEED_MESSAGE(WARN_NONCHAR,,)) {
19631975
/* The code above should have guaranteed that we don't
19641976
* get here with errors other than overlong */
19651977
assert (! (orig_problems
@@ -1994,9 +2006,7 @@ Perl__utf8n_to_uvchr_msgs_helper(const U8 *s,
19942006
else {
19952007
disallowed = TRUE;
19962008

1997-
if (( msgs
1998-
|| ckWARN_d(WARN_UTF8)) && ! (flags & UTF8_CHECK_ONLY))
1999-
{
2009+
if (NEED_MESSAGE(WARN_UTF8,,)) {
20002010
pack_warn = packWARN(WARN_UTF8);
20012011

20022012
/* These error types cause 'uv' to be something that

0 commit comments

Comments
 (0)