@@ -1673,6 +1673,39 @@ Perl__utf8n_to_uvchr_msgs_helper(const U8 *s,
1673
1673
* msgs = NULL ;
1674
1674
}
1675
1675
1676
+
1677
+ /* Returns 0 if no message needs to be generated for this problem even
1678
+ * if everything else says to. Otherwise returns the warning category
1679
+ * to use for the message.
1680
+ *
1681
+ * No message need be generated if the UTF8_CHECK_ONLY flag has been
1682
+ * set by the caller. Otherwise, a message should be generated if
1683
+ * either:
1684
+ * 1) the caller has furnished a structure into which messages should
1685
+ * be returned to it (so it itself can decide what to do); or
1686
+ * 2) warnings are enabled for either of the category parameters to the
1687
+ * macro.
1688
+ *
1689
+ * The 'warning' parameter is the higher priority warning category to
1690
+ * check. The macro calls ckWARN_d(warning), so warnings for it are
1691
+ * considered to be on by default.
1692
+ *
1693
+ * The second, lower priority category is optional. To specify not to
1694
+ * use one, call the macro
1695
+ * like: NEED_MESSAGE(WARN_FOO,,)
1696
+ * Otherwise like: NEED_MESSAGE(WARN_FOO, ckWARN_d, WARN_BAR)
1697
+ *
1698
+ * The second parameter could also have been ckWARN to specify that the
1699
+ * second category isn't on by default.
1700
+ *
1701
+ * When called without a second category, the macro outputs a bunch of
1702
+ * zeroes that the compiler should fold to nothing */
1703
+ #define NEED_MESSAGE (warning , extra_ckWARN , extra_category ) \
1704
+ ((flags & UTF8_CHECK_ONLY) ? 0 : \
1705
+ ((ckWARN_d(warning)) ? warning : \
1706
+ ((extra_ckWARN(extra_category +0)) ? extra_category +0 : \
1707
+ ((msgs) ? warning : 0))))
1708
+
1676
1709
while (possible_problems ) { /* Handle each possible problem */
1677
1710
U32 pack_warn = 0 ;
1678
1711
char * message = NULL ;
@@ -1722,29 +1755,24 @@ Perl__utf8n_to_uvchr_msgs_helper(const U8 *s,
1722
1755
|| (flags & (UTF8_WARN_SUPER |UTF8_WARN_PERL_EXTENDED )))
1723
1756
{
1724
1757
1725
- /* The warnings code explicitly says it doesn't handle the
1726
- * case of packWARN2 and two categories which have
1727
- * parent-child relationship. Even if it works now to
1728
- * raise the warning if either is enabled, it wouldn't
1729
- * necessarily do so in the future. We output (only) the
1730
- * most dire warning */
1731
- if (! (flags & UTF8_CHECK_ONLY )) {
1732
- if (ckWARN_d (WARN_UTF8 )) {
1733
- pack_warn = packWARN (WARN_UTF8 );
1734
- }
1735
- else if (ckWARN_d (WARN_NON_UNICODE )) {
1736
- pack_warn = packWARN (WARN_NON_UNICODE );
1737
- }
1738
- else if (msgs ) {
1739
- pack_warn = packWARN (WARN_UTF8 );
1740
- }
1741
-
1742
- if (pack_warn ) {
1743
- message = Perl_form (aTHX_ "%s: %s (overflows)" ,
1758
+ /* Overflow is a hybrid. If the word size on this platform
1759
+ * were wide enough for this to not overflow, a non-Unicode
1760
+ * code point would have been generated. If the caller
1761
+ * wanted warnings for such code points, the warning
1762
+ * category would be WARN_NON_UNICODE, On the other hand,
1763
+ * overflow is considered a malformation, which is serious,
1764
+ * and the category would be just WARN_UTF8. We clearly
1765
+ * should warn if either category is enabled, but which
1766
+ * category to use? Historically, we've used 'utf8' if it
1767
+ * is enabled; and that seems like the more severe
1768
+ * category, more befitting a malformation. */
1769
+ pack_warn = NEED_MESSAGE (WARN_UTF8 ,
1770
+ ckWARN_d , WARN_NON_UNICODE );
1771
+ if (pack_warn ) {
1772
+ message = Perl_form (aTHX_ "%s: %s (overflows)" ,
1744
1773
malformed_text ,
1745
1774
_byte_dump_string (s0 , curlen , 0 ));
1746
- this_flag_bit = UTF8_GOT_OVERFLOW ;
1747
- }
1775
+ this_flag_bit = UTF8_GOT_OVERFLOW ;
1748
1776
}
1749
1777
}
1750
1778
@@ -1761,9 +1789,7 @@ Perl__utf8n_to_uvchr_msgs_helper(const U8 *s,
1761
1789
assert (0 );
1762
1790
1763
1791
disallowed = TRUE;
1764
- if ( (msgs
1765
- || ckWARN_d (WARN_UTF8 )) && ! (flags & UTF8_CHECK_ONLY ))
1766
- {
1792
+ if (NEED_MESSAGE (WARN_UTF8 ,,)) {
1767
1793
pack_warn = packWARN (WARN_UTF8 );
1768
1794
message = Perl_form (aTHX_ "%s (empty string)" ,
1769
1795
malformed_text );
@@ -1778,9 +1804,7 @@ Perl__utf8n_to_uvchr_msgs_helper(const U8 *s,
1778
1804
1779
1805
if (! (flags & UTF8_ALLOW_CONTINUATION )) {
1780
1806
disallowed = TRUE;
1781
- if (( msgs
1782
- || ckWARN_d (WARN_UTF8 )) && ! (flags & UTF8_CHECK_ONLY ))
1783
- {
1807
+ if (NEED_MESSAGE (WARN_UTF8 ,,)) {
1784
1808
pack_warn = packWARN (WARN_UTF8 );
1785
1809
message = Perl_form (aTHX_
1786
1810
"%s: %s (unexpected continuation byte 0x%02x,"
@@ -1799,9 +1823,7 @@ Perl__utf8n_to_uvchr_msgs_helper(const U8 *s,
1799
1823
1800
1824
if (! (flags & UTF8_ALLOW_SHORT )) {
1801
1825
disallowed = TRUE;
1802
- if (( msgs
1803
- || ckWARN_d (WARN_UTF8 )) && ! (flags & UTF8_CHECK_ONLY ))
1804
- {
1826
+ if (NEED_MESSAGE (WARN_UTF8 ,,)) {
1805
1827
pack_warn = packWARN (WARN_UTF8 );
1806
1828
message = Perl_form (aTHX_
1807
1829
"%s: %s (too short; %d byte%s available, need %d)" ,
@@ -1822,9 +1844,7 @@ Perl__utf8n_to_uvchr_msgs_helper(const U8 *s,
1822
1844
1823
1845
if (! (flags & UTF8_ALLOW_NON_CONTINUATION )) {
1824
1846
disallowed = TRUE;
1825
- if (( msgs
1826
- || ckWARN_d (WARN_UTF8 )) && ! (flags & UTF8_CHECK_ONLY ))
1827
- {
1847
+ if (NEED_MESSAGE (WARN_UTF8 ,,)) {
1828
1848
1829
1849
/* If we don't know for sure that the input length is
1830
1850
* valid, avoid as much as possible reading past the
@@ -1849,9 +1869,7 @@ Perl__utf8n_to_uvchr_msgs_helper(const U8 *s,
1849
1869
if (flags & UTF8_WARN_SURROGATE ) {
1850
1870
* errors |= UTF8_GOT_SURROGATE ;
1851
1871
1852
- if ( ! (flags & UTF8_CHECK_ONLY )
1853
- && (msgs || ckWARN_d (WARN_SURROGATE )))
1854
- {
1872
+ if (NEED_MESSAGE (WARN_SURROGATE ,,)) {
1855
1873
pack_warn = packWARN (WARN_SURROGATE );
1856
1874
1857
1875
/* These are the only errors that can occur with a
@@ -1881,9 +1899,7 @@ Perl__utf8n_to_uvchr_msgs_helper(const U8 *s,
1881
1899
if (flags & UTF8_WARN_SUPER ) {
1882
1900
* errors |= UTF8_GOT_SUPER ;
1883
1901
1884
- if ( ! (flags & UTF8_CHECK_ONLY )
1885
- && (msgs || ckWARN_d (WARN_NON_UNICODE )))
1886
- {
1902
+ if (NEED_MESSAGE (WARN_NON_UNICODE ,,)) {
1887
1903
pack_warn = packWARN (WARN_NON_UNICODE );
1888
1904
1889
1905
if (orig_problems & UTF8_GOT_TOO_SHORT ) {
@@ -1904,10 +1920,8 @@ Perl__utf8n_to_uvchr_msgs_helper(const U8 *s,
1904
1920
* and before possibly bailing out, so that the more dire
1905
1921
* warning will override the regular one. */
1906
1922
if (UNLIKELY (UTF8_IS_PERL_EXTENDED (s0 ))) {
1907
- if ( ! (flags & UTF8_CHECK_ONLY )
1908
- && (flags & (UTF8_WARN_PERL_EXTENDED |UTF8_WARN_SUPER ))
1909
- && (msgs || ( ckWARN_d (WARN_NON_UNICODE )
1910
- || ckWARN (WARN_PORTABLE ))))
1923
+ if ( (flags & (UTF8_WARN_PERL_EXTENDED |UTF8_WARN_SUPER ))
1924
+ && NEED_MESSAGE (WARN_NON_UNICODE , ckWARN , WARN_PORTABLE ))
1911
1925
{
1912
1926
pack_warn = packWARN2 (WARN_NON_UNICODE , WARN_PORTABLE );
1913
1927
@@ -1957,9 +1971,7 @@ Perl__utf8n_to_uvchr_msgs_helper(const U8 *s,
1957
1971
if (flags & UTF8_WARN_NONCHAR ) {
1958
1972
* errors |= UTF8_GOT_NONCHAR ;
1959
1973
1960
- if ( ! (flags & UTF8_CHECK_ONLY )
1961
- && (msgs || ckWARN_d (WARN_NONCHAR )))
1962
- {
1974
+ if (NEED_MESSAGE (WARN_NONCHAR ,,)) {
1963
1975
/* The code above should have guaranteed that we don't
1964
1976
* get here with errors other than overlong */
1965
1977
assert (! (orig_problems
@@ -1994,9 +2006,7 @@ Perl__utf8n_to_uvchr_msgs_helper(const U8 *s,
1994
2006
else {
1995
2007
disallowed = TRUE;
1996
2008
1997
- if (( msgs
1998
- || ckWARN_d (WARN_UTF8 )) && ! (flags & UTF8_CHECK_ONLY ))
1999
- {
2009
+ if (NEED_MESSAGE (WARN_UTF8 ,,)) {
2000
2010
pack_warn = packWARN (WARN_UTF8 );
2001
2011
2002
2012
/* These error types cause 'uv' to be something that
0 commit comments