Skip to content

pp_pack.c: Convert from using 'to_uvchr' functions to 'to_uv' #23564

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 9 commits into
base: blead
Choose a base branch
from
155 changes: 98 additions & 57 deletions pp_pack.c
Original file line number Diff line number Diff line change
Expand Up @@ -253,12 +253,14 @@ utf8_to_byte(pTHX_ const char **s, const char *end, I32 datumtype)
if (*s >= end) {
goto croak;
}
val = utf8n_to_uvchr((U8 *) *s, end-*s, &retlen,
ckWARN(WARN_UTF8) ? 0 : UTF8_ALLOW_ANY);
if (retlen == (STRLEN) -1)
if (! utf8_to_uv_flags((U8 *) *s, (U8 *) end, &val, &retlen,
ckWARN(WARN_UTF8) ? 0 : UTF8_ALLOW_ANY))
{
croak:
croak("Malformed UTF-8 string in '%c' format in unpack",
(int) TYPE_NO_MODIFIERS(datumtype));
}

if (val >= 0x100) {
ck_warner(packWARN(WARN_UNPACK),
"Character in '%c' format wrapped in unpack",
Expand All @@ -279,49 +281,76 @@ S_utf8_to_bytes(pTHX_ const char **s, const char *end, const char *buf, SSize_t
UV val;
STRLEN retlen;
const char *from = *s;
int bad = 0;
const U32 flags = ckWARN(WARN_UTF8) ?
UTF8_CHECK_ONLY : (UTF8_CHECK_ONLY | UTF8_ALLOW_ANY);
bool bad = false;
const U32 flags = ckWARN(WARN_UTF8) ? 0 : UTF8_ALLOW_ANY;
const bool needs_swap = NEEDS_SWAP(datumtype);

if (UNLIKELY(needs_swap))
buf += buf_len;

for (;buf_len > 0; buf_len--) {
AV * msgs = NULL;
for (; buf_len > 0; buf_len--) {
if (from >= end) return FALSE;
val = utf8n_to_uvchr((U8 *) from, end-from, &retlen, flags);
if (retlen == (STRLEN) -1) {
from += UTF8_SAFE_SKIP(from, end);
bad |= 1;
} else from += retlen;
if (val >= 0x100) {
bad |= 2;
val = (U8) val;

AV * this_msgs = NULL;
if (utf8_to_uv_msgs((U8 *) from, (U8 *) end, &val, &retlen, flags,
NULL, &this_msgs))
{
if (val >= 0x100) {
bad = true;
val = (U8) val;
}
}

from += retlen;

/* Add any messages from this conversion to the list for later output
* */
if (this_msgs) {
while (av_count(this_msgs) > 0) {
av_push(msgs, av_shift(this_msgs));
}

Safefree(this_msgs);
}

if (UNLIKELY(needs_swap))
*(U8 *)--buf = (U8)val;
else
*(U8 *)buf++ = (U8)val;
}

/* We have enough characters for the buffer. Did we have problems ? */
if (bad) {
if (bad & 1) {
/* Rewalk the string fragment while warning */
const char *ptr;
const U32 flags = ckWARN(WARN_UTF8) ? 0 : UTF8_ALLOW_ANY;
for (ptr = *s; ptr < from; ptr += UTF8SKIP(ptr)) {
if (ptr >= end) break;
utf8n_to_uvchr((U8 *) ptr, end-ptr, &retlen, flags);
}
if (from > end) from = end;
if (msgs) {
while (av_count(msgs) > 0) {
HV * msg_hash = (HV *) av_shift(msgs);
SV ** packed_categories_p = hv_fetchs(msg_hash, "warn_categories", 0);
if (packed_categories_p == NULL) {
continue;
}

UV packed_categories = SvUV(*packed_categories_p);
if (packed_categories == 0) {
continue;
}

SV ** warn_text_p = hv_fetchs(msg_hash, "text", 0);
if (warn_text_p) {
warner(packed_categories, "%s", SvPV_nolen(*warn_text_p));
}
}
if ((bad & 2))
ck_warner(packWARN(datumtype & TYPE_IS_PACK ?
WARN_PACK : WARN_UNPACK),
"Character(s) in '%c' format wrapped in %s",
(int) TYPE_NO_MODIFIERS(datumtype),
datumtype & TYPE_IS_PACK ? "pack" : "unpack");

Safefree(msgs);
}

if (bad) {
ck_warner(packWARN(datumtype & TYPE_IS_PACK ?
WARN_PACK : WARN_UNPACK),
"Character(s) in '%c' format wrapped in %s",
(int) TYPE_NO_MODIFIERS(datumtype),
datumtype & TYPE_IS_PACK ? "pack" : "unpack");
}

*s = from;
return TRUE;
}
Expand Down Expand Up @@ -408,16 +437,16 @@ STMT_START { \
} STMT_END

/* Only to be used inside a loop (see the break) */
#define NEXT_UNI_VAL(val, cur, str, end, utf8_flags) \
STMT_START { \
STRLEN retlen; \
if (str >= end) break; \
val = utf8n_to_uvchr((U8 *) str, end-str, &retlen, utf8_flags); \
if (retlen == (STRLEN) -1) { \
*cur = '\0'; \
croak("Malformed UTF-8 string in pack"); \
} \
str += retlen; \
#define NEXT_UNI_VAL(val, cur, str, end, utf8_flags) \
STMT_START { \
STRLEN retlen; \
if (str >= end) break; \
if (! utf8_to_uv_flags((U8 *) str, (U8 *) end, &val, &retlen, \
utf8_flags)) { \
*cur = '\0'; \
croak("Malformed UTF-8 string in pack"); \
} \
str += retlen; \
} STMT_END

static const char *_action( const tempsym_t* symptr )
Expand Down Expand Up @@ -1230,17 +1259,28 @@ S_unpack_rec(pTHX_ tempsym_t* symptr, const char *s, const char *strbeg, const c
case 'c':
while (len-- > 0 && s < strend) {
int aint;
if (utf8)
{
if (utf8) {
STRLEN retlen;
aint = utf8n_to_uvchr((U8 *) s, strend-s, &retlen,
ckWARN(WARN_UTF8) ? 0 : UTF8_ALLOW_ANY);
if (retlen == (STRLEN) -1)
UV auv;
if (! utf8_to_uv_flags((U8 *) s, (U8 *) strend,
&auv, &retlen,
(ckWARN(WARN_UTF8))
? 0
: UTF8_ALLOW_ANY))
{
croak("Malformed UTF-8 string in unpack");
}

aint = auv;
if ( (UV) aint != auv) {
croak("Malformed UTF-8 string in unpack");
}

s += retlen;
}
else
aint = *(U8 *)(s)++;
}
else {
aint = *(U8 *)(s)++;
}
if (aint >= 128 && datumtype != 'C') /* fake up signed chars */
aint -= 256;
if (!checksum)
Expand Down Expand Up @@ -1310,15 +1350,16 @@ S_unpack_rec(pTHX_ tempsym_t* symptr, const char *s, const char *strbeg, const c
break;
len = UTF8SKIP(result);
if (!S_utf8_to_bytes(aTHX_ &ptr, strend,
(char *) &result[1], len-1, 'U')) break;
auv = utf8n_to_uvchr(result, len, &retlen,
UTF8_ALLOW_DEFAULT);
(char *) &result[1], len - 1, 'U'))
{
break;
}

auv = utf8_to_uv_or_die(result, result + len, &retlen);
s = ptr;
} else {
auv = utf8n_to_uvchr((U8*)s, strend - s, &retlen,
UTF8_ALLOW_DEFAULT);
if (retlen == (STRLEN) -1)
croak("Malformed UTF-8 string in unpack");
}
else {
(void) utf8_to_uv((U8 *) s, (U8 *) strend, &auv, &retlen);
s += retlen;
}
if (!checksum)
Expand Down
Loading