Skip to content

Commit 64b22a5

Browse files
committed
Merge branch 'js/format-2047'
Fixes many rfc2047 quoting issues in the output from format-patch. * js/format-2047: format-patch tests: check quoting/encoding in To: and Cc: headers format-patch: fix rfc2047 address encoding with respect to rfc822 specials format-patch: make rfc2047 encoding more strict format-patch: introduce helper function last_line_length() format-patch: do not wrap rfc2047 encoded headers too late format-patch: do not wrap non-rfc2047 headers too early utf8: fix off-by-one wrapping of text
2 parents 15ba878 + 25dc8da commit 64b22a5

File tree

5 files changed

+262
-126
lines changed

5 files changed

+262
-126
lines changed

git-compat-util.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -506,6 +506,7 @@ extern const char tolower_trans_tbl[256];
506506
#undef isdigit
507507
#undef isalpha
508508
#undef isalnum
509+
#undef isprint
509510
#undef islower
510511
#undef isupper
511512
#undef tolower
@@ -523,6 +524,7 @@ extern unsigned char sane_ctype[256];
523524
#define isdigit(x) sane_istest(x,GIT_DIGIT)
524525
#define isalpha(x) sane_istest(x,GIT_ALPHA)
525526
#define isalnum(x) sane_istest(x,GIT_ALPHA | GIT_DIGIT)
527+
#define isprint(x) ((x) >= 0x20 && (x) <= 0x7e)
526528
#define islower(x) sane_iscase(x, 1)
527529
#define isupper(x) sane_iscase(x, 0)
528530
#define is_glob_special(x) sane_istest(x,GIT_GLOB_SPECIAL)

pretty.c

Lines changed: 111 additions & 38 deletions
Original file line numberDiff line numberDiff line change
@@ -231,7 +231,7 @@ static int is_rfc822_special(char ch)
231231
}
232232
}
233233

234-
static int has_rfc822_specials(const char *s, int len)
234+
static int needs_rfc822_quoting(const char *s, int len)
235235
{
236236
int i;
237237
for (i = 0; i < len; i++)
@@ -240,6 +240,17 @@ static int has_rfc822_specials(const char *s, int len)
240240
return 0;
241241
}
242242

243+
static int last_line_length(struct strbuf *sb)
244+
{
245+
int i;
246+
247+
/* How many bytes are already used on the last line? */
248+
for (i = sb->len - 1; i >= 0; i--)
249+
if (sb->buf[i] == '\n')
250+
break;
251+
return sb->len - (i + 1);
252+
}
253+
243254
static void add_rfc822_quoted(struct strbuf *out, const char *s, int len)
244255
{
245256
int i;
@@ -261,57 +272,110 @@ static void add_rfc822_quoted(struct strbuf *out, const char *s, int len)
261272
strbuf_addch(out, '"');
262273
}
263274

264-
static int is_rfc2047_special(char ch)
275+
enum rfc2047_type {
276+
RFC2047_SUBJECT,
277+
RFC2047_ADDRESS,
278+
};
279+
280+
static int is_rfc2047_special(char ch, enum rfc2047_type type)
265281
{
266-
return (non_ascii(ch) || (ch == '=') || (ch == '?') || (ch == '_'));
282+
/*
283+
* rfc2047, section 4.2:
284+
*
285+
* 8-bit values which correspond to printable ASCII characters other
286+
* than "=", "?", and "_" (underscore), MAY be represented as those
287+
* characters. (But see section 5 for restrictions.) In
288+
* particular, SPACE and TAB MUST NOT be represented as themselves
289+
* within encoded words.
290+
*/
291+
292+
/*
293+
* rule out non-ASCII characters and non-printable characters (the
294+
* non-ASCII check should be redundant as isprint() is not localized
295+
* and only knows about ASCII, but be defensive about that)
296+
*/
297+
if (non_ascii(ch) || !isprint(ch))
298+
return 1;
299+
300+
/*
301+
* rule out special printable characters (' ' should be the only
302+
* whitespace character considered printable, but be defensive and use
303+
* isspace())
304+
*/
305+
if (isspace(ch) || ch == '=' || ch == '?' || ch == '_')
306+
return 1;
307+
308+
/*
309+
* rfc2047, section 5.3:
310+
*
311+
* As a replacement for a 'word' entity within a 'phrase', for example,
312+
* one that precedes an address in a From, To, or Cc header. The ABNF
313+
* definition for 'phrase' from RFC 822 thus becomes:
314+
*
315+
* phrase = 1*( encoded-word / word )
316+
*
317+
* In this case the set of characters that may be used in a "Q"-encoded
318+
* 'encoded-word' is restricted to: <upper and lower case ASCII
319+
* letters, decimal digits, "!", "*", "+", "-", "/", "=", and "_"
320+
* (underscore, ASCII 95.)>. An 'encoded-word' that appears within a
321+
* 'phrase' MUST be separated from any adjacent 'word', 'text' or
322+
* 'special' by 'linear-white-space'.
323+
*/
324+
325+
if (type != RFC2047_ADDRESS)
326+
return 0;
327+
328+
/* '=' and '_' are special cases and have been checked above */
329+
return !(isalnum(ch) || ch == '!' || ch == '*' || ch == '+' || ch == '-' || ch == '/');
267330
}
268331

269-
static void add_rfc2047(struct strbuf *sb, const char *line, int len,
270-
const char *encoding)
332+
static int needs_rfc2047_encoding(const char *line, int len,
333+
enum rfc2047_type type)
271334
{
272-
static const int max_length = 78; /* per rfc2822 */
273335
int i;
274-
int line_len;
275-
276-
/* How many bytes are already used on the current line? */
277-
for (i = sb->len - 1; i >= 0; i--)
278-
if (sb->buf[i] == '\n')
279-
break;
280-
line_len = sb->len - (i+1);
281336

282337
for (i = 0; i < len; i++) {
283338
int ch = line[i];
284339
if (non_ascii(ch) || ch == '\n')
285-
goto needquote;
340+
return 1;
286341
if ((i + 1 < len) && (ch == '=' && line[i+1] == '?'))
287-
goto needquote;
342+
return 1;
288343
}
289-
strbuf_add_wrapped_bytes(sb, line, len, 0, 1, max_length - line_len);
290-
return;
291344

292-
needquote:
345+
return 0;
346+
}
347+
348+
static void add_rfc2047(struct strbuf *sb, const char *line, int len,
349+
const char *encoding, enum rfc2047_type type)
350+
{
351+
static const int max_encoded_length = 76; /* per rfc2047 */
352+
int i;
353+
int line_len = last_line_length(sb);
354+
293355
strbuf_grow(sb, len * 3 + strlen(encoding) + 100);
294356
strbuf_addf(sb, "=?%s?q?", encoding);
295357
line_len += strlen(encoding) + 5; /* 5 for =??q? */
296358
for (i = 0; i < len; i++) {
297359
unsigned ch = line[i] & 0xFF;
360+
int is_special = is_rfc2047_special(ch, type);
361+
362+
/*
363+
* According to RFC 2047, we could encode the special character
364+
* ' ' (space) with '_' (underscore) for readability. But many
365+
* programs do not understand this and just leave the
366+
* underscore in place. Thus, we do nothing special here, which
367+
* causes ' ' to be encoded as '=20', avoiding this problem.
368+
*/
298369

299-
if (line_len >= max_length - 2) {
370+
if (line_len + 2 + (is_special ? 3 : 1) > max_encoded_length) {
300371
strbuf_addf(sb, "?=\n =?%s?q?", encoding);
301372
line_len = strlen(encoding) + 5 + 1; /* =??q? plus SP */
302373
}
303374

304-
/*
305-
* We encode ' ' using '=20' even though rfc2047
306-
* allows using '_' for readability. Unfortunately,
307-
* many programs do not understand this and just
308-
* leave the underscore in place.
309-
*/
310-
if (is_rfc2047_special(ch) || ch == ' ' || ch == '\n') {
375+
if (is_special) {
311376
strbuf_addf(sb, "=%02X", ch);
312377
line_len += 3;
313-
}
314-
else {
378+
} else {
315379
strbuf_addch(sb, ch);
316380
line_len++;
317381
}
@@ -323,6 +387,7 @@ void pp_user_info(const struct pretty_print_context *pp,
323387
const char *what, struct strbuf *sb,
324388
const char *line, const char *encoding)
325389
{
390+
int max_length = 78; /* per rfc2822 */
326391
char *date;
327392
int namelen;
328393
unsigned long time;
@@ -340,25 +405,27 @@ void pp_user_info(const struct pretty_print_context *pp,
340405
if (pp->fmt == CMIT_FMT_EMAIL) {
341406
char *name_tail = strchr(line, '<');
342407
int display_name_length;
343-
int final_line;
344408
if (!name_tail)
345409
return;
346410
while (line < name_tail && isspace(name_tail[-1]))
347411
name_tail--;
348412
display_name_length = name_tail - line;
349413
strbuf_addstr(sb, "From: ");
350-
if (!has_rfc822_specials(line, display_name_length)) {
351-
add_rfc2047(sb, line, display_name_length, encoding);
352-
} else {
414+
if (needs_rfc2047_encoding(line, display_name_length, RFC2047_ADDRESS)) {
415+
add_rfc2047(sb, line, display_name_length,
416+
encoding, RFC2047_ADDRESS);
417+
max_length = 76; /* per rfc2047 */
418+
} else if (needs_rfc822_quoting(line, display_name_length)) {
353419
struct strbuf quoted = STRBUF_INIT;
354420
add_rfc822_quoted(&quoted, line, display_name_length);
355-
add_rfc2047(sb, quoted.buf, quoted.len, encoding);
421+
strbuf_add_wrapped_bytes(sb, quoted.buf, quoted.len,
422+
-6, 1, max_length);
356423
strbuf_release(&quoted);
424+
} else {
425+
strbuf_add_wrapped_bytes(sb, line, display_name_length,
426+
-6, 1, max_length);
357427
}
358-
for (final_line = 0; final_line < sb->len; final_line++)
359-
if (sb->buf[sb->len - final_line - 1] == '\n')
360-
break;
361-
if (namelen - display_name_length + final_line > 78) {
428+
if (namelen - display_name_length + last_line_length(sb) > max_length) {
362429
strbuf_addch(sb, '\n');
363430
if (!isspace(name_tail[0]))
364431
strbuf_addch(sb, ' ');
@@ -1278,6 +1345,7 @@ void pp_title_line(const struct pretty_print_context *pp,
12781345
const char *encoding,
12791346
int need_8bit_cte)
12801347
{
1348+
static const int max_length = 78; /* per rfc2047 */
12811349
struct strbuf title;
12821350

12831351
strbuf_init(&title, 80);
@@ -1287,7 +1355,12 @@ void pp_title_line(const struct pretty_print_context *pp,
12871355
strbuf_grow(sb, title.len + 1024);
12881356
if (pp->subject) {
12891357
strbuf_addstr(sb, pp->subject);
1290-
add_rfc2047(sb, title.buf, title.len, encoding);
1358+
if (needs_rfc2047_encoding(title.buf, title.len, RFC2047_SUBJECT))
1359+
add_rfc2047(sb, title.buf, title.len,
1360+
encoding, RFC2047_SUBJECT);
1361+
else
1362+
strbuf_add_wrapped_bytes(sb, title.buf, title.len,
1363+
-last_line_length(sb), 1, max_length);
12911364
} else {
12921365
strbuf_addbuf(sb, &title);
12931366
}

0 commit comments

Comments
 (0)