Skip to content

Commit 0fcec2c

Browse files
Jan H. Schönherrgitster
authored andcommitted
format-patch: make rfc2047 encoding more strict
RFC 2047 requires more characters to be encoded than it is currently done. Especially, RFC 2047 distinguishes between allowed remaining characters in encoded words in addresses (From, To, etc.) and other headers, such as Subject. Make add_rfc2047() and is_rfc2047_special() location dependent and include all non-allowed characters to hopefully be RFC 2047 conformant. This especially fixes a problem, where RFC 822 specials (e. g. ".") were left unencoded in addresses, which was solved with a non-standard-conforming workaround in the past (which is going to be removed in a follow-up patch). Signed-off-by: Jan H. Schönherr <[email protected]> Signed-off-by: Junio C Hamano <[email protected]>
1 parent f9b7204 commit 0fcec2c

File tree

3 files changed

+72
-12
lines changed

3 files changed

+72
-12
lines changed

git-compat-util.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -466,6 +466,7 @@ extern const char tolower_trans_tbl[256];
466466
#undef isdigit
467467
#undef isalpha
468468
#undef isalnum
469+
#undef isprint
469470
#undef islower
470471
#undef isupper
471472
#undef tolower
@@ -483,6 +484,7 @@ extern unsigned char sane_ctype[256];
483484
#define isdigit(x) sane_istest(x,GIT_DIGIT)
484485
#define isalpha(x) sane_istest(x,GIT_ALPHA)
485486
#define isalnum(x) sane_istest(x,GIT_ALPHA | GIT_DIGIT)
487+
#define isprint(x) ((x) >= 0x20 && (x) <= 0x7e)
486488
#define islower(x) sane_iscase(x, 1)
487489
#define isupper(x) sane_iscase(x, 0)
488490
#define is_glob_special(x) sane_istest(x,GIT_GLOB_SPECIAL)

pretty.c

Lines changed: 59 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -272,16 +272,65 @@ static void add_rfc822_quoted(struct strbuf *out, const char *s, int len)
272272
strbuf_addch(out, '"');
273273
}
274274

275-
static int is_rfc2047_special(char ch)
275+
enum rfc2047_type {
276+
RFC2047_SUBJECT,
277+
RFC2047_ADDRESS,
278+
};
279+
280+
static int is_rfc2047_special(char ch, enum rfc2047_type type)
276281
{
277-
if (ch == ' ' || ch == '\n')
282+
/*
283+
* rfc2047, section 4.2:
284+
*
285+
* 8-bit values which correspond to printable ASCII characters other
286+
* than "=", "?", and "_" (underscore), MAY be represented as those
287+
* characters. (But see section 5 for restrictions.) In
288+
* particular, SPACE and TAB MUST NOT be represented as themselves
289+
* within encoded words.
290+
*/
291+
292+
/*
293+
* rule out non-ASCII characters and non-printable characters (the
294+
* non-ASCII check should be redundant as isprint() is not localized
295+
* and only knows about ASCII, but be defensive about that)
296+
*/
297+
if (non_ascii(ch) || !isprint(ch))
298+
return 1;
299+
300+
/*
301+
* rule out special printable characters (' ' should be the only
302+
* whitespace character considered printable, but be defensive and use
303+
* isspace())
304+
*/
305+
if (isspace(ch) || ch == '=' || ch == '?' || ch == '_')
278306
return 1;
279307

280-
return (non_ascii(ch) || (ch == '=') || (ch == '?') || (ch == '_'));
308+
/*
309+
* rfc2047, section 5.3:
310+
*
311+
* As a replacement for a 'word' entity within a 'phrase', for example,
312+
* one that precedes an address in a From, To, or Cc header. The ABNF
313+
* definition for 'phrase' from RFC 822 thus becomes:
314+
*
315+
* phrase = 1*( encoded-word / word )
316+
*
317+
* In this case the set of characters that may be used in a "Q"-encoded
318+
* 'encoded-word' is restricted to: <upper and lower case ASCII
319+
* letters, decimal digits, "!", "*", "+", "-", "/", "=", and "_"
320+
* (underscore, ASCII 95.)>. An 'encoded-word' that appears within a
321+
* 'phrase' MUST be separated from any adjacent 'word', 'text' or
322+
* 'special' by 'linear-white-space'.
323+
*/
324+
325+
if (type != RFC2047_ADDRESS)
326+
return 0;
327+
328+
/* '=' and '_' are special cases and have been checked above */
329+
return !(isalnum(ch) || ch == '!' || ch == '*' || ch == '+' || ch == '-' || ch == '/');
281330
}
282331

283332
static void add_rfc2047(struct strbuf *sb, const char *line, int len,
284-
const char *encoding)
333+
const char *encoding, enum rfc2047_type type)
285334
{
286335
static const int max_length = 78; /* per rfc2822 */
287336
static const int max_encoded_length = 76; /* per rfc2047 */
@@ -304,7 +353,7 @@ static void add_rfc2047(struct strbuf *sb, const char *line, int len,
304353
line_len += strlen(encoding) + 5; /* 5 for =??q? */
305354
for (i = 0; i < len; i++) {
306355
unsigned ch = line[i] & 0xFF;
307-
int is_special = is_rfc2047_special(ch);
356+
int is_special = is_rfc2047_special(ch, type);
308357

309358
/*
310359
* According to RFC 2047, we could encode the special character
@@ -358,11 +407,13 @@ void pp_user_info(const struct pretty_print_context *pp,
358407
display_name_length = name_tail - line;
359408
strbuf_addstr(sb, "From: ");
360409
if (!has_rfc822_specials(line, display_name_length)) {
361-
add_rfc2047(sb, line, display_name_length, encoding);
410+
add_rfc2047(sb, line, display_name_length,
411+
encoding, RFC2047_ADDRESS);
362412
} else {
363413
struct strbuf quoted = STRBUF_INIT;
364414
add_rfc822_quoted(&quoted, line, display_name_length);
365-
add_rfc2047(sb, quoted.buf, quoted.len, encoding);
415+
add_rfc2047(sb, quoted.buf, quoted.len,
416+
encoding, RFC2047_ADDRESS);
366417
strbuf_release(&quoted);
367418
}
368419
if (namelen - display_name_length + last_line_length(sb) > 78) {
@@ -1294,7 +1345,7 @@ void pp_title_line(const struct pretty_print_context *pp,
12941345
strbuf_grow(sb, title.len + 1024);
12951346
if (pp->subject) {
12961347
strbuf_addstr(sb, pp->subject);
1297-
add_rfc2047(sb, title.buf, title.len, encoding);
1348+
add_rfc2047(sb, title.buf, title.len, encoding, RFC2047_SUBJECT);
12981349
} else {
12991350
strbuf_addbuf(sb, &title);
13001351
}

t/t4014-format-patch.sh

Lines changed: 11 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -818,21 +818,28 @@ check_author() {
818818
cat >expect <<'EOF'
819819
From: "Foo B. Bar" <[email protected]>
820820
EOF
821-
test_expect_success 'format-patch quotes dot in headers' '
821+
test_expect_success 'format-patch quotes dot in from-headers' '
822822
check_author "Foo B. Bar"
823823
'
824824

825825
cat >expect <<'EOF'
826826
From: "Foo \"The Baz\" Bar" <[email protected]>
827827
EOF
828-
test_expect_success 'format-patch quotes double-quote in headers' '
828+
test_expect_success 'format-patch quotes double-quote in from-headers' '
829829
check_author "Foo \"The Baz\" Bar"
830830
'
831831

832832
cat >expect <<'EOF'
833-
From: =?UTF-8?q?"F=C3=B6o=20B.=20Bar"?= <[email protected]>
833+
From: =?UTF-8?q?F=C3=B6o=20Bar?= <[email protected]>
834834
EOF
835-
test_expect_success 'rfc2047-encoded headers also double-quote 822 specials' '
835+
test_expect_success 'format-patch uses rfc2047-encoded from-headers when necessary' '
836+
check_author "Föo Bar"
837+
'
838+
839+
cat >expect <<'EOF'
840+
From: =?UTF-8?q?F=C3=B6o=20B=2E=20Bar?= <[email protected]>
841+
EOF
842+
test_expect_failure 'rfc2047-encoded from-headers leave no rfc822 specials' '
836843
check_author "Föo B. Bar"
837844
'
838845

0 commit comments

Comments
 (0)