@@ -231,7 +231,7 @@ static int is_rfc822_special(char ch)
231
231
}
232
232
}
233
233
234
- static int has_rfc822_specials (const char * s , int len )
234
+ static int needs_rfc822_quoting (const char * s , int len )
235
235
{
236
236
int i ;
237
237
for (i = 0 ; i < len ; i ++ )
@@ -240,6 +240,17 @@ static int has_rfc822_specials(const char *s, int len)
240
240
return 0 ;
241
241
}
242
242
243
+ static int last_line_length (struct strbuf * sb )
244
+ {
245
+ int i ;
246
+
247
+ /* How many bytes are already used on the last line? */
248
+ for (i = sb -> len - 1 ; i >= 0 ; i -- )
249
+ if (sb -> buf [i ] == '\n' )
250
+ break ;
251
+ return sb -> len - (i + 1 );
252
+ }
253
+
243
254
static void add_rfc822_quoted (struct strbuf * out , const char * s , int len )
244
255
{
245
256
int i ;
@@ -261,57 +272,110 @@ static void add_rfc822_quoted(struct strbuf *out, const char *s, int len)
261
272
strbuf_addch (out , '"' );
262
273
}
263
274
264
- static int is_rfc2047_special (char ch )
275
+ enum rfc2047_type {
276
+ RFC2047_SUBJECT ,
277
+ RFC2047_ADDRESS ,
278
+ };
279
+
280
+ static int is_rfc2047_special (char ch , enum rfc2047_type type )
265
281
{
266
- return (non_ascii (ch ) || (ch == '=' ) || (ch == '?' ) || (ch == '_' ));
282
+ /*
283
+ * rfc2047, section 4.2:
284
+ *
285
+ * 8-bit values which correspond to printable ASCII characters other
286
+ * than "=", "?", and "_" (underscore), MAY be represented as those
287
+ * characters. (But see section 5 for restrictions.) In
288
+ * particular, SPACE and TAB MUST NOT be represented as themselves
289
+ * within encoded words.
290
+ */
291
+
292
+ /*
293
+ * rule out non-ASCII characters and non-printable characters (the
294
+ * non-ASCII check should be redundant as isprint() is not localized
295
+ * and only knows about ASCII, but be defensive about that)
296
+ */
297
+ if (non_ascii (ch ) || !isprint (ch ))
298
+ return 1 ;
299
+
300
+ /*
301
+ * rule out special printable characters (' ' should be the only
302
+ * whitespace character considered printable, but be defensive and use
303
+ * isspace())
304
+ */
305
+ if (isspace (ch ) || ch == '=' || ch == '?' || ch == '_' )
306
+ return 1 ;
307
+
308
+ /*
309
+ * rfc2047, section 5.3:
310
+ *
311
+ * As a replacement for a 'word' entity within a 'phrase', for example,
312
+ * one that precedes an address in a From, To, or Cc header. The ABNF
313
+ * definition for 'phrase' from RFC 822 thus becomes:
314
+ *
315
+ * phrase = 1*( encoded-word / word )
316
+ *
317
+ * In this case the set of characters that may be used in a "Q"-encoded
318
+ * 'encoded-word' is restricted to: <upper and lower case ASCII
319
+ * letters, decimal digits, "!", "*", "+", "-", "/", "=", and "_"
320
+ * (underscore, ASCII 95.)>. An 'encoded-word' that appears within a
321
+ * 'phrase' MUST be separated from any adjacent 'word', 'text' or
322
+ * 'special' by 'linear-white-space'.
323
+ */
324
+
325
+ if (type != RFC2047_ADDRESS )
326
+ return 0 ;
327
+
328
+ /* '=' and '_' are special cases and have been checked above */
329
+ return !(isalnum (ch ) || ch == '!' || ch == '*' || ch == '+' || ch == '-' || ch == '/' );
267
330
}
268
331
269
- static void add_rfc2047 ( struct strbuf * sb , const char * line , int len ,
270
- const char * encoding )
332
+ static int needs_rfc2047_encoding ( const char * line , int len ,
333
+ enum rfc2047_type type )
271
334
{
272
- static const int max_length = 78 ; /* per rfc2822 */
273
335
int i ;
274
- int line_len ;
275
-
276
- /* How many bytes are already used on the current line? */
277
- for (i = sb -> len - 1 ; i >= 0 ; i -- )
278
- if (sb -> buf [i ] == '\n' )
279
- break ;
280
- line_len = sb -> len - (i + 1 );
281
336
282
337
for (i = 0 ; i < len ; i ++ ) {
283
338
int ch = line [i ];
284
339
if (non_ascii (ch ) || ch == '\n' )
285
- goto needquote ;
340
+ return 1 ;
286
341
if ((i + 1 < len ) && (ch == '=' && line [i + 1 ] == '?' ))
287
- goto needquote ;
342
+ return 1 ;
288
343
}
289
- strbuf_add_wrapped_bytes (sb , line , len , 0 , 1 , max_length - line_len );
290
- return ;
291
344
292
- needquote :
345
+ return 0 ;
346
+ }
347
+
348
+ static void add_rfc2047 (struct strbuf * sb , const char * line , int len ,
349
+ const char * encoding , enum rfc2047_type type )
350
+ {
351
+ static const int max_encoded_length = 76 ; /* per rfc2047 */
352
+ int i ;
353
+ int line_len = last_line_length (sb );
354
+
293
355
strbuf_grow (sb , len * 3 + strlen (encoding ) + 100 );
294
356
strbuf_addf (sb , "=?%s?q?" , encoding );
295
357
line_len += strlen (encoding ) + 5 ; /* 5 for =??q? */
296
358
for (i = 0 ; i < len ; i ++ ) {
297
359
unsigned ch = line [i ] & 0xFF ;
360
+ int is_special = is_rfc2047_special (ch , type );
361
+
362
+ /*
363
+ * According to RFC 2047, we could encode the special character
364
+ * ' ' (space) with '_' (underscore) for readability. But many
365
+ * programs do not understand this and just leave the
366
+ * underscore in place. Thus, we do nothing special here, which
367
+ * causes ' ' to be encoded as '=20', avoiding this problem.
368
+ */
298
369
299
- if (line_len >= max_length - 2 ) {
370
+ if (line_len + 2 + ( is_special ? 3 : 1 ) > max_encoded_length ) {
300
371
strbuf_addf (sb , "?=\n =?%s?q?" , encoding );
301
372
line_len = strlen (encoding ) + 5 + 1 ; /* =??q? plus SP */
302
373
}
303
374
304
- /*
305
- * We encode ' ' using '=20' even though rfc2047
306
- * allows using '_' for readability. Unfortunately,
307
- * many programs do not understand this and just
308
- * leave the underscore in place.
309
- */
310
- if (is_rfc2047_special (ch ) || ch == ' ' || ch == '\n' ) {
375
+ if (is_special ) {
311
376
strbuf_addf (sb , "=%02X" , ch );
312
377
line_len += 3 ;
313
- }
314
- else {
378
+ } else {
315
379
strbuf_addch (sb , ch );
316
380
line_len ++ ;
317
381
}
@@ -323,6 +387,7 @@ void pp_user_info(const struct pretty_print_context *pp,
323
387
const char * what , struct strbuf * sb ,
324
388
const char * line , const char * encoding )
325
389
{
390
+ int max_length = 78 ; /* per rfc2822 */
326
391
char * date ;
327
392
int namelen ;
328
393
unsigned long time ;
@@ -340,25 +405,27 @@ void pp_user_info(const struct pretty_print_context *pp,
340
405
if (pp -> fmt == CMIT_FMT_EMAIL ) {
341
406
char * name_tail = strchr (line , '<' );
342
407
int display_name_length ;
343
- int final_line ;
344
408
if (!name_tail )
345
409
return ;
346
410
while (line < name_tail && isspace (name_tail [-1 ]))
347
411
name_tail -- ;
348
412
display_name_length = name_tail - line ;
349
413
strbuf_addstr (sb , "From: " );
350
- if (!has_rfc822_specials (line , display_name_length )) {
351
- add_rfc2047 (sb , line , display_name_length , encoding );
352
- } else {
414
+ if (needs_rfc2047_encoding (line , display_name_length , RFC2047_ADDRESS )) {
415
+ add_rfc2047 (sb , line , display_name_length ,
416
+ encoding , RFC2047_ADDRESS );
417
+ max_length = 76 ; /* per rfc2047 */
418
+ } else if (needs_rfc822_quoting (line , display_name_length )) {
353
419
struct strbuf quoted = STRBUF_INIT ;
354
420
add_rfc822_quoted (& quoted , line , display_name_length );
355
- add_rfc2047 (sb , quoted .buf , quoted .len , encoding );
421
+ strbuf_add_wrapped_bytes (sb , quoted .buf , quoted .len ,
422
+ -6 , 1 , max_length );
356
423
strbuf_release (& quoted );
424
+ } else {
425
+ strbuf_add_wrapped_bytes (sb , line , display_name_length ,
426
+ -6 , 1 , max_length );
357
427
}
358
- for (final_line = 0 ; final_line < sb -> len ; final_line ++ )
359
- if (sb -> buf [sb -> len - final_line - 1 ] == '\n' )
360
- break ;
361
- if (namelen - display_name_length + final_line > 78 ) {
428
+ if (namelen - display_name_length + last_line_length (sb ) > max_length ) {
362
429
strbuf_addch (sb , '\n' );
363
430
if (!isspace (name_tail [0 ]))
364
431
strbuf_addch (sb , ' ' );
@@ -1278,6 +1345,7 @@ void pp_title_line(const struct pretty_print_context *pp,
1278
1345
const char * encoding ,
1279
1346
int need_8bit_cte )
1280
1347
{
1348
+ static const int max_length = 78 ; /* per rfc2047 */
1281
1349
struct strbuf title ;
1282
1350
1283
1351
strbuf_init (& title , 80 );
@@ -1287,7 +1355,12 @@ void pp_title_line(const struct pretty_print_context *pp,
1287
1355
strbuf_grow (sb , title .len + 1024 );
1288
1356
if (pp -> subject ) {
1289
1357
strbuf_addstr (sb , pp -> subject );
1290
- add_rfc2047 (sb , title .buf , title .len , encoding );
1358
+ if (needs_rfc2047_encoding (title .buf , title .len , RFC2047_SUBJECT ))
1359
+ add_rfc2047 (sb , title .buf , title .len ,
1360
+ encoding , RFC2047_SUBJECT );
1361
+ else
1362
+ strbuf_add_wrapped_bytes (sb , title .buf , title .len ,
1363
+ - last_line_length (sb ), 1 , max_length );
1291
1364
} else {
1292
1365
strbuf_addbuf (sb , & title );
1293
1366
}
0 commit comments