Skip to content

Commit 23495a3

Browse files
newrengitster
authored andcommitted
fast-import: support 'encoding' commit header
Since git supports commit messages with an encoding other than utf-8, allow fast-import to import such commits. This may be useful for folks who do not want to reencode commit messages from an external system, and may also be useful to achieve reversible history rewrites (e.g. sha1sum <-> sha256sum transitions or subtree work) with git repositories that have used specialized encodings in their commit history. Signed-off-by: Elijah Newren <[email protected]> Signed-off-by: Junio C Hamano <[email protected]>
1 parent cda3935 commit 23495a3

File tree

3 files changed

+36
-2
lines changed

3 files changed

+36
-2
lines changed

Documentation/git-fast-import.txt

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -388,6 +388,7 @@ change to the project.
388388
original-oid?
389389
('author' (SP <name>)? SP LT <email> GT SP <when> LF)?
390390
'committer' (SP <name>)? SP LT <email> GT SP <when> LF
391+
('encoding' SP <encoding>)?
391392
data
392393
('from' SP <commit-ish> LF)?
393394
('merge' SP <commit-ish> LF)?
@@ -455,6 +456,12 @@ that was selected by the --date-format=<fmt> command-line option.
455456
See ``Date Formats'' above for the set of supported formats, and
456457
their syntax.
457458

459+
`encoding`
460+
^^^^^^^^^^
461+
The optional `encoding` command indicates the encoding of the commit
462+
message. Most commits are UTF-8 and the encoding is omitted, but this
463+
allows importing commit messages into git without first reencoding them.
464+
458465
`from`
459466
^^^^^^
460467
The `from` command is used to specify the commit to initialize

fast-import.c

Lines changed: 9 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -2585,6 +2585,7 @@ static void parse_new_commit(const char *arg)
25852585
struct branch *b;
25862586
char *author = NULL;
25872587
char *committer = NULL;
2588+
const char *encoding = NULL;
25882589
struct hash_list *merge_list = NULL;
25892590
unsigned int merge_count;
25902591
unsigned char prev_fanout, new_fanout;
@@ -2607,6 +2608,8 @@ static void parse_new_commit(const char *arg)
26072608
}
26082609
if (!committer)
26092610
die("Expected committer but didn't get one");
2611+
if (skip_prefix(command_buf.buf, "encoding ", &encoding))
2612+
read_next_command();
26102613
parse_data(&msg, 0, NULL);
26112614
read_next_command();
26122615
parse_from(b);
@@ -2670,9 +2673,13 @@ static void parse_new_commit(const char *arg)
26702673
}
26712674
strbuf_addf(&new_data,
26722675
"author %s\n"
2673-
"committer %s\n"
2674-
"\n",
2676+
"committer %s\n",
26752677
author ? author : committer, committer);
2678+
if (encoding)
2679+
strbuf_addf(&new_data,
2680+
"encoding %s\n",
2681+
encoding);
2682+
strbuf_addch(&new_data, '\n');
26762683
strbuf_addbuf(&new_data, &msg);
26772684
free(author);
26782685
free(committer);

t/t9300-fast-import.sh

Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3299,4 +3299,24 @@ test_expect_success !MINGW 'W: get-mark & empty orphan commit with erroneous thi
32993299
sed -e s/LFs/LLL/ W-input | tr L "\n" | test_must_fail git fast-import
33003300
'
33013301

3302+
###
3303+
### series X (other new features)
3304+
###
3305+
3306+
test_expect_success 'X: handling encoding' '
3307+
test_tick &&
3308+
cat >input <<-INPUT_END &&
3309+
commit refs/heads/encoding
3310+
committer $GIT_COMMITTER_NAME <$GIT_COMMITTER_EMAIL> $GIT_COMMITTER_DATE
3311+
encoding iso-8859-7
3312+
data <<COMMIT
3313+
INPUT_END
3314+
3315+
printf "Pi: \360\nCOMMIT\n" >>input &&
3316+
3317+
git fast-import <input &&
3318+
git cat-file -p encoding | grep $(printf "\360") &&
3319+
git log -1 --format=%B encoding | grep $(printf "\317\200")
3320+
'
3321+
33023322
test_done

0 commit comments

Comments
 (0)