Skip to content

Commit a965bb3

Browse files
newrengitster
authored andcommitted
fast-export: add a --show-original-ids option to show original names
Knowing the original names (hashes) of commits can sometimes enable post-filtering that would otherwise be difficult or impossible. In particular, the desire to rewrite commit messages which refer to other prior commits (on top of whatever other filtering is being done) is very difficult without knowing the original names of each commit. In addition, knowing the original names (hashes) of blobs can allow filtering by blob-id without requiring re-hashing the content of the blob, and is thus useful as a small optimization. Once we add original ids for both commits and blobs, we may as well add them for tags too for completeness. Perhaps someone will have a use for them. This commit teaches a new --show-original-ids option to fast-export which will make it add a 'original-oid <hash>' line to blob, commits, and tags. It also teaches fast-import to parse (and ignore) such lines. Signed-off-by: Elijah Newren <[email protected]> Acked-by: Jeff King <[email protected]> Signed-off-by: Junio C Hamano <[email protected]>
1 parent 25dd3e4 commit a965bb3

File tree

5 files changed

+67
-5
lines changed

5 files changed

+67
-5
lines changed

Documentation/git-fast-export.txt

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -122,6 +122,13 @@ marks the same across runs.
122122
repository which already contains the necessary parent
123123
commits.
124124

125+
--show-original-ids::
126+
Add an extra directive to the output for commits and blobs,
127+
`original-oid <SHA1SUM>`. While such directives will likely be
128+
ignored by importers such as git-fast-import, it may be useful
129+
for intermediary filters (e.g. for rewriting commit messages
130+
which refer to older commits, or for stripping blobs by id).
131+
125132
--refspec::
126133
Apply the specified refspec to each ref exported. Multiple of them can
127134
be specified.

Documentation/git-fast-import.txt

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -385,6 +385,7 @@ change to the project.
385385
....
386386
'commit' SP <ref> LF
387387
mark?
388+
original-oid?
388389
('author' (SP <name>)? SP LT <email> GT SP <when> LF)?
389390
'committer' (SP <name>)? SP LT <email> GT SP <when> LF
390391
data
@@ -741,6 +742,19 @@ New marks are created automatically. Existing marks can be moved
741742
to another object simply by reusing the same `<idnum>` in another
742743
`mark` command.
743744

745+
`original-oid`
746+
~~~~~~~~~~~~~~
747+
Provides the name of the object in the original source control system.
748+
fast-import will simply ignore this directive, but filter processes
749+
which operate on and modify the stream before feeding to fast-import
750+
may have uses for this information
751+
752+
....
753+
'original-oid' SP <object-identifier> LF
754+
....
755+
756+
where `<object-identifer>` is any string not containing LF.
757+
744758
`tag`
745759
~~~~~
746760
Creates an annotated tag referring to a specific commit. To create
@@ -749,6 +763,7 @@ lightweight (non-annotated) tags see the `reset` command below.
749763
....
750764
'tag' SP <name> LF
751765
'from' SP <commit-ish> LF
766+
original-oid?
752767
'tagger' (SP <name>)? SP LT <email> GT SP <when> LF
753768
data
754769
....
@@ -823,6 +838,7 @@ assigned mark.
823838
....
824839
'blob' LF
825840
mark?
841+
original-oid?
826842
data
827843
....
828844

builtin/fast-export.c

Lines changed: 15 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -38,6 +38,7 @@ static int use_done_feature;
3838
static int no_data;
3939
static int full_tree;
4040
static int reference_excluded_commits;
41+
static int show_original_ids;
4142
static struct string_list extra_refs = STRING_LIST_INIT_NODUP;
4243
static struct string_list tag_refs = STRING_LIST_INIT_NODUP;
4344
static struct refspec refspecs = REFSPEC_INIT_FETCH;
@@ -271,7 +272,10 @@ static void export_blob(const struct object_id *oid)
271272

272273
mark_next_object(object);
273274

274-
printf("blob\nmark :%"PRIu32"\ndata %lu\n", last_idnum, size);
275+
printf("blob\nmark :%"PRIu32"\n", last_idnum);
276+
if (show_original_ids)
277+
printf("original-oid %s\n", oid_to_hex(oid));
278+
printf("data %lu\n", size);
275279
if (size && fwrite(buf, size, 1, stdout) != 1)
276280
die_errno("could not write blob '%s'", oid_to_hex(oid));
277281
printf("\n");
@@ -635,8 +639,10 @@ static void handle_commit(struct commit *commit, struct rev_info *rev,
635639
reencoded = reencode_string(message, "UTF-8", encoding);
636640
if (!commit->parents)
637641
printf("reset %s\n", refname);
638-
printf("commit %s\nmark :%"PRIu32"\n%.*s\n%.*s\ndata %u\n%s",
639-
refname, last_idnum,
642+
printf("commit %s\nmark :%"PRIu32"\n", refname, last_idnum);
643+
if (show_original_ids)
644+
printf("original-oid %s\n", oid_to_hex(&commit->object.oid));
645+
printf("%.*s\n%.*s\ndata %u\n%s",
640646
(int)(author_end - author), author,
641647
(int)(committer_end - committer), committer,
642648
(unsigned)(reencoded
@@ -814,8 +820,10 @@ static void handle_tag(const char *name, struct tag *tag)
814820

815821
if (starts_with(name, "refs/tags/"))
816822
name += 10;
817-
printf("tag %s\nfrom :%d\n%.*s%sdata %d\n%.*s\n",
818-
name, tagged_mark,
823+
printf("tag %s\nfrom :%d\n", name, tagged_mark);
824+
if (show_original_ids)
825+
printf("original-oid %s\n", oid_to_hex(&tag->object.oid));
826+
printf("%.*s%sdata %d\n%.*s\n",
819827
(int)(tagger_end - tagger), tagger,
820828
tagger == tagger_end ? "" : "\n",
821829
(int)message_size, (int)message_size, message ? message : "");
@@ -1096,6 +1104,8 @@ int cmd_fast_export(int argc, const char **argv, const char *prefix)
10961104
OPT_BOOL(0, "anonymize", &anonymize, N_("anonymize output")),
10971105
OPT_BOOL(0, "reference-excluded-parents",
10981106
&reference_excluded_commits, N_("Reference parents which are not in fast-export stream by object id")),
1107+
OPT_BOOL(0, "show-original-ids", &show_original_ids,
1108+
N_("Show original object ids of blobs/commits")),
10991109

11001110
OPT_END()
11011111
};

fast-import.c

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1814,6 +1814,13 @@ static void parse_mark(void)
18141814
next_mark = 0;
18151815
}
18161816

1817+
static void parse_original_identifier(void)
1818+
{
1819+
const char *v;
1820+
if (skip_prefix(command_buf.buf, "original-oid ", &v))
1821+
read_next_command();
1822+
}
1823+
18171824
static int parse_data(struct strbuf *sb, uintmax_t limit, uintmax_t *len_res)
18181825
{
18191826
const char *data;
@@ -1956,6 +1963,7 @@ static void parse_new_blob(void)
19561963
{
19571964
read_next_command();
19581965
parse_mark();
1966+
parse_original_identifier();
19591967
parse_and_store_blob(&last_blob, NULL, next_mark);
19601968
}
19611969

@@ -2579,6 +2587,7 @@ static void parse_new_commit(const char *arg)
25792587

25802588
read_next_command();
25812589
parse_mark();
2590+
parse_original_identifier();
25822591
if (skip_prefix(command_buf.buf, "author ", &v)) {
25832592
author = parse_ident(v);
25842593
read_next_command();
@@ -2711,6 +2720,9 @@ static void parse_new_tag(const char *arg)
27112720
die("Invalid ref name or SHA1 expression: %s", from);
27122721
read_next_command();
27132722

2723+
/* original-oid ... */
2724+
parse_original_identifier();
2725+
27142726
/* tagger ... */
27152727
if (skip_prefix(command_buf.buf, "tagger ", &v)) {
27162728
tagger = parse_ident(v);

t/t9350-fast-export.sh

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -77,6 +77,23 @@ test_expect_success 'fast-export --reference-excluded-parents master~2..master'
7777
test $MASTER = $(git rev-parse --verify refs/heads/rewrite))
7878
'
7979

80+
test_expect_success 'fast-export --show-original-ids' '
81+
82+
git fast-export --show-original-ids master >output &&
83+
grep ^original-oid output| sed -e s/^original-oid.// | sort >actual &&
84+
git rev-list --objects master muss >objects-and-names &&
85+
awk "{print \$1}" objects-and-names | sort >commits-trees-blobs &&
86+
comm -23 actual commits-trees-blobs >unfound &&
87+
test_must_be_empty unfound
88+
'
89+
90+
test_expect_success 'fast-export --show-original-ids | git fast-import' '
91+
92+
git fast-export --show-original-ids master muss | git fast-import --quiet &&
93+
test $MASTER = $(git rev-parse --verify refs/heads/master) &&
94+
test $MUSS = $(git rev-parse --verify refs/tags/muss)
95+
'
96+
8097
test_expect_success 'iso-8859-1' '
8198
8299
git config i18n.commitencoding ISO8859-1 &&

0 commit comments

Comments
 (0)