Skip to content

Commit 6d6a3a9

Browse files
pks-tgitster
authored andcommitted
refs: implement logic to migrate between ref storage formats
With the introduction of the new "reftable" backend, users may want to migrate repositories between the backends without having to recreate the whole repository. Add the logic to do so. The implementation is generic and works with arbitrary ref storage formats so that a backend does not need to implement any migration logic. It does have a few limitations though: - We do not migrate repositories with worktrees, because worktrees have separate ref storages. It makes the overall affair more complex if we have to migrate multiple storages at once. - We do not migrate reflogs, because we have no interfaces to write many reflog entries. - We do not lock the repository for concurrent access, and thus concurrent writes may end up with weird in-between states. There is no way to fully lock the "files" backend for writes due to its format, and thus we punt on this topic altogether and defer to the user to avoid those from happening. In other words, this version is a minimum viable product for migrating a repository's ref storage format. It works alright for bare repos, which often have neither worktrees nor reflogs. But it will not work for many other repositories without some preparations. These limitations are not set into stone though, and ideally we will eventually address them over time. The logic is not yet used by anything, and thus there are no tests for it. Those will be added in the next commit. Signed-off-by: Patrick Steinhardt <[email protected]> Signed-off-by: Junio C Hamano <[email protected]>
1 parent 64a6dd8 commit 6d6a3a9

File tree

2 files changed

+326
-0
lines changed

2 files changed

+326
-0
lines changed

refs.c

Lines changed: 308 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2570,3 +2570,311 @@ int ref_update_check_old_target(const char *referent, struct ref_update *update,
25702570
referent, update->old_target);
25712571
return -1;
25722572
}
2573+
2574+
struct migration_data {
2575+
struct ref_store *old_refs;
2576+
struct ref_transaction *transaction;
2577+
struct strbuf *errbuf;
2578+
};
2579+
2580+
static int migrate_one_ref(const char *refname, const struct object_id *oid,
2581+
int flags, void *cb_data)
2582+
{
2583+
struct migration_data *data = cb_data;
2584+
struct strbuf symref_target = STRBUF_INIT;
2585+
int ret;
2586+
2587+
if (flags & REF_ISSYMREF) {
2588+
ret = refs_read_symbolic_ref(data->old_refs, refname, &symref_target);
2589+
if (ret < 0)
2590+
goto done;
2591+
2592+
ret = ref_transaction_update(data->transaction, refname, NULL, null_oid(),
2593+
symref_target.buf, NULL,
2594+
REF_SKIP_CREATE_REFLOG | REF_NO_DEREF, NULL, data->errbuf);
2595+
if (ret < 0)
2596+
goto done;
2597+
} else {
2598+
ret = ref_transaction_create(data->transaction, refname, oid,
2599+
REF_SKIP_CREATE_REFLOG | REF_SKIP_OID_VERIFICATION,
2600+
NULL, data->errbuf);
2601+
if (ret < 0)
2602+
goto done;
2603+
}
2604+
2605+
done:
2606+
strbuf_release(&symref_target);
2607+
return ret;
2608+
}
2609+
2610+
static int move_files(const char *from_path, const char *to_path, struct strbuf *errbuf)
2611+
{
2612+
struct strbuf from_buf = STRBUF_INIT, to_buf = STRBUF_INIT;
2613+
size_t from_len, to_len;
2614+
DIR *from_dir;
2615+
int ret;
2616+
2617+
from_dir = opendir(from_path);
2618+
if (!from_dir) {
2619+
strbuf_addf(errbuf, "could not open source directory '%s': %s",
2620+
from_path, strerror(errno));
2621+
ret = -1;
2622+
goto done;
2623+
}
2624+
2625+
strbuf_addstr(&from_buf, from_path);
2626+
strbuf_complete(&from_buf, '/');
2627+
from_len = from_buf.len;
2628+
2629+
strbuf_addstr(&to_buf, to_path);
2630+
strbuf_complete(&to_buf, '/');
2631+
to_len = to_buf.len;
2632+
2633+
while (1) {
2634+
struct dirent *ent;
2635+
2636+
errno = 0;
2637+
ent = readdir(from_dir);
2638+
if (!ent)
2639+
break;
2640+
2641+
if (!strcmp(ent->d_name, ".") ||
2642+
!strcmp(ent->d_name, ".."))
2643+
continue;
2644+
2645+
strbuf_setlen(&from_buf, from_len);
2646+
strbuf_addstr(&from_buf, ent->d_name);
2647+
2648+
strbuf_setlen(&to_buf, to_len);
2649+
strbuf_addstr(&to_buf, ent->d_name);
2650+
2651+
ret = rename(from_buf.buf, to_buf.buf);
2652+
if (ret < 0) {
2653+
strbuf_addf(errbuf, "could not link file '%s' to '%s': %s",
2654+
from_buf.buf, to_buf.buf, strerror(errno));
2655+
goto done;
2656+
}
2657+
}
2658+
2659+
if (errno) {
2660+
strbuf_addf(errbuf, "could not read entry from directory '%s': %s",
2661+
from_path, strerror(errno));
2662+
ret = -1;
2663+
goto done;
2664+
}
2665+
2666+
ret = 0;
2667+
2668+
done:
2669+
strbuf_release(&from_buf);
2670+
strbuf_release(&to_buf);
2671+
if (from_dir)
2672+
closedir(from_dir);
2673+
return ret;
2674+
}
2675+
2676+
static int count_reflogs(const char *reflog UNUSED, void *payload)
2677+
{
2678+
size_t *reflog_count = payload;
2679+
(*reflog_count)++;
2680+
return 0;
2681+
}
2682+
2683+
static int has_worktrees(void)
2684+
{
2685+
struct worktree **worktrees = get_worktrees();
2686+
int ret = 0;
2687+
size_t i;
2688+
2689+
for (i = 0; worktrees[i]; i++) {
2690+
if (is_main_worktree(worktrees[i]))
2691+
continue;
2692+
ret = 1;
2693+
}
2694+
2695+
free_worktrees(worktrees);
2696+
return ret;
2697+
}
2698+
2699+
int repo_migrate_ref_storage_format(struct repository *repo,
2700+
enum ref_storage_format format,
2701+
unsigned int flags,
2702+
struct strbuf *errbuf)
2703+
{
2704+
struct ref_store *old_refs = NULL, *new_refs = NULL;
2705+
struct ref_transaction *transaction = NULL;
2706+
struct strbuf new_gitdir = STRBUF_INIT;
2707+
struct migration_data data;
2708+
size_t reflog_count = 0;
2709+
int did_migrate_refs = 0;
2710+
int ret;
2711+
2712+
if (repo->ref_storage_format == format) {
2713+
strbuf_addstr(errbuf, "current and new ref storage format are equal");
2714+
ret = -1;
2715+
goto done;
2716+
}
2717+
2718+
old_refs = get_main_ref_store(repo);
2719+
2720+
/*
2721+
* We do not have any interfaces that would allow us to write many
2722+
* reflog entries. Once we have them we can remove this restriction.
2723+
*/
2724+
if (refs_for_each_reflog(old_refs, count_reflogs, &reflog_count) < 0) {
2725+
strbuf_addstr(errbuf, "cannot count reflogs");
2726+
ret = -1;
2727+
goto done;
2728+
}
2729+
if (reflog_count) {
2730+
strbuf_addstr(errbuf, "migrating reflogs is not supported yet");
2731+
ret = -1;
2732+
goto done;
2733+
}
2734+
2735+
/*
2736+
* Worktrees complicate the migration because every worktree has a
2737+
* separate ref storage. While it should be feasible to implement, this
2738+
* is pushed out to a future iteration.
2739+
*
2740+
* TODO: we should really be passing the caller-provided repository to
2741+
* `has_worktrees()`, but our worktree subsystem doesn't yet support
2742+
* that.
2743+
*/
2744+
if (has_worktrees()) {
2745+
strbuf_addstr(errbuf, "migrating repositories with worktrees is not supported yet");
2746+
ret = -1;
2747+
goto done;
2748+
}
2749+
2750+
/*
2751+
* The overall logic looks like this:
2752+
*
2753+
* 1. Set up a new temporary directory and initialize it with the new
2754+
* format. This is where all refs will be migrated into.
2755+
*
2756+
* 2. Enumerate all refs and write them into the new ref storage.
2757+
* This operation is safe as we do not yet modify the main
2758+
* repository.
2759+
*
2760+
* 3. If we're in dry-run mode then we are done and can hand over the
2761+
* directory to the caller for inspection. If not, we now start
2762+
* with the destructive part.
2763+
*
2764+
* 4. Delete the old ref storage from disk. As we have a copy of refs
2765+
* in the new ref storage it's okay(ish) if we now get interrupted
2766+
* as there is an equivalent copy of all refs available.
2767+
*
2768+
* 5. Move the new ref storage files into place.
2769+
*
2770+
* 6. Change the repository format to the new ref format.
2771+
*/
2772+
strbuf_addf(&new_gitdir, "%s/%s", old_refs->gitdir, "ref_migration.XXXXXX");
2773+
if (!mkdtemp(new_gitdir.buf)) {
2774+
strbuf_addf(errbuf, "cannot create migration directory: %s",
2775+
strerror(errno));
2776+
ret = -1;
2777+
goto done;
2778+
}
2779+
2780+
new_refs = ref_store_init(repo, format, new_gitdir.buf,
2781+
REF_STORE_ALL_CAPS);
2782+
ret = ref_store_create_on_disk(new_refs, 0, errbuf);
2783+
if (ret < 0)
2784+
goto done;
2785+
2786+
transaction = ref_store_transaction_begin(new_refs, errbuf);
2787+
if (!transaction)
2788+
goto done;
2789+
2790+
data.old_refs = old_refs;
2791+
data.transaction = transaction;
2792+
data.errbuf = errbuf;
2793+
2794+
/*
2795+
* We need to use the internal `do_for_each_ref()` here so that we can
2796+
* also include broken refs and symrefs. These would otherwise be
2797+
* skipped silently.
2798+
*
2799+
* Ideally, we would do this call while locking the old ref storage
2800+
* such that there cannot be any concurrent modifications. We do not
2801+
* have the infra for that though, and the "files" backend does not
2802+
* allow for a central lock due to its design. It's thus on the user to
2803+
* ensure that there are no concurrent writes.
2804+
*/
2805+
ret = do_for_each_ref(old_refs, "", NULL, migrate_one_ref, 0,
2806+
DO_FOR_EACH_INCLUDE_ROOT_REFS | DO_FOR_EACH_INCLUDE_BROKEN,
2807+
&data);
2808+
if (ret < 0)
2809+
goto done;
2810+
2811+
/*
2812+
* TODO: we might want to migrate to `initial_ref_transaction_commit()`
2813+
* here, which is more efficient for the files backend because it would
2814+
* write new refs into the packed-refs file directly. At this point,
2815+
* the files backend doesn't handle pseudo-refs and symrefs correctly
2816+
* though, so this requires some more work.
2817+
*/
2818+
ret = ref_transaction_commit(transaction, errbuf);
2819+
if (ret < 0)
2820+
goto done;
2821+
did_migrate_refs = 1;
2822+
2823+
if (flags & REPO_MIGRATE_REF_STORAGE_FORMAT_DRYRUN) {
2824+
printf(_("Finished dry-run migration of refs, "
2825+
"the result can be found at '%s'\n"), new_gitdir.buf);
2826+
ret = 0;
2827+
goto done;
2828+
}
2829+
2830+
/*
2831+
* Until now we were in the non-destructive phase, where we only
2832+
* populated the new ref store. From hereon though we are about
2833+
* to get hands by deleting the old ref store and then moving
2834+
* the new one into place.
2835+
*
2836+
* Assuming that there were no concurrent writes, the new ref
2837+
* store should have all information. So if we fail from hereon
2838+
* we may be in an in-between state, but it would still be able
2839+
* to recover by manually moving remaining files from the
2840+
* temporary migration directory into place.
2841+
*/
2842+
ret = ref_store_remove_on_disk(old_refs, errbuf);
2843+
if (ret < 0)
2844+
goto done;
2845+
2846+
ret = move_files(new_gitdir.buf, old_refs->gitdir, errbuf);
2847+
if (ret < 0)
2848+
goto done;
2849+
2850+
if (rmdir(new_gitdir.buf) < 0)
2851+
warning_errno(_("could not remove temporary migration directory '%s'"),
2852+
new_gitdir.buf);
2853+
2854+
/*
2855+
* We have migrated the repository, so we now need to adjust the
2856+
* repository format so that clients will use the new ref store.
2857+
* We also need to swap out the repository's main ref store.
2858+
*/
2859+
initialize_repository_version(hash_algo_by_ptr(repo->hash_algo), format, 1);
2860+
2861+
free(new_refs->gitdir);
2862+
new_refs->gitdir = xstrdup(old_refs->gitdir);
2863+
repo->refs_private = new_refs;
2864+
ref_store_release(old_refs);
2865+
2866+
ret = 0;
2867+
2868+
done:
2869+
if (ret && did_migrate_refs) {
2870+
strbuf_complete(errbuf, '\n');
2871+
strbuf_addf(errbuf, _("migrated refs can be found at '%s'"),
2872+
new_gitdir.buf);
2873+
}
2874+
2875+
if (ret && new_refs)
2876+
ref_store_release(new_refs);
2877+
ref_transaction_free(transaction);
2878+
strbuf_release(&new_gitdir);
2879+
return ret;
2880+
}

refs.h

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1070,6 +1070,24 @@ int is_root_ref(const char *refname);
10701070
*/
10711071
int is_pseudo_ref(const char *refname);
10721072

1073+
/*
1074+
* The following flags can be passed to `repo_migrate_ref_storage_format()`:
1075+
*
1076+
* - REPO_MIGRATE_REF_STORAGE_FORMAT_DRYRUN: perform a dry-run migration
1077+
* without touching the main repository. The result will be written into a
1078+
* temporary ref storage directory.
1079+
*/
1080+
#define REPO_MIGRATE_REF_STORAGE_FORMAT_DRYRUN (1 << 0)
1081+
1082+
/*
1083+
* Migrate the ref storage format used by the repository to the
1084+
* specified one.
1085+
*/
1086+
int repo_migrate_ref_storage_format(struct repository *repo,
1087+
enum ref_storage_format format,
1088+
unsigned int flags,
1089+
struct strbuf *err);
1090+
10731091
/*
10741092
* The following functions have been removed in Git v2.45 in favor of functions
10751093
* that receive a `ref_store` as parameter. The intent of this section is

0 commit comments

Comments
 (0)