Skip to content

Commit 7502fb9

Browse files
mjcheethamdscho
authored andcommitted
maintenance: add cache-local-objects maintenance task
Introduce a new maintenance task, `cache-local-objects`, that operates on Scalar or VFS for Git repositories with a per-volume, shared object cache (specified by `gvfs.sharedCache`) to migrate packfiles and loose objects from the repository object directory to the shared cache. Older versions of `microsoft/git` incorrectly placed packfiles in the repository object directory instead of the shared cache; this task will help clean up existing clones impacted by that issue. Migration of packfiles involves the following steps for each pack: 1. Hardlink (or copy): a. the .pack file b. the .keep file c. the .rev file 2. Move (or copy + delete) the .idx file 3. Delete/unlink: a. the .pack file b. the .keep file c. the .rev file Moving the index file after the others ensures the pack is not read from the new cache directory until all associated files (rev, keep) exist in the cache directory also. Moving loose objects operates as a move, or copy + delete. Signed-off-by: Matthew John Cheetham <[email protected]>
1 parent a29e87e commit 7502fb9

File tree

3 files changed

+326
-0
lines changed

3 files changed

+326
-0
lines changed

Documentation/git-maintenance.adoc

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -69,6 +69,7 @@ task:
6969
* `prefetch`: hourly.
7070
* `loose-objects`: daily.
7171
* `incremental-repack`: daily.
72+
* `cache-local-objects`: weekly.
7273
--
7374
+
7475
`git maintenance register` will also disable foreground maintenance by
@@ -174,6 +175,13 @@ worktree-prune::
174175
The `worktree-prune` task deletes stale or broken worktrees. See
175176
linkgit:git-worktree[1] for more information.
176177

178+
cache-local-objects::
179+
The `cache-local-objects` task only operates on Scalar or VFS for Git
180+
repositories (cloned with either `scalar clone` or `gvfs clone`) that
181+
have the `gvfs.sharedCache` configuration setting present. This task
182+
migrates pack files and loose objects from the repository's object
183+
directory in to the shared volume cache.
184+
177185
OPTIONS
178186
-------
179187
--auto::

builtin/gc.c

Lines changed: 189 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -13,8 +13,10 @@
1313
#define USE_THE_REPOSITORY_VARIABLE
1414
#define DISABLE_SIGN_COMPARE_WARNINGS
1515

16+
#include "git-compat-util.h"
1617
#include "builtin.h"
1718
#include "abspath.h"
19+
#include "copy.h"
1820
#include "date.h"
1921
#include "dir.h"
2022
#include "environment.h"
@@ -262,6 +264,7 @@ enum maintenance_task_label {
262264
TASK_REFLOG_EXPIRE,
263265
TASK_WORKTREE_PRUNE,
264266
TASK_RERERE_GC,
267+
TASK_CACHE_LOCAL_OBJS,
265268

266269
/* Leave as final value */
267270
TASK__COUNT
@@ -1575,6 +1578,186 @@ static int maintenance_task_incremental_repack(struct maintenance_run_opts *opts
15751578
return 0;
15761579
}
15771580

1581+
static void link_or_copy_or_die(const char *src, const char *dst)
1582+
{
1583+
if (!link(src, dst))
1584+
return;
1585+
1586+
/* Use copy operation if src and dst are on different file systems. */
1587+
if (errno != EXDEV)
1588+
warning_errno(_("failed to link '%s' to '%s'"), src, dst);
1589+
1590+
if (copy_file(dst, src, 0444))
1591+
die_errno(_("failed to copy '%s' to '%s'"), src, dst);
1592+
}
1593+
1594+
static void rename_or_copy_or_die(const char *src, const char *dst)
1595+
{
1596+
if (!rename(src, dst))
1597+
return;
1598+
1599+
/* Use copy and delete if src and dst are on different file systems. */
1600+
if (errno != EXDEV)
1601+
warning_errno(_("failed to move '%s' to '%s'"), src, dst);
1602+
1603+
if (copy_file(dst, src, 0444))
1604+
die_errno(_("failed to copy '%s' to '%s'"), src, dst);
1605+
1606+
if (unlink(src))
1607+
die_errno(_("failed to delete '%s'"), src);
1608+
}
1609+
1610+
static void migrate_pack(const char *srcdir, const char *dstdir,
1611+
const char *pack_filename)
1612+
{
1613+
size_t basenamelen, srclen, dstlen;
1614+
struct strbuf src = STRBUF_INIT, dst = STRBUF_INIT;
1615+
struct {
1616+
const char *ext;
1617+
unsigned move:1;
1618+
} files[] = {
1619+
{".pack", 0},
1620+
{".keep", 0},
1621+
{".rev", 0},
1622+
{".idx", 1}, /* The index file must be atomically moved last. */
1623+
};
1624+
1625+
trace2_region_enter("maintenance", "migrate_pack", the_repository);
1626+
1627+
basenamelen = strlen(pack_filename) - 5; /* .pack */
1628+
strbuf_addstr(&src, srcdir);
1629+
strbuf_addch(&src, '/');
1630+
strbuf_add(&src, pack_filename, basenamelen);
1631+
strbuf_addstr(&src, ".idx");
1632+
1633+
/* A pack without an index file is not yet ready to be migrated. */
1634+
if (!file_exists(src.buf))
1635+
goto cleanup;
1636+
1637+
strbuf_setlen(&src, src.len - 4 /* .idx */);
1638+
strbuf_addstr(&dst, dstdir);
1639+
strbuf_addch(&dst, '/');
1640+
strbuf_add(&dst, pack_filename, basenamelen);
1641+
1642+
srclen = src.len;
1643+
dstlen = dst.len;
1644+
1645+
/* Move or copy files from the source directory to the destination. */
1646+
for (size_t i = 0; i < ARRAY_SIZE(files); i++) {
1647+
strbuf_setlen(&src, srclen);
1648+
strbuf_addstr(&src, files[i].ext);
1649+
1650+
if (!file_exists(src.buf))
1651+
continue;
1652+
1653+
strbuf_setlen(&dst, dstlen);
1654+
strbuf_addstr(&dst, files[i].ext);
1655+
1656+
if (files[i].move)
1657+
rename_or_copy_or_die(src.buf, dst.buf);
1658+
else
1659+
link_or_copy_or_die(src.buf, dst.buf);
1660+
}
1661+
1662+
/*
1663+
* Now the pack and all associated files exist at the destination we can
1664+
* now clean up the files in the source directory.
1665+
*/
1666+
for (size_t i = 0; i < ARRAY_SIZE(files); i++) {
1667+
/* Files that were moved rather than copied have no clean up. */
1668+
if (files[i].move)
1669+
continue;
1670+
1671+
strbuf_setlen(&src, srclen);
1672+
strbuf_addstr(&src, files[i].ext);
1673+
1674+
/* Files that never existed in originally have no clean up.*/
1675+
if (!file_exists(src.buf))
1676+
continue;
1677+
1678+
if (unlink(src.buf))
1679+
warning_errno(_("failed to delete '%s'"), src.buf);
1680+
}
1681+
1682+
cleanup:
1683+
strbuf_release(&src);
1684+
strbuf_release(&dst);
1685+
1686+
trace2_region_leave("maintenance", "migrate_pack", the_repository);
1687+
}
1688+
1689+
static void move_pack_to_shared_cache(const char *full_path, size_t full_path_len,
1690+
const char *file_name, void *data)
1691+
{
1692+
char *srcdir;
1693+
const char *dstdir = (const char *)data;
1694+
1695+
/* We only care about the actual pack files here.
1696+
* The associated .idx, .keep, .rev files will be copied in tandem
1697+
* with the pack file, with the index file being moved last.
1698+
* The original locations of the non-index files will only deleted
1699+
* once all other files have been copied/moved.
1700+
*/
1701+
if (!ends_with(file_name, ".pack"))
1702+
return;
1703+
1704+
srcdir = xstrndup(full_path, full_path_len - strlen(file_name) - 1);
1705+
1706+
migrate_pack(srcdir, dstdir, file_name);
1707+
1708+
free(srcdir);
1709+
}
1710+
1711+
static int move_loose_object_to_shared_cache(const struct object_id *oid,
1712+
const char *path,
1713+
UNUSED void *data)
1714+
{
1715+
struct stat st;
1716+
struct strbuf dst = STRBUF_INIT;
1717+
char *hex = oid_to_hex(oid);
1718+
1719+
strbuf_addf(&dst, "%s/%.2s/", shared_object_dir, hex);
1720+
1721+
if (stat(dst.buf, &st)) {
1722+
if (mkdir(dst.buf, 0777))
1723+
die_errno(_("failed to create directory '%s'"), dst.buf);
1724+
} else if (!S_ISDIR(st.st_mode))
1725+
die(_("expected '%s' to be a directory"), dst.buf);
1726+
1727+
strbuf_addstr(&dst, hex+2);
1728+
rename_or_copy_or_die(path, dst.buf);
1729+
1730+
strbuf_release(&dst);
1731+
return 0;
1732+
}
1733+
1734+
static int maintenance_task_cache_local_objs(UNUSED struct maintenance_run_opts *opts,
1735+
UNUSED struct gc_config *cfg)
1736+
{
1737+
struct strbuf dstdir = STRBUF_INIT;
1738+
struct repository *r = the_repository;
1739+
1740+
/* This task is only applicable with a VFS/Scalar shared cache. */
1741+
if (!shared_object_dir)
1742+
return 0;
1743+
1744+
/* If the dest is the same as the local odb path then we do nothing. */
1745+
if (!fspathcmp(r->objects->sources->path, shared_object_dir))
1746+
goto cleanup;
1747+
1748+
strbuf_addf(&dstdir, "%s/pack", shared_object_dir);
1749+
1750+
for_each_file_in_pack_dir(r->objects->sources->path, move_pack_to_shared_cache,
1751+
dstdir.buf);
1752+
1753+
for_each_loose_object(r->objects, move_loose_object_to_shared_cache, NULL,
1754+
FOR_EACH_OBJECT_LOCAL_ONLY);
1755+
1756+
cleanup:
1757+
strbuf_release(&dstdir);
1758+
return 0;
1759+
}
1760+
15781761
typedef int (*maintenance_task_fn)(struct maintenance_run_opts *opts,
15791762
struct gc_config *cfg);
15801763
typedef int (*maintenance_auto_fn)(struct gc_config *cfg);
@@ -1648,6 +1831,10 @@ static const struct maintenance_task tasks[] = {
16481831
.background = maintenance_task_rerere_gc,
16491832
.auto_condition = rerere_gc_condition,
16501833
},
1834+
[TASK_CACHE_LOCAL_OBJS] = {
1835+
"cache-local-objects",
1836+
maintenance_task_cache_local_objs,
1837+
},
16511838
};
16521839

16531840
enum task_phase {
@@ -1752,6 +1939,8 @@ static const struct maintenance_strategy incremental_strategy = {
17521939
[TASK_LOOSE_OBJECTS].schedule = SCHEDULE_DAILY,
17531940
[TASK_PACK_REFS].enabled = 1,
17541941
[TASK_PACK_REFS].schedule = SCHEDULE_WEEKLY,
1942+
[TASK_CACHE_LOCAL_OBJS].enabled = 1,
1943+
[TASK_CACHE_LOCAL_OBJS].schedule = SCHEDULE_WEEKLY,
17551944
},
17561945
};
17571946

t/t7900-maintenance.sh

Lines changed: 129 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -31,6 +31,25 @@ test_systemd_analyze_verify () {
3131
fi
3232
}
3333

34+
test_import_packfile () {
35+
printf "blob\ndata <<END\n%s\nEND\n\n" 1 2 3 4 5 | \
36+
git -c fastimport.unpackLimit=0 fast-import
37+
}
38+
39+
test_get_packdir_files() {
40+
if [ "$#" -eq 0 ]; then
41+
find .git/objects/pack -type f
42+
else
43+
for arg in "$@"; do
44+
find .git/objects/pack -type f -name $arg
45+
done
46+
fi
47+
}
48+
49+
test_get_loose_object_files () {
50+
find .git/objects -type f -path '.git/objects/??/*'
51+
}
52+
3453
test_expect_success 'help text' '
3554
test_expect_code 129 git maintenance -h >actual &&
3655
test_grep "usage: git maintenance <subcommand>" actual &&
@@ -1191,4 +1210,114 @@ test_expect_success 'maintenance aborts with existing lock file' '
11911210
test_grep "Another scheduled git-maintenance(1) process seems to be running" err
11921211
'
11931212

1213+
test_expect_success 'cache-local-objects task with no shared cache no op' '
1214+
test_when_finished "rm -rf repo" &&
1215+
git init repo &&
1216+
(
1217+
cd repo &&
1218+
1219+
test_commit something &&
1220+
git config set maintenance.gc.enabled false &&
1221+
git config set maintenance.cache-local-objects.enabled true &&
1222+
git config set maintenance.cache-local-objects.auto 1 &&
1223+
1224+
test_import_packfile &&
1225+
test_get_packdir_files "*.pack" "*.idx" "*.keep" "*.rev" \
1226+
>files.txt &&
1227+
test_get_loose_object_files >>files.txt &&
1228+
1229+
git maintenance run &&
1230+
while IFS= read -r f; do
1231+
test_path_exists $f || exit 1
1232+
done <files.txt
1233+
)
1234+
'
1235+
1236+
test_expect_success 'cache-local-objects task cache path same as local odb no op' '
1237+
test_when_finished "rm -rf repo" &&
1238+
git init repo &&
1239+
(
1240+
cd repo &&
1241+
1242+
test_commit something &&
1243+
git config set gvfs.sharedcache .git/objects &&
1244+
git config set maintenance.gc.enabled false &&
1245+
git config set maintenance.cache-local-objects.enabled true &&
1246+
git config set maintenance.cache-local-objects.auto 1 &&
1247+
1248+
test_import_packfile &&
1249+
test_get_packdir_files "*.pack" "*.idx" "*.keep" "*.rev" \
1250+
>files.txt &&
1251+
test_get_loose_object_files >>files.txt &&
1252+
1253+
git maintenance run &&
1254+
while IFS= read -r f; do
1255+
test_path_exists $f || exit 1
1256+
done <files.txt
1257+
)
1258+
'
1259+
1260+
test_expect_success 'cache-local-objects task no .rev or .keep' '
1261+
test_when_finished "rm -rf repo cache" &&
1262+
mkdir -p cache/pack &&
1263+
git init repo &&
1264+
(
1265+
cd repo &&
1266+
1267+
test_commit something &&
1268+
git config set gvfs.sharedcache ../cache &&
1269+
git config set maintenance.gc.enabled false &&
1270+
git config set maintenance.cache-local-objects.enabled true &&
1271+
git config set maintenance.cache-local-objects.auto 1 &&
1272+
1273+
test_import_packfile &&
1274+
test_get_packdir_files "*.pack" "*.idx" >src.txt &&
1275+
test_get_loose_object_files >>src.txt &&
1276+
1277+
rm -f .git/objects/pack/*.rev .git/objects/pack/*.keep &&
1278+
1279+
sed "s/.git\\/objects\\//..\\/cache\\//" src.txt >dst.txt &&
1280+
1281+
git maintenance run &&
1282+
while IFS= read -r f; do
1283+
test_path_is_missing $f || exit 1
1284+
done <src.txt &&
1285+
1286+
while IFS= read -r f; do
1287+
test_path_exists $f || exit 1
1288+
done <dst.txt
1289+
)
1290+
'
1291+
1292+
test_expect_success 'cache-local-objects task success' '
1293+
test_when_finished "rm -rf repo cache" &&
1294+
mkdir -p cache/pack &&
1295+
git init repo &&
1296+
(
1297+
cd repo &&
1298+
1299+
test_commit something &&
1300+
git config set gvfs.sharedcache ../cache &&
1301+
git config set maintenance.gc.enabled false &&
1302+
git config set maintenance.cache-local-objects.enabled true &&
1303+
git config set maintenance.cache-local-objects.auto 1 &&
1304+
1305+
test_import_packfile &&
1306+
test_get_packdir_files "*.pack" "*.idx" "*.keep" "*.rev" \
1307+
>src.txt &&
1308+
test_get_loose_object_files >>src.txt &&
1309+
1310+
sed "s/.git\\/objects\\//..\\/cache\\//" src.txt >dst.txt &&
1311+
1312+
git maintenance run &&
1313+
while IFS= read -r f; do
1314+
test_path_is_missing $f || exit 1
1315+
done <src.txt &&
1316+
1317+
while IFS= read -r f; do
1318+
test_path_exists $f || exit 1
1319+
done <dst.txt
1320+
)
1321+
'
1322+
11941323
test_done

0 commit comments

Comments
 (0)