|
| 1 | +commit 9bacade4a3ef4b6b26e2c02f549eef0e9eb9eaa2 |
| 2 | +Author: Robert Hensing < [email protected]> |
| 3 | +Date: Sun Aug 18 20:20:36 2024 +0200 |
| 4 | + |
| 5 | + Add unoptimized git_mempack_write_thin_pack |
| 6 | + |
| 7 | +diff --git a/include/git2/sys/mempack.h b/include/git2/sys/mempack.h |
| 8 | +index 17da590a3..3688bdd50 100644 |
| 9 | +--- a/include/git2/sys/mempack.h |
| 10 | ++++ b/include/git2/sys/mempack.h |
| 11 | +@@ -44,6 +44,29 @@ GIT_BEGIN_DECL |
| 12 | + */ |
| 13 | + GIT_EXTERN(int) git_mempack_new(git_odb_backend **out); |
| 14 | + |
| 15 | ++/** |
| 16 | ++ * Write a thin packfile with the objects in the memory store. |
| 17 | ++ * |
| 18 | ++ * A thin packfile is a packfile that does not contain its transitive closure of |
| 19 | ++ * references. This is useful for efficiently distributing additions to a |
| 20 | ++ * repository over the network, but also finds use in the efficient bulk |
| 21 | ++ * addition of objects to a repository, locally. |
| 22 | ++ * |
| 23 | ++ * This operation performs the (shallow) insert operations into the |
| 24 | ++ * `git_packbuilder`, but does not write the packfile to disk; |
| 25 | ++ * see `git_packbuilder_write_buf`. |
| 26 | ++ * |
| 27 | ++ * It also does not reset the memory store; see `git_mempack_reset`. |
| 28 | ++ * |
| 29 | ++ * @note This function may or may not write trees and blobs that are not |
| 30 | ++ * referenced by commits. Currently everything is written, but this |
| 31 | ++ * behavior may change in the future as the packer is optimized. |
| 32 | ++ * |
| 33 | ++ * @param backend The mempack backend |
| 34 | ++ * @param pb The packbuilder to use to write the packfile |
| 35 | ++ */ |
| 36 | ++GIT_EXTERN(int) git_mempack_write_thin_pack(git_odb_backend *backend, git_packbuilder *pb); |
| 37 | ++ |
| 38 | + /** |
| 39 | + * Dump all the queued in-memory writes to a packfile. |
| 40 | + * |
| 41 | +diff --git a/src/libgit2/odb_mempack.c b/src/libgit2/odb_mempack.c |
| 42 | +index 6f27f45f8..0b61e2b66 100644 |
| 43 | +--- a/src/libgit2/odb_mempack.c |
| 44 | ++++ b/src/libgit2/odb_mempack.c |
| 45 | +@@ -132,6 +132,35 @@ cleanup: |
| 46 | + return err; |
| 47 | + } |
| 48 | + |
| 49 | ++int git_mempack_write_thin_pack(git_odb_backend *backend, git_packbuilder *pb) |
| 50 | ++{ |
| 51 | ++ struct memory_packer_db *db = (struct memory_packer_db *)backend; |
| 52 | ++ const git_oid *oid; |
| 53 | ++ size_t iter = 0; |
| 54 | ++ int err = -1; |
| 55 | ++ |
| 56 | ++ /* TODO: Implement the recency heuristics. |
| 57 | ++ For this it probably makes sense to only write what's referenced |
| 58 | ++ through commits, an option I've carved out for you in the docs. |
| 59 | ++ wrt heuristics: ask your favorite LLM to translate https://git-scm.com/docs/pack-heuristics/en |
| 60 | ++ to actual normal reference documentation. */ |
| 61 | ++ while (true) { |
| 62 | ++ err = git_oidmap_iterate(NULL, db->objects, &iter, &oid); |
| 63 | ++ if (err == GIT_ITEROVER) { |
| 64 | ++ err = 0; |
| 65 | ++ break; |
| 66 | ++ } |
| 67 | ++ if (err != 0) |
| 68 | ++ return err; |
| 69 | ++ |
| 70 | ++ err = git_packbuilder_insert(pb, oid, NULL); |
| 71 | ++ if (err != 0) |
| 72 | ++ return err; |
| 73 | ++ } |
| 74 | ++ |
| 75 | ++ return 0; |
| 76 | ++} |
| 77 | ++ |
| 78 | + int git_mempack_dump( |
| 79 | + git_buf *pack, |
| 80 | + git_repository *repo, |
| 81 | +diff --git a/tests/libgit2/mempack/thinpack.c b/tests/libgit2/mempack/thinpack.c |
| 82 | +new file mode 100644 |
| 83 | +index 000000000..604a4dda2 |
| 84 | +--- /dev/null |
| 85 | ++++ b/tests/libgit2/mempack/thinpack.c |
| 86 | +@@ -0,0 +1,196 @@ |
| 87 | ++#include "clar_libgit2.h" |
| 88 | ++#include "git2/indexer.h" |
| 89 | ++#include "git2/odb_backend.h" |
| 90 | ++#include "git2/tree.h" |
| 91 | ++#include "git2/types.h" |
| 92 | ++#include "git2/sys/mempack.h" |
| 93 | ++#include "git2/sys/odb_backend.h" |
| 94 | ++#include "util.h" |
| 95 | ++ |
| 96 | ++static git_repository *_repo; |
| 97 | ++static git_odb_backend * _mempack_backend; |
| 98 | ++ |
| 99 | ++void test_mempack_thinpack__initialize(void) |
| 100 | ++{ |
| 101 | ++ git_odb *odb; |
| 102 | ++ |
| 103 | ++ _repo = cl_git_sandbox_init_new("mempack_thinpack_repo"); |
| 104 | ++ |
| 105 | ++ cl_git_pass(git_mempack_new(&_mempack_backend)); |
| 106 | ++ cl_git_pass(git_repository_odb(&odb, _repo)); |
| 107 | ++ cl_git_pass(git_odb_add_backend(odb, _mempack_backend, 999)); |
| 108 | ++ git_odb_free(odb); |
| 109 | ++} |
| 110 | ++ |
| 111 | ++void _mempack_thinpack__cleanup(void) |
| 112 | ++{ |
| 113 | ++ cl_git_sandbox_cleanup(); |
| 114 | ++} |
| 115 | ++ |
| 116 | ++/* |
| 117 | ++ Generating a packfile for an unchanged repo works and produces an empty packfile. |
| 118 | ++ Even if we allow this scenario to be detected, it shouldn't misbehave if the |
| 119 | ++ application is unaware of it. |
| 120 | ++*/ |
| 121 | ++void test_mempack_thinpack__empty(void) |
| 122 | ++{ |
| 123 | ++ git_packbuilder *pb; |
| 124 | ++ int version; |
| 125 | ++ int n; |
| 126 | ++ git_buf buf = GIT_BUF_INIT; |
| 127 | ++ |
| 128 | ++ git_packbuilder_new(&pb, _repo); |
| 129 | ++ |
| 130 | ++ cl_git_pass(git_mempack_write_thin_pack(_mempack_backend, pb)); |
| 131 | ++ cl_git_pass(git_packbuilder_write_buf(&buf, pb)); |
| 132 | ++ cl_assert_in_range(12, buf.size, 1024 /* empty packfile is >0 bytes, but certainly not that big */); |
| 133 | ++ cl_assert(buf.ptr[0] == 'P'); |
| 134 | ++ cl_assert(buf.ptr[1] == 'A'); |
| 135 | ++ cl_assert(buf.ptr[2] == 'C'); |
| 136 | ++ cl_assert(buf.ptr[3] == 'K'); |
| 137 | ++ version = (buf.ptr[4] << 24) | (buf.ptr[5] << 16) | (buf.ptr[6] << 8) | buf.ptr[7]; |
| 138 | ++ /* Subject to change. https://git-scm.com/docs/pack-format: Git currently accepts version number 2 or 3 but generates version 2 only.*/ |
| 139 | ++ cl_assert_equal_i(2, version); |
| 140 | ++ n = (buf.ptr[8] << 24) | (buf.ptr[9] << 16) | (buf.ptr[10] << 8) | buf.ptr[11]; |
| 141 | ++ cl_assert_equal_i(0, n); |
| 142 | ++ git_buf_dispose(&buf); |
| 143 | ++ |
| 144 | ++ git_packbuilder_free(pb); |
| 145 | ++} |
| 146 | ++ |
| 147 | ++#define LIT_LEN(x) x, sizeof(x) - 1 |
| 148 | ++ |
| 149 | ++/* |
| 150 | ++ Check that git_mempack_write_thin_pack produces a thin packfile. |
| 151 | ++*/ |
| 152 | ++void test_mempack_thinpack__thin(void) |
| 153 | ++{ |
| 154 | ++ /* Outline: |
| 155 | ++ - Create tree 1 |
| 156 | ++ - Flush to packfile A |
| 157 | ++ - Create tree 2 |
| 158 | ++ - Flush to packfile B |
| 159 | ++ |
| 160 | ++ Tree 2 has a new blob and a reference to a blob from tree 1. |
| 161 | ++ |
| 162 | ++ Expectation: |
| 163 | ++ - Packfile B is thin and does not contain the objects from packfile A |
| 164 | ++ */ |
| 165 | ++ |
| 166 | ++ |
| 167 | ++ git_oid oid_blob_1; |
| 168 | ++ git_oid oid_blob_2; |
| 169 | ++ git_oid oid_blob_3; |
| 170 | ++ git_oid oid_tree_1; |
| 171 | ++ git_oid oid_tree_2; |
| 172 | ++ git_treebuilder *tb; |
| 173 | ++ |
| 174 | ++ git_packbuilder *pb; |
| 175 | ++ git_buf buf = GIT_BUF_INIT; |
| 176 | ++ git_indexer *indexer; |
| 177 | ++ git_indexer_progress stats; |
| 178 | ++ char pack_dir_path[1024]; |
| 179 | ++ |
| 180 | ++ char sbuf[1024]; |
| 181 | ++ const char * repo_path; |
| 182 | ++ const char * pack_name_1; |
| 183 | ++ const char * pack_name_2; |
| 184 | ++ git_str pack_path_1 = GIT_STR_INIT; |
| 185 | ++ git_str pack_path_2 = GIT_STR_INIT; |
| 186 | ++ git_odb_backend * pack_odb_backend_1; |
| 187 | ++ git_odb_backend * pack_odb_backend_2; |
| 188 | ++ |
| 189 | ++ |
| 190 | ++ cl_assert_in_range(0, snprintf(pack_dir_path, sizeof(pack_dir_path), "%s/objects/pack", git_repository_path(_repo)), sizeof(pack_dir_path)); |
| 191 | ++ |
| 192 | ++ /* Create tree 1 */ |
| 193 | ++ |
| 194 | ++ cl_git_pass(git_blob_create_from_buffer(&oid_blob_1, _repo, LIT_LEN("thinpack blob 1"))); |
| 195 | ++ cl_git_pass(git_blob_create_from_buffer(&oid_blob_2, _repo, LIT_LEN("thinpack blob 2"))); |
| 196 | ++ |
| 197 | ++ |
| 198 | ++ cl_git_pass(git_treebuilder_new(&tb, _repo, NULL)); |
| 199 | ++ cl_git_pass(git_treebuilder_insert(NULL, tb, "blob1", &oid_blob_1, GIT_FILEMODE_BLOB)); |
| 200 | ++ cl_git_pass(git_treebuilder_insert(NULL, tb, "blob2", &oid_blob_2, GIT_FILEMODE_BLOB)); |
| 201 | ++ cl_git_pass(git_treebuilder_write(&oid_tree_1, tb)); |
| 202 | ++ |
| 203 | ++ /* Flush */ |
| 204 | ++ |
| 205 | ++ cl_git_pass(git_packbuilder_new(&pb, _repo)); |
| 206 | ++ cl_git_pass(git_mempack_write_thin_pack(_mempack_backend, pb)); |
| 207 | ++ cl_git_pass(git_packbuilder_write_buf(&buf, pb)); |
| 208 | ++ cl_git_pass(git_indexer_new(&indexer, pack_dir_path, 0, NULL, NULL)); |
| 209 | ++ cl_git_pass(git_indexer_append(indexer, buf.ptr, buf.size, &stats)); |
| 210 | ++ cl_git_pass(git_indexer_commit(indexer, &stats)); |
| 211 | ++ pack_name_1 = strdup(git_indexer_name(indexer)); |
| 212 | ++ cl_assert(pack_name_1); |
| 213 | ++ git_buf_dispose(&buf); |
| 214 | ++ git_mempack_reset(_mempack_backend); |
| 215 | ++ git_indexer_free(indexer); |
| 216 | ++ git_packbuilder_free(pb); |
| 217 | ++ |
| 218 | ++ /* Create tree 2 */ |
| 219 | ++ |
| 220 | ++ cl_git_pass(git_treebuilder_clear(tb)); |
| 221 | ++ /* blob 1 won't be used, but we add it anyway to test that just "declaring" an object doesn't |
| 222 | ++ necessarily cause its inclusion in the next thin packfile. It must only be included if new. */ |
| 223 | ++ cl_git_pass(git_blob_create_from_buffer(&oid_blob_1, _repo, LIT_LEN("thinpack blob 1"))); |
| 224 | ++ cl_git_pass(git_blob_create_from_buffer(&oid_blob_3, _repo, LIT_LEN("thinpack blob 3"))); |
| 225 | ++ cl_git_pass(git_treebuilder_insert(NULL, tb, "blob1", &oid_blob_1, GIT_FILEMODE_BLOB)); |
| 226 | ++ cl_git_pass(git_treebuilder_insert(NULL, tb, "blob3", &oid_blob_3, GIT_FILEMODE_BLOB)); |
| 227 | ++ cl_git_pass(git_treebuilder_write(&oid_tree_2, tb)); |
| 228 | ++ |
| 229 | ++ /* Flush */ |
| 230 | ++ |
| 231 | ++ cl_git_pass(git_packbuilder_new(&pb, _repo)); |
| 232 | ++ cl_git_pass(git_mempack_write_thin_pack(_mempack_backend, pb)); |
| 233 | ++ cl_git_pass(git_packbuilder_write_buf(&buf, pb)); |
| 234 | ++ cl_git_pass(git_indexer_new(&indexer, pack_dir_path, 0, NULL, NULL)); |
| 235 | ++ cl_git_pass(git_indexer_append(indexer, buf.ptr, buf.size, &stats)); |
| 236 | ++ cl_git_pass(git_indexer_commit(indexer, &stats)); |
| 237 | ++ pack_name_2 = strdup(git_indexer_name(indexer)); |
| 238 | ++ cl_assert(pack_name_2); |
| 239 | ++ git_buf_dispose(&buf); |
| 240 | ++ git_mempack_reset(_mempack_backend); |
| 241 | ++ git_indexer_free(indexer); |
| 242 | ++ git_packbuilder_free(pb); |
| 243 | ++ git_treebuilder_free(tb); |
| 244 | ++ |
| 245 | ++ /* Assertions */ |
| 246 | ++ |
| 247 | ++ assert(pack_name_1); |
| 248 | ++ assert(pack_name_2); |
| 249 | ++ |
| 250 | ++ repo_path = git_repository_path(_repo); |
| 251 | ++ |
| 252 | ++ snprintf(sbuf, sizeof(sbuf), "objects/pack/pack-%s.pack", pack_name_1); |
| 253 | ++ git_str_joinpath(&pack_path_1, repo_path, sbuf); |
| 254 | ++ snprintf(sbuf, sizeof(sbuf), "objects/pack/pack-%s.pack", pack_name_2); |
| 255 | ++ git_str_joinpath(&pack_path_2, repo_path, sbuf); |
| 256 | ++ |
| 257 | ++ /* If they're the same, something definitely went wrong. */ |
| 258 | ++ cl_assert(strcmp(pack_name_1, pack_name_2) != 0); |
| 259 | ++ |
| 260 | ++ cl_git_pass(git_odb_backend_one_pack(&pack_odb_backend_1, pack_path_1.ptr)); |
| 261 | ++ cl_assert(pack_odb_backend_1->exists(pack_odb_backend_1, &oid_blob_1)); |
| 262 | ++ cl_assert(pack_odb_backend_1->exists(pack_odb_backend_1, &oid_blob_2)); |
| 263 | ++ cl_assert(!pack_odb_backend_1->exists(pack_odb_backend_1, &oid_blob_3)); |
| 264 | ++ cl_assert(pack_odb_backend_1->exists(pack_odb_backend_1, &oid_tree_1)); |
| 265 | ++ cl_assert(!pack_odb_backend_1->exists(pack_odb_backend_1, &oid_tree_2)); |
| 266 | ++ |
| 267 | ++ cl_git_pass(git_odb_backend_one_pack(&pack_odb_backend_2, pack_path_2.ptr)); |
| 268 | ++ /* blob 1 is already in the packfile 1, so packfile 2 must not include it, in order to be _thin_. */ |
| 269 | ++ cl_assert(!pack_odb_backend_2->exists(pack_odb_backend_2, &oid_blob_1)); |
| 270 | ++ cl_assert(!pack_odb_backend_2->exists(pack_odb_backend_2, &oid_blob_2)); |
| 271 | ++ cl_assert(pack_odb_backend_2->exists(pack_odb_backend_2, &oid_blob_3)); |
| 272 | ++ cl_assert(!pack_odb_backend_2->exists(pack_odb_backend_2, &oid_tree_1)); |
| 273 | ++ cl_assert(pack_odb_backend_2->exists(pack_odb_backend_2, &oid_tree_2)); |
| 274 | ++ |
| 275 | ++ pack_odb_backend_1->free(pack_odb_backend_1); |
| 276 | ++ pack_odb_backend_2->free(pack_odb_backend_2); |
| 277 | ++ free((void *)pack_name_1); |
| 278 | ++ free((void *)pack_name_2); |
| 279 | ++ git_str_dispose(&pack_path_1); |
| 280 | ++ git_str_dispose(&pack_path_2); |
| 281 | ++ |
| 282 | ++} |
0 commit comments