|
| 1 | +/* |
| 2 | + * path-walk.c: implementation for path-based walks of the object graph. |
| 3 | + */ |
| 4 | +#include "git-compat-util.h" |
| 5 | +#include "path-walk.h" |
| 6 | +#include "blob.h" |
| 7 | +#include "commit.h" |
| 8 | +#include "dir.h" |
| 9 | +#include "hashmap.h" |
| 10 | +#include "hex.h" |
| 11 | +#include "object.h" |
| 12 | +#include "oid-array.h" |
| 13 | +#include "revision.h" |
| 14 | +#include "string-list.h" |
| 15 | +#include "strmap.h" |
| 16 | +#include "trace2.h" |
| 17 | +#include "tree.h" |
| 18 | +#include "tree-walk.h" |
| 19 | + |
| 20 | +struct type_and_oid_list { |
| 21 | + enum object_type type; |
| 22 | + struct oid_array oids; |
| 23 | +}; |
| 24 | + |
| 25 | +#define TYPE_AND_OID_LIST_INIT { \ |
| 26 | + .type = OBJ_NONE, \ |
| 27 | + .oids = OID_ARRAY_INIT \ |
| 28 | +} |
| 29 | + |
| 30 | +struct path_walk_context { |
| 31 | + /** |
| 32 | + * Repeats of data in 'struct path_walk_info' for |
| 33 | + * access with fewer characters. |
| 34 | + */ |
| 35 | + struct repository *repo; |
| 36 | + struct rev_info *revs; |
| 37 | + struct path_walk_info *info; |
| 38 | + |
| 39 | + /** |
| 40 | + * Map a path to a 'struct type_and_oid_list' |
| 41 | + * containing the objects discovered at that |
| 42 | + * path. |
| 43 | + */ |
| 44 | + struct strmap paths_to_lists; |
| 45 | + |
| 46 | + /** |
| 47 | + * Store the current list of paths in a stack, to |
| 48 | + * facilitate depth-first-search without recursion. |
| 49 | + * |
| 50 | + * Use path_stack_pushed to indicate whether a path |
| 51 | + * was previously added to path_stack. |
| 52 | + */ |
| 53 | + struct string_list path_stack; |
| 54 | + struct strset path_stack_pushed; |
| 55 | +}; |
| 56 | + |
| 57 | +static void push_to_stack(struct path_walk_context *ctx, |
| 58 | + const char *path) |
| 59 | +{ |
| 60 | + if (strset_contains(&ctx->path_stack_pushed, path)) |
| 61 | + return; |
| 62 | + |
| 63 | + strset_add(&ctx->path_stack_pushed, path); |
| 64 | + string_list_append(&ctx->path_stack, path); |
| 65 | +} |
| 66 | + |
| 67 | +static int add_tree_entries(struct path_walk_context *ctx, |
| 68 | + const char *base_path, |
| 69 | + struct object_id *oid) |
| 70 | +{ |
| 71 | + struct tree_desc desc; |
| 72 | + struct name_entry entry; |
| 73 | + struct strbuf path = STRBUF_INIT; |
| 74 | + size_t base_len; |
| 75 | + struct tree *tree = lookup_tree(ctx->repo, oid); |
| 76 | + |
| 77 | + if (!tree) { |
| 78 | + error(_("failed to walk children of tree %s: not found"), |
| 79 | + oid_to_hex(oid)); |
| 80 | + return -1; |
| 81 | + } else if (parse_tree_gently(tree, 1)) { |
| 82 | + error("bad tree object %s", oid_to_hex(oid)); |
| 83 | + return -1; |
| 84 | + } |
| 85 | + |
| 86 | + strbuf_addstr(&path, base_path); |
| 87 | + base_len = path.len; |
| 88 | + |
| 89 | + parse_tree(tree); |
| 90 | + init_tree_desc(&desc, &tree->object.oid, tree->buffer, tree->size); |
| 91 | + while (tree_entry(&desc, &entry)) { |
| 92 | + struct type_and_oid_list *list; |
| 93 | + struct object *o; |
| 94 | + /* Not actually true, but we will ignore submodules later. */ |
| 95 | + enum object_type type = S_ISDIR(entry.mode) ? OBJ_TREE : OBJ_BLOB; |
| 96 | + |
| 97 | + /* Skip submodules. */ |
| 98 | + if (S_ISGITLINK(entry.mode)) |
| 99 | + continue; |
| 100 | + |
| 101 | + if (type == OBJ_TREE) { |
| 102 | + struct tree *child = lookup_tree(ctx->repo, &entry.oid); |
| 103 | + o = child ? &child->object : NULL; |
| 104 | + } else if (type == OBJ_BLOB) { |
| 105 | + struct blob *child = lookup_blob(ctx->repo, &entry.oid); |
| 106 | + o = child ? &child->object : NULL; |
| 107 | + } else { |
| 108 | + BUG("invalid type for tree entry: %d", type); |
| 109 | + } |
| 110 | + |
| 111 | + if (!o) { |
| 112 | + error(_("failed to find object %s"), |
| 113 | + oid_to_hex(&o->oid)); |
| 114 | + return -1; |
| 115 | + } |
| 116 | + |
| 117 | + /* Skip this object if already seen. */ |
| 118 | + if (o->flags & SEEN) |
| 119 | + continue; |
| 120 | + o->flags |= SEEN; |
| 121 | + |
| 122 | + strbuf_setlen(&path, base_len); |
| 123 | + strbuf_add(&path, entry.path, entry.pathlen); |
| 124 | + |
| 125 | + /* |
| 126 | + * Trees will end with "/" for concatenation and distinction |
| 127 | + * from blobs at the same path. |
| 128 | + */ |
| 129 | + if (type == OBJ_TREE) |
| 130 | + strbuf_addch(&path, '/'); |
| 131 | + |
| 132 | + if (!(list = strmap_get(&ctx->paths_to_lists, path.buf))) { |
| 133 | + CALLOC_ARRAY(list, 1); |
| 134 | + list->type = type; |
| 135 | + strmap_put(&ctx->paths_to_lists, path.buf, list); |
| 136 | + } |
| 137 | + push_to_stack(ctx, path.buf); |
| 138 | + oid_array_append(&list->oids, &entry.oid); |
| 139 | + } |
| 140 | + |
| 141 | + free_tree_buffer(tree); |
| 142 | + strbuf_release(&path); |
| 143 | + return 0; |
| 144 | +} |
| 145 | + |
| 146 | +/* |
| 147 | + * For each path in paths_to_explore, walk the trees another level |
| 148 | + * and add any found blobs to the batch (but only if they exist and |
| 149 | + * haven't been added yet). |
| 150 | + */ |
| 151 | +static int walk_path(struct path_walk_context *ctx, |
| 152 | + const char *path) |
| 153 | +{ |
| 154 | + struct type_and_oid_list *list; |
| 155 | + int ret = 0; |
| 156 | + |
| 157 | + list = strmap_get(&ctx->paths_to_lists, path); |
| 158 | + |
| 159 | + if (!list->oids.nr) |
| 160 | + return 0; |
| 161 | + |
| 162 | + /* Evaluate function pointer on this data. */ |
| 163 | + ret = ctx->info->path_fn(path, &list->oids, list->type, |
| 164 | + ctx->info->path_fn_data); |
| 165 | + |
| 166 | + /* Expand data for children. */ |
| 167 | + if (list->type == OBJ_TREE) { |
| 168 | + for (size_t i = 0; i < list->oids.nr; i++) { |
| 169 | + ret |= add_tree_entries(ctx, |
| 170 | + path, |
| 171 | + &list->oids.oid[i]); |
| 172 | + } |
| 173 | + } |
| 174 | + |
| 175 | + oid_array_clear(&list->oids); |
| 176 | + strmap_remove(&ctx->paths_to_lists, path, 1); |
| 177 | + return ret; |
| 178 | +} |
| 179 | + |
| 180 | +static void clear_paths_to_lists(struct strmap *map) |
| 181 | +{ |
| 182 | + struct hashmap_iter iter; |
| 183 | + struct strmap_entry *e; |
| 184 | + |
| 185 | + hashmap_for_each_entry(&map->map, &iter, e, ent) { |
| 186 | + struct type_and_oid_list *list = e->value; |
| 187 | + oid_array_clear(&list->oids); |
| 188 | + } |
| 189 | + strmap_clear(map, 1); |
| 190 | + strmap_init(map); |
| 191 | +} |
| 192 | + |
| 193 | +/** |
| 194 | + * Given the configuration of 'info', walk the commits based on 'info->revs' and |
| 195 | + * call 'info->path_fn' on each discovered path. |
| 196 | + * |
| 197 | + * Returns nonzero on an error. |
| 198 | + */ |
| 199 | +int walk_objects_by_path(struct path_walk_info *info) |
| 200 | +{ |
| 201 | + const char *root_path = ""; |
| 202 | + int ret = 0; |
| 203 | + size_t commits_nr = 0, paths_nr = 0; |
| 204 | + struct commit *c; |
| 205 | + struct type_and_oid_list *root_tree_list; |
| 206 | + struct path_walk_context ctx = { |
| 207 | + .repo = info->revs->repo, |
| 208 | + .revs = info->revs, |
| 209 | + .info = info, |
| 210 | + .path_stack = STRING_LIST_INIT_DUP, |
| 211 | + .path_stack_pushed = STRSET_INIT, |
| 212 | + .paths_to_lists = STRMAP_INIT |
| 213 | + }; |
| 214 | + |
| 215 | + trace2_region_enter("path-walk", "commit-walk", info->revs->repo); |
| 216 | + |
| 217 | + /* Insert a single list for the root tree into the paths. */ |
| 218 | + CALLOC_ARRAY(root_tree_list, 1); |
| 219 | + root_tree_list->type = OBJ_TREE; |
| 220 | + strmap_put(&ctx.paths_to_lists, root_path, root_tree_list); |
| 221 | + push_to_stack(&ctx, root_path); |
| 222 | + |
| 223 | + if (prepare_revision_walk(info->revs)) |
| 224 | + die(_("failed to setup revision walk")); |
| 225 | + |
| 226 | + while ((c = get_revision(info->revs))) { |
| 227 | + struct object_id *oid = get_commit_tree_oid(c); |
| 228 | + struct tree *t; |
| 229 | + commits_nr++; |
| 230 | + |
| 231 | + oid = get_commit_tree_oid(c); |
| 232 | + t = lookup_tree(info->revs->repo, oid); |
| 233 | + |
| 234 | + if (!t) { |
| 235 | + error("could not find tree %s", oid_to_hex(oid)); |
| 236 | + return -1; |
| 237 | + } |
| 238 | + |
| 239 | + if (t->object.flags & SEEN) |
| 240 | + continue; |
| 241 | + t->object.flags |= SEEN; |
| 242 | + oid_array_append(&root_tree_list->oids, oid); |
| 243 | + } |
| 244 | + |
| 245 | + trace2_data_intmax("path-walk", ctx.repo, "commits", commits_nr); |
| 246 | + trace2_region_leave("path-walk", "commit-walk", info->revs->repo); |
| 247 | + |
| 248 | + trace2_region_enter("path-walk", "path-walk", info->revs->repo); |
| 249 | + while (!ret && ctx.path_stack.nr) { |
| 250 | + char *path = ctx.path_stack.items[ctx.path_stack.nr - 1].string; |
| 251 | + ctx.path_stack.nr--; |
| 252 | + paths_nr++; |
| 253 | + |
| 254 | + ret = walk_path(&ctx, path); |
| 255 | + |
| 256 | + free(path); |
| 257 | + } |
| 258 | + trace2_data_intmax("path-walk", ctx.repo, "paths", paths_nr); |
| 259 | + trace2_region_leave("path-walk", "path-walk", info->revs->repo); |
| 260 | + |
| 261 | + clear_paths_to_lists(&ctx.paths_to_lists); |
| 262 | + strset_clear(&ctx.path_stack_pushed); |
| 263 | + string_list_clear(&ctx.path_stack, 0); |
| 264 | + return ret; |
| 265 | +} |
| 266 | + |
| 267 | +void path_walk_info_init(struct path_walk_info *info) |
| 268 | +{ |
| 269 | + struct path_walk_info empty = PATH_WALK_INFO_INIT; |
| 270 | + memcpy(info, &empty, sizeof(empty)); |
| 271 | +} |
| 272 | + |
| 273 | +void path_walk_info_clear(struct path_walk_info *info UNUSED) |
| 274 | +{ |
| 275 | + /* |
| 276 | + * This destructor is empty for now, as info->revs |
| 277 | + * is not owned by 'struct path_walk_info'. |
| 278 | + */ |
| 279 | +} |
0 commit comments