Skip to content

Commit eb83e4c

Browse files
pks-tgitster
authored andcommitted
builtin/cat-file: wire up an option to filter objects
In batch mode, git-cat-file(1) enumerates all objects and prints them by iterating through both loose and packed objects. This works without considering their reachability at all, and consequently most options to filter objects as they exist in e.g. git-rev-list(1) are not applicable. In some situations it may still be useful though to filter objects based on properties that are inherent to them. This includes the object size as well as its type. Such a filter already exists in git-rev-list(1) with the `--filter=` command line option. While this option supports a couple of filters that are not applicable to our usecase, some of them are quite a neat fit. Wire up the filter as an option for git-cat-file(1). This allows us to reuse the same syntax as in git-rev-list(1) so that we don't have to reinvent the wheel. For now, we die when any of the filter options has been passed by the user, but they will be wired up in subsequent commits. Further note that the filters that we are about to introduce don't significantly speed up the runtime of git-cat-file(1). While we can skip emitting a lot of objects in case they are uninteresting to us, the majority of time is spent reading the packfile, which is bottlenecked by I/O and not the processor. This will change though once we start to make use of bitmaps, which will allow us to skip reading the whole packfile. Signed-off-by: Patrick Steinhardt <[email protected]> Signed-off-by: Junio C Hamano <[email protected]>
1 parent 1914ae0 commit eb83e4c

File tree

3 files changed

+88
-4
lines changed

3 files changed

+88
-4
lines changed

Documentation/git-cat-file.adoc

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -81,6 +81,15 @@ OPTIONS
8181
end-of-line conversion, etc). In this case, `<object>` has to be of
8282
the form `<tree-ish>:<path>`, or `:<path>`.
8383

84+
--filter=<filter-spec>::
85+
--no-filter::
86+
Omit objects from the list of printed objects. This can only be used in
87+
combination with one of the batched modes. Excluded objects that have
88+
been explicitly requested via any of the batch modes that read objects
89+
via standard input (`--batch`, `--batch-check`) will be reported as
90+
"filtered". Excluded objects in `--batch-all-objects` mode will not be
91+
printed at all. No filters are supported yet.
92+
8493
--path=<path>::
8594
For use with `--textconv` or `--filters`, to allow specifying an object
8695
name and a path separately, e.g. when it is difficult to figure out
@@ -340,6 +349,13 @@ the repository, then `cat-file` will ignore any custom format and print:
340349
<object> SP missing LF
341350
------------
342351

352+
If a name is specified on stdin that is filtered out via `--filter=`,
353+
then `cat-file` will ignore any custom format and print:
354+
355+
------------
356+
<object> SP excluded LF
357+
------------
358+
343359
If a name is specified that might refer to more than one object (an ambiguous short sha), then `cat-file` will ignore any custom format and print:
344360

345361
------------

builtin/cat-file.c

Lines changed: 32 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,7 @@
1515
#include "gettext.h"
1616
#include "hex.h"
1717
#include "ident.h"
18+
#include "list-objects-filter-options.h"
1819
#include "parse-options.h"
1920
#include "userdiff.h"
2021
#include "streaming.h"
@@ -35,6 +36,7 @@ enum batch_mode {
3536
};
3637

3738
struct batch_options {
39+
struct list_objects_filter_options objects_filter;
3840
int enabled;
3941
int follow_symlinks;
4042
enum batch_mode batch_mode;
@@ -495,6 +497,13 @@ static void batch_object_write(const char *obj_name,
495497
return;
496498
}
497499

500+
switch (opt->objects_filter.choice) {
501+
case LOFC_DISABLED:
502+
break;
503+
default:
504+
BUG("unsupported objects filter");
505+
}
506+
498507
if (use_mailmap && (data->type == OBJ_COMMIT || data->type == OBJ_TAG)) {
499508
size_t s = data->size;
500509
char *buf = NULL;
@@ -820,7 +829,8 @@ static int batch_objects(struct batch_options *opt)
820829
struct object_cb_data cb;
821830
struct object_info empty = OBJECT_INFO_INIT;
822831

823-
if (!memcmp(&data.info, &empty, sizeof(empty)))
832+
if (!memcmp(&data.info, &empty, sizeof(empty)) &&
833+
opt->objects_filter.choice == LOFC_DISABLED)
824834
data.skip_object_info = 1;
825835

826836
if (repo_has_promisor_remote(the_repository))
@@ -944,10 +954,13 @@ int cmd_cat_file(int argc,
944954
int opt_cw = 0;
945955
int opt_epts = 0;
946956
const char *exp_type = NULL, *obj_name = NULL;
947-
struct batch_options batch = {0};
957+
struct batch_options batch = {
958+
.objects_filter = LIST_OBJECTS_FILTER_INIT,
959+
};
948960
int unknown_type = 0;
949961
int input_nul_terminated = 0;
950962
int nul_terminated = 0;
963+
int ret;
951964

952965
const char * const builtin_catfile_usage[] = {
953966
N_("git cat-file <type> <object>"),
@@ -1008,6 +1021,7 @@ int cmd_cat_file(int argc,
10081021
N_("run filters on object's content"), 'w'),
10091022
OPT_STRING(0, "path", &force_path, N_("blob|tree"),
10101023
N_("use a <path> for (--textconv | --filters); Not with 'batch'")),
1024+
OPT_PARSE_LIST_OBJECTS_FILTER(&batch.objects_filter),
10111025
OPT_END()
10121026
};
10131027

@@ -1022,6 +1036,14 @@ int cmd_cat_file(int argc,
10221036
if (use_mailmap)
10231037
read_mailmap(&mailmap);
10241038

1039+
switch (batch.objects_filter.choice) {
1040+
case LOFC_DISABLED:
1041+
break;
1042+
default:
1043+
usagef(_("objects filter not supported: '%s'"),
1044+
list_object_filter_config_name(batch.objects_filter.choice));
1045+
}
1046+
10251047
/* --batch-all-objects? */
10261048
if (opt == 'b')
10271049
batch.all_objects = 1;
@@ -1076,7 +1098,8 @@ int cmd_cat_file(int argc,
10761098
usage_msg_opt(_("batch modes take no arguments"),
10771099
builtin_catfile_usage, options);
10781100

1079-
return batch_objects(&batch);
1101+
ret = batch_objects(&batch);
1102+
goto out;
10801103
}
10811104

10821105
if (opt) {
@@ -1108,5 +1131,10 @@ int cmd_cat_file(int argc,
11081131

11091132
if (unknown_type && opt != 't' && opt != 's')
11101133
die("git cat-file --allow-unknown-type: use with -s or -t");
1111-
return cat_one_file(opt, exp_type, obj_name, unknown_type);
1134+
1135+
ret = cat_one_file(opt, exp_type, obj_name, unknown_type);
1136+
1137+
out:
1138+
list_objects_filter_release(&batch.objects_filter);
1139+
return ret;
11121140
}

t/t1006-cat-file.sh

Lines changed: 40 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1353,4 +1353,44 @@ test_expect_success PERL '--batch-command info is unbuffered by default' '
13531353
perl -e "$script" -- --batch-command $hello_oid "$expect" "info "
13541354
'
13551355

1356+
test_expect_success 'setup for objects filter' '
1357+
git init repo
1358+
'
1359+
1360+
test_expect_success 'objects filter with unknown option' '
1361+
cat >expect <<-EOF &&
1362+
fatal: invalid filter-spec ${SQ}unknown${SQ}
1363+
EOF
1364+
test_must_fail git -C repo cat-file --filter=unknown 2>err &&
1365+
test_cmp expect err
1366+
'
1367+
1368+
for option in blob:none blob:limit=1 object:type=tag sparse:oid=1234 tree:1 sparse:path=x
1369+
do
1370+
test_expect_success "objects filter with unsupported option $option" '
1371+
case "$option" in
1372+
tree:1)
1373+
echo "usage: objects filter not supported: ${SQ}tree${SQ}" >expect
1374+
;;
1375+
sparse:path=x)
1376+
echo "fatal: sparse:path filters support has been dropped" >expect
1377+
;;
1378+
*)
1379+
option_name=$(echo "$option" | cut -d= -f1) &&
1380+
printf "usage: objects filter not supported: ${SQ}%s${SQ}\n" "$option_name" >expect
1381+
;;
1382+
esac &&
1383+
test_must_fail git -C repo cat-file --filter=$option 2>err &&
1384+
test_cmp expect err
1385+
'
1386+
done
1387+
1388+
test_expect_success 'objects filter: disabled' '
1389+
git -C repo cat-file --batch-check="%(objectname)" --batch-all-objects --no-filter >actual &&
1390+
sort actual >actual.sorted &&
1391+
git -C repo rev-list --objects --no-object-names --all >expect &&
1392+
sort expect >expect.sorted &&
1393+
test_cmp expect.sorted actual.sorted
1394+
'
1395+
13561396
test_done

0 commit comments

Comments
 (0)