Skip to content

Commit 84243da

Browse files
peffgitster
authored andcommitted
pack-bitmap: implement BLOB_LIMIT filtering
Just as the previous commit implemented BLOB_NONE, we can support BLOB_LIMIT filters by looking at the sizes of any blobs in the result and unsetting their bits as appropriate. This is slightly more expensive than BLOB_NONE, but still produces a noticeable speedup (these results are on git.git): Test HEAD~2 HEAD ------------------------------------------------------------------------------------ 5310.9: rev-list count with blob:none 1.80(1.77+0.02) 0.22(0.20+0.02) -87.8% 5310.10: rev-list count with blob:limit=1k 1.99(1.96+0.03) 0.29(0.25+0.03) -85.4% The implementation is similar to the BLOB_NONE one, with the exception that we have to go object-by-object while walking the blob-type bitmap (since we can't mask out the matches, but must look up the size individually for each blob). The trick with using ctz64() is taken from show_objects_for_type(), which likewise needs to find individual bits (but wants to quickly skip over big chunks without blobs). Signed-off-by: Jeff King <[email protected]> Signed-off-by: Junio C Hamano <[email protected]>
1 parent 4f3bd56 commit 84243da

File tree

3 files changed

+104
-1
lines changed

3 files changed

+104
-1
lines changed

pack-bitmap.c

Lines changed: 80 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -779,6 +779,78 @@ static void filter_bitmap_blob_none(struct bitmap_index *bitmap_git,
779779
bitmap_free(tips);
780780
}
781781

782+
static unsigned long get_size_by_pos(struct bitmap_index *bitmap_git,
783+
uint32_t pos)
784+
{
785+
struct packed_git *pack = bitmap_git->pack;
786+
unsigned long size;
787+
struct object_info oi = OBJECT_INFO_INIT;
788+
789+
oi.sizep = &size;
790+
791+
if (pos < pack->num_objects) {
792+
struct revindex_entry *entry = &pack->revindex[pos];
793+
if (packed_object_info(the_repository, pack,
794+
entry->offset, &oi) < 0) {
795+
struct object_id oid;
796+
nth_packed_object_oid(&oid, pack, entry->nr);
797+
die(_("unable to get size of %s"), oid_to_hex(&oid));
798+
}
799+
} else {
800+
struct eindex *eindex = &bitmap_git->ext_index;
801+
struct object *obj = eindex->objects[pos - pack->num_objects];
802+
if (oid_object_info_extended(the_repository, &obj->oid, &oi, 0) < 0)
803+
die(_("unable to get size of %s"), oid_to_hex(&obj->oid));
804+
}
805+
806+
return size;
807+
}
808+
809+
static void filter_bitmap_blob_limit(struct bitmap_index *bitmap_git,
810+
struct object_list *tip_objects,
811+
struct bitmap *to_filter,
812+
unsigned long limit)
813+
{
814+
struct eindex *eindex = &bitmap_git->ext_index;
815+
struct bitmap *tips;
816+
struct ewah_iterator it;
817+
eword_t mask;
818+
uint32_t i;
819+
820+
tips = find_tip_blobs(bitmap_git, tip_objects);
821+
822+
for (i = 0, init_type_iterator(&it, bitmap_git, OBJ_BLOB);
823+
i < to_filter->word_alloc && ewah_iterator_next(&mask, &it);
824+
i++) {
825+
eword_t word = to_filter->words[i] & mask;
826+
unsigned offset;
827+
828+
for (offset = 0; offset < BITS_IN_EWORD; offset++) {
829+
uint32_t pos;
830+
831+
if ((word >> offset) == 0)
832+
break;
833+
offset += ewah_bit_ctz64(word >> offset);
834+
pos = i * BITS_IN_EWORD + offset;
835+
836+
if (!bitmap_get(tips, pos) &&
837+
get_size_by_pos(bitmap_git, pos) >= limit)
838+
bitmap_unset(to_filter, pos);
839+
}
840+
}
841+
842+
for (i = 0; i < eindex->count; i++) {
843+
uint32_t pos = i + bitmap_git->pack->num_objects;
844+
if (eindex->objects[i]->type == OBJ_BLOB &&
845+
bitmap_get(to_filter, pos) &&
846+
!bitmap_get(tips, pos) &&
847+
get_size_by_pos(bitmap_git, pos) >= limit)
848+
bitmap_unset(to_filter, pos);
849+
}
850+
851+
bitmap_free(tips);
852+
}
853+
782854
static int filter_bitmap(struct bitmap_index *bitmap_git,
783855
struct object_list *tip_objects,
784856
struct bitmap *to_filter,
@@ -794,6 +866,14 @@ static int filter_bitmap(struct bitmap_index *bitmap_git,
794866
return 0;
795867
}
796868

869+
if (filter->choice == LOFC_BLOB_LIMIT) {
870+
if (bitmap_git)
871+
filter_bitmap_blob_limit(bitmap_git, tip_objects,
872+
to_filter,
873+
filter->blob_limit_value);
874+
return 0;
875+
}
876+
797877
/* filter choice not handled */
798878
return -1;
799879
}

t/perf/p5310-pack-bitmaps.sh

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -52,6 +52,11 @@ test_perf 'rev-list count with blob:none' '
5252
--filter=blob:none >/dev/null
5353
'
5454

55+
test_perf 'rev-list count with blob:limit=1k' '
56+
git rev-list --use-bitmap-index --count --objects --all \
57+
--filter=blob:limit=1k >/dev/null
58+
'
59+
5560
test_expect_success 'create partial bitmap state' '
5661
# pick a commit to represent the repo tip in the past
5762
cutoff=$(git rev-list HEAD~100 -1) &&

t/t6113-rev-list-bitmap-filters.sh

Lines changed: 19 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -6,8 +6,10 @@ test_description='rev-list combining bitmaps and filters'
66
test_expect_success 'set up bitmapped repo' '
77
# one commit will have bitmaps, the other will not
88
test_commit one &&
9+
test_commit much-larger-blob-one &&
910
git repack -adb &&
10-
test_commit two
11+
test_commit two &&
12+
test_commit much-larger-blob-two
1113
'
1214

1315
test_expect_success 'filters fallback to non-bitmap traversal' '
@@ -35,4 +37,20 @@ test_expect_success 'blob:none filter with specified blob' '
3537
test_bitmap_traversal expect actual
3638
'
3739

40+
test_expect_success 'blob:limit filter' '
41+
git rev-list --objects --filter=blob:limit=5 HEAD >expect &&
42+
git rev-list --use-bitmap-index \
43+
--objects --filter=blob:limit=5 HEAD >actual &&
44+
test_bitmap_traversal expect actual
45+
'
46+
47+
test_expect_success 'blob:limit filter with specified blob' '
48+
git rev-list --objects --filter=blob:limit=5 \
49+
HEAD HEAD:much-larger-blob-two.t >expect &&
50+
git rev-list --use-bitmap-index \
51+
--objects --filter=blob:limit=5 \
52+
HEAD HEAD:much-larger-blob-two.t >actual &&
53+
test_bitmap_traversal expect actual
54+
'
55+
3856
test_done

0 commit comments

Comments
 (0)