Skip to content

Commit 0c5a62f

Browse files
ttaylorrgitster
authored andcommitted
midx-write.c: do not read existing MIDX with packs_to_include
Commit d6a8c58 (midx-write.c: support reading an existing MIDX with `packs_to_include`, 2024-05-29) changed the MIDX generation machinery to support reading from an existing MIDX when writing a new one. Unfortunately, the rest of the MIDX generation machinery is not prepared to deal with such a change. For instance, the function responsible for adding to the object ID fanout table from a MIDX source (midx_fanout_add_midx_fanout()) will gladly add objects from an existing MIDX for some fanout level regardless of whether or not those objects came from packs that are to be included in the subsequent MIDX write. This results in broken pseudo-pack object order (leading to incorrect object traversal results) and segmentation faults, like so (generated by running the added test prior to the changes in midx-write.c): #0 0x000055ee31393f47 in midx_pack_order (ctx=0x7ffdde205c70) at midx-write.c:590 #1 0x000055ee31395a69 in write_midx_internal (object_dir=0x55ee32570440 ".git/objects", packs_to_include=0x7ffdde205e20, packs_to_drop=0x0, preferred_pack_name=0x0, refs_snapshot=0x0, flags=15) at midx-write.c:1171 #2 0x000055ee31395f38 in write_midx_file_only (object_dir=0x55ee32570440 ".git/objects", packs_to_include=0x7ffdde205e20, preferred_pack_name=0x0, refs_snapshot=0x0, flags=15) at midx-write.c:1274 [...] In stack frame #0, the code on midx-write.c:590 is using the new pack ID corresponding to some object which was added from the existing MIDX. Importantly, the pack from which that object was selected in the existing MIDX does not appear in the new MIDX as it was excluded via `--stdin-packs`. In this instance, the pack in question had pack ID "1" in the existing MIDX, but since it was excluded from the new MIDX, we never filled in that entry in the pack_perm table, resulting in: (gdb) p *ctx->pack_perm@2 $1 = {0, 1515870810} Which is what causes the segfault above when we try and read: struct pack_info *pack = &ctx->info[ctx->pack_perm[i]]; if (pack->bitmap_pos == BITMAP_POS_UNKNOWN) pack->bitmap_pos = 0; Fundamentally, we should be able to read information from an existing MIDX when generating a new one. But in practice the midx-write.c code assumes that we won't run into issues like the above with incongruent pack IDs, and often makes those assumptions in extremely subtle and fragile ways. Instead, let's avoid reading from an existing MIDX altogether, and stick with the pre-d6a8c58675 implementation. Harden against any regressions in this area by adding a test which demonstrates these issues. Signed-off-by: Taylor Blau <[email protected]> Signed-off-by: Junio C Hamano <[email protected]>
1 parent 1b76f06 commit 0c5a62f

File tree

2 files changed

+61
-11
lines changed

2 files changed

+61
-11
lines changed

midx-write.c

Lines changed: 31 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -101,13 +101,27 @@ struct write_midx_context {
101101
};
102102

103103
static int should_include_pack(const struct write_midx_context *ctx,
104-
const char *file_name,
105-
int exclude_from_midx)
104+
const char *file_name)
106105
{
107-
if (exclude_from_midx && ctx->m && midx_contains_pack(ctx->m, file_name))
106+
/*
107+
* Note that at most one of ctx->m and ctx->to_include are set,
108+
* so we are testing midx_contains_pack() and
109+
* string_list_has_string() independently (guarded by the
110+
* appropriate NULL checks).
111+
*
112+
* We could support passing to_include while reusing an existing
113+
* MIDX, but don't currently since the reuse process drags
114+
* forward all packs from an existing MIDX (without checking
115+
* whether or not they appear in the to_include list).
116+
*
117+
* If we added support for that, these next two conditional
118+
* should be performed independently (likely checking
119+
* to_include before the existing MIDX).
120+
*/
121+
if (ctx->m && midx_contains_pack(ctx->m, file_name))
108122
return 0;
109-
if (ctx->to_include && !string_list_has_string(ctx->to_include,
110-
file_name))
123+
else if (ctx->to_include &&
124+
!string_list_has_string(ctx->to_include, file_name))
111125
return 0;
112126
return 1;
113127
}
@@ -121,7 +135,7 @@ static void add_pack_to_midx(const char *full_path, size_t full_path_len,
121135
if (ends_with(file_name, ".idx")) {
122136
display_progress(ctx->progress, ++ctx->pack_paths_checked);
123137

124-
if (!should_include_pack(ctx, file_name, 1))
138+
if (!should_include_pack(ctx, file_name))
125139
return;
126140

127141
ALLOC_GROW(ctx->info, ctx->nr + 1, ctx->alloc);
@@ -880,9 +894,6 @@ static int fill_packs_from_midx(struct write_midx_context *ctx,
880894
uint32_t i;
881895

882896
for (i = 0; i < ctx->m->num_packs; i++) {
883-
if (!should_include_pack(ctx, ctx->m->pack_names[i], 0))
884-
continue;
885-
886897
ALLOC_GROW(ctx->info, ctx->nr + 1, ctx->alloc);
887898

888899
if (flags & MIDX_WRITE_REV_INDEX || preferred_pack_name) {
@@ -937,7 +948,15 @@ static int write_midx_internal(const char *object_dir,
937948
die_errno(_("unable to create leading directories of %s"),
938949
midx_name.buf);
939950

940-
ctx.m = lookup_multi_pack_index(the_repository, object_dir);
951+
if (!packs_to_include) {
952+
/*
953+
* Only reference an existing MIDX when not filtering which
954+
* packs to include, since all packs and objects are copied
955+
* blindly from an existing MIDX if one is present.
956+
*/
957+
ctx.m = lookup_multi_pack_index(the_repository, object_dir);
958+
}
959+
941960
if (ctx.m && !midx_checksum_valid(ctx.m)) {
942961
warning(_("ignoring existing multi-pack-index; checksum mismatch"));
943962
ctx.m = NULL;
@@ -946,7 +965,6 @@ static int write_midx_internal(const char *object_dir,
946965
ctx.nr = 0;
947966
ctx.alloc = ctx.m ? ctx.m->num_packs : 16;
948967
ctx.info = NULL;
949-
ctx.to_include = packs_to_include;
950968
ALLOC_ARRAY(ctx.info, ctx.alloc);
951969

952970
if (ctx.m && fill_packs_from_midx(&ctx, preferred_pack_name,
@@ -963,6 +981,8 @@ static int write_midx_internal(const char *object_dir,
963981
else
964982
ctx.progress = NULL;
965983

984+
ctx.to_include = packs_to_include;
985+
966986
for_each_file_in_pack_dir(object_dir, add_pack_to_midx, &ctx);
967987
stop_progress(&ctx.progress);
968988

t/t5326-multi-pack-bitmaps.sh

Lines changed: 30 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -551,4 +551,34 @@ do
551551
'
552552
done
553553

554+
test_expect_success 'remove one packfile between MIDX bitmap writes' '
555+
git init remove-pack-between-writes &&
556+
(
557+
cd remove-pack-between-writes &&
558+
559+
test_commit A &&
560+
test_commit B &&
561+
test_commit C &&
562+
563+
# Create packs with the prefix "pack-A", "pack-B",
564+
# "pack-C" to impose a lexicographic order on these
565+
# packs so the pack being removed is always from the
566+
# middle.
567+
packdir=.git/objects/pack &&
568+
A="$(echo A | git pack-objects $packdir/pack-A --revs)" &&
569+
B="$(echo B | git pack-objects $packdir/pack-B --revs)" &&
570+
C="$(echo C | git pack-objects $packdir/pack-C --revs)" &&
571+
572+
git multi-pack-index write --bitmap &&
573+
574+
cat >in <<-EOF &&
575+
pack-A-$A.idx
576+
pack-C-$C.idx
577+
EOF
578+
git multi-pack-index write --bitmap --stdin-packs <in &&
579+
580+
git rev-list --test-bitmap HEAD
581+
)
582+
'
583+
554584
test_done

0 commit comments

Comments
 (0)