Skip to content

Commit 6c52614

Browse files
committed
csum-file: introduce sha1file_checkpoint
It is useful to be able to rewind a check-summed file to a certain previous state after writing data into it using sha1write() API. The fast-import command does this after streaming a blob data to the packfile being generated and then noticing that the same blob has already been written, and it does this with a private code truncate_pack() that is commented as "Yes, this is a layering violation". Introduce two API functions, sha1file_checkpoint(), that allows the caller to save a state of a sha1file, and then later revert it to the saved state. Use it to reimplement truncate_pack(). Signed-off-by: Junio C Hamano <[email protected]>
1 parent 0e99053 commit 6c52614

File tree

3 files changed

+37
-17
lines changed

3 files changed

+37
-17
lines changed

csum-file.c

Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -158,6 +158,26 @@ struct sha1file *sha1fd_throughput(int fd, const char *name, struct progress *tp
158158
return f;
159159
}
160160

161+
void sha1file_checkpoint(struct sha1file *f, struct sha1file_checkpoint *checkpoint)
162+
{
163+
sha1flush(f);
164+
checkpoint->offset = f->total;
165+
checkpoint->ctx = f->ctx;
166+
}
167+
168+
int sha1file_truncate(struct sha1file *f, struct sha1file_checkpoint *checkpoint)
169+
{
170+
off_t offset = checkpoint->offset;
171+
172+
if (ftruncate(f->fd, offset) ||
173+
lseek(f->fd, offset, SEEK_SET) != offset)
174+
return -1;
175+
f->total = offset;
176+
f->ctx = checkpoint->ctx;
177+
f->offset = 0; /* sha1flush() was called in checkpoint */
178+
return 0;
179+
}
180+
161181
void crc32_begin(struct sha1file *f)
162182
{
163183
f->crc32 = crc32(0, NULL, 0);

csum-file.h

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,15 @@ struct sha1file {
1717
unsigned char buffer[8192];
1818
};
1919

20+
/* Checkpoint */
21+
struct sha1file_checkpoint {
22+
off_t offset;
23+
git_SHA_CTX ctx;
24+
};
25+
26+
extern void sha1file_checkpoint(struct sha1file *, struct sha1file_checkpoint *);
27+
extern int sha1file_truncate(struct sha1file *, struct sha1file_checkpoint *);
28+
2029
/* sha1close flags */
2130
#define CSUM_CLOSE 1
2231
#define CSUM_FSYNC 2

fast-import.c

Lines changed: 8 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -1143,17 +1143,11 @@ static int store_object(
11431143
return 0;
11441144
}
11451145

1146-
static void truncate_pack(off_t to, git_SHA_CTX *ctx)
1146+
static void truncate_pack(struct sha1file_checkpoint *checkpoint)
11471147
{
1148-
if (ftruncate(pack_data->pack_fd, to)
1149-
|| lseek(pack_data->pack_fd, to, SEEK_SET) != to)
1148+
if (sha1file_truncate(pack_file, checkpoint))
11501149
die_errno("cannot truncate pack to skip duplicate");
1151-
pack_size = to;
1152-
1153-
/* yes this is a layering violation */
1154-
pack_file->total = to;
1155-
pack_file->offset = 0;
1156-
pack_file->ctx = *ctx;
1150+
pack_size = checkpoint->offset;
11571151
}
11581152

11591153
static void stream_blob(uintmax_t len, unsigned char *sha1out, uintmax_t mark)
@@ -1166,20 +1160,17 @@ static void stream_blob(uintmax_t len, unsigned char *sha1out, uintmax_t mark)
11661160
unsigned long hdrlen;
11671161
off_t offset;
11681162
git_SHA_CTX c;
1169-
git_SHA_CTX pack_file_ctx;
11701163
git_zstream s;
1164+
struct sha1file_checkpoint checkpoint;
11711165
int status = Z_OK;
11721166

11731167
/* Determine if we should auto-checkpoint. */
11741168
if ((max_packsize && (pack_size + 60 + len) > max_packsize)
11751169
|| (pack_size + 60 + len) < pack_size)
11761170
cycle_packfile();
11771171

1178-
offset = pack_size;
1179-
1180-
/* preserve the pack_file SHA1 ctx in case we have to truncate later */
1181-
sha1flush(pack_file);
1182-
pack_file_ctx = pack_file->ctx;
1172+
sha1file_checkpoint(pack_file, &checkpoint);
1173+
offset = checkpoint.offset;
11831174

11841175
hdrlen = snprintf((char *)out_buf, out_sz, "blob %" PRIuMAX, len) + 1;
11851176
if (out_sz <= hdrlen)
@@ -1245,14 +1236,14 @@ static void stream_blob(uintmax_t len, unsigned char *sha1out, uintmax_t mark)
12451236

12461237
if (e->idx.offset) {
12471238
duplicate_count_by_type[OBJ_BLOB]++;
1248-
truncate_pack(offset, &pack_file_ctx);
1239+
truncate_pack(&checkpoint);
12491240

12501241
} else if (find_sha1_pack(sha1, packed_git)) {
12511242
e->type = OBJ_BLOB;
12521243
e->pack_id = MAX_PACK_ID;
12531244
e->idx.offset = 1; /* just not zero! */
12541245
duplicate_count_by_type[OBJ_BLOB]++;
1255-
truncate_pack(offset, &pack_file_ctx);
1246+
truncate_pack(&checkpoint);
12561247

12571248
} else {
12581249
e->depth = 0;

0 commit comments

Comments
 (0)