@@ -280,6 +280,7 @@ struct recent_command
280280/* Configured limits on output */
281281static unsigned long max_depth = 10 ;
282282static off_t max_packsize = (1LL << 32 ) - 1 ;
283+ static uintmax_t big_file_threshold = 512 * 1024 * 1024 ;
283284static int force_update ;
284285static int pack_compression_level = Z_DEFAULT_COMPRESSION ;
285286static int pack_compression_seen ;
@@ -1003,7 +1004,7 @@ static void cycle_packfile(void)
10031004
10041005static size_t encode_header (
10051006 enum object_type type ,
1006- size_t size ,
1007+ uintmax_t size ,
10071008 unsigned char * hdr )
10081009{
10091010 int n = 1 ;
@@ -1159,6 +1160,118 @@ static int store_object(
11591160 return 0 ;
11601161}
11611162
1163+ static void truncate_pack (off_t to )
1164+ {
1165+ if (ftruncate (pack_data -> pack_fd , to )
1166+ || lseek (pack_data -> pack_fd , to , SEEK_SET ) != to )
1167+ die_errno ("cannot truncate pack to skip duplicate" );
1168+ pack_size = to ;
1169+ }
1170+
1171+ static void stream_blob (uintmax_t len , unsigned char * sha1out , uintmax_t mark )
1172+ {
1173+ size_t in_sz = 64 * 1024 , out_sz = 64 * 1024 ;
1174+ unsigned char * in_buf = xmalloc (in_sz );
1175+ unsigned char * out_buf = xmalloc (out_sz );
1176+ struct object_entry * e ;
1177+ unsigned char sha1 [20 ];
1178+ unsigned long hdrlen ;
1179+ off_t offset ;
1180+ git_SHA_CTX c ;
1181+ z_stream s ;
1182+ int status = Z_OK ;
1183+
1184+ /* Determine if we should auto-checkpoint. */
1185+ if ((pack_size + 60 + len ) > max_packsize
1186+ || (pack_size + 60 + len ) < pack_size )
1187+ cycle_packfile ();
1188+
1189+ offset = pack_size ;
1190+
1191+ hdrlen = snprintf ((char * )out_buf , out_sz , "blob %" PRIuMAX , len ) + 1 ;
1192+ if (out_sz <= hdrlen )
1193+ die ("impossibly large object header" );
1194+
1195+ git_SHA1_Init (& c );
1196+ git_SHA1_Update (& c , out_buf , hdrlen );
1197+
1198+ memset (& s , 0 , sizeof (s ));
1199+ deflateInit (& s , pack_compression_level );
1200+
1201+ hdrlen = encode_header (OBJ_BLOB , len , out_buf );
1202+ if (out_sz <= hdrlen )
1203+ die ("impossibly large object header" );
1204+
1205+ s .next_out = out_buf + hdrlen ;
1206+ s .avail_out = out_sz - hdrlen ;
1207+
1208+ while (status != Z_STREAM_END ) {
1209+ if (0 < len && !s .avail_in ) {
1210+ size_t cnt = in_sz < len ? in_sz : (size_t )len ;
1211+ size_t n = fread (in_buf , 1 , cnt , stdin );
1212+ if (!n && feof (stdin ))
1213+ die ("EOF in data (%" PRIuMAX " bytes remaining)" , len );
1214+
1215+ git_SHA1_Update (& c , in_buf , n );
1216+ s .next_in = in_buf ;
1217+ s .avail_in = n ;
1218+ len -= n ;
1219+ }
1220+
1221+ status = deflate (& s , len ? 0 : Z_FINISH );
1222+
1223+ if (!s .avail_out || status == Z_STREAM_END ) {
1224+ size_t n = s .next_out - out_buf ;
1225+ write_or_die (pack_data -> pack_fd , out_buf , n );
1226+ pack_size += n ;
1227+ s .next_out = out_buf ;
1228+ s .avail_out = out_sz ;
1229+ }
1230+
1231+ switch (status ) {
1232+ case Z_OK :
1233+ case Z_BUF_ERROR :
1234+ case Z_STREAM_END :
1235+ continue ;
1236+ default :
1237+ die ("unexpected deflate failure: %d" , status );
1238+ }
1239+ }
1240+ deflateEnd (& s );
1241+ git_SHA1_Final (sha1 , & c );
1242+
1243+ if (sha1out )
1244+ hashcpy (sha1out , sha1 );
1245+
1246+ e = insert_object (sha1 );
1247+
1248+ if (mark )
1249+ insert_mark (mark , e );
1250+
1251+ if (e -> offset ) {
1252+ duplicate_count_by_type [OBJ_BLOB ]++ ;
1253+ truncate_pack (offset );
1254+
1255+ } else if (find_sha1_pack (sha1 , packed_git )) {
1256+ e -> type = OBJ_BLOB ;
1257+ e -> pack_id = MAX_PACK_ID ;
1258+ e -> offset = 1 ; /* just not zero! */
1259+ duplicate_count_by_type [OBJ_BLOB ]++ ;
1260+ truncate_pack (offset );
1261+
1262+ } else {
1263+ e -> depth = 0 ;
1264+ e -> type = OBJ_BLOB ;
1265+ e -> pack_id = pack_id ;
1266+ e -> offset = offset ;
1267+ object_count ++ ;
1268+ object_count_by_type [OBJ_BLOB ]++ ;
1269+ }
1270+
1271+ free (in_buf );
1272+ free (out_buf );
1273+ }
1274+
11621275/* All calls must be guarded by find_object() or find_mark() to
11631276 * ensure the 'struct object_entry' passed was written by this
11641277 * process instance. We unpack the entry by the offset, avoiding
@@ -1704,7 +1817,7 @@ static void parse_mark(void)
17041817 next_mark = 0 ;
17051818}
17061819
1707- static void parse_data (struct strbuf * sb )
1820+ static int parse_data (struct strbuf * sb , uintmax_t limit , uintmax_t * len_res )
17081821{
17091822 strbuf_reset (sb );
17101823
@@ -1728,9 +1841,15 @@ static void parse_data(struct strbuf *sb)
17281841 free (term );
17291842 }
17301843 else {
1731- size_t n = 0 , length ;
1844+ uintmax_t len = strtoumax (command_buf .buf + 5 , NULL , 10 );
1845+ size_t n = 0 , length = (size_t )len ;
17321846
1733- length = strtoul (command_buf .buf + 5 , NULL , 10 );
1847+ if (limit && limit < len ) {
1848+ * len_res = len ;
1849+ return 0 ;
1850+ }
1851+ if (length < len )
1852+ die ("data is too large to use in this context" );
17341853
17351854 while (n < length ) {
17361855 size_t s = strbuf_fread (sb , length - n , stdin );
@@ -1742,6 +1861,7 @@ static void parse_data(struct strbuf *sb)
17421861 }
17431862
17441863 skip_optional_lf ();
1864+ return 1 ;
17451865}
17461866
17471867static int validate_raw_date (const char * src , char * result , int maxlen )
@@ -1806,14 +1926,32 @@ static char *parse_ident(const char *buf)
18061926 return ident ;
18071927}
18081928
1809- static void parse_new_blob (void )
1929+ static void parse_and_store_blob (
1930+ struct last_object * last ,
1931+ unsigned char * sha1out ,
1932+ uintmax_t mark )
18101933{
18111934 static struct strbuf buf = STRBUF_INIT ;
1935+ uintmax_t len ;
18121936
1937+ if (parse_data (& buf , big_file_threshold , & len ))
1938+ store_object (OBJ_BLOB , & buf , last , sha1out , mark );
1939+ else {
1940+ if (last ) {
1941+ strbuf_release (& last -> data );
1942+ last -> offset = 0 ;
1943+ last -> depth = 0 ;
1944+ }
1945+ stream_blob (len , sha1out , mark );
1946+ skip_optional_lf ();
1947+ }
1948+ }
1949+
1950+ static void parse_new_blob (void )
1951+ {
18131952 read_next_command ();
18141953 parse_mark ();
1815- parse_data (& buf );
1816- store_object (OBJ_BLOB , & buf , & last_blob , NULL , next_mark );
1954+ parse_and_store_blob (& last_blob , NULL , next_mark );
18171955}
18181956
18191957static void unload_one_branch (void )
@@ -1924,15 +2062,12 @@ static void file_change_m(struct branch *b)
19242062 * another repository.
19252063 */
19262064 } else if (inline_data ) {
1927- static struct strbuf buf = STRBUF_INIT ;
1928-
19292065 if (p != uq .buf ) {
19302066 strbuf_addstr (& uq , p );
19312067 p = uq .buf ;
19322068 }
19332069 read_next_command ();
1934- parse_data (& buf );
1935- store_object (OBJ_BLOB , & buf , & last_blob , sha1 , 0 );
2070+ parse_and_store_blob (& last_blob , sha1 , 0 );
19362071 } else if (oe ) {
19372072 if (oe -> type != OBJ_BLOB )
19382073 die ("Not a blob (actually a %s): %s" ,
@@ -2058,15 +2193,12 @@ static void note_change_n(struct branch *b)
20582193 die ("Invalid ref name or SHA1 expression: %s" , p );
20592194
20602195 if (inline_data ) {
2061- static struct strbuf buf = STRBUF_INIT ;
2062-
20632196 if (p != uq .buf ) {
20642197 strbuf_addstr (& uq , p );
20652198 p = uq .buf ;
20662199 }
20672200 read_next_command ();
2068- parse_data (& buf );
2069- store_object (OBJ_BLOB , & buf , & last_blob , sha1 , 0 );
2201+ parse_and_store_blob (& last_blob , sha1 , 0 );
20702202 } else if (oe ) {
20712203 if (oe -> type != OBJ_BLOB )
20722204 die ("Not a blob (actually a %s): %s" ,
@@ -2232,7 +2364,7 @@ static void parse_new_commit(void)
22322364 }
22332365 if (!committer )
22342366 die ("Expected committer but didn't get one" );
2235- parse_data (& msg );
2367+ parse_data (& msg , 0 , NULL );
22362368 read_next_command ();
22372369 parse_from (b );
22382370 merge_list = parse_merge (& merge_count );
@@ -2353,7 +2485,7 @@ static void parse_new_tag(void)
23532485 tagger = NULL ;
23542486
23552487 /* tag payload/message */
2356- parse_data (& msg );
2488+ parse_data (& msg , 0 , NULL );
23572489
23582490 /* build the tag object */
23592491 strbuf_reset (& new_data );
@@ -2473,11 +2605,15 @@ static int git_pack_config(const char *k, const char *v, void *cb)
24732605 pack_compression_seen = 1 ;
24742606 return 0 ;
24752607 }
2608+ if (!strcmp (k , "core.bigfilethreshold" )) {
2609+ long n = git_config_int (k , v );
2610+ big_file_threshold = 0 < n ? n : 0 ;
2611+ }
24762612 return git_default_config (k , v , cb );
24772613}
24782614
24792615static const char fast_import_usage [] =
2480- "git fast-import [--date-format=f] [--max-pack-size=n] [--depth=n] [--active-branches=n] [--export-marks=marks.file]" ;
2616+ "git fast-import [--date-format=f] [--max-pack-size=n] [--big-file-threshold=n] [-- depth=n] [--active-branches=n] [--export-marks=marks.file]" ;
24812617
24822618int main (int argc , const char * * argv )
24832619{
@@ -2518,7 +2654,12 @@ int main(int argc, const char **argv)
25182654 }
25192655 else if (!prefixcmp (a , "--max-pack-size=" ))
25202656 max_packsize = strtoumax (a + 16 , NULL , 0 ) * 1024 * 1024 ;
2521- else if (!prefixcmp (a , "--depth=" )) {
2657+ else if (!prefixcmp (a , "--big-file-threshold=" )) {
2658+ unsigned long v ;
2659+ if (!git_parse_ulong (a + 21 , & v ))
2660+ usage (fast_import_usage );
2661+ big_file_threshold = v ;
2662+ } else if (!prefixcmp (a , "--depth=" )) {
25222663 max_depth = strtoul (a + 8 , NULL , 0 );
25232664 if (max_depth > MAX_DEPTH )
25242665 die ("--depth cannot exceed %u" , MAX_DEPTH );
0 commit comments