@@ -439,96 +439,208 @@ void ensure_correct_sparsity(struct index_state *istate)
439439 ensure_full_index (istate );
440440}
441441
442- static int path_found (const char * path , const char * * dirname , size_t * dir_len ,
443- int * dir_found )
442+ struct path_found_data {
443+ /**
444+ * The path stored in 'dir', if non-empty, corresponds to the most-
445+ * recent path that we checked where:
446+ *
447+ * 1. The path should be a directory, according to the index.
448+ * 2. The path does not exist.
449+ * 3. The parent path _does_ exist. (This may be the root of the
450+ * working directory.)
451+ */
452+ struct strbuf dir ;
453+ size_t lstat_count ;
454+ };
455+
456+ #define PATH_FOUND_DATA_INIT { \
457+ .dir = STRBUF_INIT \
458+ }
459+
460+ static void clear_path_found_data (struct path_found_data * data )
461+ {
462+ strbuf_release (& data -> dir );
463+ }
464+
465+ /**
466+ * Return the length of the longest common substring that ends in a
467+ * slash ('/') to indicate the longest common parent directory. Returns
468+ * zero if no common directory exists.
469+ */
470+ static size_t max_common_dir_prefix (const char * path1 , const char * path2 )
471+ {
472+ size_t common_prefix = 0 ;
473+ for (size_t i = 0 ; path1 [i ] && path2 [i ]; i ++ ) {
474+ if (path1 [i ] != path2 [i ])
475+ break ;
476+
477+ /*
478+ * If they agree at a directory separator, then add one
479+ * to make sure it is included in the common prefix string.
480+ */
481+ if (path1 [i ] == '/' )
482+ common_prefix = i + 1 ;
483+ }
484+
485+ return common_prefix ;
486+ }
487+
488+ static int path_found (const char * path , struct path_found_data * data )
444489{
445490 struct stat st ;
446- char * newdir ;
447- char * tmp ;
491+ size_t common_prefix ;
448492
449493 /*
450- * If dirname corresponds to a directory that doesn't exist, and this
451- * path starts with dirname, then path can't exist.
494+ * If data->dir is non-empty, then it contains a path that doesn't
495+ * exist, including an ending slash ('/'). If it is a prefix of 'path',
496+ * then we can return 0.
452497 */
453- if (! * dir_found && !memcmp (path , * dirname , * dir_len ))
498+ if (data -> dir . len && !memcmp (path , data -> dir . buf , data -> dir . len ))
454499 return 0 ;
455500
456501 /*
457- * If path itself exists, return 1.
502+ * Otherwise, we must check if the current path exists. If it does, then
503+ * return 1. The cached directory will be skipped until we come across
504+ * a missing path again.
458505 */
506+ data -> lstat_count ++ ;
459507 if (!lstat (path , & st ))
460508 return 1 ;
461509
462510 /*
463- * Otherwise, path does not exist so we'll return 0...but we'll first
464- * determine some info about its parent directory so we can avoid
465- * lstat calls for future cache entries.
511+ * At this point, we know that 'path' doesn't exist, and we know that
512+ * the parent directory of 'data->dir' does exist. Let's set 'data->dir'
513+ * to be the top-most non-existing directory of 'path'. If the first
514+ * parent of 'path' exists, then we will act as though 'path'
515+ * corresponds to a directory (by adding a slash).
466516 */
467- newdir = strrchr (path , '/' );
468- if (!newdir )
469- return 0 ; /* Didn't find a parent dir; just return 0 now. */
517+ common_prefix = max_common_dir_prefix (path , data -> dir .buf );
470518
471519 /*
472- * If path starts with directory (which we already lstat'ed and found),
473- * then no need to lstat parent directory again.
520+ * At this point, 'path' and 'data->dir' have a common existing parent
521+ * directory given by path[0..common_prefix] (which could have length 0).
522+ * We "grow" the data->dir buffer by checking for existing directories
523+ * along 'path'.
474524 */
475- if (* dir_found && * dirname && memcmp (path , * dirname , * dir_len ))
476- return 0 ;
477525
478- /* Free previous dirname, and cache path's dirname */
479- * dirname = path ;
480- * dir_len = newdir - path + 1 ;
526+ strbuf_setlen (& data -> dir , common_prefix );
527+ while (1 ) {
528+ /* Find the next directory in 'path'. */
529+ const char * rest = path + data -> dir .len ;
530+ const char * next_slash = strchr (rest , '/' );
531+
532+ /*
533+ * If there are no more slashes, then 'path' doesn't contain a
534+ * non-existent _parent_ directory. Set 'data->dir' to be equal
535+ * to 'path' plus an additional slash, so it can be used for
536+ * caching in the future. The filename of 'path' is considered
537+ * a non-existent directory.
538+ *
539+ * Note: if "{path}/" exists as a directory, then it will never
540+ * appear as a prefix of other callers to this method, assuming
541+ * the context from the clear_skip_worktree... methods. If this
542+ * method is reused, then this must be reconsidered.
543+ */
544+ if (!next_slash ) {
545+ strbuf_addstr (& data -> dir , rest );
546+ strbuf_addch (& data -> dir , '/' );
547+ break ;
548+ }
549+
550+ /*
551+ * Now that we have a slash, let's grow 'data->dir' to include
552+ * this slash, then test if we should stop.
553+ */
554+ strbuf_add (& data -> dir , rest , next_slash - rest + 1 );
481555
482- tmp = xstrndup (path , * dir_len );
483- * dir_found = !lstat (tmp , & st );
484- free (tmp );
556+ /* If the parent dir doesn't exist, then stop here. */
557+ data -> lstat_count ++ ;
558+ if (lstat (data -> dir .buf , & st ))
559+ return 0 ;
560+ }
485561
562+ /*
563+ * At this point, 'data->dir' is equal to 'path' plus a slash character,
564+ * and the parent directory of 'path' definitely exists. Moreover, we
565+ * know that 'path' doesn't exist, or we would have returned 1 earlier.
566+ */
486567 return 0 ;
487568}
488569
489- void clear_skip_worktree_from_present_files (struct index_state * istate )
570+ static int clear_skip_worktree_from_present_files_sparse (struct index_state * istate )
490571{
491- const char * last_dirname = NULL ;
492- size_t dir_len = 0 ;
493- int dir_found = 1 ;
494-
495- int i ;
496- int path_count [2 ] = {0 , 0 };
497- int restarted = 0 ;
572+ struct path_found_data data = PATH_FOUND_DATA_INIT ;
498573
499- if (!core_apply_sparse_checkout ||
500- sparse_expect_files_outside_of_patterns )
501- return ;
574+ int path_count = 0 ;
575+ int to_restart = 0 ;
502576
503- trace2_region_enter ("index" , "clear_skip_worktree_from_present_files " ,
577+ trace2_region_enter ("index" , "clear_skip_worktree_from_present_files_sparse " ,
504578 istate -> repo );
505- restart :
506- for (i = 0 ; i < istate -> cache_nr ; i ++ ) {
579+ for (int i = 0 ; i < istate -> cache_nr ; i ++ ) {
507580 struct cache_entry * ce = istate -> cache [i ];
508581
509582 if (ce_skip_worktree (ce )) {
510- path_count [ restarted ] ++ ;
511- if (path_found (ce -> name , & last_dirname , & dir_len , & dir_found )) {
583+ path_count ++ ;
584+ if (path_found (ce -> name , & data )) {
512585 if (S_ISSPARSEDIR (ce -> ce_mode )) {
513- if (restarted )
514- BUG ("ensure-full-index did not fully flatten?" );
515- ensure_full_index (istate );
516- restarted = 1 ;
517- goto restart ;
586+ to_restart = 1 ;
587+ break ;
518588 }
519589 ce -> ce_flags &= ~CE_SKIP_WORKTREE ;
520590 }
521591 }
522592 }
523593
524- if (path_count [0 ])
525- trace2_data_intmax ("index" , istate -> repo ,
526- "sparse_path_count" , path_count [0 ]);
527- if (restarted )
528- trace2_data_intmax ("index" , istate -> repo ,
529- "sparse_path_count_full" , path_count [1 ]);
530- trace2_region_leave ("index" , "clear_skip_worktree_from_present_files" ,
594+ trace2_data_intmax ("index" , istate -> repo ,
595+ "sparse_path_count" , path_count );
596+ trace2_data_intmax ("index" , istate -> repo ,
597+ "sparse_lstat_count" , data .lstat_count );
598+ trace2_region_leave ("index" , "clear_skip_worktree_from_present_files_sparse" ,
599+ istate -> repo );
600+ clear_path_found_data (& data );
601+ return to_restart ;
602+ }
603+
604+ static void clear_skip_worktree_from_present_files_full (struct index_state * istate )
605+ {
606+ struct path_found_data data = PATH_FOUND_DATA_INIT ;
607+
608+ int path_count = 0 ;
609+
610+ trace2_region_enter ("index" , "clear_skip_worktree_from_present_files_full" ,
611+ istate -> repo );
612+ for (int i = 0 ; i < istate -> cache_nr ; i ++ ) {
613+ struct cache_entry * ce = istate -> cache [i ];
614+
615+ if (S_ISSPARSEDIR (ce -> ce_mode ))
616+ BUG ("ensure-full-index did not fully flatten?" );
617+
618+ if (ce_skip_worktree (ce )) {
619+ path_count ++ ;
620+ if (path_found (ce -> name , & data ))
621+ ce -> ce_flags &= ~CE_SKIP_WORKTREE ;
622+ }
623+ }
624+
625+ trace2_data_intmax ("index" , istate -> repo ,
626+ "full_path_count" , path_count );
627+ trace2_data_intmax ("index" , istate -> repo ,
628+ "full_lstat_count" , data .lstat_count );
629+ trace2_region_leave ("index" , "clear_skip_worktree_from_present_files_full" ,
531630 istate -> repo );
631+ clear_path_found_data (& data );
632+ }
633+
634+ void clear_skip_worktree_from_present_files (struct index_state * istate )
635+ {
636+ if (!core_apply_sparse_checkout ||
637+ sparse_expect_files_outside_of_patterns )
638+ return ;
639+
640+ if (clear_skip_worktree_from_present_files_sparse (istate )) {
641+ ensure_full_index (istate );
642+ clear_skip_worktree_from_present_files_full (istate );
643+ }
532644}
533645
534646/*
0 commit comments