3434@copyright (c) BSD-3 License - see LICENSE.txt
3535*/
3636
37- #define UGREP_INDEXER_VERSION " 0.9 beta"
37+ #define UGREP_INDEXER_VERSION " 0.9.1 beta"
3838
3939// check if we are compiling for a windows OS, but not Cygwin or MinGW
4040#if (defined(__WIN32__) || defined(_WIN32) || defined(WIN32) || defined(__BORLANDC__)) && !defined(__CYGWIN__) && !defined(__MINGW32__) && !defined(__MINGW64__)
104104#include < vector>
105105#include < stack>
106106
107+ // number of bytes to gulp into the buffer to index a file
107108#define BUF_SIZE 65536
109+
110+ // smallest possible power-of-two size of an index of a file, shoud be > 61
108111#define MIN_SIZE 128
109112
113+ // default --ignore-files=FILE argument
110114#define DEFAULT_IGNORE_FILE " .gitignore"
111115
116+ // fixed constant strings
112117const char ugrep_index_filename[] = " ._UG#_Store" ;
113118const char ugrep_index_file_magic[5 ] = " UG#\x03 " ;
114119
120+ // command-line optional PATH argument
121+ const char *arg_pathname = NULL ;
122+
123+ // command-line options
115124int flag_accuracy = 6 ;
116125bool flag_check = false ;
117126bool flag_decompress = false ;
@@ -131,20 +140,26 @@ struct Ignore {
131140 std::vector<std::string> dirs;
132141};
133142
134- // stack of ignore files/dirs
143+ // stack of ignore file/dir globs per ignore-file found
135144std::stack<Ignore> ignore_stack;
136145
137146// entry data extracted from directory contents, moves pathname to this entry
138147struct Entry {
139148
149+ // indexing is initiated with the pathname to the root of the directory to index
140150 Entry (const char *pathname = " ." )
141151 :
142152 pathname (pathname), // the working dir by default
143153 base (0 ),
144154 mtime (~0ULL ), // max time to make sure we check the working directory for updates
145155 size (0 )
146- { }
156+ {
157+ const char *sep = strrchr (pathname, PATHSEPCHR);
158+ if (sep != NULL )
159+ base = strlen (sep) - 1 ;
160+ }
147161
162+ // new pathname entry, note this moves the pathname to the entry that owns it now
148163 Entry (std::string& pathname, size_t base, uint64_t mtime, off_t size)
149164 :
150165 pathname (std::move(pathname)),
@@ -175,7 +190,7 @@ struct Entry {
175190 }
176191
177192 std::string pathname; // full pathname
178- size_t base; // size of the basename in the pathname
193+ size_t base; // length of the basename in the pathname
179194 uint64_t mtime; // modification time
180195 off_t size; // file size
181196
@@ -193,7 +208,8 @@ void version()
193208// display a help message and exit
194209void help ()
195210{
196- std::cout << " Usage: ugrep-indexer [-0|...|-9] [-.] [-c|-d|-f] [-I] [-q] [-S] [-s] [-X] [-z]\n\n \
211+ std::cout << " \n Usage:\n\n ugrep-indexer [-0|...|-9] [-.] [-c|-d|-f] [-I] [-q] [-S] [-s] [-X] [-z] [PATH]\n\n \
212+ PATH Optional pathname to the root of the directory tree to index.\n\n \
197213 -0, -1, -2, -3, ..., -9, --accuracy=DIGIT\n \
198214 Specifies indexing accuracy. A low accuracy reduces the indexing\n \
199215 storage overhead at the cost of a higher rate of false positive\n \
@@ -392,6 +408,14 @@ void options(int argc, const char **argv)
392408 }
393409 }
394410 }
411+ else if (arg_pathname == NULL )
412+ {
413+ arg_pathname = arg;
414+ }
415+ else
416+ {
417+ usage (" argument PATH already specified as " , arg_pathname);
418+ }
395419 }
396420
397421 if (flag_check)
@@ -408,7 +432,6 @@ inline int fopenw_s(FILE **file, const char *filename, const char *mode)
408432#if defined(HAVE_F_RDAHEAD)
409433 if (strchr (mode, ' a' ) == NULL && strchr (mode, ' w' ) == NULL )
410434 {
411- // removed O_NOATIME which may fail
412435#if defined(O_NOCTTY)
413436 int fd = open (filename, O_RDONLY | O_NOCTTY);
414437#else
@@ -874,7 +897,7 @@ void cat(const std::string& pathname, std::stack<Entry>& dir_entries, std::vecto
874897}
875898
876899// recursively delete index files
877- void deleter ()
900+ void deleter (const char *pathname )
878901{
879902 flag_no_messages = true ;
880903
@@ -891,7 +914,11 @@ void deleter()
891914 uint64_t index_time;
892915 uint64_t last_time;
893916
894- dir_entries.emplace ();
917+ // pathname to the directory tree to index or .
918+ if (pathname == NULL )
919+ dir_entries.emplace ();
920+ else
921+ dir_entries.emplace (pathname);
895922
896923 // recurse subdirectories breadth-first to remove index files
897924 while (!dir_entries.empty ())
@@ -901,6 +928,7 @@ void deleter()
901928
902929 cat (visit.pathname , dir_entries, file_entries, num_dirs, num_links, num_other, ign_dirs, ign_files, index_time, last_time, true );
903930
931+ // if index time is nonzero, there is a valid index file in this directory we should remove
904932 if (index_time > 0 )
905933 {
906934 index_filename.assign (visit.pathname ).append (PATHSEPSTR).append (ugrep_index_filename);
@@ -910,7 +938,7 @@ void deleter()
910938}
911939
912940// recursively index files
913- void indexer ()
941+ void indexer (const char *pathname )
914942{
915943 std::stack<Entry> dir_entries;
916944 std::vector<Entry> file_entries;
@@ -933,7 +961,11 @@ void indexer()
933961 float sum_noise = 0 ;
934962 uint8_t hashes[65536 ];
935963
936- dir_entries.emplace ();
964+ // pathname to the directory tree to index or .
965+ if (pathname == NULL )
966+ dir_entries.emplace ();
967+ else
968+ dir_entries.emplace (pathname);
937969
938970 // recurse subdirectories
939971 while (!dir_entries.empty ())
@@ -1209,9 +1241,9 @@ int main(int argc, const char **argv)
12091241 options (argc, argv);
12101242
12111243 if (flag_delete)
1212- deleter ();
1244+ deleter (arg_pathname );
12131245 else
1214- indexer ();
1246+ indexer (arg_pathname );
12151247
12161248 return EXIT_SUCCESS;
12171249}
0 commit comments