@@ -140,42 +140,49 @@ std::optional<std::string> BuildGlobPattern(std::filesystem::path const& glob)
140140{
141141 using namespace std ::literals::string_view_literals;
142142 auto globStr = glob.generic_u8string ();
143+ auto globView = std::string_view (globStr);
143144
144145 // Deal with traditional "everything" wildcards.
145- if (glob == " *" || glob == " *.*" ) {
146+ if (globView == " *" || globView == " *.*" ) {
146147 return {};
147148 }
148149
150+ auto u32Str = IndexUTF8ToUTF32 (globStr);
151+ auto & offsets = u32Str.sourceCodeUnitOffsets ;
152+
149153 fmt::memory_buffer buf;
154+ buf.reserve (globStr.size () * 3 ); // Decent estimate of final pattern size.
150155
151156 // If no wildcards are present, test file path verbatim.
152157 // We use a regex rather than string comparisons to make it case-insensitive.
153- if (globStr. find_first_of (" ?*" ) == std::string ::npos) {
154- buf. reserve (globStr. size () * 3 ); // Decent estimate of final pattern size.
155-
156- for ( char ch : globStr) {
157- fmt::format_to (fmt::appender (buf), " [{}]" , ch );
158+ if (u32Str. text . find_first_of (U " ?*" ) == std::u32string ::npos) {
159+ for ( size_t offIdx = 0 ; offIdx < offsets. size (); ++offIdx) {
160+ int byteOffset = offsets[offIdx];
161+ int nextOffset = (offIdx + 1 < offsets. size ()) ? offsets[offIdx + 1 ] : globStr. size ();
162+ fmt::format_to (fmt::appender (buf), " [{}]" , globView. substr (byteOffset, nextOffset - byteOffset) );
158163 }
159164 }
160165 else {
161166 // Otherwise build a regular expression from the glob and use that to match files.
162167 auto it = fmt::appender (buf);
163- for (char ch : globStr) {
164- if (ch == ' *' ) {
168+ for (size_t offIdx = 0 ; offIdx < offsets.size (); ++offIdx) {
169+ char32_t ch = u32Str.text [offIdx];
170+ if (ch == U' *' ) {
165171 it = fmt::format_to (it, " .*" );
166172 }
167- else if (ch == ' ?' ) {
173+ else if (ch == U ' ?' ) {
168174 *it++ = ' .' ;
169175 }
170- else if (" +[]{}+()|" sv.find (ch) != std::string_view ::npos) {
176+ else if (U" . +[]{}+()|" sv.find (ch) != std::u32string ::npos) {
171177 // Escape metacharacters
172- it = fmt::format_to (it, " \\ {}" , ch);
178+ it = fmt::format_to (it, " \\ {}" , ( char ) ch);
173179 }
174- else if (std::isalnum ((unsigned char )ch)) {
175- *it++ = ch;
180+ else if (ch < 0x80 && std::isalnum ((unsigned char )ch)) {
181+ *it++ = ( char ) ch;
176182 }
177183 else {
178- it = fmt::format_to (it, " [{}]" , ch);
184+ // Emit as \x{10FFFF}.
185+ it = fmt::format_to (it, " \\ x{{{:X}}}" , (uint32_t )ch);
179186 }
180187 }
181188 }
0 commit comments