|
| 1 | +/* |
| 2 | + * Converts filenames from decomposed unicode into precomposed unicode. |
| 3 | + * Used on MacOS X. |
| 4 | +*/ |
| 5 | + |
| 6 | + |
| 7 | +#define PRECOMPOSE_UNICODE_C |
| 8 | + |
| 9 | +#include "cache.h" |
| 10 | +#include "utf8.h" |
| 11 | +#include "precompose_utf8.h" |
| 12 | + |
| 13 | +typedef char *iconv_ibp; |
| 14 | +const static char *repo_encoding = "UTF-8"; |
| 15 | +const static char *path_encoding = "UTF-8-MAC"; |
| 16 | + |
| 17 | + |
| 18 | +static size_t has_utf8(const char *s, size_t maxlen, size_t *strlen_c) |
| 19 | +{ |
| 20 | + const uint8_t *utf8p = (const uint8_t*) s; |
| 21 | + size_t strlen_chars = 0; |
| 22 | + size_t ret = 0; |
| 23 | + |
| 24 | + if ((!utf8p) || (!*utf8p)) { |
| 25 | + return 0; |
| 26 | + } |
| 27 | + |
| 28 | + while((*utf8p) && maxlen) { |
| 29 | + if (*utf8p & 0x80) |
| 30 | + ret++; |
| 31 | + strlen_chars++; |
| 32 | + utf8p++; |
| 33 | + maxlen--; |
| 34 | + } |
| 35 | + if (strlen_c) |
| 36 | + *strlen_c = strlen_chars; |
| 37 | + |
| 38 | + return ret; |
| 39 | +} |
| 40 | + |
| 41 | + |
| 42 | +void probe_utf8_pathname_composition(char *path, int len) |
| 43 | +{ |
| 44 | + const static char *auml_nfc = "\xc3\xa4"; |
| 45 | + const static char *auml_nfd = "\x61\xcc\x88"; |
| 46 | + int output_fd; |
| 47 | + if (precomposed_unicode != -1) |
| 48 | + return; /* We found it defined in the global config, respect it */ |
| 49 | + path[len] = 0; |
| 50 | + strcpy(path + len, auml_nfc); |
| 51 | + output_fd = open(path, O_CREAT|O_EXCL|O_RDWR, 0600); |
| 52 | + if (output_fd >=0) { |
| 53 | + close(output_fd); |
| 54 | + path[len] = 0; |
| 55 | + strcpy(path + len, auml_nfd); |
| 56 | + /* Indicate to the user, that we can configure it to true */ |
| 57 | + if (0 == access(path, R_OK)) |
| 58 | + git_config_set("core.precomposeunicode", "false"); |
| 59 | + /* To be backward compatible, set precomposed_unicode to 0 */ |
| 60 | + precomposed_unicode = 0; |
| 61 | + path[len] = 0; |
| 62 | + strcpy(path + len, auml_nfc); |
| 63 | + unlink(path); |
| 64 | + } |
| 65 | +} |
| 66 | + |
| 67 | + |
| 68 | +void precompose_argv(int argc, const char **argv) |
| 69 | +{ |
| 70 | + int i = 0; |
| 71 | + const char *oldarg; |
| 72 | + char *newarg; |
| 73 | + iconv_t ic_precompose; |
| 74 | + |
| 75 | + if (precomposed_unicode != 1) |
| 76 | + return; |
| 77 | + |
| 78 | + ic_precompose = iconv_open(repo_encoding, path_encoding); |
| 79 | + if (ic_precompose == (iconv_t) -1) |
| 80 | + return; |
| 81 | + |
| 82 | + while (i < argc) { |
| 83 | + size_t namelen; |
| 84 | + oldarg = argv[i]; |
| 85 | + if (has_utf8(oldarg, (size_t)-1, &namelen)) { |
| 86 | + newarg = reencode_string_iconv(oldarg, namelen, ic_precompose); |
| 87 | + if (newarg) |
| 88 | + argv[i] = newarg; |
| 89 | + } |
| 90 | + i++; |
| 91 | + } |
| 92 | + iconv_close(ic_precompose); |
| 93 | +} |
| 94 | + |
| 95 | + |
| 96 | +PREC_DIR *precompose_utf8_opendir(const char *dirname) |
| 97 | +{ |
| 98 | + PREC_DIR *prec_dir = xmalloc(sizeof(PREC_DIR)); |
| 99 | + prec_dir->dirent_nfc = xmalloc(sizeof(dirent_prec_psx)); |
| 100 | + prec_dir->dirent_nfc->max_name_len = sizeof(prec_dir->dirent_nfc->d_name); |
| 101 | + |
| 102 | + prec_dir->dirp = opendir(dirname); |
| 103 | + if (!prec_dir->dirp) { |
| 104 | + free(prec_dir->dirent_nfc); |
| 105 | + free(prec_dir); |
| 106 | + return NULL; |
| 107 | + } else { |
| 108 | + int ret_errno = errno; |
| 109 | + prec_dir->ic_precompose = iconv_open(repo_encoding, path_encoding); |
| 110 | + /* if iconv_open() fails, die() in readdir() if needed */ |
| 111 | + errno = ret_errno; |
| 112 | + } |
| 113 | + |
| 114 | + return prec_dir; |
| 115 | +} |
| 116 | + |
| 117 | +struct dirent_prec_psx *precompose_utf8_readdir(PREC_DIR *prec_dir) |
| 118 | +{ |
| 119 | + struct dirent *res; |
| 120 | + res = readdir(prec_dir->dirp); |
| 121 | + if (res) { |
| 122 | + size_t namelenz = strlen(res->d_name) + 1; /* \0 */ |
| 123 | + size_t new_maxlen = namelenz; |
| 124 | + |
| 125 | + int ret_errno = errno; |
| 126 | + |
| 127 | + if (new_maxlen > prec_dir->dirent_nfc->max_name_len) { |
| 128 | + size_t new_len = sizeof(dirent_prec_psx) + new_maxlen - |
| 129 | + sizeof(prec_dir->dirent_nfc->d_name); |
| 130 | + |
| 131 | + prec_dir->dirent_nfc = xrealloc(prec_dir->dirent_nfc, new_len); |
| 132 | + prec_dir->dirent_nfc->max_name_len = new_maxlen; |
| 133 | + } |
| 134 | + |
| 135 | + prec_dir->dirent_nfc->d_ino = res->d_ino; |
| 136 | + prec_dir->dirent_nfc->d_type = res->d_type; |
| 137 | + |
| 138 | + if ((precomposed_unicode == 1) && has_utf8(res->d_name, (size_t)-1, NULL)) { |
| 139 | + if (prec_dir->ic_precompose == (iconv_t)-1) { |
| 140 | + die("iconv_open(%s,%s) failed, but needed:\n" |
| 141 | + " precomposed unicode is not supported.\n" |
| 142 | + " If you wnat to use decomposed unicode, run\n" |
| 143 | + " \"git config core.precomposeunicode false\"\n", |
| 144 | + repo_encoding, path_encoding); |
| 145 | + } else { |
| 146 | + iconv_ibp cp = (iconv_ibp)res->d_name; |
| 147 | + size_t inleft = namelenz; |
| 148 | + char *outpos = &prec_dir->dirent_nfc->d_name[0]; |
| 149 | + size_t outsz = prec_dir->dirent_nfc->max_name_len; |
| 150 | + size_t cnt; |
| 151 | + errno = 0; |
| 152 | + cnt = iconv(prec_dir->ic_precompose, &cp, &inleft, &outpos, &outsz); |
| 153 | + if (errno || inleft) { |
| 154 | + /* |
| 155 | + * iconv() failed and errno could be E2BIG, EILSEQ, EINVAL, EBADF |
| 156 | + * MacOS X avoids illegal byte sequemces. |
| 157 | + * If they occur on a mounted drive (e.g. NFS) it is not worth to |
| 158 | + * die() for that, but rather let the user see the original name |
| 159 | + */ |
| 160 | + namelenz = 0; /* trigger strlcpy */ |
| 161 | + } |
| 162 | + } |
| 163 | + } |
| 164 | + else |
| 165 | + namelenz = 0; |
| 166 | + |
| 167 | + if (!namelenz) |
| 168 | + strlcpy(prec_dir->dirent_nfc->d_name, res->d_name, |
| 169 | + prec_dir->dirent_nfc->max_name_len); |
| 170 | + |
| 171 | + errno = ret_errno; |
| 172 | + return prec_dir->dirent_nfc; |
| 173 | + } |
| 174 | + return NULL; |
| 175 | +} |
| 176 | + |
| 177 | + |
| 178 | +int precompose_utf8_closedir(PREC_DIR *prec_dir) |
| 179 | +{ |
| 180 | + int ret_value; |
| 181 | + int ret_errno; |
| 182 | + ret_value = closedir(prec_dir->dirp); |
| 183 | + ret_errno = errno; |
| 184 | + if (prec_dir->ic_precompose != (iconv_t)-1) |
| 185 | + iconv_close(prec_dir->ic_precompose); |
| 186 | + free(prec_dir->dirent_nfc); |
| 187 | + free(prec_dir); |
| 188 | + errno = ret_errno; |
| 189 | + return ret_value; |
| 190 | +} |
0 commit comments