Skip to content

Commit d38208a

Browse files
yangskyboxlabsgitster
authored andcommitted
git-p4: convert path to unicode before processing them
P4 allows essentially arbitrary encoding for path data while we would perfer to be dealing only with unicode strings. Since path data need to survive round-trip back to p4, this patch implements the general policy that we store path data as-is, but decode them to unicode before doing any non-trivial processing. A new `decode_path()` method is provided that generally does the correct conversion, taking into account `git-p4.pathEncoding` configuration. For python2.7, path strings will be left as-is if it only contains ASCII characters. For python3, decoding is always done so that we have str objects. Signed-off-by: Yang Zhao <[email protected]> Reviewed-by: Ben Keene <[email protected]> Signed-off-by: Junio C Hamano <[email protected]>
1 parent 86dca24 commit d38208a

File tree

1 file changed

+44
-25
lines changed

1 file changed

+44
-25
lines changed

git-p4.py

Lines changed: 44 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -150,6 +150,21 @@ def decode_text_stream(s):
150150
def encode_text_stream(s):
151151
return s.encode('utf_8') if isinstance(s, unicode) else s
152152

153+
def decode_path(path):
154+
"""Decode a given string (bytes or otherwise) using configured path encoding options
155+
"""
156+
encoding = gitConfig('git-p4.pathEncoding') or 'utf_8'
157+
if bytes is not str:
158+
return path.decode(encoding, errors='replace') if isinstance(path, bytes) else path
159+
else:
160+
try:
161+
path.decode('ascii')
162+
except:
163+
path = path.decode(encoding, errors='replace')
164+
if verbose:
165+
print('Path with non-ASCII characters detected. Used {} to decode: {}'.format(encoding, path))
166+
return path
167+
153168
def write_pipe(c, stdin):
154169
if verbose:
155170
sys.stderr.write('Writing pipe: %s\n' % str(c))
@@ -697,7 +712,8 @@ def p4Where(depotPath):
697712
if "depotFile" in entry:
698713
# Search for the base client side depot path, as long as it starts with the branch's P4 path.
699714
# The base path always ends with "/...".
700-
if entry["depotFile"].find(depotPath) == 0 and entry["depotFile"][-4:] == "/...":
715+
entry_path = decode_path(entry['depotFile'])
716+
if entry_path.find(depotPath) == 0 and entry_path[-4:] == "/...":
701717
output = entry
702718
break
703719
elif "data" in entry:
@@ -712,11 +728,11 @@ def p4Where(depotPath):
712728
return ""
713729
clientPath = ""
714730
if "path" in output:
715-
clientPath = output.get("path")
731+
clientPath = decode_path(output['path'])
716732
elif "data" in output:
717733
data = output.get("data")
718-
lastSpace = data.rfind(" ")
719-
clientPath = data[lastSpace + 1:]
734+
lastSpace = data.rfind(b" ")
735+
clientPath = decode_path(data[lastSpace + 1:])
720736

721737
if clientPath.endswith("..."):
722738
clientPath = clientPath[:-3]
@@ -2484,7 +2500,7 @@ def append(self, view_line):
24842500

24852501
def convert_client_path(self, clientFile):
24862502
# chop off //client/ part to make it relative
2487-
if not clientFile.startswith(self.client_prefix):
2503+
if not decode_path(clientFile).startswith(self.client_prefix):
24882504
die("No prefix '%s' on clientFile '%s'" %
24892505
(self.client_prefix, clientFile))
24902506
return clientFile[len(self.client_prefix):]
@@ -2493,7 +2509,7 @@ def update_client_spec_path_cache(self, files):
24932509
""" Caching file paths by "p4 where" batch query """
24942510

24952511
# List depot file paths exclude that already cached
2496-
fileArgs = [f['path'] for f in files if f['path'] not in self.client_spec_path_cache]
2512+
fileArgs = [f['path'] for f in files if decode_path(f['path']) not in self.client_spec_path_cache]
24972513

24982514
if len(fileArgs) == 0:
24992515
return # All files in cache
@@ -2508,16 +2524,18 @@ def update_client_spec_path_cache(self, files):
25082524
if "unmap" in res:
25092525
# it will list all of them, but only one not unmap-ped
25102526
continue
2527+
depot_path = decode_path(res['depotFile'])
25112528
if gitConfigBool("core.ignorecase"):
2512-
res['depotFile'] = res['depotFile'].lower()
2513-
self.client_spec_path_cache[res['depotFile']] = self.convert_client_path(res["clientFile"])
2529+
depot_path = depot_path.lower()
2530+
self.client_spec_path_cache[depot_path] = self.convert_client_path(res["clientFile"])
25142531

25152532
# not found files or unmap files set to ""
25162533
for depotFile in fileArgs:
2534+
depotFile = decode_path(depotFile)
25172535
if gitConfigBool("core.ignorecase"):
25182536
depotFile = depotFile.lower()
25192537
if depotFile not in self.client_spec_path_cache:
2520-
self.client_spec_path_cache[depotFile] = ""
2538+
self.client_spec_path_cache[depotFile] = b''
25212539

25222540
def map_in_client(self, depot_path):
25232541
"""Return the relative location in the client where this
@@ -2635,7 +2653,7 @@ def isPathWanted(self, path):
26352653
elif path.lower() == p.lower():
26362654
return False
26372655
for p in self.depotPaths:
2638-
if p4PathStartsWith(path, p):
2656+
if p4PathStartsWith(path, decode_path(p)):
26392657
return True
26402658
return False
26412659

@@ -2644,7 +2662,7 @@ def extractFilesFromCommit(self, commit, shelved=False, shelved_cl = 0):
26442662
fnum = 0
26452663
while "depotFile%s" % fnum in commit:
26462664
path = commit["depotFile%s" % fnum]
2647-
found = self.isPathWanted(path)
2665+
found = self.isPathWanted(decode_path(path))
26482666
if not found:
26492667
fnum = fnum + 1
26502668
continue
@@ -2678,7 +2696,7 @@ def stripRepoPath(self, path, prefixes):
26782696
if self.useClientSpec:
26792697
# branch detection moves files up a level (the branch name)
26802698
# from what client spec interpretation gives
2681-
path = self.clientSpecDirs.map_in_client(path)
2699+
path = decode_path(self.clientSpecDirs.map_in_client(path))
26822700
if self.detectBranches:
26832701
for b in self.knownBranches:
26842702
if p4PathStartsWith(path, b + "/"):
@@ -2712,14 +2730,15 @@ def splitFilesIntoBranches(self, commit):
27122730
branches = {}
27132731
fnum = 0
27142732
while "depotFile%s" % fnum in commit:
2715-
path = commit["depotFile%s" % fnum]
2733+
raw_path = commit["depotFile%s" % fnum]
2734+
path = decode_path(raw_path)
27162735
found = self.isPathWanted(path)
27172736
if not found:
27182737
fnum = fnum + 1
27192738
continue
27202739

27212740
file = {}
2722-
file["path"] = path
2741+
file["path"] = raw_path
27232742
file["rev"] = commit["rev%s" % fnum]
27242743
file["action"] = commit["action%s" % fnum]
27252744
file["type"] = commit["type%s" % fnum]
@@ -2728,7 +2747,7 @@ def splitFilesIntoBranches(self, commit):
27282747
# start with the full relative path where this file would
27292748
# go in a p4 client
27302749
if self.useClientSpec:
2731-
relPath = self.clientSpecDirs.map_in_client(path)
2750+
relPath = decode_path(self.clientSpecDirs.map_in_client(path))
27322751
else:
27332752
relPath = self.stripRepoPath(path, self.depotPaths)
27342753

@@ -2766,14 +2785,15 @@ def encodeWithUTF8(self, path):
27662785
# - helper for streamP4Files
27672786

27682787
def streamOneP4File(self, file, contents):
2769-
relPath = self.stripRepoPath(file['depotFile'], self.branchPrefixes)
2770-
relPath = self.encodeWithUTF8(relPath)
2788+
file_path = file['depotFile']
2789+
relPath = self.stripRepoPath(decode_path(file_path), self.branchPrefixes)
2790+
27712791
if verbose:
27722792
if 'fileSize' in self.stream_file:
27732793
size = int(self.stream_file['fileSize'])
27742794
else:
27752795
size = 0 # deleted files don't get a fileSize apparently
2776-
sys.stdout.write('\r%s --> %s (%i MB)\n' % (file['depotFile'], relPath, size/1024/1024))
2796+
sys.stdout.write('\r%s --> %s (%i MB)\n' % (file_path, relPath, size/1024/1024))
27772797
sys.stdout.flush()
27782798

27792799
(type_base, type_mods) = split_p4_type(file["type"])
@@ -2791,7 +2811,7 @@ def streamOneP4File(self, file, contents):
27912811
# to nothing. This causes p4 errors when checking out such
27922812
# a change, and errors here too. Work around it by ignoring
27932813
# the bad symlink; hopefully a future change fixes it.
2794-
print("\nIgnoring empty symlink in %s" % file['depotFile'])
2814+
print("\nIgnoring empty symlink in %s" % file_path)
27952815
return
27962816
elif data[-1] == '\n':
27972817
contents = [data[:-1]]
@@ -2810,15 +2830,15 @@ def streamOneP4File(self, file, contents):
28102830
# just the native "NT" type.
28112831
#
28122832
try:
2813-
text = p4_read_pipe(['print', '-q', '-o', '-', '%s@%s' % (file['depotFile'], file['change'])])
2833+
text = p4_read_pipe(['print', '-q', '-o', '-', '%s@%s' % (decode_path(file['depotFile']), file['change'])], raw=True)
28142834
except Exception as e:
28152835
if 'Translation of file content failed' in str(e):
28162836
type_base = 'binary'
28172837
else:
28182838
raise e
28192839
else:
28202840
if p4_version_string().find('/NT') >= 0:
2821-
text = text.replace('\r\n', '\n')
2841+
text = text.replace(b'\r\n', b'\n')
28222842
contents = [ text ]
28232843

28242844
if type_base == "apple":
@@ -2849,8 +2869,7 @@ def streamOneP4File(self, file, contents):
28492869
self.writeToGitStream(git_mode, relPath, contents)
28502870

28512871
def streamOneP4Deletion(self, file):
2852-
relPath = self.stripRepoPath(file['path'], self.branchPrefixes)
2853-
relPath = self.encodeWithUTF8(relPath)
2872+
relPath = self.stripRepoPath(decode_path(file['path']), self.branchPrefixes)
28542873
if verbose:
28552874
sys.stdout.write("delete %s\n" % relPath)
28562875
sys.stdout.flush()
@@ -3037,8 +3056,8 @@ def commit(self, details, files, branch, parent = "", allow_empty=False):
30373056
if self.clientSpecDirs:
30383057
self.clientSpecDirs.update_client_spec_path_cache(files)
30393058

3040-
files = [f for f in files
3041-
if self.inClientSpec(f['path']) and self.hasBranchPrefix(f['path'])]
3059+
files = [f for (f, path) in ((f, decode_path(f['path'])) for f in files)
3060+
if self.inClientSpec(path) and self.hasBranchPrefix(path)]
30423061

30433062
if gitConfigBool('git-p4.keepEmptyCommits'):
30443063
allow_empty = True

0 commit comments

Comments
 (0)