Skip to content

Commit 70c0d55

Browse files
jholgitster
authored andcommitted
git-p4: resolve RCS keywords in bytes not utf-8
RCS keywords are strings that are replaced with information from Perforce. Examples include $Date$, $Author$, $File$, $Change$ etc. Perforce resolves these by expanding them with their expanded values when files are synced, but Git's data model requires these expanded values to be converted back into their unexpanded form. Previously, git-p4.py would implement this behaviour through the use of regular expressions. However, the regular expression substitution was applied using decoded strings i.e. the content of incoming commit diffs was first decoded from bytes into UTF-8, processed with regular expressions, then converted back to bytes. Not only is this behaviour inefficient, but it is also a cause of a common issue caused by text files containing invalid UTF-8 data. For files created in Windows, CP1252 Smart Quote Characters (0x93 and 0x94) are seen fairly frequently. These codes are invalid in UTF-8, so if the script encountered any file containing them, on Python 2 the symbols will be corrupted, and on Python 3 the script will fail with an exception. This patch replaces this decoding/encoding with bytes object regular expressions, so that the substitution is performed directly upon the source data with no conversions. A test for smart quote handling has been added to the t9810-git-p4-rcs.sh test suite. Signed-off-by: Joel Holdsworth <[email protected]> Signed-off-by: Junio C Hamano <[email protected]>
1 parent 4cf67ae commit 70c0d55

File tree

2 files changed

+23
-7
lines changed

2 files changed

+23
-7
lines changed

git-p4.py

Lines changed: 8 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -56,8 +56,8 @@
5656

5757
p4_access_checked = False
5858

59-
re_ko_keywords = re.compile(r'\$(Id|Header)(:[^$\n]+)?\$')
60-
re_k_keywords = re.compile(r'\$(Id|Header|Author|Date|DateTime|Change|File|Revision)(:[^$\n]+)?\$')
59+
re_ko_keywords = re.compile(br'\$(Id|Header)(:[^$\n]+)?\$')
60+
re_k_keywords = re.compile(br'\$(Id|Header|Author|Date|DateTime|Change|File|Revision)(:[^$\n]+)?\$')
6161

6262
def p4_build_cmd(cmd):
6363
"""Build a suitable p4 command line.
@@ -1754,9 +1754,9 @@ def patchRCSKeywords(self, file, regexp):
17541754
# Attempt to zap the RCS keywords in a p4 controlled file matching the given regex
17551755
(handle, outFileName) = tempfile.mkstemp(dir='.')
17561756
try:
1757-
with os.fdopen(handle, "w") as outFile, open(file, "r") as inFile:
1757+
with os.fdopen(handle, "wb") as outFile, open(file, "rb") as inFile:
17581758
for line in inFile.readlines():
1759-
outFile.write(regexp.sub(r'$\1$', line))
1759+
outFile.write(regexp.sub(br'$\1$', line))
17601760
# Forcibly overwrite the original file
17611761
os.unlink(file)
17621762
shutil.move(outFileName, file)
@@ -2089,7 +2089,9 @@ def applyCommit(self, id):
20892089
regexp = p4_keywords_regexp_for_file(file)
20902090
if regexp:
20912091
# this file is a possibility...look for RCS keywords.
2092-
for line in read_pipe_lines(["git", "diff", "%s^..%s" % (id, id), file]):
2092+
for line in read_pipe_lines(
2093+
["git", "diff", "%s^..%s" % (id, id), file],
2094+
raw=True):
20932095
if regexp.search(line):
20942096
if verbose:
20952097
print("got keyword match on %s in %s in %s" % (regex.pattern, line, file))
@@ -3020,8 +3022,7 @@ def streamOneP4File(self, file, contents):
30203022
# even though in theory somebody may want that.
30213023
regexp = p4_keywords_regexp_for_type(type_base, type_mods)
30223024
if regexp:
3023-
contents = [encode_text_stream(regexp.sub(
3024-
r'$\1$', ''.join(decode_text_stream(c) for c in contents)))]
3025+
contents = [regexp.sub(br'$\1$', c) for c in contents]
30253026

30263027
if self.largeFileSystem:
30273028
(git_mode, contents) = self.largeFileSystem.processContent(git_mode, relPath, contents)

t/t9810-git-p4-rcs.sh

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,8 @@ test_description='git p4 rcs keywords'
44

55
. ./lib-git-p4.sh
66

7+
CP1252="\223\224"
8+
79
test_expect_success 'start p4d' '
810
start_p4d
911
'
@@ -32,6 +34,9 @@ test_expect_success 'init depot' '
3234
p4 submit -d "filek" &&
3335
p4 add -t text+ko fileko &&
3436
p4 submit -d "fileko" &&
37+
printf "$CP1252" >fileko_cp1252 &&
38+
p4 add -t text+ko fileko_cp1252 &&
39+
p4 submit -d "fileko_cp1252" &&
3540
p4 add -t text file_text &&
3641
p4 submit -d "file_text"
3742
)
@@ -359,4 +364,14 @@ test_expect_failure 'Add keywords in git which do not match the default p4 value
359364
)
360365
'
361366

367+
test_expect_success 'check cp1252 smart quote are preserved through RCS keyword processing' '
368+
test_when_finished cleanup_git &&
369+
git p4 clone --dest="$git" //depot &&
370+
(
371+
cd "$git" &&
372+
printf "$CP1252" >expect &&
373+
test_cmp_bin expect fileko_cp1252
374+
)
375+
'
376+
362377
test_done

0 commit comments

Comments
 (0)