Skip to content

Commit 55aa571

Browse files
Pete Wyckoffgitster
authored andcommitted
git-p4: handle utf16 filetype properly
One of the filetypes that p4 supports is utf16. Its behavior is odd in this case. The data delivered through "p4 -G print" is not encoded in utf16, although "p4 print -o" will produce the proper utf16-encoded file. When dealing with this filetype, discard the data from -G, and instead read the contents directly. An alternate approach would be to try to encode the data in python. That worked for true utf16 files, but for other files marked as utf16, p4 delivers mangled text in no recognizable encoding. Add a test case to check utf16 handling, and +k and +ko handling. Reported-by: Chris Li <[email protected]> Acked-by: Luke Diamand <[email protected]> Signed-off-by: Pete Wyckoff <[email protected]> Signed-off-by: Junio C Hamano <[email protected]>
1 parent fc00233 commit 55aa571

File tree

2 files changed

+119
-0
lines changed

2 files changed

+119
-0
lines changed

contrib/fast-import/git-p4

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1238,13 +1238,24 @@ class P4Sync(Command, P4UserMap):
12381238
data = ''.join(contents)
12391239
contents = [data[:-1]]
12401240

1241+
if file['type'].startswith("utf16"):
1242+
# p4 delivers different text in the python output to -G
1243+
# than it does when using "print -o", or normal p4 client
1244+
# operations. utf16 is converted to ascii or utf8, perhaps.
1245+
# But ascii text saved as -t utf16 is completely mangled.
1246+
# Invoke print -o to get the real contents.
1247+
text = p4_read_pipe('print -q -o - "%s"' % file['depotFile'])
1248+
contents = [ text ]
1249+
12411250
if self.isWindows and file["type"].endswith("text"):
12421251
mangled = []
12431252
for data in contents:
12441253
data = data.replace("\r\n", "\n")
12451254
mangled.append(data)
12461255
contents = mangled
12471256

1257+
# Note that we do not try to de-mangle keywords on utf16 files,
1258+
# even though in theory somebody may want that.
12481259
if file['type'] in ('text+ko', 'unicode+ko', 'binary+ko'):
12491260
contents = map(lambda text: re.sub(r'(?i)\$(Id|Header):[^$]*\$',r'$\1$', text), contents)
12501261
elif file['type'] in ('text+k', 'ktext', 'kxtext', 'unicode+k', 'binary+k'):

t/t9802-git-p4-filetype.sh

Lines changed: 108 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,108 @@
1+
#!/bin/sh
2+
3+
test_description='git-p4 p4 filetype tests'
4+
5+
. ./lib-git-p4.sh
6+
7+
test_expect_success 'start p4d' '
8+
start_p4d
9+
'
10+
11+
test_expect_success 'utf-16 file create' '
12+
(
13+
cd "$cli" &&
14+
15+
# p4 saves this verbatim
16+
printf "three\nline\ntext\n" >f-ascii &&
17+
p4 add -t text f-ascii &&
18+
19+
# p4 adds \377\376 header
20+
cp f-ascii f-ascii-as-utf16 &&
21+
p4 add -t utf16 f-ascii-as-utf16 &&
22+
23+
# p4 saves this exactly as iconv produced it
24+
printf "three\nline\ntext\n" | iconv -f ascii -t utf-16 >f-utf16 &&
25+
p4 add -t utf16 f-utf16 &&
26+
27+
# this also is unchanged
28+
cp f-utf16 f-utf16-as-text &&
29+
p4 add -t text f-utf16-as-text &&
30+
31+
p4 submit -d "f files" &&
32+
33+
# force update of client files
34+
p4 sync -f
35+
)
36+
'
37+
38+
test_expect_success 'utf-16 file test' '
39+
test_when_finished cleanup_git &&
40+
"$GITP4" clone --dest="$git" //depot@all &&
41+
(
42+
cd "$git" &&
43+
44+
test_cmp "$cli/f-ascii" f-ascii &&
45+
test_cmp "$cli/f-ascii-as-utf16" f-ascii-as-utf16 &&
46+
test_cmp "$cli/f-utf16" f-utf16 &&
47+
test_cmp "$cli/f-utf16-as-text" f-utf16-as-text
48+
)
49+
'
50+
51+
test_expect_success 'keyword file create' '
52+
(
53+
cd "$cli" &&
54+
55+
printf "id\n\$Id\$\n\$Author\$\ntext\n" >k-text-k &&
56+
p4 add -t text+k k-text-k &&
57+
58+
cp k-text-k k-text-ko &&
59+
p4 add -t text+ko k-text-ko &&
60+
61+
cat k-text-k | iconv -f ascii -t utf-16 >k-utf16-k &&
62+
p4 add -t utf16+k k-utf16-k &&
63+
64+
cp k-utf16-k k-utf16-ko &&
65+
p4 add -t utf16+ko k-utf16-ko &&
66+
67+
p4 submit -d "k files" &&
68+
p4 sync -f
69+
)
70+
'
71+
72+
build_smush() {
73+
cat >k_smush.py <<-\EOF &&
74+
import re, sys
75+
sys.stdout.write(re.sub(r'(?i)\$(Id|Header|Author|Date|DateTime|Change|File|Revision):[^$]*\$', r'$\1$', sys.stdin.read()))
76+
EOF
77+
cat >ko_smush.py <<-\EOF
78+
import re, sys
79+
sys.stdout.write(re.sub(r'(?i)\$(Id|Header):[^$]*\$', r'$\1$', sys.stdin.read()))
80+
EOF
81+
}
82+
83+
test_expect_success 'keyword file test' '
84+
build_smush &&
85+
test_when_finished rm -f k_smush.py ko_smush.py &&
86+
test_when_finished cleanup_git &&
87+
"$GITP4" clone --dest="$git" //depot@all &&
88+
(
89+
cd "$git" &&
90+
91+
# text, ensure unexpanded
92+
"$PYTHON_PATH" "$TRASH_DIRECTORY/k_smush.py" <"$cli/k-text-k" >cli-k-text-k-smush &&
93+
test_cmp cli-k-text-k-smush k-text-k &&
94+
"$PYTHON_PATH" "$TRASH_DIRECTORY/ko_smush.py" <"$cli/k-text-ko" >cli-k-text-ko-smush &&
95+
test_cmp cli-k-text-ko-smush k-text-ko &&
96+
97+
# utf16, even though p4 expands keywords, git-p4 does not
98+
# try to undo that
99+
test_cmp "$cli/k-utf16-k" k-utf16-k &&
100+
test_cmp "$cli/k-utf16-ko" k-utf16-ko
101+
)
102+
'
103+
104+
test_expect_success 'kill p4d' '
105+
kill_p4d
106+
'
107+
108+
test_done

0 commit comments

Comments
 (0)