Skip to content

Commit 0be3cd7

Browse files
committed
git p4 fix for failure to decode p4 errors
Fixes the git p4 failure happening when Perforce command returns error containing byte stream of characters with high bit set. In such situations git p4 implementatino fails to decode this byte stream into utf-8 string. Design: Make use of existing decoding fallback strategy, described by git-p4.metadataDecodingStrategy and git-p4.metadataFallbackEncoding settings in the logic that decodes the Perforce command error bytes. Details: - Moved p4 metadata transcoding logic from metadata_stream_to_writable_bytes(..) into a new MetadataTranscoder class. - Enhcanced the implementation to use git-p4.metadataDecodingStrategy and git-p4.metadataFallbackEncoding settings for p4 errors decoding. - Added test. Signed-off-by: Nikolay Shustov <[email protected]>
1 parent 683c54c commit 0be3cd7

File tree

4 files changed

+149
-55
lines changed

4 files changed

+149
-55
lines changed

git-p4.py

Lines changed: 80 additions & 55 deletions
Original file line numberDiff line numberDiff line change
@@ -234,67 +234,91 @@ def encode_text_stream(s):
234234

235235

236236
class MetadataDecodingException(Exception):
237-
def __init__(self, input_string):
237+
def __init__(self, input_string, error=None):
238238
self.input_string = input_string
239+
self.error = error
239240

240241
def __str__(self):
241-
return """Decoding perforce metadata failed!
242+
message = """Decoding perforce metadata failed!
242243
The failing string was:
243244
---
244245
{}
245246
---
246247
Consider setting the git-p4.metadataDecodingStrategy config option to
247248
'fallback', to allow metadata to be decoded using a fallback encoding,
248-
defaulting to cp1252.""".format(self.input_string)
249+
defaulting to cp1252."""
250+
if verbose and self.error is not None:
251+
message += """
252+
---
253+
Error:
254+
---
255+
{}"""
256+
return message.format(self.input_string, self.error)
249257

250258

251-
encoding_fallback_warning_issued = False
252-
encoding_escape_warning_issued = False
253-
def metadata_stream_to_writable_bytes(s):
254-
encodingStrategy = gitConfig('git-p4.metadataDecodingStrategy') or defaultMetadataDecodingStrategy
255-
fallbackEncoding = gitConfig('git-p4.metadataFallbackEncoding') or defaultFallbackMetadataEncoding
256-
if not isinstance(s, bytes):
257-
return s.encode('utf_8')
258-
if encodingStrategy == 'passthrough':
259-
return s
260-
try:
261-
s.decode('utf_8')
262-
return s
263-
except UnicodeDecodeError:
264-
if encodingStrategy == 'fallback' and fallbackEncoding:
265-
global encoding_fallback_warning_issued
266-
global encoding_escape_warning_issued
267-
try:
268-
if not encoding_fallback_warning_issued:
269-
print("\nCould not decode value as utf-8; using configured fallback encoding %s: %s" % (fallbackEncoding, s))
270-
print("\n(this warning is only displayed once during an import)")
271-
encoding_fallback_warning_issued = True
272-
return s.decode(fallbackEncoding).encode('utf_8')
273-
except Exception as exc:
274-
if not encoding_escape_warning_issued:
275-
print("\nCould not decode value with configured fallback encoding %s; escaping bytes over 127: %s" % (fallbackEncoding, s))
276-
print("\n(this warning is only displayed once during an import)")
277-
encoding_escape_warning_issued = True
278-
escaped_bytes = b''
279-
# bytes and strings work very differently in python2 vs python3...
280-
if str is bytes:
281-
for byte in s:
282-
byte_number = struct.unpack('>B', byte)[0]
283-
if byte_number > 127:
284-
escaped_bytes += b'%'
285-
escaped_bytes += hex(byte_number)[2:].upper()
286-
else:
287-
escaped_bytes += byte
288-
else:
289-
for byte_number in s:
290-
if byte_number > 127:
291-
escaped_bytes += b'%'
292-
escaped_bytes += hex(byte_number).upper().encode()[2:]
293-
else:
294-
escaped_bytes += bytes([byte_number])
295-
return escaped_bytes
259+
class MetadataTranscoder:
260+
def __init__(self, default_metadata_decoding_strategy, default_fallback_metadata_encoding):
261+
self.decoding_fallback_warning_issued = False
262+
self.decoding_escape_warning_issued = False
263+
self.decodingStrategy = gitConfig('git-p4.metadataDecodingStrategy') or default_metadata_decoding_strategy
264+
self.fallbackEncoding = gitConfig('git-p4.metadataFallbackEncoding') or default_fallback_metadata_encoding
265+
266+
def decode_metadata(self, s, error_from_fallback=True):
267+
try:
268+
return [s.decode('utf_8'), 'utf_8']
269+
except UnicodeDecodeError as decode_exception:
270+
error = decode_exception
271+
if self.decodingStrategy == 'fallback' and self.fallbackEncoding:
272+
try:
273+
if not self.decoding_fallback_warning_issued:
274+
print("\nCould not decode value as utf-8; using configured fallback encoding %s: %s" % (self.fallbackEncoding, s))
275+
print("\n(this warning is only displayed once during an import)")
276+
self.decoding_fallback_warning_issued = True
277+
return [s.decode(self.fallbackEncoding), self.fallbackEncoding]
278+
except Exception as decode_exception:
279+
if not error_from_fallback:
280+
return [s, None]
281+
error = decode_exception
282+
raise MetadataDecodingException(s, error)
283+
284+
def metadata_stream_to_writable_bytes(self, s):
285+
if not isinstance(s, bytes):
286+
return s.encode('utf_8')
287+
if self.decodingStrategy == 'passthrough':
288+
return s
289+
290+
[text, encoding] = self.decode_metadata(s, False)
291+
if encoding == 'utf_8':
292+
# s is of utf-8 already
293+
return s
294+
295+
if encoding is None:
296+
# could not decode s, even with fallback encoding
297+
if not self.decoding_escape_warning_issued:
298+
print("\nCould not decode value with configured fallback encoding %s; escaping bytes over 127: %s" % (self.fallbackEncoding, s))
299+
print("\n(this warning is only displayed once during an import)")
300+
self.decoding_escape_warning_issued = True
301+
escaped_bytes = b''
302+
# bytes and strings work very differently in python2 vs python3...
303+
if str is bytes:
304+
for byte in s:
305+
byte_number = struct.unpack('>B', byte)[0]
306+
if byte_number > 127:
307+
escaped_bytes += b'%'
308+
escaped_bytes += hex(byte_number)[2:].upper()
309+
else:
310+
escaped_bytes += byte
311+
else:
312+
for byte_number in s:
313+
if byte_number > 127:
314+
escaped_bytes += b'%'
315+
escaped_bytes += hex(byte_number).upper().encode()[2:]
316+
else:
317+
escaped_bytes += bytes([byte_number])
318+
return escaped_bytes
296319

297-
raise MetadataDecodingException(s)
320+
# were able to decode but not to utf-8
321+
return text.encode('utf_8')
298322

299323

300324
def decode_path(path):
@@ -898,14 +922,14 @@ def p4CmdList(cmd, stdin=None, stdin_mode='w+b', cb=None, skip_info=False,
898922
decoded_entry[key] = value
899923
# Parse out data if it's an error response
900924
if decoded_entry.get('code') == 'error' and 'data' in decoded_entry:
901-
decoded_entry['data'] = decoded_entry['data'].decode()
925+
decoded_entry['data'] = metadataTranscoder.decode_metadata(decoded_entry['data'])
902926
entry = decoded_entry
903927
if skip_info:
904928
if 'code' in entry and entry['code'] == 'info':
905929
continue
906930
for key in p4KeysContainingNonUtf8Chars():
907931
if key in entry:
908-
entry[key] = metadata_stream_to_writable_bytes(entry[key])
932+
entry[key] = metadataTranscoder.metadata_stream_to_writable_bytes(entry[key])
909933
if cb is not None:
910934
cb(entry)
911935
else:
@@ -1718,7 +1742,7 @@ def getUserMapFromPerforceServer(self):
17181742
# python2 or python3. To support
17191743
# git-p4.metadataDecodingStrategy=fallback, self.users dict values
17201744
# are always bytes, ready to be written to git.
1721-
emailbytes = metadata_stream_to_writable_bytes(output["Email"])
1745+
emailbytes = metadataTranscoder.metadata_stream_to_writable_bytes(output["Email"])
17221746
self.users[output["User"]] = output["FullName"] + b" <" + emailbytes + b">"
17231747
self.emails[output["Email"]] = output["User"]
17241748

@@ -1730,12 +1754,12 @@ def getUserMapFromPerforceServer(self):
17301754
fullname = mapUser[0][1]
17311755
email = mapUser[0][2]
17321756
fulluser = fullname + " <" + email + ">"
1733-
self.users[user] = metadata_stream_to_writable_bytes(fulluser)
1757+
self.users[user] = metadataTranscoder.metadata_stream_to_writable_bytes(fulluser)
17341758
self.emails[email] = user
17351759

17361760
s = b''
17371761
for (key, val) in self.users.items():
1738-
keybytes = metadata_stream_to_writable_bytes(key)
1762+
keybytes = metadataTranscoder.metadata_stream_to_writable_bytes(key)
17391763
s += b"%s\t%s\n" % (keybytes.expandtabs(1), val.expandtabs(1))
17401764

17411765
open(self.getUserCacheFilename(), 'wb').write(s)
@@ -3349,7 +3373,7 @@ def make_email(self, userid):
33493373
if userid in self.users:
33503374
return self.users[userid]
33513375
else:
3352-
userid_bytes = metadata_stream_to_writable_bytes(userid)
3376+
userid_bytes = metadataTranscoder.metadata_stream_to_writable_bytes(userid)
33533377
return b"%s <a@b>" % userid_bytes
33543378

33553379
def streamTag(self, gitStream, labelName, labelDetails, commit, epoch):
@@ -4561,6 +4585,7 @@ def printUsage(commands):
45614585
"unshelve": P4Unshelve,
45624586
}
45634587

4588+
metadataTranscoder = MetadataTranscoder(defaultMetadataDecodingStrategy, defaultFallbackMetadataEncoding)
45644589

45654590
def main():
45664591
if len(sys.argv[1:]) == 0:

t/meson.build

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1090,6 +1090,7 @@ integration_tests = [
10901090
't9834-git-p4-file-dir-bug.sh',
10911091
't9835-git-p4-metadata-encoding-python2.sh',
10921092
't9836-git-p4-metadata-encoding-python3.sh',
1093+
't9837-git-p4-error-encoding.sh',
10931094
't9850-shell.sh',
10941095
't9901-git-web--browse.sh',
10951096
't9902-completion.sh',

t/t9837-git-p4-error-encoding.sh

Lines changed: 53 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,53 @@
1+
#!/bin/sh
2+
3+
test_description='git p4 error encoding
4+
5+
This test checks that the import process handles inconsistent text
6+
encoding in p4 error messages without failing'
7+
8+
. ./lib-git-p4.sh
9+
10+
###############################
11+
## SECTION REPEATED IN t9835 ##
12+
###############################
13+
14+
# These tests require Perforce with non-unicode setup.
15+
out=$(2>&1 P4CHARSET=utf8 p4 client -o)
16+
if test $? -eq 0
17+
then
18+
skip_all="skipping git p4 error encoding tests; Perforce is setup with unicode"
19+
test_done
20+
fi
21+
22+
# These tests are specific to Python 3. Write a custom script that executes
23+
# git-p4 directly with the Python 3 interpreter to ensure that we use that
24+
# version even if Git was compiled with Python 2.
25+
python_target_binary=$(which python3)
26+
if test -n "$python_target_binary"
27+
then
28+
mkdir temp_python
29+
PATH="$(pwd)/temp_python:$PATH"
30+
export PATH
31+
32+
write_script temp_python/git-p4-python3 <<-EOF
33+
exec "$python_target_binary" "$(git --exec-path)/git-p4" "\$@"
34+
EOF
35+
fi
36+
37+
git p4-python3 >err
38+
if ! grep 'valid commands' err
39+
then
40+
skip_all="skipping python3 git p4 tests; python3 not available"
41+
test_done
42+
fi
43+
44+
test_expect_success 'start p4d' '
45+
start_p4d
46+
'
47+
48+
test_expect_success 'see if Perforce error with characters not convertable to utf-8 will be processed correctly' '
49+
test_when_finished cleanup_git &&
50+
$python_target_binary "$TEST_DIRECTORY"/t9837/git-p4-error-python3.py "$TEST_DIRECTORY"
51+
'
52+
53+
test_done

t/t9837/git-p4-error-python3.py

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,15 @@
1+
import os
2+
import sys
3+
from importlib.machinery import SourceFileLoader
4+
5+
def main():
6+
if len(sys.argv[1:]) != 1:
7+
print("Expected test directory name")
8+
9+
gitp4_path = sys.argv[1] + "/../git-p4.py"
10+
gitp4 = SourceFileLoader("gitp4", gitp4_path).load_module()
11+
gitp4.p4CmdList(["edit", b'\xFEfile'])
12+
13+
if __name__ == '__main__':
14+
main()
15+

0 commit comments

Comments
 (0)