Skip to content

Commit 794388f

Browse files
committed
git p4 fix for failure to decode p4 errors
Moved p4 metadata transcoding logic from imetadata_stream_to_writable_bytes(..) into a new MetadataTranscoder class. Made it use p4.metadataDecodingStrategy and git-p4.metadataFallbackEncoding settings for p4 errors decoding. Added test.
1 parent d0dc685 commit 794388f

File tree

4 files changed

+149
-55
lines changed

4 files changed

+149
-55
lines changed

git-p4.py

Lines changed: 80 additions & 55 deletions
Original file line numberDiff line numberDiff line change
@@ -234,67 +234,91 @@ def encode_text_stream(s):
234234

235235

236236
class MetadataDecodingException(Exception):
237-
def __init__(self, input_string):
237+
def __init__(self, input_string, error=None):
238238
self.input_string = input_string
239+
self.error = error
239240

240241
def __str__(self):
241-
return """Decoding perforce metadata failed!
242+
message = """Decoding perforce metadata failed!
242243
The failing string was:
243244
---
244245
{}
245246
---
246247
Consider setting the git-p4.metadataDecodingStrategy config option to
247248
'fallback', to allow metadata to be decoded using a fallback encoding,
248-
defaulting to cp1252.""".format(self.input_string)
249+
defaulting to cp1252."""
250+
if verbose and self.error is not None:
251+
message += """
252+
---
253+
Error:
254+
---
255+
{}"""
256+
return message.format(self.input_string, self.error)
249257

250258

251-
encoding_fallback_warning_issued = False
252-
encoding_escape_warning_issued = False
253-
def metadata_stream_to_writable_bytes(s):
254-
encodingStrategy = gitConfig('git-p4.metadataDecodingStrategy') or defaultMetadataDecodingStrategy
255-
fallbackEncoding = gitConfig('git-p4.metadataFallbackEncoding') or defaultFallbackMetadataEncoding
256-
if not isinstance(s, bytes):
257-
return s.encode('utf_8')
258-
if encodingStrategy == 'passthrough':
259-
return s
260-
try:
261-
s.decode('utf_8')
262-
return s
263-
except UnicodeDecodeError:
264-
if encodingStrategy == 'fallback' and fallbackEncoding:
265-
global encoding_fallback_warning_issued
266-
global encoding_escape_warning_issued
267-
try:
268-
if not encoding_fallback_warning_issued:
269-
print("\nCould not decode value as utf-8; using configured fallback encoding %s: %s" % (fallbackEncoding, s))
270-
print("\n(this warning is only displayed once during an import)")
271-
encoding_fallback_warning_issued = True
272-
return s.decode(fallbackEncoding).encode('utf_8')
273-
except Exception as exc:
274-
if not encoding_escape_warning_issued:
275-
print("\nCould not decode value with configured fallback encoding %s; escaping bytes over 127: %s" % (fallbackEncoding, s))
276-
print("\n(this warning is only displayed once during an import)")
277-
encoding_escape_warning_issued = True
278-
escaped_bytes = b''
279-
# bytes and strings work very differently in python2 vs python3...
280-
if str is bytes:
281-
for byte in s:
282-
byte_number = struct.unpack('>B', byte)[0]
283-
if byte_number > 127:
284-
escaped_bytes += b'%'
285-
escaped_bytes += hex(byte_number)[2:].upper()
286-
else:
287-
escaped_bytes += byte
288-
else:
289-
for byte_number in s:
290-
if byte_number > 127:
291-
escaped_bytes += b'%'
292-
escaped_bytes += hex(byte_number).upper().encode()[2:]
293-
else:
294-
escaped_bytes += bytes([byte_number])
295-
return escaped_bytes
259+
class MetadataTranscoder:
260+
def __init__(self, default_metadata_decoding_strategy, default_fallback_metadata_encoding):
261+
self.decoding_fallback_warning_issued = False
262+
self.decoding_escape_warning_issued = False
263+
self.decodingStrategy = gitConfig('git-p4.metadataDecodingStrategy') or default_metadata_decoding_strategy
264+
self.fallbackEncoding = gitConfig('git-p4.metadataFallbackEncoding') or default_fallback_metadata_encoding
265+
266+
def decode_metadata(self, s, error_from_fallback=True):
267+
try:
268+
return [s.decode('utf_8'), 'utf_8']
269+
except UnicodeDecodeError as decode_exception:
270+
error = decode_exception
271+
if self.decodingStrategy == 'fallback' and self.fallbackEncoding:
272+
try:
273+
if not self.decoding_fallback_warning_issued:
274+
print("\nCould not decode value as utf-8; using configured fallback encoding %s: %s" % (self.fallbackEncoding, s))
275+
print("\n(this warning is only displayed once during an import)")
276+
self.decoding_fallback_warning_issued = True
277+
return [s.decode(self.fallbackEncoding), self.fallbackEncoding]
278+
except Exception as decode_exception:
279+
if not error_from_fallback:
280+
return [s, None]
281+
error = decode_exception
282+
raise MetadataDecodingException(s, error)
283+
284+
def metadata_stream_to_writable_bytes(self, s):
285+
if not isinstance(s, bytes):
286+
return s.encode('utf_8')
287+
if self.decodingStrategy == 'passthrough':
288+
return s
289+
290+
[text, encoding] = self.decode_metadata(s, False)
291+
if encoding == 'utf_8':
292+
# s is of utf-8 already
293+
return s
294+
295+
if encoding is None:
296+
# could not decode s, even with fallback encoding
297+
if not self.decoding_escape_warning_issued:
298+
print("\nCould not decode value with configured fallback encoding %s; escaping bytes over 127: %s" % (self.fallbackEncoding, s))
299+
print("\n(this warning is only displayed once during an import)")
300+
self.decoding_escape_warning_issued = True
301+
escaped_bytes = b''
302+
# bytes and strings work very differently in python2 vs python3...
303+
if str is bytes:
304+
for byte in s:
305+
byte_number = struct.unpack('>B', byte)[0]
306+
if byte_number > 127:
307+
escaped_bytes += b'%'
308+
escaped_bytes += hex(byte_number)[2:].upper()
309+
else:
310+
escaped_bytes += byte
311+
else:
312+
for byte_number in s:
313+
if byte_number > 127:
314+
escaped_bytes += b'%'
315+
escaped_bytes += hex(byte_number).upper().encode()[2:]
316+
else:
317+
escaped_bytes += bytes([byte_number])
318+
return escaped_bytes
296319

297-
raise MetadataDecodingException(s)
320+
# were able to decode but not to utf-8
321+
return text.encode('utf_8')
298322

299323

300324
def decode_path(path):
@@ -898,14 +922,14 @@ def p4CmdList(cmd, stdin=None, stdin_mode='w+b', cb=None, skip_info=False,
898922
decoded_entry[key] = value
899923
# Parse out data if it's an error response
900924
if decoded_entry.get('code') == 'error' and 'data' in decoded_entry:
901-
decoded_entry['data'] = decoded_entry['data'].decode()
925+
decoded_entry['data'] = metadataTranscoder.decode_metadata(decoded_entry['data'])
902926
entry = decoded_entry
903927
if skip_info:
904928
if 'code' in entry and entry['code'] == 'info':
905929
continue
906930
for key in p4KeysContainingNonUtf8Chars():
907931
if key in entry:
908-
entry[key] = metadata_stream_to_writable_bytes(entry[key])
932+
entry[key] = metadataTranscoder.metadata_stream_to_writable_bytes(entry[key])
909933
if cb is not None:
910934
cb(entry)
911935
else:
@@ -1718,7 +1742,7 @@ def getUserMapFromPerforceServer(self):
17181742
# python2 or python3. To support
17191743
# git-p4.metadataDecodingStrategy=fallback, self.users dict values
17201744
# are always bytes, ready to be written to git.
1721-
emailbytes = metadata_stream_to_writable_bytes(output["Email"])
1745+
emailbytes = metadataTranscoder.metadata_stream_to_writable_bytes(output["Email"])
17221746
self.users[output["User"]] = output["FullName"] + b" <" + emailbytes + b">"
17231747
self.emails[output["Email"]] = output["User"]
17241748

@@ -1730,12 +1754,12 @@ def getUserMapFromPerforceServer(self):
17301754
fullname = mapUser[0][1]
17311755
email = mapUser[0][2]
17321756
fulluser = fullname + " <" + email + ">"
1733-
self.users[user] = metadata_stream_to_writable_bytes(fulluser)
1757+
self.users[user] = metadataTranscoder.metadata_stream_to_writable_bytes(fulluser)
17341758
self.emails[email] = user
17351759

17361760
s = b''
17371761
for (key, val) in self.users.items():
1738-
keybytes = metadata_stream_to_writable_bytes(key)
1762+
keybytes = metadataTranscoder.metadata_stream_to_writable_bytes(key)
17391763
s += b"%s\t%s\n" % (keybytes.expandtabs(1), val.expandtabs(1))
17401764

17411765
open(self.getUserCacheFilename(), 'wb').write(s)
@@ -3349,7 +3373,7 @@ def make_email(self, userid):
33493373
if userid in self.users:
33503374
return self.users[userid]
33513375
else:
3352-
userid_bytes = metadata_stream_to_writable_bytes(userid)
3376+
userid_bytes = metadataTranscoder.metadata_stream_to_writable_bytes(userid)
33533377
return b"%s <a@b>" % userid_bytes
33543378

33553379
def streamTag(self, gitStream, labelName, labelDetails, commit, epoch):
@@ -4561,6 +4585,7 @@ def printUsage(commands):
45614585
"unshelve": P4Unshelve,
45624586
}
45634587

4588+
metadataTranscoder = MetadataTranscoder(defaultMetadataDecodingStrategy, defaultFallbackMetadataEncoding)
45644589

45654590
def main():
45664591
if len(sys.argv[1:]) == 0:

t/meson.build

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1090,6 +1090,7 @@ integration_tests = [
10901090
't9834-git-p4-file-dir-bug.sh',
10911091
't9835-git-p4-metadata-encoding-python2.sh',
10921092
't9836-git-p4-metadata-encoding-python3.sh',
1093+
't9837-git-p4-error-encoding.sh',
10931094
't9850-shell.sh',
10941095
't9901-git-web--browse.sh',
10951096
't9902-completion.sh',

t/t9837-git-p4-error-encoding.sh

Lines changed: 53 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,53 @@
1+
#!/bin/sh
2+
3+
test_description='git p4 error encoding
4+
5+
This test checks that the import process handles inconsistent text
6+
encoding in p4 error messages without failing'
7+
8+
. ./lib-git-p4.sh
9+
10+
###############################
11+
## SECTION REPEATED IN t9835 ##
12+
###############################
13+
14+
# These tests require Perforce with non-unicode setup.
15+
out=$(2>&1 P4CHARSET=utf8 p4 client -o)
16+
if test $? -eq 0
17+
then
18+
skip_all="skipping git p4 error encoding tests; Perforce is setup with unicode"
19+
test_done
20+
fi
21+
22+
# These tests are specific to Python 3. Write a custom script that executes
23+
# git-p4 directly with the Python 3 interpreter to ensure that we use that
24+
# version even if Git was compiled with Python 2.
25+
python_target_binary=$(which python3)
26+
if test -n "$python_target_binary"
27+
then
28+
mkdir temp_python
29+
PATH="$(pwd)/temp_python:$PATH"
30+
export PATH
31+
32+
write_script temp_python/git-p4-python3 <<-EOF
33+
exec "$python_target_binary" "$(git --exec-path)/git-p4" "\$@"
34+
EOF
35+
fi
36+
37+
git p4-python3 >err
38+
if ! grep 'valid commands' err
39+
then
40+
skip_all="skipping python3 git p4 tests; python3 not available"
41+
test_done
42+
fi
43+
44+
test_expect_success 'start p4d' '
45+
start_p4d
46+
'
47+
48+
test_expect_success 'see if Perforce error with characters not convertable to utf-8 will be processed correctly' '
49+
test_when_finished cleanup_git &&
50+
$python_target_binary "$TEST_DIRECTORY"/t9837/git-p4-error-python3.py "$TEST_DIRECTORY"
51+
'
52+
53+
test_done

t/t9837/git-p4-error-python3.py

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,15 @@
1+
import os
2+
import sys
3+
from importlib.machinery import SourceFileLoader
4+
5+
def main():
6+
if len(sys.argv[1:]) != 1:
7+
print("Expected test directory name")
8+
9+
gitp4_path = sys.argv[1] + "/../git-p4.py"
10+
gitp4 = SourceFileLoader("gitp4", gitp4_path).load_module()
11+
gitp4.p4CmdList(["edit", b'\xFEfile'])
12+
13+
if __name__ == '__main__':
14+
main()
15+

0 commit comments

Comments
 (0)