@@ -234,67 +234,91 @@ def encode_text_stream(s):
234234
235235
236236class MetadataDecodingException (Exception ):
237- def __init__ (self , input_string ):
237+ def __init__ (self , input_string , error = None ):
238238 self .input_string = input_string
239+ self .error = error
239240
240241 def __str__ (self ):
241- return """Decoding perforce metadata failed!
242+ message = """Decoding perforce metadata failed!
242243The failing string was:
243244---
244245{}
245246---
246247Consider setting the git-p4.metadataDecodingStrategy config option to
247248'fallback', to allow metadata to be decoded using a fallback encoding,
248- defaulting to cp1252.""" .format (self .input_string )
249+ defaulting to cp1252."""
250+ if verbose and self .error is not None :
251+ message += """
252+ ---
253+ Error:
254+ ---
255+ {}"""
256+ return message .format (self .input_string , self .error )
249257
250258
251- encoding_fallback_warning_issued = False
252- encoding_escape_warning_issued = False
253- def metadata_stream_to_writable_bytes (s ):
254- encodingStrategy = gitConfig ('git-p4.metadataDecodingStrategy' ) or defaultMetadataDecodingStrategy
255- fallbackEncoding = gitConfig ('git-p4.metadataFallbackEncoding' ) or defaultFallbackMetadataEncoding
256- if not isinstance (s , bytes ):
257- return s .encode ('utf_8' )
258- if encodingStrategy == 'passthrough' :
259- return s
260- try :
261- s .decode ('utf_8' )
262- return s
263- except UnicodeDecodeError :
264- if encodingStrategy == 'fallback' and fallbackEncoding :
265- global encoding_fallback_warning_issued
266- global encoding_escape_warning_issued
267- try :
268- if not encoding_fallback_warning_issued :
269- print ("\n Could not decode value as utf-8; using configured fallback encoding %s: %s" % (fallbackEncoding , s ))
270- print ("\n (this warning is only displayed once during an import)" )
271- encoding_fallback_warning_issued = True
272- return s .decode (fallbackEncoding ).encode ('utf_8' )
273- except Exception as exc :
274- if not encoding_escape_warning_issued :
275- print ("\n Could not decode value with configured fallback encoding %s; escaping bytes over 127: %s" % (fallbackEncoding , s ))
276- print ("\n (this warning is only displayed once during an import)" )
277- encoding_escape_warning_issued = True
278- escaped_bytes = b''
279- # bytes and strings work very differently in python2 vs python3...
280- if str is bytes :
281- for byte in s :
282- byte_number = struct .unpack ('>B' , byte )[0 ]
283- if byte_number > 127 :
284- escaped_bytes += b'%'
285- escaped_bytes += hex (byte_number )[2 :].upper ()
286- else :
287- escaped_bytes += byte
288- else :
289- for byte_number in s :
290- if byte_number > 127 :
291- escaped_bytes += b'%'
292- escaped_bytes += hex (byte_number ).upper ().encode ()[2 :]
293- else :
294- escaped_bytes += bytes ([byte_number ])
295- return escaped_bytes
259+ class MetadataTranscoder :
260+ def __init__ (self , default_metadata_decoding_strategy , default_fallback_metadata_encoding ):
261+ self .decoding_fallback_warning_issued = False
262+ self .decoding_escape_warning_issued = False
263+ self .decodingStrategy = gitConfig ('git-p4.metadataDecodingStrategy' ) or default_metadata_decoding_strategy
264+ self .fallbackEncoding = gitConfig ('git-p4.metadataFallbackEncoding' ) or default_fallback_metadata_encoding
265+
266+ def decode_metadata (self , s , error_from_fallback = True ):
267+ try :
268+ return [s .decode ('utf_8' ), 'utf_8' ]
269+ except UnicodeDecodeError as decode_exception :
270+ error = decode_exception
271+ if self .decodingStrategy == 'fallback' and self .fallbackEncoding :
272+ try :
273+ if not self .decoding_fallback_warning_issued :
274+ print ("\n Could not decode value as utf-8; using configured fallback encoding %s: %s" % (self .fallbackEncoding , s ))
275+ print ("\n (this warning is only displayed once during an import)" )
276+ self .decoding_fallback_warning_issued = True
277+ return [s .decode (self .fallbackEncoding ), self .fallbackEncoding ]
278+ except Exception as decode_exception :
279+ if not error_from_fallback :
280+ return [s , None ]
281+ error = decode_exception
282+ raise MetadataDecodingException (s , error )
283+
284+ def metadata_stream_to_writable_bytes (self , s ):
285+ if not isinstance (s , bytes ):
286+ return s .encode ('utf_8' )
287+ if self .decodingStrategy == 'passthrough' :
288+ return s
289+
290+ [text , encoding ] = self .decode_metadata (s , False )
291+ if encoding == 'utf_8' :
292+ # s is of utf-8 already
293+ return s
294+
295+ if encoding is None :
296+ # could not decode s, even with fallback encoding
297+ if not self .decoding_escape_warning_issued :
298+ print ("\n Could not decode value with configured fallback encoding %s; escaping bytes over 127: %s" % (self .fallbackEncoding , s ))
299+ print ("\n (this warning is only displayed once during an import)" )
300+ self .decoding_escape_warning_issued = True
301+ escaped_bytes = b''
302+ # bytes and strings work very differently in python2 vs python3...
303+ if str is bytes :
304+ for byte in s :
305+ byte_number = struct .unpack ('>B' , byte )[0 ]
306+ if byte_number > 127 :
307+ escaped_bytes += b'%'
308+ escaped_bytes += hex (byte_number )[2 :].upper ()
309+ else :
310+ escaped_bytes += byte
311+ else :
312+ for byte_number in s :
313+ if byte_number > 127 :
314+ escaped_bytes += b'%'
315+ escaped_bytes += hex (byte_number ).upper ().encode ()[2 :]
316+ else :
317+ escaped_bytes += bytes ([byte_number ])
318+ return escaped_bytes
296319
297- raise MetadataDecodingException (s )
320+ # were able to decode but not to utf-8
321+ return text .encode ('utf_8' )
298322
299323
300324def decode_path (path ):
@@ -898,14 +922,14 @@ def p4CmdList(cmd, stdin=None, stdin_mode='w+b', cb=None, skip_info=False,
898922 decoded_entry [key ] = value
899923 # Parse out data if it's an error response
900924 if decoded_entry .get ('code' ) == 'error' and 'data' in decoded_entry :
901- decoded_entry ['data' ] = decoded_entry ['data' ]. decode ( )
925+ decoded_entry ['data' ] = metadataTranscoder . decode_metadata ( decoded_entry ['data' ])
902926 entry = decoded_entry
903927 if skip_info :
904928 if 'code' in entry and entry ['code' ] == 'info' :
905929 continue
906930 for key in p4KeysContainingNonUtf8Chars ():
907931 if key in entry :
908- entry [key ] = metadata_stream_to_writable_bytes (entry [key ])
932+ entry [key ] = metadataTranscoder . metadata_stream_to_writable_bytes (entry [key ])
909933 if cb is not None :
910934 cb (entry )
911935 else :
@@ -1718,7 +1742,7 @@ def getUserMapFromPerforceServer(self):
17181742 # python2 or python3. To support
17191743 # git-p4.metadataDecodingStrategy=fallback, self.users dict values
17201744 # are always bytes, ready to be written to git.
1721- emailbytes = metadata_stream_to_writable_bytes (output ["Email" ])
1745+ emailbytes = metadataTranscoder . metadata_stream_to_writable_bytes (output ["Email" ])
17221746 self .users [output ["User" ]] = output ["FullName" ] + b" <" + emailbytes + b">"
17231747 self .emails [output ["Email" ]] = output ["User" ]
17241748
@@ -1730,12 +1754,12 @@ def getUserMapFromPerforceServer(self):
17301754 fullname = mapUser [0 ][1 ]
17311755 email = mapUser [0 ][2 ]
17321756 fulluser = fullname + " <" + email + ">"
1733- self .users [user ] = metadata_stream_to_writable_bytes (fulluser )
1757+ self .users [user ] = metadataTranscoder . metadata_stream_to_writable_bytes (fulluser )
17341758 self .emails [email ] = user
17351759
17361760 s = b''
17371761 for (key , val ) in self .users .items ():
1738- keybytes = metadata_stream_to_writable_bytes (key )
1762+ keybytes = metadataTranscoder . metadata_stream_to_writable_bytes (key )
17391763 s += b"%s\t %s\n " % (keybytes .expandtabs (1 ), val .expandtabs (1 ))
17401764
17411765 open (self .getUserCacheFilename (), 'wb' ).write (s )
@@ -3349,7 +3373,7 @@ def make_email(self, userid):
33493373 if userid in self .users :
33503374 return self .users [userid ]
33513375 else :
3352- userid_bytes = metadata_stream_to_writable_bytes (userid )
3376+ userid_bytes = metadataTranscoder . metadata_stream_to_writable_bytes (userid )
33533377 return b"%s <a@b>" % userid_bytes
33543378
33553379 def streamTag (self , gitStream , labelName , labelDetails , commit , epoch ):
@@ -4561,6 +4585,7 @@ def printUsage(commands):
45614585 "unshelve" : P4Unshelve ,
45624586}
45634587
4588+ metadataTranscoder = MetadataTranscoder (defaultMetadataDecodingStrategy , defaultFallbackMetadataEncoding )
45644589
45654590def main ():
45664591 if len (sys .argv [1 :]) == 0 :
0 commit comments