31
31
# pylint: disable=wrong-import-position
32
32
#
33
33
34
+ import struct
34
35
import sys
35
36
if sys .version_info .major < 3 and sys .version_info .minor < 7 :
36
37
sys .stderr .write ("git-p4: requires Python 2.7 or later.\n " )
71
72
# The block size is reduced automatically if required
72
73
defaultBlockSize = 1 << 20
73
74
75
+ defaultMetadataDecodingStrategy = 'passthrough' if sys .version_info .major == 2 else 'fallback'
76
+ defaultFallbackMetadataEncoding = 'cp1252'
77
+
74
78
p4_access_checked = False
75
79
76
80
re_ko_keywords = re .compile (br'\$(Id|Header)(:[^$\n]+)?\$' )
@@ -229,6 +233,70 @@ def encode_text_stream(s):
229
233
return s .encode ('utf_8' ) if isinstance (s , unicode ) else s
230
234
231
235
236
+ class MetadataDecodingException (Exception ):
237
+ def __init__ (self , input_string ):
238
+ self .input_string = input_string
239
+
240
+ def __str__ (self ):
241
+ return """Decoding perforce metadata failed!
242
+ The failing string was:
243
+ ---
244
+ {}
245
+ ---
246
+ Consider setting the git-p4.metadataDecodingStrategy config option to
247
+ 'fallback', to allow metadata to be decoded using a fallback encoding,
248
+ defaulting to cp1252.""" .format (self .input_string )
249
+
250
+
251
+ encoding_fallback_warning_issued = False
252
+ encoding_escape_warning_issued = False
253
+ def metadata_stream_to_writable_bytes (s ):
254
+ encodingStrategy = gitConfig ('git-p4.metadataDecodingStrategy' ) or defaultMetadataDecodingStrategy
255
+ fallbackEncoding = gitConfig ('git-p4.metadataFallbackEncoding' ) or defaultFallbackMetadataEncoding
256
+ if not isinstance (s , bytes ):
257
+ return s .encode ('utf_8' )
258
+ if encodingStrategy == 'passthrough' :
259
+ return s
260
+ try :
261
+ s .decode ('utf_8' )
262
+ return s
263
+ except UnicodeDecodeError :
264
+ if encodingStrategy == 'fallback' and fallbackEncoding :
265
+ global encoding_fallback_warning_issued
266
+ global encoding_escape_warning_issued
267
+ try :
268
+ if not encoding_fallback_warning_issued :
269
+ print ("\n Could not decode value as utf-8; using configured fallback encoding %s: %s" % (fallbackEncoding , s ))
270
+ print ("\n (this warning is only displayed once during an import)" )
271
+ encoding_fallback_warning_issued = True
272
+ return s .decode (fallbackEncoding ).encode ('utf_8' )
273
+ except Exception as exc :
274
+ if not encoding_escape_warning_issued :
275
+ print ("\n Could not decode value with configured fallback encoding %s; escaping bytes over 127: %s" % (fallbackEncoding , s ))
276
+ print ("\n (this warning is only displayed once during an import)" )
277
+ encoding_escape_warning_issued = True
278
+ escaped_bytes = b''
279
+ # bytes and strings work very differently in python2 vs python3...
280
+ if str is bytes :
281
+ for byte in s :
282
+ byte_number = struct .unpack ('>B' , byte )[0 ]
283
+ if byte_number > 127 :
284
+ escaped_bytes += b'%'
285
+ escaped_bytes += hex (byte_number )[2 :].upper ()
286
+ else :
287
+ escaped_bytes += byte
288
+ else :
289
+ for byte_number in s :
290
+ if byte_number > 127 :
291
+ escaped_bytes += b'%'
292
+ escaped_bytes += hex (byte_number ).upper ().encode ()[2 :]
293
+ else :
294
+ escaped_bytes += bytes ([byte_number ])
295
+ return escaped_bytes
296
+
297
+ raise MetadataDecodingException (s )
298
+
299
+
232
300
def decode_path (path ):
233
301
"""Decode a given string (bytes or otherwise) using configured path
234
302
encoding options.
@@ -786,11 +854,12 @@ def p4CmdList(cmd, stdin=None, stdin_mode='w+b', cb=None, skip_info=False,
786
854
if bytes is not str :
787
855
# Decode unmarshalled dict to use str keys and values, except for:
788
856
# - `data` which may contain arbitrary binary data
789
- # - `depotFile[0-9]*`, `path`, or `clientFile` which may contain non-UTF8 encoded text
857
+ # - `desc` or `FullName` which may contain non-UTF8 encoded text handled below, eagerly converted to bytes
858
+ # - `depotFile[0-9]*`, `path`, or `clientFile` which may contain non-UTF8 encoded text, handled by decode_path()
790
859
decoded_entry = {}
791
860
for key , value in entry .items ():
792
861
key = key .decode ()
793
- if isinstance (value , bytes ) and not (key in ('data' , 'path' , 'clientFile' ) or key .startswith ('depotFile' )):
862
+ if isinstance (value , bytes ) and not (key in ('data' , 'desc' , 'FullName' , ' path' , 'clientFile' ) or key .startswith ('depotFile' )):
794
863
value = value .decode ()
795
864
decoded_entry [key ] = value
796
865
# Parse out data if it's an error response
@@ -800,6 +869,10 @@ def p4CmdList(cmd, stdin=None, stdin_mode='w+b', cb=None, skip_info=False,
800
869
if skip_info :
801
870
if 'code' in entry and entry ['code' ] == 'info' :
802
871
continue
872
+ if 'desc' in entry :
873
+ entry ['desc' ] = metadata_stream_to_writable_bytes (entry ['desc' ])
874
+ if 'FullName' in entry :
875
+ entry ['FullName' ] = metadata_stream_to_writable_bytes (entry ['FullName' ])
803
876
if cb is not None :
804
877
cb (entry )
805
878
else :
@@ -1603,7 +1676,13 @@ def getUserMapFromPerforceServer(self):
1603
1676
for output in p4CmdList (["users" ]):
1604
1677
if "User" not in output :
1605
1678
continue
1606
- self .users [output ["User" ]] = output ["FullName" ] + " <" + output ["Email" ] + ">"
1679
+ # "FullName" is bytes. "Email" on the other hand might be bytes
1680
+ # or unicode string depending on whether we are running under
1681
+ # python2 or python3. To support
1682
+ # git-p4.metadataDecodingStrategy=fallback, self.users dict values
1683
+ # are always bytes, ready to be written to git.
1684
+ emailbytes = metadata_stream_to_writable_bytes (output ["Email" ])
1685
+ self .users [output ["User" ]] = output ["FullName" ] + b" <" + emailbytes + b">"
1607
1686
self .emails [output ["Email" ]] = output ["User" ]
1608
1687
1609
1688
mapUserConfigRegex = re .compile (r"^\s*(\S+)\s*=\s*(.+)\s*<(\S+)>\s*$" , re .VERBOSE )
@@ -1613,26 +1692,28 @@ def getUserMapFromPerforceServer(self):
1613
1692
user = mapUser [0 ][0 ]
1614
1693
fullname = mapUser [0 ][1 ]
1615
1694
email = mapUser [0 ][2 ]
1616
- self .users [user ] = fullname + " <" + email + ">"
1695
+ fulluser = fullname + " <" + email + ">"
1696
+ self .users [user ] = metadata_stream_to_writable_bytes (fulluser )
1617
1697
self .emails [email ] = user
1618
1698
1619
- s = ''
1699
+ s = b ''
1620
1700
for (key , val ) in self .users .items ():
1621
- s += "%s\t %s\n " % (key .expandtabs (1 ), val .expandtabs (1 ))
1701
+ keybytes = metadata_stream_to_writable_bytes (key )
1702
+ s += b"%s\t %s\n " % (keybytes .expandtabs (1 ), val .expandtabs (1 ))
1622
1703
1623
- open (self .getUserCacheFilename (), 'w ' ).write (s )
1704
+ open (self .getUserCacheFilename (), 'wb ' ).write (s )
1624
1705
self .userMapFromPerforceServer = True
1625
1706
1626
1707
def loadUserMapFromCache (self ):
1627
1708
self .users = {}
1628
1709
self .userMapFromPerforceServer = False
1629
1710
try :
1630
- cache = open (self .getUserCacheFilename (), 'r ' )
1711
+ cache = open (self .getUserCacheFilename (), 'rb ' )
1631
1712
lines = cache .readlines ()
1632
1713
cache .close ()
1633
1714
for line in lines :
1634
- entry = line .strip ().split ("\t " )
1635
- self .users [entry [0 ]] = entry [1 ]
1715
+ entry = line .strip ().split (b "\t " )
1716
+ self .users [entry [0 ]. decode ( 'utf_8' ) ] = entry [1 ]
1636
1717
except IOError :
1637
1718
self .getUserMapFromPerforceServer ()
1638
1719
@@ -3229,7 +3310,8 @@ def make_email(self, userid):
3229
3310
if userid in self .users :
3230
3311
return self .users [userid ]
3231
3312
else :
3232
- return "%s <a@b>" % userid
3313
+ userid_bytes = metadata_stream_to_writable_bytes (userid )
3314
+ return b"%s <a@b>" % userid_bytes
3233
3315
3234
3316
def streamTag (self , gitStream , labelName , labelDetails , commit , epoch ):
3235
3317
"""Stream a p4 tag.
@@ -3253,9 +3335,10 @@ def streamTag(self, gitStream, labelName, labelDetails, commit, epoch):
3253
3335
email = self .make_email (owner )
3254
3336
else :
3255
3337
email = self .make_email (self .p4UserId ())
3256
- tagger = "%s %s %s" % (email , epoch , self .tz )
3257
3338
3258
- gitStream .write ("tagger %s\n " % tagger )
3339
+ gitStream .write ("tagger " )
3340
+ gitStream .write (email )
3341
+ gitStream .write (" %s %s\n " % (epoch , self .tz ))
3259
3342
3260
3343
print ("labelDetails=" , labelDetails )
3261
3344
if 'Description' in labelDetails :
@@ -3351,12 +3434,12 @@ def commit(self, details, files, branch, parent="", allow_empty=False):
3351
3434
self .gitStream .write ("commit %s\n " % branch )
3352
3435
self .gitStream .write ("mark :%s\n " % details ["change" ])
3353
3436
self .committedChanges .add (int (details ["change" ]))
3354
- committer = ""
3355
3437
if author not in self .users :
3356
3438
self .getUserMapFromPerforceServer ()
3357
- committer = "%s %s %s" % (self .make_email (author ), epoch , self .tz )
3358
3439
3359
- self .gitStream .write ("committer %s\n " % committer )
3440
+ self .gitStream .write ("committer " )
3441
+ self .gitStream .write (self .make_email (author ))
3442
+ self .gitStream .write (" %s %s\n " % (epoch , self .tz ))
3360
3443
3361
3444
self .gitStream .write ("data <<EOT\n " )
3362
3445
self .gitStream .write (details ["desc" ])
@@ -4257,6 +4340,14 @@ def run(self, args):
4257
4340
if self .useClientSpec_from_options :
4258
4341
system (["git" , "config" , "--bool" , "git-p4.useclientspec" , "true" ])
4259
4342
4343
+ # persist any git-p4 encoding-handling config options passed in for clone:
4344
+ if gitConfig ('git-p4.metadataDecodingStrategy' ):
4345
+ system (["git" , "config" , "git-p4.metadataDecodingStrategy" , gitConfig ('git-p4.metadataDecodingStrategy' )])
4346
+ if gitConfig ('git-p4.metadataFallbackEncoding' ):
4347
+ system (["git" , "config" , "git-p4.metadataFallbackEncoding" , gitConfig ('git-p4.metadataFallbackEncoding' )])
4348
+ if gitConfig ('git-p4.pathEncoding' ):
4349
+ system (["git" , "config" , "git-p4.pathEncoding" , gitConfig ('git-p4.pathEncoding' )])
4350
+
4260
4351
return True
4261
4352
4262
4353
0 commit comments