15
15
# pylint: disable=too-many-statements,too-many-instance-attributes
16
16
# pylint: disable=too-many-branches,too-many-nested-blocks
17
17
#
18
+ import struct
18
19
import sys
19
20
if sys .version_info .major < 3 and sys .version_info .minor < 7 :
20
21
sys .stderr .write ("git-p4: requires Python 2.7 or later.\n " )
54
55
# The block size is reduced automatically if required
55
56
defaultBlockSize = 1 << 20
56
57
58
+ defaultMetadataDecodingStrategy = 'passthrough' if sys .version_info .major == 2 else 'fallback'
59
+ defaultFallbackMetadataEncoding = 'cp1252'
60
+
57
61
p4_access_checked = False
58
62
59
63
re_ko_keywords = re .compile (br'\$(Id|Header)(:[^$\n]+)?\$' )
@@ -203,6 +207,70 @@ def decode_text_stream(s):
203
207
def encode_text_stream (s ):
204
208
return s .encode ('utf_8' ) if isinstance (s , unicode ) else s
205
209
210
+
211
+ class MetadataDecodingException (Exception ):
212
+ def __init__ (self , input_string ):
213
+ self .input_string = input_string
214
+
215
+ def __str__ (self ):
216
+ return """Decoding perforce metadata failed!
217
+ The failing string was:
218
+ ---
219
+ {}
220
+ ---
221
+ Consider setting the git-p4.metadataDecodingStrategy config option to
222
+ 'fallback', to allow metadata to be decoded using a fallback encoding,
223
+ defaulting to cp1252.""" .format (self .input_string )
224
+
225
+
226
+ encoding_fallback_warning_issued = False
227
+ encoding_escape_warning_issued = False
228
+ def metadata_stream_to_writable_bytes (s ):
229
+ encodingStrategy = gitConfig ('git-p4.metadataDecodingStrategy' ) or defaultMetadataDecodingStrategy
230
+ fallbackEncoding = gitConfig ('git-p4.metadataFallbackEncoding' ) or defaultFallbackMetadataEncoding
231
+ if not isinstance (s , bytes ):
232
+ return s .encode ('utf_8' )
233
+ if encodingStrategy == 'passthrough' :
234
+ return s
235
+ try :
236
+ s .decode ('utf_8' )
237
+ return s
238
+ except UnicodeDecodeError :
239
+ if encodingStrategy == 'fallback' and fallbackEncoding :
240
+ global encoding_fallback_warning_issued
241
+ global encoding_escape_warning_issued
242
+ try :
243
+ if not encoding_fallback_warning_issued :
244
+ print ("\n Could not decode value as utf-8; using configured fallback encoding %s: %s" % (fallbackEncoding , s ))
245
+ print ("\n (this warning is only displayed once during an import)" )
246
+ encoding_fallback_warning_issued = True
247
+ return s .decode (fallbackEncoding ).encode ('utf_8' )
248
+ except Exception as exc :
249
+ if not encoding_escape_warning_issued :
250
+ print ("\n Could not decode value with configured fallback encoding %s; escaping bytes over 127: %s" % (fallbackEncoding , s ))
251
+ print ("\n (this warning is only displayed once during an import)" )
252
+ encoding_escape_warning_issued = True
253
+ escaped_bytes = b''
254
+ # bytes and strings work very differently in python2 vs python3...
255
+ if str is bytes :
256
+ for byte in s :
257
+ byte_number = struct .unpack ('>B' , byte )[0 ]
258
+ if byte_number > 127 :
259
+ escaped_bytes += b'%'
260
+ escaped_bytes += hex (byte_number )[2 :].upper ()
261
+ else :
262
+ escaped_bytes += byte
263
+ else :
264
+ for byte_number in s :
265
+ if byte_number > 127 :
266
+ escaped_bytes += b'%'
267
+ escaped_bytes += hex (byte_number ).upper ().encode ()[2 :]
268
+ else :
269
+ escaped_bytes += bytes ([byte_number ])
270
+ return escaped_bytes
271
+
272
+ raise MetadataDecodingException (s )
273
+
206
274
def decode_path (path ):
207
275
"""Decode a given string (bytes or otherwise) using configured path encoding options
208
276
"""
@@ -702,11 +770,12 @@ def p4CmdList(cmd, stdin=None, stdin_mode='w+b', cb=None, skip_info=False,
702
770
if bytes is not str :
703
771
# Decode unmarshalled dict to use str keys and values, except for:
704
772
# - `data` which may contain arbitrary binary data
705
- # - `depotFile[0-9]*`, `path`, or `clientFile` which may contain non-UTF8 encoded text
773
+ # - `desc` or `FullName` which may contain non-UTF8 encoded text handled below, eagerly converted to bytes
774
+ # - `depotFile[0-9]*`, `path`, or `clientFile` which may contain non-UTF8 encoded text, handled by decode_path()
706
775
decoded_entry = {}
707
776
for key , value in entry .items ():
708
777
key = key .decode ()
709
- if isinstance (value , bytes ) and not (key in ('data' , 'path' , 'clientFile' ) or key .startswith ('depotFile' )):
778
+ if isinstance (value , bytes ) and not (key in ('data' , 'desc' , 'FullName' , ' path' , 'clientFile' ) or key .startswith ('depotFile' )):
710
779
value = value .decode ()
711
780
decoded_entry [key ] = value
712
781
# Parse out data if it's an error response
@@ -716,6 +785,10 @@ def p4CmdList(cmd, stdin=None, stdin_mode='w+b', cb=None, skip_info=False,
716
785
if skip_info :
717
786
if 'code' in entry and entry ['code' ] == 'info' :
718
787
continue
788
+ if 'desc' in entry :
789
+ entry ['desc' ] = metadata_stream_to_writable_bytes (entry ['desc' ])
790
+ if 'FullName' in entry :
791
+ entry ['FullName' ] = metadata_stream_to_writable_bytes (entry ['FullName' ])
719
792
if cb is not None :
720
793
cb (entry )
721
794
else :
@@ -1435,7 +1508,13 @@ def getUserMapFromPerforceServer(self):
1435
1508
for output in p4CmdList (["users" ]):
1436
1509
if "User" not in output :
1437
1510
continue
1438
- self .users [output ["User" ]] = output ["FullName" ] + " <" + output ["Email" ] + ">"
1511
+ # "FullName" is bytes. "Email" on the other hand might be bytes
1512
+ # or unicode string depending on whether we are running under
1513
+ # python2 or python3. To support
1514
+ # git-p4.metadataDecodingStrategy=fallback, self.users dict values
1515
+ # are always bytes, ready to be written to git.
1516
+ emailbytes = metadata_stream_to_writable_bytes (output ["Email" ])
1517
+ self .users [output ["User" ]] = output ["FullName" ] + b" <" + emailbytes + b">"
1439
1518
self .emails [output ["Email" ]] = output ["User" ]
1440
1519
1441
1520
mapUserConfigRegex = re .compile (r"^\s*(\S+)\s*=\s*(.+)\s*<(\S+)>\s*$" , re .VERBOSE )
@@ -1445,26 +1524,28 @@ def getUserMapFromPerforceServer(self):
1445
1524
user = mapUser [0 ][0 ]
1446
1525
fullname = mapUser [0 ][1 ]
1447
1526
email = mapUser [0 ][2 ]
1448
- self .users [user ] = fullname + " <" + email + ">"
1527
+ fulluser = fullname + " <" + email + ">"
1528
+ self .users [user ] = metadata_stream_to_writable_bytes (fulluser )
1449
1529
self .emails [email ] = user
1450
1530
1451
- s = ''
1531
+ s = b ''
1452
1532
for (key , val ) in self .users .items ():
1453
- s += "%s\t %s\n " % (key .expandtabs (1 ), val .expandtabs (1 ))
1533
+ keybytes = metadata_stream_to_writable_bytes (key )
1534
+ s += b"%s\t %s\n " % (keybytes .expandtabs (1 ), val .expandtabs (1 ))
1454
1535
1455
- open (self .getUserCacheFilename (), 'w ' ).write (s )
1536
+ open (self .getUserCacheFilename (), 'wb ' ).write (s )
1456
1537
self .userMapFromPerforceServer = True
1457
1538
1458
1539
def loadUserMapFromCache (self ):
1459
1540
self .users = {}
1460
1541
self .userMapFromPerforceServer = False
1461
1542
try :
1462
- cache = open (self .getUserCacheFilename (), 'r ' )
1543
+ cache = open (self .getUserCacheFilename (), 'rb ' )
1463
1544
lines = cache .readlines ()
1464
1545
cache .close ()
1465
1546
for line in lines :
1466
- entry = line .strip ().split ("\t " )
1467
- self .users [entry [0 ]] = entry [1 ]
1547
+ entry = line .strip ().split (b "\t " )
1548
+ self .users [entry [0 ]. decode ( 'utf_8' ) ] = entry [1 ]
1468
1549
except IOError :
1469
1550
self .getUserMapFromPerforceServer ()
1470
1551
@@ -3020,7 +3101,8 @@ def make_email(self, userid):
3020
3101
if userid in self .users :
3021
3102
return self .users [userid ]
3022
3103
else :
3023
- return "%s <a@b>" % userid
3104
+ userid_bytes = metadata_stream_to_writable_bytes (userid )
3105
+ return b"%s <a@b>" % userid_bytes
3024
3106
3025
3107
def streamTag (self , gitStream , labelName , labelDetails , commit , epoch ):
3026
3108
""" Stream a p4 tag.
@@ -3043,9 +3125,10 @@ def streamTag(self, gitStream, labelName, labelDetails, commit, epoch):
3043
3125
email = self .make_email (owner )
3044
3126
else :
3045
3127
email = self .make_email (self .p4UserId ())
3046
- tagger = "%s %s %s" % (email , epoch , self .tz )
3047
3128
3048
- gitStream .write ("tagger %s\n " % tagger )
3129
+ gitStream .write ("tagger " )
3130
+ gitStream .write (email )
3131
+ gitStream .write (" %s %s\n " % (epoch , self .tz ))
3049
3132
3050
3133
print ("labelDetails=" ,labelDetails )
3051
3134
if 'Description' in labelDetails :
@@ -3138,12 +3221,12 @@ def commit(self, details, files, branch, parent = "", allow_empty=False):
3138
3221
self .gitStream .write ("commit %s\n " % branch )
3139
3222
self .gitStream .write ("mark :%s\n " % details ["change" ])
3140
3223
self .committedChanges .add (int (details ["change" ]))
3141
- committer = ""
3142
3224
if author not in self .users :
3143
3225
self .getUserMapFromPerforceServer ()
3144
- committer = "%s %s %s" % (self .make_email (author ), epoch , self .tz )
3145
3226
3146
- self .gitStream .write ("committer %s\n " % committer )
3227
+ self .gitStream .write ("committer " )
3228
+ self .gitStream .write (self .make_email (author ))
3229
+ self .gitStream .write (" %s %s\n " % (epoch , self .tz ))
3147
3230
3148
3231
self .gitStream .write ("data <<EOT\n " )
3149
3232
self .gitStream .write (details ["desc" ])
@@ -4055,6 +4138,14 @@ def run(self, args):
4055
4138
if self .useClientSpec_from_options :
4056
4139
system (["git" , "config" , "--bool" , "git-p4.useclientspec" , "true" ])
4057
4140
4141
+ # persist any git-p4 encoding-handling config options passed in for clone:
4142
+ if gitConfig ('git-p4.metadataDecodingStrategy' ):
4143
+ system (["git" , "config" , "git-p4.metadataDecodingStrategy" , gitConfig ('git-p4.metadataDecodingStrategy' )])
4144
+ if gitConfig ('git-p4.metadataFallbackEncoding' ):
4145
+ system (["git" , "config" , "git-p4.metadataFallbackEncoding" , gitConfig ('git-p4.metadataFallbackEncoding' )])
4146
+ if gitConfig ('git-p4.pathEncoding' ):
4147
+ system (["git" , "config" , "git-p4.pathEncoding" , gitConfig ('git-p4.pathEncoding' )])
4148
+
4058
4149
return True
4059
4150
4060
4151
class P4Unshelve (Command ):
0 commit comments