Skip to content

Commit b932705

Browse files
luked99gitster
authored andcommitted
git-p4: stream from perforce to speed up clones
Change commit() to stream data from Perforce and into fast-import rather than reading into memory first, and then writing out. This hugely reduces the memory requirements when cloning non-incrementally. Signed-off-by: Luke Diamand <[email protected]> Signed-off-by: Pete Wyckoff <[email protected]> Signed-off-by: Junio C Hamano <[email protected]>
1 parent c14417c commit b932705

File tree

1 file changed

+102
-67
lines changed

1 file changed

+102
-67
lines changed

contrib/fast-import/git-p4

Lines changed: 102 additions & 67 deletions
Original file line numberDiff line numberDiff line change
@@ -201,7 +201,7 @@ def isModeExec(mode):
201201
def isModeExecChanged(src_mode, dst_mode):
202202
return isModeExec(src_mode) != isModeExec(dst_mode)
203203

204-
def p4CmdList(cmd, stdin=None, stdin_mode='w+b'):
204+
def p4CmdList(cmd, stdin=None, stdin_mode='w+b', cb=None):
205205
cmd = p4_build_cmd("-G %s" % (cmd))
206206
if verbose:
207207
sys.stderr.write("Opening pipe: %s\n" % cmd)
@@ -224,7 +224,10 @@ def p4CmdList(cmd, stdin=None, stdin_mode='w+b'):
224224
try:
225225
while True:
226226
entry = marshal.load(p4.stdout)
227-
result.append(entry)
227+
if cb is not None:
228+
cb(entry)
229+
else:
230+
result.append(entry)
228231
except EOFError:
229232
pass
230233
exitCode = p4.wait()
@@ -950,10 +953,84 @@ class P4Sync(Command):
950953

951954
return branches
952955

953-
## Should move this out, doesn't use SELF.
954-
def readP4Files(self, files):
956+
# output one file from the P4 stream
957+
# - helper for streamP4Files
958+
959+
def streamOneP4File(self, file, contents):
960+
if file["type"] == "apple":
961+
print "\nfile %s is a strange apple file that forks. Ignoring" % \
962+
file['depotFile']
963+
return
964+
965+
relPath = self.stripRepoPath(file['depotFile'], self.branchPrefixes)
966+
if verbose:
967+
sys.stderr.write("%s\n" % relPath)
968+
969+
mode = "644"
970+
if isP4Exec(file["type"]):
971+
mode = "755"
972+
elif file["type"] == "symlink":
973+
mode = "120000"
974+
# p4 print on a symlink contains "target\n", so strip it off
975+
last = contents.pop()
976+
last = last[:-1]
977+
contents.append(last)
978+
979+
if self.isWindows and file["type"].endswith("text"):
980+
mangled = []
981+
for data in contents:
982+
data = data.replace("\r\n", "\n")
983+
mangled.append(data)
984+
contents = mangled
985+
986+
if file['type'] in ('text+ko', 'unicode+ko', 'binary+ko'):
987+
contents = map(lambda text: re.sub(r'(?i)\$(Id|Header):[^$]*\$',r'$\1$', text), contents)
988+
elif file['type'] in ('text+k', 'ktext', 'kxtext', 'unicode+k', 'binary+k'):
989+
contents = map(lambda text: re.sub(r'\$(Id|Header|Author|Date|DateTime|Change|File|Revision):[^$\n]*\$',r'$\1$', text), contents)
990+
991+
self.gitStream.write("M %s inline %s\n" % (mode, relPath))
992+
993+
# total length...
994+
length = 0
995+
for d in contents:
996+
length = length + len(d)
997+
998+
self.gitStream.write("data %d\n" % length)
999+
for d in contents:
1000+
self.gitStream.write(d)
1001+
self.gitStream.write("\n")
1002+
1003+
def streamOneP4Deletion(self, file):
1004+
relPath = self.stripRepoPath(file['path'], self.branchPrefixes)
1005+
if verbose:
1006+
sys.stderr.write("delete %s\n" % relPath)
1007+
self.gitStream.write("D %s\n" % relPath)
1008+
1009+
# handle another chunk of streaming data
1010+
def streamP4FilesCb(self, marshalled):
1011+
1012+
if marshalled.has_key('depotFile') and self.stream_have_file_info:
1013+
# start of a new file - output the old one first
1014+
self.streamOneP4File(self.stream_file, self.stream_contents)
1015+
self.stream_file = {}
1016+
self.stream_contents = []
1017+
self.stream_have_file_info = False
1018+
1019+
# pick up the new file information... for the
1020+
# 'data' field we need to append to our array
1021+
for k in marshalled.keys():
1022+
if k == 'data':
1023+
self.stream_contents.append(marshalled['data'])
1024+
else:
1025+
self.stream_file[k] = marshalled[k]
1026+
1027+
self.stream_have_file_info = True
1028+
1029+
# Stream directly from "p4 files" into "git fast-import"
1030+
def streamP4Files(self, files):
9551031
filesForCommit = []
9561032
filesToRead = []
1033+
filesToDelete = []
9571034

9581035
for f in files:
9591036
includeFile = True
@@ -967,50 +1044,35 @@ class P4Sync(Command):
9671044
filesForCommit.append(f)
9681045
if f['action'] not in ('delete', 'purge'):
9691046
filesToRead.append(f)
1047+
else:
1048+
filesToDelete.append(f)
9701049

971-
filedata = []
972-
if len(filesToRead) > 0:
973-
filedata = p4CmdList('-x - print',
974-
stdin='\n'.join(['%s#%s' % (f['path'], f['rev'])
975-
for f in filesToRead]),
976-
stdin_mode='w+')
977-
978-
if "p4ExitCode" in filedata[0]:
979-
die("Problems executing p4. Error: [%d]."
980-
% (filedata[0]['p4ExitCode']));
981-
982-
j = 0;
983-
contents = {}
984-
while j < len(filedata):
985-
stat = filedata[j]
986-
j += 1
987-
text = ''
988-
while j < len(filedata) and filedata[j]['code'] in ('text', 'unicode', 'binary'):
989-
text += filedata[j]['data']
990-
del filedata[j]['data']
991-
j += 1
992-
993-
if not stat.has_key('depotFile'):
994-
sys.stderr.write("p4 print fails with: %s\n" % repr(stat))
995-
continue
1050+
# deleted files...
1051+
for f in filesToDelete:
1052+
self.streamOneP4Deletion(f)
9961053

997-
if stat['type'] in ('text+ko', 'unicode+ko', 'binary+ko'):
998-
text = re.sub(r'(?i)\$(Id|Header):[^$]*\$',r'$\1$', text)
999-
elif stat['type'] in ('text+k', 'ktext', 'kxtext', 'unicode+k', 'binary+k'):
1000-
text = re.sub(r'\$(Id|Header|Author|Date|DateTime|Change|File|Revision):[^$\n]*\$',r'$\1$', text)
1054+
if len(filesToRead) > 0:
1055+
self.stream_file = {}
1056+
self.stream_contents = []
1057+
self.stream_have_file_info = False
10011058

1002-
contents[stat['depotFile']] = text
1059+
# curry self argument
1060+
def streamP4FilesCbSelf(entry):
1061+
self.streamP4FilesCb(entry)
10031062

1004-
for f in filesForCommit:
1005-
path = f['path']
1006-
if contents.has_key(path):
1007-
f['data'] = contents[path]
1063+
p4CmdList("-x - print",
1064+
'\n'.join(['%s#%s' % (f['path'], f['rev'])
1065+
for f in filesToRead]),
1066+
cb=streamP4FilesCbSelf)
10081067

1009-
return filesForCommit
1068+
# do the last chunk
1069+
if self.stream_file.has_key('depotFile'):
1070+
self.streamOneP4File(self.stream_file, self.stream_contents)
10101071

10111072
def commit(self, details, files, branch, branchPrefixes, parent = ""):
10121073
epoch = details["time"]
10131074
author = details["user"]
1075+
self.branchPrefixes = branchPrefixes
10141076

10151077
if self.verbose:
10161078
print "commit into %s" % branch
@@ -1023,7 +1085,6 @@ class P4Sync(Command):
10231085
new_files.append (f)
10241086
else:
10251087
sys.stderr.write("Ignoring file outside of prefix: %s\n" % path)
1026-
files = self.readP4Files(new_files)
10271088

10281089
self.gitStream.write("commit %s\n" % branch)
10291090
# gitStream.write("mark :%s\n" % details["change"])
@@ -1051,33 +1112,7 @@ class P4Sync(Command):
10511112
print "parent %s" % parent
10521113
self.gitStream.write("from %s\n" % parent)
10531114

1054-
for file in files:
1055-
if file["type"] == "apple":
1056-
print "\nfile %s is a strange apple file that forks. Ignoring!" % file['path']
1057-
continue
1058-
1059-
relPath = self.stripRepoPath(file['path'], branchPrefixes)
1060-
if file["action"] in ("delete", "purge"):
1061-
self.gitStream.write("D %s\n" % relPath)
1062-
else:
1063-
data = file['data']
1064-
1065-
mode = "644"
1066-
if isP4Exec(file["type"]):
1067-
mode = "755"
1068-
elif file["type"] == "symlink":
1069-
mode = "120000"
1070-
# p4 print on a symlink contains "target\n", so strip it off
1071-
data = data[:-1]
1072-
1073-
if self.isWindows and file["type"].endswith("text"):
1074-
data = data.replace("\r\n", "\n")
1075-
1076-
self.gitStream.write("M %s inline %s\n" % (mode, relPath))
1077-
self.gitStream.write("data %s\n" % len(data))
1078-
self.gitStream.write(data)
1079-
self.gitStream.write("\n")
1080-
1115+
self.streamP4Files(new_files)
10811116
self.gitStream.write("\n")
10821117

10831118
change = int(details["change"])

0 commit comments

Comments
 (0)