Skip to content

Commit 2b94e8d

Browse files
authored
fix(perf): crawler and longevity should share is_binary check code (#148) (#169)
1 parent 6c895b5 commit 2b94e8d

File tree

2 files changed

+16
-29
lines changed

2 files changed

+16
-29
lines changed

src/main/kotlin/app/hashers/CodeLongevity.kt

Lines changed: 0 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -456,22 +456,6 @@ class CodeLongevity(
456456
val newId = diff.getNewId().toObjectId()
457457
Logger.trace { "old: '$oldPath', new: '$newPath'" }
458458

459-
// Skip binary files.
460-
val fileId = if (newPath != DiffEntry.DEV_NULL) newId else oldId
461-
try {
462-
if (RawText.isBinary(repo.open(fileId).openStream())) {
463-
continue
464-
}
465-
} catch (e: Exception) {
466-
continue
467-
//TODO(anatoly): better exception handling.
468-
}
469-
470-
// TODO(alex): does it happen in the wilds?
471-
if (diff.changeType == DiffEntry.ChangeType.COPY) {
472-
continue
473-
}
474-
475459
// File was deleted, initialize the line array in the files map.
476460
if (diff.changeType == DiffEntry.ChangeType.DELETE) {
477461
val fileLoader = repo.open(oldId)

src/main/kotlin/app/hashers/CommitCrawler.kt

Lines changed: 16 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -139,7 +139,22 @@ object CommitCrawler {
139139
Logger.printCommit(commit.shortMessage, commit.name, perc)
140140

141141
val diffEntries = df.scan(parentCommit, commit)
142-
val diffEdits = diffEntries.map { diff ->
142+
val diffEdits = diffEntries
143+
.filter { diff ->
144+
diff.changeType != DiffEntry.ChangeType.COPY
145+
}
146+
.filter { diff ->
147+
val fileId =
148+
if (diff.getNewPath() != DiffEntry.DEV_NULL) {
149+
diff.getNewId().toObjectId()
150+
} else {
151+
diff.getOldId().toObjectId()
152+
}
153+
val stream = try { repo.open(fileId).openStream() }
154+
catch (e: Exception) { null }
155+
stream != null && !RawText.isBinary(stream)
156+
}
157+
.map { diff ->
143158
JgitDiff(diff, df.toFileHeader(diff).toEditList())
144159
}
145160
subscriber.onNext(JgitPair(commit, diffEdits))
@@ -180,18 +195,6 @@ object CommitCrawler {
180195
private fun getDiffFiles(jgitRepo: Repository,
181196
jgitDiffs: List<JgitDiff>) : List<DiffFile> {
182197
return jgitDiffs
183-
// Skip binary files.
184-
.filter { (diff, _) ->
185-
val fileId =
186-
if (diff.getNewPath() != DiffEntry.DEV_NULL) {
187-
diff.getNewId().toObjectId()
188-
} else {
189-
diff.getOldId().toObjectId()
190-
}
191-
val stream = try { jgitRepo.open(fileId).openStream() }
192-
catch (e: Exception) { null }
193-
stream != null && !RawText.isBinary(stream)
194-
}
195198
.map { (diff, edits) ->
196199
// TODO(anatoly): Can produce exception for large object.
197200
// Investigate for size.

0 commit comments

Comments
 (0)