|
| 1 | +// Copyright 2017 Sourcerer Inc. All Rights Reserved. |
| 2 | +// Author: Anatoly Kislov ([email protected]) |
| 3 | + |
| 4 | +package app.hashers |
| 5 | + |
| 6 | +import app.Logger |
| 7 | +import app.model.Commit |
| 8 | +import app.model.DiffContent |
| 9 | +import app.model.DiffFile |
| 10 | +import app.model.DiffRange |
| 11 | +import app.model.Repo |
| 12 | +import app.utils.RepoHelper |
| 13 | +import io.reactivex.Observable |
| 14 | +import org.eclipse.jgit.api.Git |
| 15 | +import org.eclipse.jgit.diff.DiffEntry |
| 16 | +import org.eclipse.jgit.diff.DiffFormatter |
| 17 | +import org.eclipse.jgit.diff.RawText |
| 18 | +import org.eclipse.jgit.errors.MissingObjectException |
| 19 | +import org.eclipse.jgit.lib.ObjectId |
| 20 | +import org.eclipse.jgit.revwalk.RevCommit |
| 21 | +import org.eclipse.jgit.revwalk.RevWalk |
| 22 | +import org.eclipse.jgit.util.io.DisabledOutputStream |
| 23 | + |
| 24 | +object CommitCrawler { |
| 25 | + fun getObservable(git: Git, repo: Repo) = Observable |
| 26 | + .create<Commit> { subscriber -> |
| 27 | + try { |
| 28 | + val revWalk = RevWalk(git.repository) |
| 29 | + val commitId = git.repository.resolve(RepoHelper.MASTER_BRANCH) |
| 30 | + revWalk.markStart(revWalk.parseCommit(commitId)) |
| 31 | + for (revCommit in revWalk) { |
| 32 | + subscriber.onNext(Commit(revCommit)) |
| 33 | + } |
| 34 | + // Commits are combined in pairs, an empty commit concatenated |
| 35 | + // to calculate the diff of the initial commit. |
| 36 | + subscriber.onNext(Commit()) |
| 37 | + } catch (e: Exception) { |
| 38 | + Logger.error("Commit producing error", e) |
| 39 | + subscriber.onError(e) |
| 40 | + } |
| 41 | + subscriber.onComplete() |
| 42 | + } // TODO(anatoly): Rewrite diff calculation in non-weird way. |
| 43 | + .pairWithNext() // Pair commits to get diff. |
| 44 | + .map { (new, old) -> |
| 45 | + // Mapping and stats extraction. |
| 46 | + Logger.debug("Commit: ${new.raw?.name ?: ""}: " |
| 47 | + + new.raw?.shortMessage) |
| 48 | + new.diffs = getDiffFiles(git, new, old) |
| 49 | + Logger.debug("Diff: ${new.diffs.size} entries") |
| 50 | + new.repo = repo |
| 51 | + new |
| 52 | + } |
| 53 | + |
| 54 | + private fun getDiffFiles(git: Git, |
| 55 | + commitNew: Commit, |
| 56 | + commitOld: Commit): List<DiffFile> { |
| 57 | + val revCommitNew: RevCommit? = commitNew.raw |
| 58 | + val revCommitOld: RevCommit? = commitOld.raw |
| 59 | + |
| 60 | + return DiffFormatter(DisabledOutputStream.INSTANCE).use { formatter -> |
| 61 | + formatter.setRepository(git.repository) |
| 62 | + formatter.setDetectRenames(true) |
| 63 | + formatter.scan(revCommitOld?.tree, revCommitNew?.tree) |
| 64 | + // RENAME change type doesn't change file content. |
| 65 | + .filter { it.changeType != DiffEntry.ChangeType.RENAME } |
| 66 | + // Skip binary files. |
| 67 | + .filter { |
| 68 | + val id = if (it.changeType == DiffEntry.ChangeType.DELETE) { |
| 69 | + it.oldId.toObjectId() |
| 70 | + } else { |
| 71 | + it.newId.toObjectId() |
| 72 | + } |
| 73 | + !RawText.isBinary(git.repository.open(id).openStream()) |
| 74 | + } |
| 75 | + .map { diff -> |
| 76 | + val new = getContentByObjectId(git, diff.newId.toObjectId()) |
| 77 | + val old = getContentByObjectId(git, diff.oldId.toObjectId()) |
| 78 | + |
| 79 | + val edits = formatter.toFileHeader(diff).toEditList() |
| 80 | + val path = when (diff.changeType) { |
| 81 | + DiffEntry.ChangeType.DELETE -> diff.oldPath |
| 82 | + else -> diff.newPath |
| 83 | + } |
| 84 | + DiffFile(path = path, |
| 85 | + changeType = diff.changeType, |
| 86 | + old = DiffContent(old, edits.map { edit -> |
| 87 | + DiffRange(edit.beginA, edit.endA) }), |
| 88 | + new = DiffContent(new, edits.map { edit -> |
| 89 | + DiffRange(edit.beginB, edit.endB) })) |
| 90 | + } |
| 91 | + } |
| 92 | + } |
| 93 | + |
| 94 | + private fun getContentByObjectId(git: Git, |
| 95 | + objectId: ObjectId): List<String> { |
| 96 | + return try { |
| 97 | + val rawText = RawText(git.repository.open(objectId).bytes) |
| 98 | + val content = ArrayList<String>(rawText.size()) |
| 99 | + for (i in 0..(rawText.size() - 1)) { |
| 100 | + content.add(rawText.getString(i)) |
| 101 | + } |
| 102 | + return content |
| 103 | + } catch (e: MissingObjectException) { |
| 104 | + listOf() |
| 105 | + } |
| 106 | + } |
| 107 | + |
| 108 | + private fun <T> Observable<T>.pairWithNext(): Observable<Pair<T, T>> { |
| 109 | + return this.map { emit -> Pair(emit, emit) } |
| 110 | + // Accumulate emits by prev-next pair. |
| 111 | + .scan { pairAccumulated, pairNext -> |
| 112 | + Pair(pairAccumulated.second, pairNext.second) |
| 113 | + } |
| 114 | + .skip(1) // Skip initial not paired emit. |
| 115 | + } |
| 116 | +} |
0 commit comments