Skip to content

Commit efe0f5a

Browse files
authored
feat: add common observable interface for hashers (APP-100) (#34)
* wip: single observable for traversing commits * wip: trying to switch to observables (APP-100) * feat: add common observable for commits with diffs * feat: move fact logic to FactHasher * chore: remove excess imports, add author * fix: pinch * feat: update tests, add FactHasherTest, add CommitObservable, minor fixes * chore: author * chore: remove excess imports * wip: fix PR
1 parent 9c1e6a9 commit efe0f5a

File tree

9 files changed

+421
-307
lines changed

9 files changed

+421
-307
lines changed
Lines changed: 116 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,116 @@
1+
// Copyright 2017 Sourcerer Inc. All Rights Reserved.
2+
// Author: Anatoly Kislov ([email protected])
3+
4+
package app.hashers
5+
6+
import app.Logger
7+
import app.model.Commit
8+
import app.model.DiffContent
9+
import app.model.DiffFile
10+
import app.model.DiffRange
11+
import app.model.Repo
12+
import app.utils.RepoHelper
13+
import io.reactivex.Observable
14+
import org.eclipse.jgit.api.Git
15+
import org.eclipse.jgit.diff.DiffEntry
16+
import org.eclipse.jgit.diff.DiffFormatter
17+
import org.eclipse.jgit.diff.RawText
18+
import org.eclipse.jgit.errors.MissingObjectException
19+
import org.eclipse.jgit.lib.ObjectId
20+
import org.eclipse.jgit.revwalk.RevCommit
21+
import org.eclipse.jgit.revwalk.RevWalk
22+
import org.eclipse.jgit.util.io.DisabledOutputStream
23+
24+
object CommitCrawler {
25+
fun getObservable(git: Git, repo: Repo) = Observable
26+
.create<Commit> { subscriber ->
27+
try {
28+
val revWalk = RevWalk(git.repository)
29+
val commitId = git.repository.resolve(RepoHelper.MASTER_BRANCH)
30+
revWalk.markStart(revWalk.parseCommit(commitId))
31+
for (revCommit in revWalk) {
32+
subscriber.onNext(Commit(revCommit))
33+
}
34+
// Commits are combined in pairs, an empty commit concatenated
35+
// to calculate the diff of the initial commit.
36+
subscriber.onNext(Commit())
37+
} catch (e: Exception) {
38+
Logger.error("Commit producing error", e)
39+
subscriber.onError(e)
40+
}
41+
subscriber.onComplete()
42+
} // TODO(anatoly): Rewrite diff calculation in non-weird way.
43+
.pairWithNext() // Pair commits to get diff.
44+
.map { (new, old) ->
45+
// Mapping and stats extraction.
46+
Logger.debug("Commit: ${new.raw?.name ?: ""}: "
47+
+ new.raw?.shortMessage)
48+
new.diffs = getDiffFiles(git, new, old)
49+
Logger.debug("Diff: ${new.diffs.size} entries")
50+
new.repo = repo
51+
new
52+
}
53+
54+
private fun getDiffFiles(git: Git,
55+
commitNew: Commit,
56+
commitOld: Commit): List<DiffFile> {
57+
val revCommitNew: RevCommit? = commitNew.raw
58+
val revCommitOld: RevCommit? = commitOld.raw
59+
60+
return DiffFormatter(DisabledOutputStream.INSTANCE).use { formatter ->
61+
formatter.setRepository(git.repository)
62+
formatter.setDetectRenames(true)
63+
formatter.scan(revCommitOld?.tree, revCommitNew?.tree)
64+
// RENAME change type doesn't change file content.
65+
.filter { it.changeType != DiffEntry.ChangeType.RENAME }
66+
// Skip binary files.
67+
.filter {
68+
val id = if (it.changeType == DiffEntry.ChangeType.DELETE) {
69+
it.oldId.toObjectId()
70+
} else {
71+
it.newId.toObjectId()
72+
}
73+
!RawText.isBinary(git.repository.open(id).openStream())
74+
}
75+
.map { diff ->
76+
val new = getContentByObjectId(git, diff.newId.toObjectId())
77+
val old = getContentByObjectId(git, diff.oldId.toObjectId())
78+
79+
val edits = formatter.toFileHeader(diff).toEditList()
80+
val path = when (diff.changeType) {
81+
DiffEntry.ChangeType.DELETE -> diff.oldPath
82+
else -> diff.newPath
83+
}
84+
DiffFile(path = path,
85+
changeType = diff.changeType,
86+
old = DiffContent(old, edits.map { edit ->
87+
DiffRange(edit.beginA, edit.endA) }),
88+
new = DiffContent(new, edits.map { edit ->
89+
DiffRange(edit.beginB, edit.endB) }))
90+
}
91+
}
92+
}
93+
94+
private fun getContentByObjectId(git: Git,
95+
objectId: ObjectId): List<String> {
96+
return try {
97+
val rawText = RawText(git.repository.open(objectId).bytes)
98+
val content = ArrayList<String>(rawText.size())
99+
for (i in 0..(rawText.size() - 1)) {
100+
content.add(rawText.getString(i))
101+
}
102+
return content
103+
} catch (e: MissingObjectException) {
104+
listOf()
105+
}
106+
}
107+
108+
private fun <T> Observable<T>.pairWithNext(): Observable<Pair<T, T>> {
109+
return this.map { emit -> Pair(emit, emit) }
110+
// Accumulate emits by prev-next pair.
111+
.scan { pairAccumulated, pairNext ->
112+
Pair(pairAccumulated.second, pairNext.second)
113+
}
114+
.skip(1) // Skip initial not paired emit.
115+
}
116+
}

0 commit comments

Comments
 (0)