@@ -5,11 +5,9 @@ package app.hashers
55
66import app.Logger
77import app.api.Api
8- import app.config.Configurator
98import app.extractors.Extractor
109import app.model.Commit
1110import app.model.DiffContent
12- import app.model.DiffEdit
1311import app.model.DiffFile
1412import app.model.DiffRange
1513import app.model.LocalRepo
@@ -23,6 +21,7 @@ import org.eclipse.jgit.lib.Repository
2321import org.eclipse.jgit.revwalk.RevWalk
2422import java.nio.charset.Charset
2523import org.eclipse.jgit.diff.DiffFormatter
24+ import org.eclipse.jgit.diff.RawText
2625import org.eclipse.jgit.lib.ObjectId
2726import org.eclipse.jgit.errors.MissingObjectException
2827import org.eclipse.jgit.revwalk.RevCommit
@@ -58,13 +57,23 @@ class CommitHasher(private val localRepo: LocalRepo,
5857 || ! knownCommits.contains(new) }
5958 .filter { (new, _) -> emailFilter(new) } // Email filtering.
6059 .map { (new, old) -> // Mapping and stats extraction.
61- new.repo = repo
62- val diffFiles = getDiffFiles(new, old)
6360 Logger .debug(" Commit: ${new.raw?.name ? : " " } : "
6461 + new.raw?.shortMessage)
62+ new.repo = repo
63+
64+ val diffFiles = getDiffFiles(new, old)
6565 Logger .debug(" Diff: ${diffFiles.size} entries" )
6666 new.stats = Extractor ().extract(diffFiles)
6767 Logger .debug(" Stats: ${new.stats.size} entries" )
68+
69+ // Count lines on all non-binary files. This is additional
70+ // statistics to CommitStats because not all file extensions
71+ // may be supported.
72+ new.numLinesAdded = diffFiles.fold(0 ) { total, file ->
73+ total + file.getAllAdded().size }
74+ new.numLinesDeleted = diffFiles.fold(0 ) { total, file ->
75+ total + file.getAllDeleted().size }
76+
6877 new
6978 }
7079 .observeOn(Schedulers .io()) // Different thread for data sending.
@@ -81,18 +90,28 @@ class CommitHasher(private val localRepo: LocalRepo,
8190
8291 private fun getDiffFiles (commitNew : Commit ,
8392 commitOld : Commit ): List <DiffFile > {
84- // TODO(anatoly): Binary files.
8593 val revCommitNew: RevCommit ? = commitNew.raw
8694 val revCommitOld: RevCommit ? = commitOld.raw
8795
8896 return DiffFormatter (DisabledOutputStream .INSTANCE ).use { formatter ->
8997 formatter.setRepository(gitRepo)
98+ formatter.setDetectRenames(true )
9099 formatter.scan(revCommitOld?.tree, revCommitNew?.tree)
91100 // RENAME change type doesn't change file content.
92101 .filter { it.changeType != DiffEntry .ChangeType .RENAME }
102+ // Skip binary files.
103+ .filter {
104+ val id = if (it.changeType == DiffEntry .ChangeType .DELETE ) {
105+ it.oldId.toObjectId()
106+ } else {
107+ it.newId.toObjectId()
108+ }
109+ ! RawText .isBinary(gitRepo.open(id).openStream())
110+ }
93111 .map { diff ->
94112 val new = getContentByObjectId(diff.newId.toObjectId())
95113 val old = getContentByObjectId(diff.oldId.toObjectId())
114+
96115 val edits = formatter.toFileHeader(diff).toEditList()
97116 val path = when (diff.changeType) {
98117 DiffEntry .ChangeType .DELETE -> diff.oldPath
@@ -152,7 +171,7 @@ class CommitHasher(private val localRepo: LocalRepo,
152171 repo.emails.contains(email))
153172 }
154173
155- fun <T > Observable<T>.pairWithNext (): Observable <Pair <T , T >> {
174+ private fun <T > Observable<T>.pairWithNext (): Observable <Pair <T , T >> {
156175 return this .map { emit -> Pair (emit, emit) }
157176 // Accumulate emits by prev-next pair.
158177 .scan { pairAccumulated, pairNext ->
0 commit comments