Skip to content

Commit 50a5f01

Browse files
authored
fix: add number of lines stats per commit, add binary filter, other minor fixes (#31)
* chore: remove excess constants from extractors, add detailed comment about timestamp to proto * wip: filter binary files hashing, add number of lines per commit stats * chore: private function, remove excess imports * fix: disable slf4j * chore: detailed format
1 parent 78ca97b commit 50a5f01

File tree

5 files changed

+29
-10
lines changed

5 files changed

+29
-10
lines changed

build.gradle

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -63,6 +63,7 @@ dependencies {
6363
compile 'com.github.kittinunf.fuel:fuel-rxjava:1.9.0'
6464
compile group: 'org.eclipse.jgit', name: 'org.eclipse.jgit',
6565
version: '4.8.0.201706111038-r'
66+
compile "org.slf4j:slf4j-nop:1.7.2"
6667

6768
testCompile 'org.jetbrains.kotlin:kotlin-test'
6869
testCompile 'org.jetbrains.spek:spek-api:1.1.4'

src/main/kotlin/app/extractors/JavaExtractor.kt

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,6 @@ class JavaExtractor : ExtractorInterface {
1313
val LANGUAGE_NAME = "java"
1414
val FILE_EXTS = listOf("java")
1515
}
16-
val NAME = "Java"
1716

1817
val KEYWORDS = listOf("abstract", "continue", "for", "new", "switch",
1918
"assert", "default", "goto", "package", "synchronized", "boolean",
@@ -49,7 +48,7 @@ class JavaExtractor : ExtractorInterface {
4948
numLinesAdded = totalAdded,
5049
numLinesDeleted = totalDeleted,
5150
type = Extractor.TYPE_KEYWORD,
52-
tech = NAME + Extractor.SEPARATOR + keyword))
51+
tech = LANGUAGE_NAME + Extractor.SEPARATOR + keyword))
5352
}
5453
}
5554

src/main/kotlin/app/extractors/ObjectiveCExtractor.kt

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,7 @@ import app.model.DiffFile
1010
class ObjectiveCExtractor : ExtractorInterface {
1111
companion object {
1212
val LANGUAGE_NAME = "objectivec"
13-
val FILE_EXTS = listOf("h", "m", "mm")
13+
val FILE_EXTS = listOf("m", "mm")
1414
}
1515

1616
override fun extract(files: List<DiffFile>): List<CommitStats> {

src/main/kotlin/app/hashers/CommitHasher.kt

Lines changed: 25 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -5,11 +5,9 @@ package app.hashers
55

66
import app.Logger
77
import app.api.Api
8-
import app.config.Configurator
98
import app.extractors.Extractor
109
import app.model.Commit
1110
import app.model.DiffContent
12-
import app.model.DiffEdit
1311
import app.model.DiffFile
1412
import app.model.DiffRange
1513
import app.model.LocalRepo
@@ -23,6 +21,7 @@ import org.eclipse.jgit.lib.Repository
2321
import org.eclipse.jgit.revwalk.RevWalk
2422
import java.nio.charset.Charset
2523
import org.eclipse.jgit.diff.DiffFormatter
24+
import org.eclipse.jgit.diff.RawText
2625
import org.eclipse.jgit.lib.ObjectId
2726
import org.eclipse.jgit.errors.MissingObjectException
2827
import org.eclipse.jgit.revwalk.RevCommit
@@ -58,13 +57,23 @@ class CommitHasher(private val localRepo: LocalRepo,
5857
|| !knownCommits.contains(new) }
5958
.filter { (new, _) -> emailFilter(new) } // Email filtering.
6059
.map { (new, old) -> // Mapping and stats extraction.
61-
new.repo = repo
62-
val diffFiles = getDiffFiles(new, old)
6360
Logger.debug("Commit: ${new.raw?.name ?: ""}: "
6461
+ new.raw?.shortMessage)
62+
new.repo = repo
63+
64+
val diffFiles = getDiffFiles(new, old)
6565
Logger.debug("Diff: ${diffFiles.size} entries")
6666
new.stats = Extractor().extract(diffFiles)
6767
Logger.debug("Stats: ${new.stats.size} entries")
68+
69+
// Count lines on all non-binary files. This is additional
70+
// statistics to CommitStats because not all file extensions
71+
// may be supported.
72+
new.numLinesAdded = diffFiles.fold(0) { total, file ->
73+
total + file.getAllAdded().size }
74+
new.numLinesDeleted = diffFiles.fold(0) { total, file ->
75+
total + file.getAllDeleted().size }
76+
6877
new
6978
}
7079
.observeOn(Schedulers.io()) // Different thread for data sending.
@@ -81,18 +90,28 @@ class CommitHasher(private val localRepo: LocalRepo,
8190

8291
private fun getDiffFiles(commitNew: Commit,
8392
commitOld: Commit): List<DiffFile> {
84-
// TODO(anatoly): Binary files.
8593
val revCommitNew:RevCommit? = commitNew.raw
8694
val revCommitOld:RevCommit? = commitOld.raw
8795

8896
return DiffFormatter(DisabledOutputStream.INSTANCE).use { formatter ->
8997
formatter.setRepository(gitRepo)
98+
formatter.setDetectRenames(true)
9099
formatter.scan(revCommitOld?.tree, revCommitNew?.tree)
91100
// RENAME change type doesn't change file content.
92101
.filter { it.changeType != DiffEntry.ChangeType.RENAME }
102+
// Skip binary files.
103+
.filter {
104+
val id = if (it.changeType == DiffEntry.ChangeType.DELETE) {
105+
it.oldId.toObjectId()
106+
} else {
107+
it.newId.toObjectId()
108+
}
109+
!RawText.isBinary(gitRepo.open(id).openStream())
110+
}
93111
.map { diff ->
94112
val new = getContentByObjectId(diff.newId.toObjectId())
95113
val old = getContentByObjectId(diff.oldId.toObjectId())
114+
96115
val edits = formatter.toFileHeader(diff).toEditList()
97116
val path = when (diff.changeType) {
98117
DiffEntry.ChangeType.DELETE -> diff.oldPath
@@ -152,7 +171,7 @@ class CommitHasher(private val localRepo: LocalRepo,
152171
repo.emails.contains(email))
153172
}
154173

155-
fun <T> Observable<T>.pairWithNext(): Observable<Pair<T, T>> {
174+
private fun <T> Observable<T>.pairWithNext(): Observable<Pair<T, T>> {
156175
return this.map { emit -> Pair(emit, emit) }
157176
// Accumulate emits by prev-next pair.
158177
.scan { pairAccumulated, pairNext ->

src/main/proto/sourcerer.proto

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -23,7 +23,7 @@ message Commit {
2323
string author_name = 4;
2424
string author_email = 5;
2525

26-
// Timestamp of a commit creation.
26+
// Timestamp of a commit creation in seconds UTC+0.
2727
uint32 date = 6;
2828

2929
// Is quality commit.

0 commit comments

Comments
 (0)