Skip to content

Commit 32b50d9

Browse files
Merge pull request #238 from sourcerer-io/develop
fix: different case email problem
2 parents a83fe48 + a1cf7b1 commit 32b50d9

File tree

3 files changed

+180
-1
lines changed

3 files changed

+180
-1
lines changed

src/main/kotlin/app/hashers/CommitCrawler.kt

Lines changed: 79 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,10 @@ import app.model.Repo
1313
import app.utils.EmptyRepoException
1414
import app.utils.FileHelper
1515
import io.reactivex.Observable
16+
import java.io.BufferedReader
17+
import java.io.FileReader
18+
import java.io.File
19+
import java.io.InputStreamReader
1620
import org.apache.commons.codec.digest.DigestUtils
1721
import org.eclipse.jgit.api.Git
1822
import org.eclipse.jgit.diff.DiffEntry
@@ -23,6 +27,8 @@ import org.eclipse.jgit.lib.ObjectId
2327
import org.eclipse.jgit.lib.Repository
2428
import org.eclipse.jgit.revwalk.RevCommit
2529
import org.eclipse.jgit.revwalk.RevWalk
30+
import org.eclipse.jgit.treewalk.filter.PathFilter
31+
import org.eclipse.jgit.treewalk.TreeWalk
2632
import org.eclipse.jgit.util.io.DisabledOutputStream
2733
import java.util.LinkedList
2834

@@ -39,6 +45,7 @@ object CommitCrawler {
3945
private const val LOCAL_HEAD = "HEAD"
4046
private val REFS = listOf(REMOTE_HEAD, REMOTE_MASTER_BRANCH,
4147
LOCAL_MASTER_BRANCH, LOCAL_HEAD)
48+
private val CONF_FILE_PATH = ".sourcerer-conf"
4249
private val MAX_DIFF_SIZE = 600000
4350

4451
fun getDefaultBranchHead(git: Git): ObjectId {
@@ -106,6 +113,18 @@ object CommitCrawler {
106113
df.setRepository(repo)
107114
df.isDetectRenames = true
108115

116+
val confTreeWalk = TreeWalk(repo)
117+
confTreeWalk.addTree(head.getTree())
118+
confTreeWalk.setFilter(PathFilter.create(CONF_FILE_PATH))
119+
120+
var ignoredPaths =
121+
if (confTreeWalk.next()) {
122+
getIgnoredPaths(repo, confTreeWalk.getObjectId(0))
123+
}
124+
else {
125+
listOf()
126+
}
127+
109128
var commitCount = 0
110129
revWalk.markStart(head)
111130
var commit: RevCommit? = revWalk.next() // Move the walker to the head.
@@ -167,6 +186,31 @@ object CommitCrawler {
167186
}
168187
stream != null && !RawText.isBinary(stream)
169188
}
189+
.filter { diff ->
190+
val filePath =
191+
if (diff.getNewPath() != DiffEntry.DEV_NULL) {
192+
diff.getNewPath()
193+
} else {
194+
diff.getOldPath()
195+
}
196+
197+
// Update ignored paths list. The config file has retroactive
198+
// force, i.e. if it was added at this commit, then we presume
199+
// it is applied to all commits, preceding this commit.
200+
if (diff.getOldPath() == CONF_FILE_PATH) {
201+
ignoredPaths =
202+
getIgnoredPaths(repo, diff.getNewId().toObjectId())
203+
}
204+
205+
!ignoredPaths.any { path ->
206+
if (path.endsWith("/")) {
207+
filePath.startsWith(path)
208+
}
209+
else {
210+
path == filePath
211+
}
212+
}
213+
}
170214
.map { diff ->
171215
JgitDiff(diff, df.toFileHeader(diff).toEditList())
172216
}
@@ -264,4 +308,39 @@ object CommitCrawler {
264308
listOf()
265309
}
266310
}
311+
312+
/**
313+
* Return a list of paths that should be ignored in commit analysis.
314+
*/
315+
private fun getIgnoredPaths(repo: Repository, objectId: ObjectId?): List<String> {
316+
return try {
317+
if (objectId == null) {
318+
return listOf()
319+
}
320+
321+
val list = mutableListOf<String>()
322+
val fileLoader = repo.open(objectId)
323+
val reader =
324+
BufferedReader(InputStreamReader(fileLoader.openStream()))
325+
var collectIgnored = false
326+
for (line in reader.lines()) {
327+
if (line == "" || line.startsWith("#")) {
328+
continue
329+
}
330+
331+
if (line.startsWith("[")) {
332+
collectIgnored = (line == "[ignore]")
333+
continue
334+
}
335+
336+
if (collectIgnored) {
337+
list.add(line)
338+
}
339+
}
340+
list
341+
}
342+
catch(e: Exception) {
343+
listOf()
344+
}
345+
}
267346
}

src/main/kotlin/app/model/Commit.kt

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -34,7 +34,7 @@ data class Commit(
3434

3535
rehash = DigestUtils.sha256Hex(revCommit.id.name)
3636
author = Author(revCommit.authorIdent.name,
37-
revCommit.authorIdent.emailAddress)
37+
revCommit.authorIdent.emailAddress.toLowerCase())
3838
dateTimestamp = revCommit.authorIdent.getWhen().time / 1000
3939
dateTimeZoneOffset = revCommit.authorIdent.timeZoneOffset
4040
treeRehash = DigestUtils.sha256Hex(revCommit.tree.name)
Lines changed: 100 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,100 @@
1+
// Copyright 2018 Sourcerer Inc. All Rights Reserved.
2+
// Author: Alexander Surkov ([email protected])
3+
4+
package test.tests.hashers
5+
6+
import app.api.MockApi
7+
import app.extractors.Extractor
8+
import app.hashers.CommitHasher
9+
import app.hashers.CommitCrawler
10+
import app.model.*
11+
import app.utils.RepoHelper
12+
import org.eclipse.jgit.api.Git
13+
import org.jetbrains.spek.api.Spek
14+
import org.jetbrains.spek.api.dsl.given
15+
import org.jetbrains.spek.api.dsl.it
16+
import test.utils.TestRepo
17+
import java.io.File
18+
import java.util.stream.StreamSupport.stream
19+
import kotlin.streams.toList
20+
import kotlin.test.assertEquals
21+
22+
class IgnorePathsTest : Spek({
23+
fun cleanRepos() {
24+
Runtime.getRuntime().exec("src/test/delete_repo.sh").waitFor()
25+
}
26+
27+
val userName = "Contributor"
28+
val userEmail = "[email protected]"
29+
30+
// Creation of test repo.
31+
cleanRepos()
32+
33+
given("commits with syntax stats") {
34+
val lines = listOf("x = [i**2 for i range(9999)]", "def fn()", "x = 1",
35+
"x = map(lambda x: x**2, range(9999))",
36+
"x = map(lambda x: x**2, map(lambda x: x**3, range(10))",
37+
"x = map(lambda x: x**2, range(10))," +
38+
"map(lambda x: x**3, range(10)))")
39+
40+
val author = Author(userName, userEmail)
41+
val emails = hashSetOf(userEmail)
42+
43+
val testRepoPath = "../IgnorePaths_t1"
44+
val testRepo = TestRepo(testRepoPath)
45+
46+
val testRehash = "rehash_IgnorePaths_t1"
47+
val serverRepo = Repo(rehash = testRehash)
48+
49+
val mockApi = MockApi(mockRepo = serverRepo)
50+
val observable = CommitCrawler.getObservable(testRepo.git, serverRepo)
51+
52+
it("t1") {
53+
testRepo.createFile("test.py", lines)
54+
testRepo.commit(message = "commit1", author = author)
55+
56+
testRepo.createFile("ignore.py", lines)
57+
testRepo.commit(message = "commit2", author = author)
58+
59+
// Add config, ignore.py from previous commit should be
60+
// ignored for stats.
61+
testRepo.createFile(".sourcerer-conf",
62+
listOf("[ignore]", "ignore.py", "#test.py"))
63+
testRepo.commit(message = "commit3", author = author)
64+
65+
// Uncomment test.py file in config and delete it. The change
66+
// should be ignored for statistics.
67+
testRepo.deleteLines(".sourcerer-conf", 1, 1)
68+
testRepo.insertLines(".sourcerer-conf", 1, listOf("test.py"))
69+
testRepo.commit(message = "commit4", author = author)
70+
71+
testRepo.deleteFile("test.py")
72+
testRepo.commit(message = "commit5", author = author)
73+
74+
val errors = mutableListOf<Throwable>()
75+
CommitHasher(serverRepo, mockApi, listOf("rehashes"), emails)
76+
.updateFromObservable(observable, { e -> errors.add(e) })
77+
if (errors.size > 0) {
78+
println(errors[0].message)
79+
}
80+
assertEquals(0, errors.size)
81+
82+
val syntaxStats = mockApi.receivedAddedCommits
83+
.fold(mutableListOf<CommitStats>()) { allStats, commit ->
84+
allStats.addAll(commit.stats)
85+
allStats
86+
}.filter { it.type == Extractor.TYPE_SYNTAX }
87+
88+
val mapStats = syntaxStats.filter { it.tech == "python>map" }
89+
assertEquals(1, mapStats.size)
90+
assertEquals(5, mapStats.map { it.numLinesAdded }.sum())
91+
assertEquals(0, mapStats.map { it.numLinesDeleted }.sum())
92+
}
93+
94+
afterGroup {
95+
testRepo.destroy()
96+
}
97+
}
98+
99+
cleanRepos()
100+
})

0 commit comments

Comments
 (0)