@@ -57,7 +57,7 @@ object CommitCrawler {
5757 }
5858
5959 fun fetchRehashesAndAuthors (git : Git ):
60- Pair <LinkedList <String >, HashSet <Author >> {
60+ Triple <LinkedList <String >, HashSet <Author >, HashMap < String , Int >> {
6161 val head: RevCommit = RevWalk (git.repository)
6262 .parseCommit(getDefaultBranchHead(git))
6363
@@ -67,6 +67,7 @@ object CommitCrawler {
6767 val commitsRehashes = LinkedList <String >()
6868 val emails = hashSetOf<String >()
6969 val names = hashMapOf<String , String >()
70+ val commitsCount = hashMapOf<String , Int >()
7071
7172 var commit: RevCommit ? = revWalk.next()
7273 while (commit != null ) {
@@ -81,6 +82,7 @@ object CommitCrawler {
8182 names[email] = name
8283 }
8384 }
85+ commitsCount[email] = commitsCount.getOrDefault(email, 0 ) + 1
8486
8587 commit.disposeBody()
8688 commit = revWalk.next()
@@ -90,11 +92,12 @@ object CommitCrawler {
9092 val authors = emails.map { email -> Author (names[email]!! , email) }
9193 .toHashSet()
9294
93- return Pair (commitsRehashes, authors)
95+ return Triple (commitsRehashes, authors, commitsCount )
9496 }
9597
9698 fun getJGitObservable (git : Git ,
9799 totalCommitCount : Int = 0,
100+ filteredEmails : HashSet <String >? = null,
98101 tail : RevCommit ? = null) : Observable <JgitPair > =
99102 Observable .create { subscriber ->
100103
@@ -138,8 +141,29 @@ object CommitCrawler {
138141 } else 0.0
139142 Logger .printCommit(commit.shortMessage, commit.name, perc)
140143
144+ val email = commit.authorIdent.emailAddress
145+ if (filteredEmails != null && ! filteredEmails.contains(email)) {
146+ commit = parentCommit
147+ continue
148+ }
149+
141150 val diffEntries = df.scan(parentCommit, commit)
142- val diffEdits = diffEntries.map { diff ->
151+ val diffEdits = diffEntries
152+ .filter { diff ->
153+ diff.changeType != DiffEntry .ChangeType .COPY
154+ }
155+ .filter { diff ->
156+ val fileId =
157+ if (diff.getNewPath() != DiffEntry .DEV_NULL ) {
158+ diff.getNewId().toObjectId()
159+ } else {
160+ diff.getOldId().toObjectId()
161+ }
162+ val stream = try { repo.open(fileId).openStream() }
163+ catch (e: Exception ) { null }
164+ stream != null && ! RawText .isBinary(stream)
165+ }
166+ .map { diff ->
143167 JgitDiff (diff, df.toFileHeader(diff).toEditList())
144168 }
145169 subscriber.onNext(JgitPair (commit, diffEdits))
@@ -180,18 +204,6 @@ object CommitCrawler {
180204 private fun getDiffFiles (jgitRepo : Repository ,
181205 jgitDiffs : List <JgitDiff >) : List <DiffFile > {
182206 return jgitDiffs
183- // Skip binary files.
184- .filter { (diff, _) ->
185- val fileId =
186- if (diff.getNewPath() != DiffEntry .DEV_NULL ) {
187- diff.getNewId().toObjectId()
188- } else {
189- diff.getOldId().toObjectId()
190- }
191- val stream = try { jgitRepo.open(fileId).openStream() }
192- catch (e: Exception ) { null }
193- stream != null && ! RawText .isBinary(stream)
194- }
195207 .map { (diff, edits) ->
196208 // TODO(anatoly): Can produce exception for large object.
197209 // Investigate for size.
0 commit comments