Skip to content

Commit ef581da

Browse files
authored
feat: refactor CodeLongevity to use CommitCrawler (APP-98) (#80)
1 parent 00d7c67 commit ef581da

File tree

5 files changed

+355
-353
lines changed

5 files changed

+355
-353
lines changed

src/main/kotlin/app/hashers/CodeLongevity.kt

Lines changed: 95 additions & 123 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11
// Copyright 2017 Sourcerer Inc. All Rights Reserved.
22
// Author: Alexander Surkov ([email protected])
3+
// Author: Anatoly Kislov ([email protected])
34

45
package app.hashers
56

@@ -11,7 +12,6 @@ import app.model.Repo
1112
import app.model.Fact
1213
import app.utils.FileHelper
1314
import io.reactivex.Observable
14-
import org.eclipse.jgit.diff.DiffFormatter
1515
import org.eclipse.jgit.diff.DiffEntry
1616
import org.eclipse.jgit.diff.RawText
1717
import org.eclipse.jgit.api.Git
@@ -20,7 +20,6 @@ import org.eclipse.jgit.lib.Repository
2020
import org.eclipse.jgit.revwalk.RevCommit
2121
import org.eclipse.jgit.revwalk.RevWalk
2222
import org.eclipse.jgit.treewalk.TreeWalk
23-
import org.eclipse.jgit.util.io.DisabledOutputStream
2423

2524
import java.io.FileInputStream
2625
import java.io.FileNotFoundException
@@ -154,7 +153,7 @@ class Colleagues {
154153
val month = SimpleDateFormat("yyyy-MM").format(line.editDate)
155154

156155
Logger.trace { "collected colleague, age: ${line.age}" }
157-
var vicinity = dates.getOrPut(month, { line.age })
156+
val vicinity = dates.getOrPut(month, { line.age })
158157
if (vicinity > line.age) {
159158
dates.put(month, line.age)
160159
}
@@ -184,7 +183,7 @@ class Colleagues {
184183
val colleagueEmail =
185184
if (email == pair.first) pair.second else pair.first
186185

187-
var list = mutableListOf<Triple<String, String, Long>>()
186+
val list = mutableListOf<Triple<String, String, Long>>()
188187
dates.forEach { month, vicinity ->
189188
list.add(Triple(colleagueEmail, month, vicinity))
190189
}
@@ -196,9 +195,10 @@ class Colleagues {
196195
/**
197196
* A data class used to store line age information.
198197
*/
199-
class CodeLineAges : Serializable {
198+
class CodeLineAges : Serializable, Cloneable {
200199
/**
201-
* A pair of (line age sum, line count) representing an aggregated line ages.
200+
* A pair of (line age sum, line count) representing an aggregated line
201+
* ages.
202202
*/
203203
data class AggrAge(var sum: Long = 0L, var count: Int = 0) : Serializable
204204

@@ -217,37 +217,38 @@ class CodeLineAges : Serializable {
217217
* A map of existing code lines ids to their ages at the revision.
218218
*/
219219
var lastingLines: HashMap<String, LineInfo> = hashMapOf()
220+
221+
override public fun clone(): CodeLineAges {
222+
val clone = CodeLineAges()
223+
aggrAges.forEach { (email, age) ->
224+
clone.aggrAges[email] = age.copy() }
225+
lastingLines.forEach { (email, line) ->
226+
clone.lastingLines[email] = line.copy() }
227+
return clone
228+
}
220229
}
221230

222231
/**
223232
* Used to compute age of code lines in the repo.
224233
*/
225-
class CodeLongevity(private val serverRepo: Repo,
226-
private val emails: HashSet<String>,
227-
git: Git,
228-
private val onError: (Throwable) -> Unit) {
234+
class CodeLongevity(
235+
private val serverRepo: Repo,
236+
private val emails: HashSet<String>,
237+
private val git: Git) {
238+
229239
val repo: Repository = git.repository
230240
val revWalk = RevWalk(repo)
231241
val head: RevCommit =
232242
try { revWalk.parseCommit(CommitCrawler.getDefaultBranchHead(git)) }
233243
catch(e: Exception) { throw Exception("No branch") }
234244

235-
val df = DiffFormatter(DisabledOutputStream.INSTANCE)
236245
val dataPath = FileHelper.getPath(serverRepo.rehash, "longevity")
237246
val colleagues = Colleagues()
238247

239-
init {
240-
df.setRepository(repo)
241-
df.setDetectRenames(true)
242-
}
243-
244248
/**
245-
* Update code line age statistics on the server.
249+
* Updates code line age statistics on the server.
246250
*/
247-
fun updateStats(api: Api) {
248-
// If no changes, then nothing to update, return early.
249-
val ages = scan() ?: return
250-
251+
private fun calculateAndSendFacts(ages: CodeLineAges, api: Api) {
251252
var repoTotal = 0
252253
var repoSum: Long = 0
253254
val aggrAges : HashMap<String, CodeLineAges.AggrAge> = hashMapOf()
@@ -277,9 +278,8 @@ class CodeLongevity(private val serverRepo: Repo,
277278
code = FactCodes.LINE_LONGEVITY_REPO,
278279
value = repoAvg.toString()))
279280
val repoAvgDays = repoAvg / secondsInDay
280-
Logger.info {
281-
"Repo average code line age is $repoAvgDays days, lines total: $repoTotal"
282-
}
281+
Logger.info { "Repo average code line age is $repoAvgDays days, " +
282+
"lines total: $repoTotal" }
283283

284284
for (email in emails) {
285285
val aggrAge = aggrAges[email] ?: CodeLineAges.AggrAge()
@@ -296,13 +296,17 @@ class CodeLongevity(private val serverRepo: Repo,
296296
Logger.info { "Sent ${stats.size} facts to server" }
297297
}
298298

299-
colleagues.updateStats();
299+
colleagues.updateStats()
300300
}
301301

302302
/**
303303
* Scans the repo to extract code line ages.
304304
*/
305-
fun scan() : CodeLineAges? {
305+
fun updateFromObservable(diffObservable: Observable<JgitPair> =
306+
CommitCrawler.getJGitObservable(git),
307+
onError: (Throwable) -> Unit = {},
308+
api: Api,
309+
onDataComplete: (CodeLineAges) -> Unit = {}) {
306310
var storedHead: RevCommit? = null
307311
var ageData = CodeLineAges()
308312

@@ -315,20 +319,18 @@ class CodeLongevity(private val serverRepo: Repo,
315319
Logger.debug { "Stored repo head: $storedHeadId" }
316320
storedHead = revWalk.parseCommit(repo.resolve(storedHeadId))
317321
if (storedHead == head) {
318-
return null
322+
return // TODO(anatoly): Send saved stats in such case.
319323
}
320324
ageData = (iStream.readObject() ?: CodeLineAges()) as CodeLineAges
321325
}
322326
catch(e: FileNotFoundException) { }
323-
catch(e: Exception) {
324-
Logger.error(
325-
e,
326-
"Failed to read longevity data. CAUTION: data will be recomputed."
327-
)
328-
}
327+
catch(e: Exception) { Logger.error(e, "Failed to read longevity " +
328+
"data. CAUTION: data will be recomputed.") }
329329

330330
// Update ages.
331-
getLinesObservable(storedHead).blockingSubscribe { line ->
331+
getLinesObservable(storedHead, diffObservable, onError).subscribe({
332+
line ->
333+
Logger.trace { "Scanning: ${line}" }
332334
if (line.isDeleted) {
333335
if (ageData.lastingLines.contains(line.oldId)) {
334336
line.age += ageData.lastingLines.remove(line.oldId)!!.age
@@ -338,29 +340,30 @@ class CodeLongevity(private val serverRepo: Repo,
338340
aggrAge.sum += line.age
339341
aggrAge.count += 1
340342

341-
colleagues.collect(line);
343+
colleagues.collect(line)
342344
} else {
343345
var age = line.age
344346
if (ageData.lastingLines.contains(line.oldId)) {
345347
age += ageData.lastingLines.remove(line.oldId)!!.age
346348
}
347-
ageData.lastingLines.put(line.newId,
348-
CodeLineAges.LineInfo(age, line.authorEmail))
349+
ageData.lastingLines.put(line.newId, CodeLineAges.LineInfo(age,
350+
line.authorEmail))
349351
}
350-
}
351-
352-
// Store ages for subsequent runs.
353-
try {
354-
val file = dataPath.toFile()
355-
val oStream = ObjectOutputStream(FileOutputStream(file))
356-
oStream.writeUTF(head.getName())
357-
oStream.writeObject(ageData)
358-
}
359-
catch(e: Exception) {
360-
Logger.error(e, "Failed to save longevity data. CAUTION: data " +
361-
"will be recomputed on a next run.")
362-
}
363-
return ageData
352+
}, onError, {
353+
// Store ages for subsequent runs.
354+
try {
355+
val file = dataPath.toFile()
356+
val oStream = ObjectOutputStream(FileOutputStream(file))
357+
oStream.writeUTF(head.getName())
358+
oStream.writeObject(ageData)
359+
}
360+
catch(e: Exception) {
361+
Logger.error(e, "Failed to save longevity data. " +
362+
"CAUTION: data will be recomputed on a next run.")
363+
}
364+
onDataComplete(ageData)
365+
calculateAndSendFacts(ageData, api)
366+
})
364367
}
365368

366369
/**
@@ -374,10 +377,13 @@ class CodeLongevity(private val serverRepo: Repo,
374377
* Returns a list of code lines, both alive and deleted, between
375378
* the revisions of the repo.
376379
*/
377-
fun getLinesList(tail : RevCommit? = null) : List<CodeLine> {
380+
fun getLinesList(tail : RevCommit? = null,
381+
diffObservable: Observable<JgitPair> =
382+
CommitCrawler.getJGitObservable(git),
383+
onError: (Throwable) -> Unit = {}) : List<CodeLine> {
378384
val codeLines: MutableList<CodeLine> = mutableListOf()
379-
getLinesObservable(tail).blockingSubscribe { line ->
380-
codeLines.add(line)
385+
getLinesObservable(tail, diffObservable, onError).blockingSubscribe {
386+
line -> codeLines.add(line)
381387
}
382388
return codeLines
383389
}
@@ -386,7 +392,10 @@ class CodeLongevity(private val serverRepo: Repo,
386392
* Returns an observable for for code lines, both alive and deleted, between
387393
* the revisions of the repo.
388394
*/
389-
fun getLinesObservable(tail : RevCommit? = null) : Observable<CodeLine> =
395+
fun getLinesObservable(tail : RevCommit? = null,
396+
diffObservable: Observable<JgitPair>,
397+
onError: (Throwable) -> Unit)
398+
: Observable<CodeLine> =
390399
Observable.create { subscriber ->
391400

392401
val headWalk = TreeWalk(repo)
@@ -414,12 +423,14 @@ class CodeLongevity(private val serverRepo: Repo,
414423
}
415424
}
416425

417-
getDiffsObservable(tail).blockingSubscribe( { (commit, diffs) ->
426+
diffObservable
427+
.takeWhile { (commit, _) -> commit != tail }
428+
.subscribe( { (commit, diffs) ->
418429
// A step back in commits history. Update the files map according
419430
// to the diff. Traverse the diffs backwards to handle double
420431
// renames properly.
421432
// TODO(alex): cover file renames by tests (see APP-132 issue).
422-
for (diff in diffs.asReversed()) {
433+
for ((diff, editList) in diffs.asReversed()) {
423434
val oldPath = diff.getOldPath()
424435
val oldId = diff.getOldId().toObjectId()
425436
val newPath = diff.getNewPath()
@@ -461,7 +472,6 @@ class CodeLongevity(private val serverRepo: Repo,
461472
// Update the lines array according to diff insertions.
462473
// Traverse the edit list backwards to keep indices of
463474
// the edit list and the lines array in sync.
464-
val editList = df.toFileHeader(diff).toEditList()
465475
for (edit in editList.asReversed()) {
466476
// Insertion case: track the lines.
467477
val insCount = edit.getLengthB()
@@ -480,8 +490,9 @@ class CodeLongevity(private val serverRepo: Repo,
480490
subscriber.onNext(cl)
481491
}
482492
catch(e: IndexOutOfBoundsException) {
483-
Logger.error(e,
484-
"No line at ${idx}; commit: ${commit.getName()}; '${commit.getShortMessage()}'")
493+
Logger.error(e, "No line at ${idx}; commit: " +
494+
"${commit.getName()}; " +
495+
"'${commit.getShortMessage()}'")
485496
throw e
486497
}
487498
}
@@ -513,71 +524,32 @@ class CodeLongevity(private val serverRepo: Repo,
513524
files.set(oldPath, files.remove(newPath)!!)
514525
}
515526
}
516-
}, onError)
517-
518-
// If a tail revision was given then the map has to contain unclaimed
519-
// code lines, i.e. the lines added before the tail revision. Push
520-
// them all into the result lines list, so the caller can update their
521-
// ages properly.
522-
if (tail != null) {
523-
val tailWalk = TreeWalk(repo)
524-
tailWalk.setRecursive(true)
525-
tailWalk.addTree(tail.getTree())
526-
527-
while (tailWalk.next()) {
528-
val filePath = tailWalk.getPathString()
529-
val lines = files.get(filePath)
530-
if (lines != null) {
531-
val fileId = tailWalk.getObjectId(0)
532-
for (idx in 0 .. lines.size - 1) {
533-
val from = RevCommitLine(tail, fileId,
534-
filePath, idx, false)
535-
val cl = CodeLine(repo, from, lines[idx])
536-
Logger.trace { "Collected (tail): $cl" }
537-
subscriber.onNext(cl)
538-
}
539-
}
540-
}
541-
}
542-
543-
subscriber.onComplete()
544-
}
545-
546-
/**
547-
* Iterates over the diffs between commits in the repo's history.
548-
*/
549-
private fun getDiffsObservable(tail : RevCommit?) :
550-
Observable<Pair<RevCommit, List<DiffEntry>>> =
551-
Observable.create { subscriber ->
552-
553-
revWalk.markStart(head)
554-
var commit: RevCommit? = revWalk.next() // Move the walker to the head.
555-
while (commit != null && commit != tail) {
556-
val parentCommit: RevCommit? = revWalk.next()
557-
558-
// Smart casts are not yet supported for a mutable variable captured
559-
// in an inline lambda, see
560-
// https://youtrack.jetbrains.com/issue/KT-7186.
561-
if (Logger.isDebug) {
562-
val commitName = commit.getName()
563-
val commitMsg = commit.getShortMessage()
564-
Logger.debug { "commit: $commitName; '$commitMsg'" }
565-
if (parentCommit != null) {
566-
val parentCommitName = parentCommit.getName()
567-
val parentCommitMsg = parentCommit.getShortMessage()
568-
Logger.debug {
569-
"parent commit: ${parentCommitName}; '${parentCommitMsg}'"
527+
}, onError, {
528+
// If a tail revision was given then the map has to contain
529+
// unclaimed code lines, i.e. the lines added before the tail
530+
// revision. Push them all into the result lines list, so the
531+
// caller can update their ages properly.
532+
if (tail != null) {
533+
val tailWalk = TreeWalk(repo)
534+
tailWalk.setRecursive(true)
535+
tailWalk.addTree(tail.getTree())
536+
537+
while (tailWalk.next()) {
538+
val filePath = tailWalk.getPathString()
539+
val lines = files.get(filePath)
540+
if (lines != null) {
541+
val fileId = tailWalk.getObjectId(0)
542+
for (idx in 0 .. lines.size - 1) {
543+
val from = RevCommitLine(tail, fileId,
544+
filePath, idx, false)
545+
val cl = CodeLine(repo, from, lines[idx])
546+
Logger.trace { "Collected (tail): $cl" }
547+
subscriber.onNext(cl)
548+
}
570549
}
571550
}
572-
else {
573-
Logger.debug { "parent commit: null" }
574-
}
575551
}
576-
577-
subscriber.onNext(Pair(commit, df.scan(parentCommit, commit)))
578-
commit = parentCommit
579-
}
580-
581-
subscriber.onComplete()
552+
subscriber.onComplete()
553+
})
582554
}
583555
}

0 commit comments

Comments
 (0)