Skip to content

Commit b19f6cf

Browse files
feat: add filter of file by extensions (#221)
* feat: add filter of file by extensions * refactor: move code * chore: fix indent
1 parent d41e664 commit b19f6cf

File tree

4 files changed

+41
-7
lines changed

4 files changed

+41
-7
lines changed

src/main/kotlin/app/Main.kt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@ package app
55

66
import app.api.ServerApi
77
import app.config.FileConfigurator
8+
import app.extractors.Extractor
89
import app.model.LocalRepo
910
import app.ui.ConsoleUi
1011
import app.utils.CommandConfig

src/main/kotlin/app/extractors/Extractor.kt

Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,26 @@ class Extractor : ExtractorInterface {
1515
val TYPE_SYNTAX = 4
1616
val SEPARATOR = ">"
1717
val RESTRICTED_EXTS = listOf(".min.js")
18+
19+
fun getAllExtensions(): HashSet<String> {
20+
val set =
21+
CommonExtractor.FILE_EXTS_MAP.value.keys +
22+
CExtractor.FILE_EXTS +
23+
CppExtractor.FILE_EXTS +
24+
CSharpExtractor.FILE_EXTS +
25+
CssExtractor.FILE_EXTS +
26+
GoExtractor.FILE_EXTS +
27+
JavaExtractor.FILE_EXTS +
28+
JavascriptExtractor.FILE_EXTS +
29+
KotlinExtractor.FILE_EXTS +
30+
ObjectiveCExtractor.FILE_EXTS +
31+
PhpExtractor.FILE_EXTS +
32+
PythonExtractor.FILE_EXTS +
33+
RubyExtractor.FILE_EXTS +
34+
SwiftExtractor.FILE_EXTS
35+
36+
return set.toHashSet()
37+
}
1838
}
1939

2040
fun create(extension: String): ExtractorInterface {

src/main/kotlin/app/hashers/CommitCrawler.kt

Lines changed: 16 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,7 @@ import app.model.DiffFile
1111
import app.model.DiffRange
1212
import app.model.Repo
1313
import app.utils.EmptyRepoException
14+
import app.utils.FileHelper
1415
import io.reactivex.Observable
1516
import org.apache.commons.codec.digest.DigestUtils
1617
import org.eclipse.jgit.api.Git
@@ -91,9 +92,9 @@ object CommitCrawler {
9192
fun getJGitObservable(git: Git,
9293
totalCommitCount: Int = 0,
9394
filteredEmails: HashSet<String>? = null,
94-
tail : RevCommit? = null) : Observable<JgitPair> =
95-
Observable.create { subscriber ->
96-
95+
tail : RevCommit? = null,
96+
allowedExts: HashSet<String>? = null) :
97+
Observable<JgitPair> = Observable.create { subscriber ->
9798
val repo: Repository = git.repository
9899
val revWalk = RevWalk(repo)
99100
val head: RevCommit =
@@ -146,14 +147,23 @@ object CommitCrawler {
146147
diff.changeType != DiffEntry.ChangeType.COPY
147148
}
148149
.filter { diff ->
150+
val path = diff.newPath
151+
val ext = FileHelper.getFileExtension(path)
152+
if (allowedExts != null && !allowedExts.contains(ext)) {
153+
return@filter false
154+
}
155+
149156
val fileId =
150-
if (diff.getNewPath() != DiffEntry.DEV_NULL) {
157+
if (path != DiffEntry.DEV_NULL) {
151158
diff.getNewId().toObjectId()
152159
} else {
153160
diff.getOldId().toObjectId()
154161
}
155-
val stream = try { repo.open(fileId).openStream() }
156-
catch (e: Exception) { null }
162+
val stream = try {
163+
repo.open(fileId).openStream()
164+
} catch (e: Exception) {
165+
null
166+
}
157167
stream != null && !RawText.isBinary(stream)
158168
}
159169
.map { diff ->

src/main/kotlin/app/hashers/RepoHasher.kt

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,7 @@ import app.BuildConfig
77
import app.Logger
88
import app.api.Api
99
import app.config.Configurator
10+
import app.extractors.Extractor
1011
import app.model.Author
1112
import app.model.LocalRepo
1213
import app.model.ProcessEntry
@@ -69,7 +70,9 @@ class RepoHasher(private val api: Api,
6970
filteredEmails
7071
} else null
7172
val jgitObservable = CommitCrawler.getJGitObservable(git,
72-
rehashes.size, crawlerEmails).publish()
73+
rehashes.size, crawlerEmails,
74+
allowedExts = Extractor.getAllExtensions()
75+
).publish()
7376
val observable = CommitCrawler.getObservable(git,
7477
jgitObservable, serverRepo)
7578

0 commit comments

Comments
 (0)