Skip to content

Commit a2f98cc

Browse files
asurkovanatolystansler
authored andcommitted
feat: borrow heuristics from linguist (#274)
* feat: borrow heuristics from linguist * address comments * fix
1 parent f85a069 commit a2f98cc

16 files changed

+560
-133
lines changed

src/main/kotlin/app/extractors/CExtractor.kt

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,6 @@ import app.model.DiffFile
1010
class CExtractor : ExtractorInterface {
1111
companion object {
1212
const val LANGUAGE_NAME = "c"
13-
val FILE_EXTS = listOf("c")
1413
val evaluator by lazy {
1514
ExtractorInterface.getLibraryClassifier(LANGUAGE_NAME)
1615
}

src/main/kotlin/app/extractors/CSharpExtractor.kt

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,6 @@ import app.model.DiffFile
1010
class CSharpExtractor : ExtractorInterface {
1111
companion object {
1212
val LANGUAGE_NAME = "csharp"
13-
val FILE_EXTS = listOf("cs")
1413
val LIBRARIES = ExtractorInterface.getLibraries("cs")
1514
val evaluator by lazy {
1615
ExtractorInterface.getLibraryClassifier(LANGUAGE_NAME)

src/main/kotlin/app/extractors/CommonExtractor.kt

Lines changed: 2 additions & 79 deletions
Original file line numberDiff line numberDiff line change
@@ -6,86 +6,9 @@ package app.extractors
66
import app.model.CommitStats
77
import app.model.DiffFile
88

9-
class CommonExtractor : ExtractorInterface {
10-
companion object {
11-
val FILE_EXTS_MAP = lazy {
12-
val reversedMap = mutableMapOf<String, List<String>>()
13-
reversedMap["actionscript"] = listOf("as")
14-
reversedMap["arduino"] = listOf("ino")
15-
reversedMap["assembly"] = listOf("asm", "s", "S")
16-
reversedMap["clojure"] = listOf("clj", "cljs", "cljc", "edn")
17-
reversedMap["cobol"] = listOf("cbl", "cob", "cpy")
18-
reversedMap["coffeescript"] = listOf("coffee", "litcoffee")
19-
reversedMap["cuda"] = listOf("cu", "cuh")
20-
reversedMap["d"] = listOf("d")
21-
reversedMap["dosbatch"] = listOf("bat")
22-
reversedMap["emacslisp"] = listOf("el", "elc")
23-
reversedMap["erlang"] = listOf("erl", "hrl")
24-
reversedMap["elixir"] = listOf("ex", "exs")
25-
reversedMap["elm"] = listOf("elm")
26-
reversedMap["factor"] = listOf("factor")
27-
reversedMap["forth"] = listOf("forth", "4TH")
28-
reversedMap["fortran"] = listOf("f", "for", "f90", "f95", "f03",
29-
"f08", "f15")
30-
reversedMap["gradle"] = listOf("gradle")
31-
reversedMap["groovy"] = listOf("groovy")
32-
reversedMap["haskell"] = listOf("hs", "lhs")
33-
reversedMap["haxe"] = listOf("hx")
34-
reversedMap["html"] = listOf("html", "htm")
35-
reversedMap["hy"] = listOf("hy")
36-
reversedMap["j"] = listOf("ijs")
37-
reversedMap["julia"] = listOf("jl")
38-
reversedMap["lisp"] = listOf("lisp", "lsp", "l")
39-
reversedMap["lua"] = listOf("lua")
40-
reversedMap["makefile"] = listOf("makefile")
41-
reversedMap["matlab"] = listOf("m", "mlx")
42-
reversedMap["maven"] = listOf("pom")
43-
reversedMap["ocaml"] = listOf("ml", "mli")
44-
reversedMap["oxygene"] = listOf("oxygene")
45-
reversedMap["pascal"] = listOf("pas")
46-
reversedMap["perl"] = listOf("pl", "PL")
47-
reversedMap["powershell"] = listOf("ps1", "psm1", "psd1")
48-
reversedMap["processing"] = listOf("pde")
49-
reversedMap["prolog"] = listOf("P")
50-
reversedMap["puppet"] = listOf("pp")
51-
reversedMap["qml"] = listOf("qml")
52-
reversedMap["r"] = listOf("r", "R")
53-
reversedMap["rust"] = listOf("rs")
54-
reversedMap["sas"] = listOf("sas")
55-
reversedMap["scala"] = listOf("scala", "sc")
56-
reversedMap["scheme"] = listOf("scm", "ss")
57-
reversedMap["shell"] = listOf("sh")
58-
reversedMap["smalltalk"] = listOf("st")
59-
reversedMap["sql"] = listOf("sql")
60-
reversedMap["tcl"] = listOf("tcl")
61-
reversedMap["tex"] = listOf("tex")
62-
reversedMap["typescript"] = listOf("ts", "tsx")
63-
reversedMap["verilog"] = listOf("v")
64-
reversedMap["vhdl"] = listOf("vhdl")
65-
reversedMap["viml"] = listOf("vim")
66-
reversedMap["visualbasic"] = listOf("bas")
67-
reversedMap["visualbasicforapps"] = listOf("vba")
68-
reversedMap["vue"] = listOf("vue")
69-
reversedMap["wolframlanguage"] = listOf("nb","m")
70-
reversedMap["xtend"] = listOf("xtend")
71-
72-
val map = hashMapOf<String, String>()
73-
reversedMap.forEach({ lang, exts ->
74-
exts.forEach { ext -> map.put(ext, lang)}
75-
})
76-
map
77-
}
78-
}
79-
9+
class CommonExtractor(val languageName: String) : ExtractorInterface {
8010
override fun extract(files: List<DiffFile>): List<CommitStats> {
81-
files.mapNotNull { file ->
82-
val lang = FILE_EXTS_MAP.value[file.extension]
83-
if (lang != null) {
84-
file.language = lang
85-
file
86-
} else null
87-
}
88-
11+
files.map { file -> file.language = languageName }
8912
return super.extract(files)
9013
}
9114
}

src/main/kotlin/app/extractors/CppExtractor.kt

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,6 @@ import app.model.DiffFile
1010
class CppExtractor : ExtractorInterface {
1111
companion object {
1212
val LANGUAGE_NAME = "cpp"
13-
val FILE_EXTS = listOf("cc", "cpp", "cxx", "c++")
1413
val evaluator by lazy {
1514
ExtractorInterface.getLibraryClassifier(LANGUAGE_NAME)
1615
}

src/main/kotlin/app/extractors/Extractor.kt

Lines changed: 10 additions & 41 deletions
Original file line numberDiff line numberDiff line change
@@ -17,51 +17,20 @@ class Extractor : ExtractorInterface {
1717
val RESTRICTED_EXTS = listOf(".min.js")
1818

1919
fun getAllExtensions(): HashSet<String> {
20-
val set =
21-
CommonExtractor.FILE_EXTS_MAP.value.keys +
22-
CExtractor.FILE_EXTS +
23-
CppExtractor.FILE_EXTS +
24-
CSharpExtractor.FILE_EXTS +
25-
CssExtractor.FILE_EXTS +
26-
FSharpExtractor.FILE_EXTS +
27-
GoExtractor.FILE_EXTS +
28-
JavaExtractor.FILE_EXTS +
29-
JavascriptExtractor.FILE_EXTS +
30-
KotlinExtractor.FILE_EXTS +
31-
ObjectiveCExtractor.FILE_EXTS +
32-
PhpExtractor.FILE_EXTS +
33-
PythonExtractor.FILE_EXTS +
34-
RubyExtractor.FILE_EXTS +
35-
SwiftExtractor.FILE_EXTS
36-
37-
return set.toHashSet()
38-
}
39-
}
40-
41-
fun create(extension: String): ExtractorInterface {
42-
return when (extension) {
43-
in JavascriptExtractor.FILE_EXTS -> JavascriptExtractor()
44-
in JavaExtractor.FILE_EXTS -> JavaExtractor()
45-
in PythonExtractor.FILE_EXTS -> PythonExtractor()
46-
in RubyExtractor.FILE_EXTS -> RubyExtractor()
47-
in PhpExtractor.FILE_EXTS -> PhpExtractor()
48-
in CExtractor.FILE_EXTS -> CExtractor()
49-
in CppExtractor.FILE_EXTS -> CppExtractor()
50-
in CSharpExtractor.FILE_EXTS -> CSharpExtractor()
51-
in FSharpExtractor.FILE_EXTS -> FSharpExtractor()
52-
in GoExtractor.FILE_EXTS -> GoExtractor()
53-
in ObjectiveCExtractor.FILE_EXTS -> ObjectiveCExtractor()
54-
in SwiftExtractor.FILE_EXTS -> SwiftExtractor()
55-
in KotlinExtractor.FILE_EXTS -> KotlinExtractor()
56-
in CssExtractor.FILE_EXTS -> CssExtractor()
57-
else -> CommonExtractor()
20+
return Heuristics
21+
.map { (ext, _) -> ext }
22+
.toHashSet()
5823
}
5924
}
6025

6126
override fun extract(files: List<DiffFile>): List<CommitStats> {
62-
return files.groupBy { file -> file.extension }
63-
.filter { (extension, _) -> !RESTRICTED_EXTS.contains(extension) }
64-
.map { (extension, files) -> create(extension).extract(files) }
27+
return files
28+
.filter { file -> !RESTRICTED_EXTS.contains(file.extension) }
29+
.mapNotNull { file ->
30+
val extractor = Heuristics.get(file.extension)
31+
if (extractor != null) extractor(file.new.content)?.extract(listOf(file))
32+
else null
33+
}
6534
.fold(mutableListOf()) { accStats, stats ->
6635
accStats.addAll(stats)
6736
accStats

src/main/kotlin/app/extractors/FSharpExtractor.kt

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,6 @@ import app.model.DiffFile
1010
class FSharpExtractor : ExtractorInterface {
1111
companion object {
1212
val LANGUAGE_NAME = "fsharp"
13-
val FILE_EXTS = listOf("fs", "fsx")
1413
// The behaviour of csharp library classifier is the same as for csharp.
1514
val LIBRARIES = ExtractorInterface.getLibraries("cs")
1615
val evaluator by lazy {

src/main/kotlin/app/extractors/GoExtractor.kt

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,6 @@ import app.model.DiffFile
1010
class GoExtractor : ExtractorInterface {
1111
companion object {
1212
val LANGUAGE_NAME = "go"
13-
val FILE_EXTS = listOf("go")
1413
val evaluator by lazy {
1514
ExtractorInterface.getLibraryClassifier(LANGUAGE_NAME)
1615
}

0 commit comments

Comments
 (0)