Skip to content

Commit 198dc19

Browse files
committed
feat(analysis): adjust calculation of rloc for python #4092
1 parent a40d3db commit 198dc19

File tree

9 files changed

+146
-26
lines changed

9 files changed

+146
-26
lines changed

analysis/analysers/parsers/UnifiedParser/src/main/kotlin/de/maibornwolff/codecharta/analysers/parsers/unified/metriccollectors/MetricCollector.kt

Lines changed: 9 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -13,12 +13,13 @@ import org.treesitter.TSTreeCursor
1313
import java.io.File
1414

1515
abstract class MetricCollector(
16-
private val treeSitterLanguage: TSLanguage,
17-
private val queryProvider: MetricQueries
16+
protected val treeSitterLanguage: TSLanguage,
17+
protected val queryProvider: MetricQueries
1818
) {
1919
private val cursor = TSQueryCursor()
2020
private val parser = TSParser()
21-
private var lastCountedLine = -1
21+
protected var lastCountedLine = -1
22+
protected var rootNodeType: String = ""
2223

2324
private val metricToCalculation by lazy {
2425
mapOf(
@@ -86,21 +87,23 @@ abstract class MetricCollector(
8687
open fun getRealLinesOfCode(root: TSNode): Int {
8788
if (root.childCount == 0) return 0
8889

90+
rootNodeType = root.type
8991
val commentTypes = getTypesFromQuery(queryProvider.commentLinesQuery)
9092
return walkTree(TSTreeCursor(root), commentTypes)
9193
}
9294

9395
private fun getTypesFromQuery(query: String): List<String> {
94-
val matches = Regex("\\((\\w+)\\)").findAll(query)
95-
return matches.map { it.groupValues[1] }.toList()
96+
val regex = Regex("""\((.*?)\)\s*@""", RegexOption.MULTILINE)
97+
val result = regex.findAll(query).map { it.groupValues[1] }.toList()
98+
return result
9699
}
97100

98101
private fun walkTree(cursor: TSTreeCursor, commentTypes: List<String>): Int {
99102
var realLinesOfCode = 0
100103
val currentNode = cursor.currentNode()
101104

102105
if (!commentTypes.contains(currentNode.type)) {
103-
if (currentNode.startPoint.row > lastCountedLine) {
106+
if (currentNode.startPoint.row > lastCountedLine && currentNode.type != rootNodeType) {
104107
lastCountedLine = currentNode.startPoint.row
105108
realLinesOfCode++
106109
}
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,88 @@
11
package de.maibornwolff.codecharta.analysers.parsers.unified.metriccollectors
22

33
import de.maibornwolff.codecharta.analysers.parsers.unified.metricqueries.PythonQueries
4+
import org.treesitter.TSNode
5+
import org.treesitter.TSTreeCursor
46
import org.treesitter.TreeSitterPython
57

68
class PythonCollector : MetricCollector(
79
treeSitterLanguage = TreeSitterPython(),
810
queryProvider = PythonQueries()
9-
)
11+
) {
12+
override fun getRealLinesOfCode(root: TSNode): Int {
13+
if (root.childCount == 0) return 0
14+
15+
rootNodeType = root.type
16+
val commentTypes = getParentAndChildNodeTypesFromQuery(queryProvider.commentLinesQuery)
17+
return walkTree(TSTreeCursor(root), commentTypes)
18+
}
19+
20+
private fun walkTree(cursor: TSTreeCursor, commentTypes: List<Pair<String, String?>>): Int {
21+
var realLinesOfCode = 0
22+
val currentNode = cursor.currentNode()
23+
24+
if (!isCommentNode(currentNode, commentTypes)) {
25+
if (currentNode.startPoint.row > lastCountedLine &&
26+
currentNode.type != rootNodeType &&
27+
!areAllChildrenInLineCommentNodes(currentNode, currentNode.startPoint.row, commentTypes)
28+
) {
29+
lastCountedLine = currentNode.startPoint.row
30+
realLinesOfCode++
31+
}
32+
33+
if (currentNode.childCount == 0) {
34+
if (currentNode.endPoint.row > lastCountedLine) {
35+
realLinesOfCode += currentNode.endPoint.row - currentNode.startPoint.row
36+
lastCountedLine = currentNode.endPoint.row
37+
}
38+
} else if (currentNode.endPoint.row > currentNode.startPoint.row && cursor.gotoFirstChild()) {
39+
realLinesOfCode += walkTree(cursor, commentTypes)
40+
}
41+
}
42+
43+
if (cursor.gotoNextSibling()) {
44+
realLinesOfCode += walkTree(cursor, commentTypes)
45+
} else {
46+
cursor.gotoParent()
47+
}
48+
49+
return realLinesOfCode
50+
}
51+
52+
private fun getParentAndChildNodeTypesFromQuery(query: String): List<Pair<String, String?>> {
53+
val regex = Regex("""\((.*?)\)\s*@""", RegexOption.MULTILINE)
54+
val commentNodeTypes = regex.findAll(query).map { it.groupValues[1] }.toList()
55+
56+
val parentToChildTypes = commentNodeTypes.mapNotNull {
57+
val match = Regex("""(\w+)(?:\s*\((\w+)\))?""").find(it)
58+
match?.let { m -> m.groupValues[1] to m.groupValues.getOrNull(2) }
59+
}
60+
return parentToChildTypes
61+
}
62+
63+
private fun isCommentNode(node: TSNode, commentTypes: List<Pair<String, String?>>): Boolean {
64+
for ((parentType, childType) in commentTypes) {
65+
if (childType.isNullOrBlank() && node.type == parentType) {
66+
return true
67+
} else if (node.type == parentType && node.childCount == 1 && node.getChild(0).type == childType) {
68+
return true
69+
}
70+
}
71+
return false
72+
}
73+
74+
private fun areAllChildrenInLineCommentNodes(node: TSNode, line: Int, commentTypes: List<Pair<String, String?>>): Boolean {
75+
val lookAheadCursor = TSTreeCursor(node)
76+
if (lookAheadCursor.gotoFirstChild()) {
77+
do {
78+
val currentNode = lookAheadCursor.currentNode()
79+
require(
80+
currentNode.startPoint.row >= line
81+
) { "Malformed tree detected, child node start line comes before parent node start like!" }
82+
if (currentNode.startPoint.row > line) return true
83+
if (!isCommentNode(currentNode, commentTypes)) return false
84+
} while (lookAheadCursor.gotoNextSibling())
85+
}
86+
return true
87+
}
88+
}

analysis/analysers/parsers/UnifiedParser/src/main/kotlin/de/maibornwolff/codecharta/analysers/parsers/unified/metricqueries/PythonQueries.kt

Lines changed: 10 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,7 @@ package de.maibornwolff.codecharta.analysers.parsers.unified.metricqueries
33
class PythonQueries : MetricQueries {
44
companion object {
55
private val complexityNodes = listOf(
6-
//if
6+
// if
77
"if_statement",
88
"elif_clause",
99
"if_clause",
@@ -13,20 +13,22 @@ class PythonQueries : MetricQueries {
1313
"for_in_clause",
1414
// conditional
1515
"conditional_expression",
16-
"list", //in MG deactivated TODO: warum?
16+
"list",
1717
"boolean_operator",
18-
//logical binary
19-
//case label
18+
// logical binary
19+
// case label
2020
"case_pattern",
21-
//catch block
21+
// catch block
2222
"except_clause",
23-
//function
23+
// function
2424
"function_definition",
25-
"lambda",
25+
"lambda"
2626
)
2727

28+
// in python unassigned strings are used as block comments, meaning an expression that only has string as a child
2829
private val commentNodes = listOf(
29-
"comment"
30+
"comment",
31+
"expression_statement (string)"
3032
)
3133
}
3234

analysis/analysers/parsers/UnifiedParser/src/test/kotlin/de/maibornwolff/codecharta/analysers/parsers/unified/UnifiedParserTest.kt

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -47,7 +47,7 @@ class UnifiedParserTest {
4747
fun `Should produce correct output for a single source file of each supported language`(language: String, fileExtension: String) {
4848
// given
4949
val pipedProject = ""
50-
val inputFilePath = "${testResourceBaseFolder}${language}Sample${fileExtension}"
50+
val inputFilePath = "${testResourceBaseFolder}${language}Sample$fileExtension"
5151
val expectedResultFile = File("${testResourceBaseFolder}${language}Sample.cc.json")
5252

5353
// when
@@ -132,13 +132,13 @@ class UnifiedParserTest {
132132
val inputFilePath = "${testResourceBaseFolder}sampleproject"
133133
val ignoredFiles = listOf(
134134
".whatever/something.kt",
135-
"bar/something.strange",
136-
"foo.py"
135+
"bar/something.strange"
137136
)
138137
val parsedFiles = listOf(
139138
"bar/hello.kt",
140139
"bar/foo.kt",
141140
"foo.kt",
141+
"foo.py",
142142
"whenCase.kt",
143143
"helloWorld.ts"
144144
)

analysis/analysers/parsers/UnifiedParser/src/test/resources/includeAll.cc.json

Lines changed: 13 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
{
2-
"checksum": "35d382e2141d3ad820923f92d3964ffb",
2+
"checksum": "2e59a5a836cc7dc4d23481bb8d3213ec",
33
"data": {
44
"projectName": "",
55
"nodes": [
@@ -73,6 +73,18 @@
7373
"link": "",
7474
"children": []
7575
},
76+
{
77+
"name": "foo.py",
78+
"type": "File",
79+
"attributes": {
80+
"complexity": 2,
81+
"comment_lines": 4,
82+
"loc": 14,
83+
"rloc": 9
84+
},
85+
"link": "",
86+
"children": []
87+
},
7688
{
7789
"name": "helloWorld.ts",
7890
"type": "File",

analysis/analysers/parsers/UnifiedParser/src/test/resources/mergeResult.cc.json

Lines changed: 13 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
{
2-
"checksum": "6e89384da8cd75847e14644bebfa2e60",
2+
"checksum": "4c2c1b4d1165c6e1010c827cefa9b3d7",
33
"data": {
44
"projectName": "",
55
"nodes": [
@@ -71,6 +71,18 @@
7171
"link": "",
7272
"children": []
7373
},
74+
{
75+
"name": "foo.py",
76+
"type": "File",
77+
"attributes": {
78+
"complexity": 2,
79+
"comment_lines": 4,
80+
"loc": 14,
81+
"rloc": 9
82+
},
83+
"link": "",
84+
"children": []
85+
},
7486
{
7587
"name": "helloWorld.ts",
7688
"type": "File",

analysis/analysers/parsers/UnifiedParser/src/test/resources/pythonSample.cc.json

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
{
2-
"checksum": "89dc48a5569a49c48d520b6e88b13f09",
2+
"checksum": "d39de639c38fb69dc5e7e5220d3b54a9",
33
"data": {
44
"projectName": "",
55
"nodes": [
@@ -14,9 +14,9 @@
1414
"type": "File",
1515
"attributes": {
1616
"complexity": 12,
17-
"comment_lines": 1,
17+
"comment_lines": 61,
1818
"loc": 104,
19-
"rloc": 92
19+
"rloc": 32
2020
},
2121
"link": "",
2222
"children": []

analysis/analysers/parsers/UnifiedParser/src/test/resources/sampleProject.cc.json

Lines changed: 13 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
{
2-
"checksum": "f5e93df9e55abfe832fa428121bd1d6a",
2+
"checksum": "320b420c79ad382ecc31f23e8118fcec",
33
"data": {
44
"projectName": "",
55
"nodes": [
@@ -53,6 +53,18 @@
5353
"link": "",
5454
"children": []
5555
},
56+
{
57+
"name": "foo.py",
58+
"type": "File",
59+
"attributes": {
60+
"complexity": 2,
61+
"comment_lines": 4,
62+
"loc": 14,
63+
"rloc": 9
64+
},
65+
"link": "",
66+
"children": []
67+
},
5668
{
5769
"name": "helloWorld.ts",
5870
"type": "File",

analysis/analysers/parsers/UnifiedParser/src/test/resources/typescriptSample.cc.json

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
{
2-
"checksum": "b693a0f06900613229ce4db5db9f29f1",
2+
"checksum": "bd1dc239a00b5fc494324112744118d4",
33
"data": {
44
"projectName": "",
55
"nodes": [
@@ -16,7 +16,7 @@
1616
"complexity": 32,
1717
"comment_lines": 60,
1818
"loc": 200,
19-
"rloc": 115
19+
"rloc": 114
2020
},
2121
"link": "",
2222
"children": []

0 commit comments

Comments
 (0)