Skip to content

Commit 88d78f1

Browse files
authored
Merge pull request #30 from ARTEMKOPIK/bolt-optimize-template-matcher-5482039880792567779
⚡ Bolt: Optimize TemplateMatcher performance and fix width bug
2 parents 2eaf150 + 4bfc7f2 commit 88d78f1

File tree

2 files changed

+62
-53
lines changed

2 files changed

+62
-53
lines changed

.Jules/bolt.md

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
# Bolt's Journal - Critical Learnings
2+
3+
## 2026-01-25 - Template Matching Performance
4+
**Learning:** Sliding window template matching with Normalized Cross-Correlation (NCC) is extremely expensive (O(N*M*n*m)). Standard implementations often redundantly calculate mean and variance for every window position, and sometimes contain inefficient coordinate mapping (e.g., using `sqrt` on array size to find width).
5+
**Action:** Always pre-calculate template statistics, use actual dimensions instead of heuristics for coordinate mapping, and use the single-pass algebraic formula for NCC to halve the number of operations in the inner loop.

app/src/main/java/com/autoclicker/app/util/TemplateMatcher.kt

Lines changed: 57 additions & 53 deletions
Original file line numberDiff line numberDiff line change
@@ -44,22 +44,40 @@ class TemplateMatcher {
4444
}
4545

4646
try {
47+
val sourceWidth = source.width
48+
val sourceHeight = source.height
49+
val templateWidth = template.width
50+
val templateHeight = template.height
51+
4752
// Convert to grayscale for faster matching
4853
val sourceGray = toGrayscale(source)
4954
val templateGray = toGrayscale(template)
5055

56+
// Pre-calculate template statistics to avoid redundant calculations in the sliding window
57+
val n = templateWidth * templateHeight
58+
var templateSum = 0.0
59+
var templateSumSq = 0.0
60+
for (gray in templateGray) {
61+
val g = gray.toDouble()
62+
templateSum += g
63+
templateSumSq += g * g
64+
}
65+
val templateMean = templateSum / n
66+
val templateSumSqDiff = templateSumSq - n * templateMean * templateMean
67+
5168
// Perform normalized cross-correlation
5269
val matches = mutableListOf<MatchResult>()
53-
val searchWidth = source.width - template.width + 1
54-
val searchHeight = source.height - template.height + 1
70+
val searchWidth = sourceWidth - templateWidth + 1
71+
val searchHeight = sourceHeight - templateHeight + 1
5572

5673
// Sliding window search
5774
for (y in 0 until searchHeight) {
5875
for (x in 0 until searchWidth) {
5976
val confidence = computeNCC(
60-
sourceGray, templateGray,
77+
sourceGray, sourceWidth,
78+
templateGray, templateWidth, templateHeight,
6179
x, y,
62-
template.width, template.height
80+
templateMean, templateSumSqDiff
6381
)
6482

6583
if (confidence >= threshold) {
@@ -88,92 +106,78 @@ class TemplateMatcher {
88106

89107
/**
90108
* Convert bitmap to grayscale array for faster processing.
109+
* Optimized using bitwise operations and fixed-point arithmetic.
91110
*/
92111
private fun toGrayscale(bitmap: Bitmap): IntArray {
93112
val width = bitmap.width
94113
val height = bitmap.height
95-
val pixels = IntArray(width * height)
114+
val size = width * height
115+
val pixels = IntArray(size)
96116
bitmap.getPixels(pixels, 0, width, 0, 0, width, height)
97117

98-
val gray = IntArray(width * height)
99-
for (i in pixels.indices) {
118+
val gray = IntArray(size)
119+
for (i in 0 until size) {
100120
val pixel = pixels[i]
101-
val r = Color.red(pixel)
102-
val g = Color.green(pixel)
103-
val b = Color.blue(pixel)
104-
// Standard grayscale conversion formula
105-
gray[i] = (0.299 * r + 0.587 * g + 0.114 * b).toInt()
121+
val r = (pixel shr 16) and 0xFF
122+
val g = (pixel shr 8) and 0xFF
123+
val b = pixel and 0xFF
124+
// Optimized formula: (77*R + 151*G + 28*B) / 256
125+
// This is faster than floating point math and avoids function calls
126+
gray[i] = (r * 77 + g * 151 + b * 28) shr 8
106127
}
107128

108129
return gray
109130
}
110131

111132
/**
112133
* Compute Normalized Cross-Correlation between template and source region.
113-
*
114-
* NCC formula: sum((source[i] - mean_source) * (template[i] - mean_template)) /
115-
* (std_source * std_template * N)
134+
* Optimized to perform all calculations in a single pass using the algebraic formula:
135+
* sum((s - s_mean)(t - t_mean)) = sum(s*t) - n * s_mean * t_mean
116136
*
117137
* @return Confidence value between -1.0 and 1.0 (normalized to 0.0-1.0)
118138
*/
119139
private fun computeNCC(
120140
source: IntArray,
141+
sourceWidth: Int,
121142
template: IntArray,
143+
templateWidth: Int,
144+
templateHeight: Int,
122145
startX: Int,
123146
startY: Int,
124-
templateWidth: Int,
125-
templateHeight: Int
147+
templateMean: Double,
148+
templateSumSqDiff: Double
126149
): Float {
127-
val sourceWidth = kotlin.math.sqrt(source.size.toDouble()).toInt()
128150
val n = templateWidth * templateHeight
129151

130-
// Compute means
131-
var sourceMean = 0.0
132-
var templateMean = 0.0
152+
var dotProduct = 0.0
153+
var sourceSum = 0.0
154+
var sourceSumSq = 0.0
133155

134156
for (ty in 0 until templateHeight) {
157+
val sOffset = (startY + ty) * sourceWidth + startX
158+
val tOffset = ty * templateWidth
135159
for (tx in 0 until templateWidth) {
136-
val sx = startX + tx
137-
val sy = startY + ty
138-
val sourceIdx = sy * sourceWidth + sx
139-
val templateIdx = ty * templateWidth + tx
160+
val s = source[sOffset + tx].toDouble()
161+
val t = template[tOffset + tx].toDouble()
140162

141-
sourceMean += source[sourceIdx]
142-
templateMean += template[templateIdx]
163+
dotProduct += s * t
164+
sourceSum += s
165+
sourceSumSq += s * s
143166
}
144167
}
145168

146-
sourceMean /= n
147-
templateMean /= n
148-
149-
// Compute standard deviations and cross-correlation
150-
var numerator = 0.0
151-
var sourceVar = 0.0
152-
var templateVar = 0.0
153-
154-
for (ty in 0 until templateHeight) {
155-
for (tx in 0 until templateWidth) {
156-
val sx = startX + tx
157-
val sy = startY + ty
158-
val sourceIdx = sy * sourceWidth + sx
159-
val templateIdx = ty * templateWidth + tx
160-
161-
val sourceDiff = source[sourceIdx] - sourceMean
162-
val templateDiff = template[templateIdx] - templateMean
163-
164-
numerator += sourceDiff * templateDiff
165-
sourceVar += sourceDiff * sourceDiff
166-
templateVar += templateDiff * templateDiff
167-
}
168-
}
169+
val sourceMean = sourceSum / n
170+
val sourceSumSqDiff = sourceSumSq - n * sourceMean * sourceMean
169171

170172
// Avoid division by zero
171-
if (sourceVar == 0.0 || templateVar == 0.0) {
173+
if (sourceSumSqDiff <= 0.0 || templateSumSqDiff <= 0.0) {
172174
return 0f
173175
}
174176

175-
val denominator = sqrt(sourceVar * templateVar)
176-
val ncc = numerator / denominator
177+
val numerator = dotProduct - n * sourceMean * templateMean
178+
val denominator = sqrt(sourceSumSqDiff * templateSumSqDiff)
179+
180+
val ncc = (numerator / denominator).coerceIn(-1.0, 1.0)
177181

178182
// Normalize to 0.0-1.0 range (NCC is in -1.0 to 1.0)
179183
return ((ncc + 1.0) / 2.0).toFloat()

0 commit comments

Comments
 (0)