Skip to content

Commit 7c842af

Browse files
committed
feat(web): 新增截图识别定位文字功能
- 添加 ML Kit 中文文字识别依赖 - 实现 TextRecognitionChineseLocator 工具类 - 支持全图或指定区域文字识别 - 提供识别结果位置信息及耗时统计 - 在 ASJavascriptInterfaceAsync 中集成识别接口 - 新增 recognizeTextInScreenshot 调用方法 - 支持目标文字在截图中的坐标定位 - 自动处理截图区域裁剪与坐标转换 - 兼容 Android R 及以上版本的截图权限- 添加识别失败时的错误处理与提示信息
1 parent e523a09 commit 7c842af

File tree

4 files changed

+346
-0
lines changed

4 files changed

+346
-0
lines changed

assists-web/build.gradle

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -42,6 +42,7 @@ dependencies {
4242
implementation project(':assists-mp')
4343
implementation 'com.journeyapps:zxing-android-embedded:4.3.0'
4444
implementation 'com.squareup.okhttp3:okhttp:4.10.0'
45+
api 'com.google.mlkit:text-recognition-chinese:16.0.1'
4546
}
4647
task sourcesJar(type: Jar) {
4748
archiveClassifier.set('sources')

assists-web/src/main/java/com/ven/assists/web/ASJavascriptInterfaceAsync.kt

Lines changed: 96 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -53,6 +53,7 @@ import com.ven.assists.web.databinding.WebFloatingWindowBinding
5353
import com.ven.assists.window.AssistsWindowManager
5454
import com.ven.assists.window.AssistsWindowManager.overlayToast
5555
import com.ven.assists.window.AssistsWindowWrapper
56+
import com.ven.assists.web.utils.TextRecognitionChineseLocator
5657
import kotlinx.coroutines.CoroutineScope
5758
import kotlinx.coroutines.Dispatchers
5859
import kotlinx.coroutines.delay
@@ -202,6 +203,101 @@ class ASJavascriptInterfaceAsync(val webView: WebView) {
202203
response
203204
}
204205

206+
CallMethod.recognizeTextInScreenshot -> {
207+
if (Build.VERSION.SDK_INT < Build.VERSION_CODES.R) {
208+
val response = request.createResponse(-1, message = "Screenshot recognition requires Android R or above", data = false)
209+
response
210+
} else {
211+
val targetText = request.arguments?.get("targetText")?.asString ?: ""
212+
val rotationDegrees = request.arguments?.get("rotationDegrees")?.asInt ?: 0
213+
val overlayHiddenDelay = request.arguments?.get("overlayHiddenScreenshotDelayMillis")?.asLong ?: 250L
214+
val restoreOverlay = request.arguments?.get("restoreOverlay")?.asBoolean ?: true
215+
216+
val regionJson = request.arguments?.get("region")?.asJsonObject
217+
val regionRect = regionJson?.let {
218+
val left = it.get("left")?.asInt
219+
val top = it.get("top")?.asInt
220+
val right = it.get("right")?.asInt
221+
val bottom = it.get("bottom")?.asInt
222+
val width = it.get("width")?.asInt
223+
val height = it.get("height")?.asInt
224+
225+
val resolvedLeft = left ?: 0
226+
val resolvedTop = top ?: 0
227+
val resolvedRight = when {
228+
right != null -> right
229+
width != null -> resolvedLeft + width
230+
else -> null
231+
}
232+
val resolvedBottom = when {
233+
bottom != null -> bottom
234+
height != null -> resolvedTop + height
235+
else -> null
236+
}
237+
238+
if (resolvedRight == null || resolvedBottom == null) {
239+
null
240+
} else if (resolvedRight <= resolvedLeft || resolvedBottom <= resolvedTop) {
241+
null
242+
} else {
243+
Rect(resolvedLeft, resolvedTop, resolvedRight, resolvedBottom)
244+
}
245+
}
246+
247+
if (restoreOverlay) {
248+
AssistsWindowManager.hideAll()
249+
}
250+
delay(overlayHiddenDelay)
251+
val recognitionResult = runCatching {
252+
TextRecognitionChineseLocator.findWordPositionsInScreenshotRegion(
253+
region = regionRect,
254+
targetText = targetText,
255+
rotationDegrees = rotationDegrees
256+
)
257+
}.onFailure {
258+
LogUtils.e(it)
259+
}
260+
if (restoreOverlay) {
261+
AssistsWindowManager.showTop()
262+
}
263+
264+
recognitionResult.fold(
265+
onSuccess = { result ->
266+
val positionsArray = JsonArray().apply {
267+
result.targetPositions.forEach { position ->
268+
add(JsonObject().apply {
269+
addProperty("text", position.text)
270+
addProperty("left", position.left)
271+
addProperty("top", position.top)
272+
addProperty("right", position.right)
273+
addProperty("bottom", position.bottom)
274+
addProperty("width", position.width)
275+
addProperty("height", position.height)
276+
})
277+
}
278+
}
279+
280+
val data = JsonObject().apply {
281+
addProperty("fullText", result.fullText)
282+
addProperty("processingTimeMillis", result.processingTimeMillis)
283+
add("positions", positionsArray)
284+
}
285+
286+
val response = request.createResponse(0, data = data)
287+
response
288+
},
289+
onFailure = {
290+
val response = request.createResponse(
291+
-1,
292+
message = it.message ?: "Recognition failed",
293+
data = ""
294+
)
295+
response
296+
}
297+
)
298+
}
299+
}
300+
205301
CallMethod.getDeviceInfo -> {
206302
val uniqueDeviceId = DeviceUtils.getUniqueDeviceId()
207303
val androidID = DeviceUtils.getAndroidID()

assists-web/src/main/java/com/ven/assists/web/CallMethod.kt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -67,5 +67,6 @@ object CallMethod {
6767
const val download = "download"
6868
const val audioPlayFromFile = "audioPlayFromFile"
6969
const val audioStop = "audioStop"
70+
const val recognizeTextInScreenshot = "recognizeTextInScreenshot"
7071

7172
}
Lines changed: 248 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,248 @@
1+
package com.ven.assists.web.utils
2+
3+
import android.content.Context
4+
import android.graphics.Bitmap
5+
import android.graphics.Rect
6+
import android.os.Build
7+
import android.net.Uri
8+
import androidx.annotation.RequiresApi
9+
import com.google.android.gms.tasks.Task
10+
import com.google.mlkit.vision.common.InputImage
11+
import com.google.mlkit.vision.text.Text
12+
import com.google.mlkit.vision.text.TextRecognition
13+
import com.google.mlkit.vision.text.chinese.ChineseTextRecognizerOptions
14+
import com.ven.assists.AssistsCore
15+
import kotlinx.coroutines.Dispatchers
16+
import kotlinx.coroutines.suspendCancellableCoroutine
17+
import kotlinx.coroutines.withContext
18+
import kotlin.coroutines.resume
19+
import kotlin.coroutines.resumeWithException
20+
21+
/**
22+
* 使用 ML Kit 中文文字识别查找词组位置的工具类
23+
*/
24+
object TextRecognitionChineseLocator {
25+
26+
/**
27+
* 词组位置结果数据
28+
*/
29+
data class WordPosition(
30+
val text: String,
31+
val left: Int,
32+
val top: Int,
33+
val right: Int,
34+
val bottom: Int
35+
) {
36+
val width: Int get() = right - left
37+
val height: Int get() = bottom - top
38+
}
39+
40+
/**
41+
* 识别结果数据
42+
*/
43+
data class RecognitionResult(
44+
val fullText: String,
45+
val targetPositions: List<WordPosition>,
46+
val processingTimeMillis: Long
47+
)
48+
49+
private val recognizer by lazy(LazyThreadSafetyMode.SYNCHRONIZED) {
50+
TextRecognition.getClient(ChineseTextRecognizerOptions.Builder().build())
51+
}
52+
53+
/**
54+
* 通过 Bitmap 查找指定词组位置
55+
*/
56+
suspend fun findWordPositions(
57+
bitmap: Bitmap,
58+
targetText: String,
59+
rotationDegrees: Int = 0
60+
): RecognitionResult {
61+
val image = InputImage.fromBitmap(bitmap, rotationDegrees)
62+
return processImage(image, targetText)
63+
}
64+
65+
/**
66+
* 在指定截图区域内查找词组并返回原截图中的坐标
67+
*/
68+
suspend fun findWordPositionsInRegion(
69+
bitmap: Bitmap,
70+
region: Rect,
71+
targetText: String,
72+
rotationDegrees: Int = 0
73+
): RecognitionResult {
74+
require(!region.isEmpty) { "Region must not be empty" }
75+
val bounds = Rect(0, 0, bitmap.width, bitmap.height)
76+
require(bounds.contains(region)) { "Region must be inside bitmap bounds" }
77+
78+
val croppedBitmap = Bitmap.createBitmap(
79+
bitmap,
80+
region.left,
81+
region.top,
82+
region.width(),
83+
region.height()
84+
)
85+
86+
return try {
87+
val recognition = findWordPositions(croppedBitmap, targetText, rotationDegrees)
88+
if (recognition.targetPositions.isEmpty()) {
89+
recognition
90+
} else {
91+
val adjusted = recognition.targetPositions.map { position ->
92+
position.copy(
93+
left = position.left + region.left,
94+
top = position.top + region.top,
95+
right = position.right + region.left,
96+
bottom = position.bottom + region.top
97+
)
98+
}
99+
recognition.copy(targetPositions = adjusted)
100+
}
101+
} finally {
102+
if (!croppedBitmap.isRecycled) {
103+
croppedBitmap.recycle()
104+
}
105+
}
106+
}
107+
108+
/**
109+
* 通过 Uri 查找指定词组位置
110+
*/
111+
suspend fun findWordPositions(
112+
context: Context,
113+
imageUri: Uri,
114+
targetText: String
115+
): RecognitionResult {
116+
val image = InputImage.fromFilePath(context, imageUri)
117+
return processImage(image, targetText)
118+
}
119+
120+
/**
121+
* 直接通过当前截图的指定区域查找词组位置
122+
*/
123+
@RequiresApi(Build.VERSION_CODES.R)
124+
suspend fun findWordPositionsInScreenshotRegion(
125+
region: Rect? = null,
126+
targetText: String,
127+
rotationDegrees: Int = 0
128+
): RecognitionResult {
129+
val screenshot = AssistsCore.takeScreenshot()
130+
?: throw IllegalStateException("Screenshot capture failed")
131+
return try {
132+
if (region == null || region.isEmpty) {
133+
findWordPositions(screenshot, targetText, rotationDegrees)
134+
} else {
135+
findWordPositionsInRegion(screenshot, region, targetText, rotationDegrees)
136+
}
137+
} finally {
138+
if (!screenshot.isRecycled) {
139+
screenshot.recycle()
140+
}
141+
}
142+
}
143+
144+
/**
145+
* 释放识别器资源
146+
*/
147+
fun close() {
148+
recognizer.close()
149+
}
150+
151+
private suspend fun processImage(
152+
image: InputImage,
153+
targetText: String
154+
): RecognitionResult = withContext(Dispatchers.IO) {
155+
val startTime = System.currentTimeMillis()
156+
val recognizedText = recognizer.process(image).await()
157+
val positions = if (targetText.isBlank()) {
158+
emptyList()
159+
} else {
160+
findTargetPositions(recognizedText, targetText)
161+
}
162+
val duration = System.currentTimeMillis() - startTime
163+
RecognitionResult(
164+
fullText = recognizedText.text,
165+
targetPositions = positions,
166+
processingTimeMillis = duration
167+
)
168+
}
169+
170+
private fun findTargetPositions(
171+
recognizedText: Text,
172+
targetText: String
173+
): List<WordPosition> {
174+
val normalizedTarget = targetText.replace("\\s+".toRegex(), "")
175+
if (normalizedTarget.isEmpty()) return emptyList()
176+
177+
val results = mutableListOf<WordPosition>()
178+
recognizedText.textBlocks.forEach { block ->
179+
block.lines.forEach { line ->
180+
val elements = line.elements
181+
if (elements.isEmpty()) return@forEach
182+
183+
val contentBuilder = StringBuilder()
184+
val indexRanges = mutableListOf<Pair<IntRange, Rect>>()
185+
186+
var currentIndex = 0
187+
elements.forEach { element ->
188+
val boundingBox = element.boundingBox ?: return@forEach
189+
val sanitized = element.text.replace("\\s+".toRegex(), "")
190+
if (sanitized.isEmpty()) return@forEach
191+
val start = currentIndex
192+
val end = currentIndex + sanitized.length
193+
contentBuilder.append(sanitized)
194+
indexRanges.add(start until end to Rect(boundingBox))
195+
currentIndex = end
196+
}
197+
198+
if (indexRanges.isEmpty()) return@forEach
199+
200+
val joinedLine = contentBuilder.toString()
201+
var searchStart = 0
202+
while (true) {
203+
val matchIndex = joinedLine.indexOf(normalizedTarget, startIndex = searchStart)
204+
if (matchIndex == -1) break
205+
val matchEnd = matchIndex + normalizedTarget.length
206+
val includedRects = indexRanges.filter { range ->
207+
range.first.first < matchEnd && range.first.last + 1 > matchIndex
208+
}.map { it.second }
209+
210+
if (includedRects.isNotEmpty()) {
211+
val left = includedRects.minOf { it.left }
212+
val top = includedRects.minOf { it.top }
213+
val right = includedRects.maxOf { it.right }
214+
val bottom = includedRects.maxOf { it.bottom }
215+
216+
val matchedText = joinedLine.substring(matchIndex, matchEnd)
217+
results.add(
218+
WordPosition(
219+
text = matchedText,
220+
left = left,
221+
top = top,
222+
right = right,
223+
bottom = bottom
224+
)
225+
)
226+
}
227+
searchStart = matchIndex + 1
228+
}
229+
}
230+
}
231+
return results
232+
}
233+
234+
private suspend fun <T> Task<T>.await(): T = suspendCancellableCoroutine { continuation ->
235+
addOnSuccessListener { result ->
236+
if (continuation.isActive) {
237+
continuation.resume(result)
238+
}
239+
}.addOnFailureListener { exception ->
240+
if (continuation.isActive) {
241+
continuation.resumeWithException(exception)
242+
}
243+
}.addOnCanceledListener {
244+
continuation.cancel()
245+
}
246+
}
247+
}
248+

0 commit comments

Comments
 (0)