Skip to content

Commit ec2016c

Browse files
author
Ven
committed
feat(assists): 新增剪贴板内容获取功能和ML Kit文字识别能力
- 添加ClipboardActivity用于后台获取剪贴板内容 - 实现getClipboardText方法支持前后台剪贴板获取 - 集成ML Kit文字识别功能到WebView - 添加屏幕文字位置识别和区域文字识别能力 - 增加JavaScript接口支持文字识别调用 - 在overlay界面添加测试按钮验证功能 - 更新应用版本号从3.2.214到3.2.215
1 parent 198ba84 commit ec2016c

File tree

12 files changed

+725
-2
lines changed

12 files changed

+725
-2
lines changed

assists-web/src/main/java/com/ven/assists/web/ASWebView.kt

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -26,6 +26,7 @@ import com.ven.assists.web.gallery.GalleryJavascriptInterface
2626
import com.ven.assists.web.network.HttpJavascriptInterface
2727
import com.ven.assists.web.ime.ImeJavascriptInterface
2828
import com.ven.assists.web.imageutils.ImageUtilsJavascriptInterface
29+
import com.ven.assists.web.mlkit.MlkitJavascriptInterface
2930
import kotlinx.coroutines.CoroutineScope
3031
import kotlinx.coroutines.Dispatchers
3132
import kotlinx.coroutines.Job
@@ -122,6 +123,7 @@ open class ASWebView @JvmOverloads constructor(
122123
val httpJavascriptInterface = HttpJavascriptInterface(webView = this)
123124
val imeJavascriptInterface = ImeJavascriptInterface(webView = this)
124125
val imageUtilsJavascriptInterface = ImageUtilsJavascriptInterface(webView = this)
126+
val mlkitJavascriptInterface = MlkitJavascriptInterface(webView = this)
125127
val galleryJavascriptInterface = GalleryJavascriptInterface(webView = this)
126128

127129

@@ -222,6 +224,7 @@ open class ASWebView @JvmOverloads constructor(
222224
addJavascriptInterface(httpJavascriptInterface, "assistsxHttp")
223225
addJavascriptInterface(imeJavascriptInterface, "assistsxIme")
224226
addJavascriptInterface(imageUtilsJavascriptInterface, "assistsxImageUtils")
227+
addJavascriptInterface(mlkitJavascriptInterface, "assistsxMlkit")
225228
addJavascriptInterface(galleryJavascriptInterface, "assistsxGallery")
226229
AssistsService.listeners.add(assistsServiceListener)
227230
}
Lines changed: 31 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,31 @@
1+
package com.ven.assists.web.mlkit
2+
3+
/**
4+
* ML Kit 文字识别相关的方法常量定义
5+
* 支持识别屏幕中指定词组位置以及识别屏幕文字内容位置
6+
*/
7+
object MlkitCallMethod {
8+
/**
9+
* 识别屏幕中指定词组的位置(基于当前截图)
10+
* 参数:targetText 必填;region 可选 { left, top, right, bottom };rotationDegrees 可选,默认 0
11+
*/
12+
const val findPhrasePositions = "findPhrasePositions"
13+
14+
/**
15+
* 识别屏幕中所有文字内容及其位置(基于当前截图)
16+
* 参数:region 可选 { left, top, right, bottom };rotationDegrees 可选,默认 0
17+
*/
18+
const val getScreenTextPositions = "getScreenTextPositions"
19+
20+
/**
21+
* 识别屏幕中指定词组的位置,直接返回 JSON 字符串(基于当前截图)
22+
* 参数:targetText 必填;region 可选 { left, top, right, bottom };rotationDegrees 可选,默认 0
23+
*/
24+
const val findPhrasePositionsOnScreenAsJson = "findPhrasePositionsOnScreenAsJson"
25+
26+
/**
27+
* 识别屏幕中所有文字及其位置,直接返回 JSON 字符串(基于当前截图)
28+
* 参数:region 可选 { left, top, right, bottom };rotationDegrees 可选,默认 0
29+
*/
30+
const val getScreenTextPositionsAsJson = "getScreenTextPositionsAsJson"
31+
}
Lines changed: 282 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,282 @@
1+
package com.ven.assists.web.mlkit
2+
3+
import android.graphics.Rect
4+
import android.util.Base64
5+
import android.webkit.JavascriptInterface
6+
import android.webkit.WebView
7+
import com.blankj.utilcode.util.GsonUtils
8+
import com.blankj.utilcode.util.LogUtils
9+
import com.google.gson.JsonObject
10+
import com.google.gson.reflect.TypeToken
11+
import com.ven.assists.web.CallRequest
12+
import com.ven.assists.web.CallResponse
13+
import com.ven.assists.web.createResponse
14+
import com.ven.assists.window.AssistsWindowManager
15+
import kotlinx.coroutines.CoroutineScope
16+
import kotlinx.coroutines.Dispatchers
17+
import kotlinx.coroutines.delay
18+
import kotlinx.coroutines.launch
19+
import java.nio.charset.StandardCharsets
20+
21+
/**
22+
* ML Kit 文字识别相关的 JavascriptInterface
23+
* 提供识别屏幕中指定词组位置、识别屏幕文字内容位置的能力
24+
*/
25+
class MlkitJavascriptInterface(val webView: WebView) {
26+
private val coroutineScope = CoroutineScope(Dispatchers.Main)
27+
28+
/**
29+
* 回调响应给 JavaScript
30+
*/
31+
fun <T> callbackResponse(result: CallResponse<T>) {
32+
coroutineScope.launch {
33+
runCatching {
34+
val json = GsonUtils.toJson(result)
35+
callback(json)
36+
}.onFailure {
37+
LogUtils.e(it)
38+
}
39+
}
40+
}
41+
42+
/**
43+
* 执行回调
44+
*/
45+
fun callback(result: String) {
46+
val encoded = Base64.encodeToString(result.toByteArray(StandardCharsets.UTF_8), Base64.NO_WRAP)
47+
val js = String.format("javascript:assistsxMlkitCallback('%s')", encoded)
48+
webView.evaluateJavascript(js, null)
49+
}
50+
51+
@JavascriptInterface
52+
fun call(originJson: String): String {
53+
val result = GsonUtils.toJson(CallResponse<Any>(code = 0))
54+
coroutineScope.launch(Dispatchers.Main) {
55+
processCall(originJson)
56+
}
57+
return result
58+
}
59+
60+
/**
61+
* 处理调用请求
62+
*/
63+
private suspend fun processCall(originJson: String) {
64+
val request = GsonUtils.fromJson<CallRequest<JsonObject>>(
65+
originJson,
66+
object : TypeToken<CallRequest<JsonObject>>() {}.type
67+
)
68+
runCatching {
69+
val response = when (request.method) {
70+
MlkitCallMethod.findPhrasePositions -> {
71+
handleFindPhrasePositions(request)
72+
}
73+
74+
MlkitCallMethod.getScreenTextPositions -> {
75+
handleGetScreenTextPositions(request)
76+
}
77+
78+
MlkitCallMethod.findPhrasePositionsOnScreenAsJson -> {
79+
handleFindPhrasePositionsOnScreenAsJson(request)
80+
}
81+
82+
MlkitCallMethod.getScreenTextPositionsAsJson -> {
83+
handleGetScreenTextPositionsAsJson(request)
84+
}
85+
86+
else -> {
87+
request.createResponse(-1, message = "方法未支持: ${request.method}")
88+
}
89+
}
90+
callbackResponse(response)
91+
}.onFailure {
92+
LogUtils.e(it)
93+
callbackResponse(request.createResponse(-1, message = "执行失败: ${it.message}", data = null))
94+
}
95+
}
96+
97+
/**
98+
* 识别前隐藏浮窗,识别完成后恢复显示;识别过程中执行 [block]
99+
* @param request 用于解析 overlayHiddenScreenshotDelayMillis、restoreOverlay
100+
* @param block 识别逻辑,返回响应
101+
*/
102+
private suspend fun withOverlayHiddenForRecognition(
103+
request: CallRequest<JsonObject>,
104+
block: suspend () -> CallResponse<JsonObject>
105+
): CallResponse<JsonObject> {
106+
val restoreOverlay = request.arguments?.get("restoreOverlay")?.asBoolean ?: true
107+
val delayMillis = request.arguments?.get("overlayHiddenScreenshotDelayMillis")?.asLong ?: 250L
108+
if (restoreOverlay) {
109+
AssistsWindowManager.hideAll()
110+
}
111+
delay(delayMillis)
112+
return try {
113+
block()
114+
} finally {
115+
if (restoreOverlay) {
116+
AssistsWindowManager.showTop()
117+
}
118+
}
119+
}
120+
121+
/**
122+
* 解析可选的 region 参数:{ left, top, right, bottom }
123+
*/
124+
private fun parseRegion(arguments: JsonObject?): Rect? {
125+
val left = arguments?.get("left")?.asInt ?: return null
126+
val top = arguments?.get("top")?.asInt ?: return null
127+
val right = arguments?.get("right")?.asInt ?: return null
128+
val bottom = arguments?.get("bottom")?.asInt ?: return null
129+
val rect = Rect(left, top, right, bottom)
130+
return if (rect.isEmpty) null else rect
131+
}
132+
133+
/**
134+
* 处理识别屏幕中指定词组位置请求
135+
*/
136+
private suspend fun handleFindPhrasePositions(request: CallRequest<JsonObject>): CallResponse<JsonObject> {
137+
return withOverlayHiddenForRecognition(request) {
138+
val targetText = request.arguments?.get("targetText")?.asString
139+
if (targetText.isNullOrBlank()) {
140+
return@withOverlayHiddenForRecognition request.createResponse(-1, message = "targetText 不能为空", data = null)
141+
}
142+
143+
val region = parseRegion(request.arguments)
144+
val rotationDegrees = request.arguments?.get("rotationDegrees")?.asInt ?: 0
145+
146+
val result = MlkitScreenTextUtils.findPhrasePositionsOnScreen(
147+
targetText = targetText,
148+
region = region,
149+
rotationDegrees = rotationDegrees
150+
)
151+
152+
result.fold(
153+
onSuccess = { recognition ->
154+
val positionsArray = com.google.gson.JsonArray().apply {
155+
recognition.targetPositions.forEach { pos ->
156+
add(JsonObject().apply {
157+
addProperty("text", pos.text)
158+
addProperty("left", pos.left)
159+
addProperty("top", pos.top)
160+
addProperty("right", pos.right)
161+
addProperty("bottom", pos.bottom)
162+
})
163+
}
164+
}
165+
val data = JsonObject().apply {
166+
addProperty("fullText", recognition.fullText)
167+
add("positions", positionsArray)
168+
addProperty("processingTimeMillis", recognition.processingTimeMillis)
169+
}
170+
request.createResponse(0, data = data, message = "识别完成")
171+
},
172+
onFailure = { e ->
173+
LogUtils.e(e)
174+
request.createResponse(-1, message = "识别失败: ${e.message}", data = null)
175+
}
176+
)
177+
}
178+
}
179+
180+
/**
181+
* 处理识别屏幕所有文字位置请求
182+
*/
183+
private suspend fun handleGetScreenTextPositions(request: CallRequest<JsonObject>): CallResponse<JsonObject> {
184+
return withOverlayHiddenForRecognition(request) {
185+
val region = parseRegion(request.arguments)
186+
val rotationDegrees = request.arguments?.get("rotationDegrees")?.asInt ?: 0
187+
188+
val result = MlkitScreenTextUtils.getScreenTextPositions(
189+
region = region,
190+
rotationDegrees = rotationDegrees
191+
)
192+
193+
result.fold(
194+
onSuccess = { recognition ->
195+
val positionsArray = com.google.gson.JsonArray().apply {
196+
recognition.positions.forEach { pos ->
197+
add(JsonObject().apply {
198+
addProperty("text", pos.text)
199+
addProperty("left", pos.left)
200+
addProperty("top", pos.top)
201+
addProperty("right", pos.right)
202+
addProperty("bottom", pos.bottom)
203+
})
204+
}
205+
}
206+
val data = JsonObject().apply {
207+
addProperty("fullText", recognition.fullText)
208+
add("positions", positionsArray)
209+
addProperty("processingTimeMillis", recognition.processingTimeMillis)
210+
}
211+
request.createResponse(0, data = data, message = "识别完成")
212+
},
213+
onFailure = { e ->
214+
LogUtils.e(e)
215+
request.createResponse(-1, message = "识别失败: ${e.message}", data = null)
216+
}
217+
)
218+
}
219+
}
220+
221+
/**
222+
* 处理识别屏幕中指定词组位置请求,直接返回 JSON 字符串
223+
*/
224+
private suspend fun handleFindPhrasePositionsOnScreenAsJson(request: CallRequest<JsonObject>): CallResponse<JsonObject> {
225+
return withOverlayHiddenForRecognition(request) {
226+
val targetText = request.arguments?.get("targetText")?.asString
227+
if (targetText.isNullOrBlank()) {
228+
return@withOverlayHiddenForRecognition request.createResponse(-1, message = "targetText 不能为空", data = null)
229+
}
230+
231+
val region = parseRegion(request.arguments)
232+
val rotationDegrees = request.arguments?.get("rotationDegrees")?.asInt ?: 0
233+
234+
val result = MlkitScreenTextUtils.findPhrasePositionsOnScreenAsJson(
235+
targetText = targetText,
236+
region = region,
237+
rotationDegrees = rotationDegrees
238+
)
239+
240+
result.fold(
241+
onSuccess = { jsonStr ->
242+
val data = JsonObject().apply {
243+
addProperty("jsonResult", jsonStr)
244+
}
245+
request.createResponse(0, data = data, message = "识别完成")
246+
},
247+
onFailure = { e ->
248+
LogUtils.e(e)
249+
request.createResponse(-1, message = "识别失败: ${e.message}", data = null)
250+
}
251+
)
252+
}
253+
}
254+
255+
/**
256+
* 处理识别屏幕所有文字位置请求,直接返回 JSON 字符串
257+
*/
258+
private suspend fun handleGetScreenTextPositionsAsJson(request: CallRequest<JsonObject>): CallResponse<JsonObject> {
259+
return withOverlayHiddenForRecognition(request) {
260+
val region = parseRegion(request.arguments)
261+
val rotationDegrees = request.arguments?.get("rotationDegrees")?.asInt ?: 0
262+
263+
val result = MlkitScreenTextUtils.getScreenTextPositionsAsJson(
264+
region = region,
265+
rotationDegrees = rotationDegrees
266+
)
267+
268+
result.fold(
269+
onSuccess = { jsonStr ->
270+
val data = JsonObject().apply {
271+
addProperty("jsonResult", jsonStr)
272+
}
273+
request.createResponse(0, data = data, message = "识别完成")
274+
},
275+
onFailure = { e ->
276+
LogUtils.e(e)
277+
request.createResponse(-1, message = "识别失败: ${e.message}", data = null)
278+
}
279+
)
280+
}
281+
}
282+
}

0 commit comments

Comments
 (0)