Skip to content

Commit 21f3109

Browse files
feat: add text recognition module (#235)
* feat: added initial support for text recognition on iOS * feat: added android support * chore: clean up * chore: improved API adding more properties * feat: finished text recognition docs * chore: removed console.log * docs(changeset): Added first version of Text Recognition module * chore: bump yarn lock after rebase * chore: hide claude folders from git * fix: restore some dependencies we needed * fix: restore camera capabilities, remove image picker * fix: clean up types in TS and swift * fix: clean up types, add more debug to text recognition screen * fix: remove nested scroll, simplify debug screen * fix: remove nested scroll, simplify debug screen * fix: formatting * test: add placeholder test for now * chore: update changeset to just be minor * docs: update text recognition docs * fix: use rnmlkitimage for android * docs: update readme with link to text recognition * docs: add link in docs folder as well --------- Co-authored-by: Diogo Carmo <dccarmo@gmail.com>
1 parent 579ae06 commit 21f3109

File tree

32 files changed

+923
-5
lines changed

32 files changed

+923
-5
lines changed

.changeset/nine-ads-drum.md

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,6 @@
1+
---
2+
"@infinitered/react-native-mlkit-text-recognition": minor
3+
"example-app": major
4+
---
5+
6+
Added first version of Text Recognition module

.gitignore

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -609,3 +609,6 @@ $RECYCLE.BIN/
609609
# .pnp.*
610610

611611
# End of https://www.toptal.com/developers/gitignore/api/intellij,reactnative,turbo,yarn,react,macos,windows,swift,java,kotlin,objective-c
612+
613+
# Do not version control .claude folders
614+
./claude

README.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -96,6 +96,7 @@ For usage instructions see the readme in each module's directory.
9696

9797
- [react-native-mlkit-core](./modules/react-native-mlkit-core/README.md)
9898
- [react-native-mlkit-face-detection](./modules/react-native-mlkit-face-detection/README.md)
99+
- [react-native-mlkit-text-recognition](./modules/react-native-mlkit-text-recognition/README.md)
99100

100101
## Building
101102

apps/ExampleApp/app.json

Lines changed: 12 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -28,7 +28,10 @@
2828
"foregroundImage": "./assets/images/app-icon-android-adaptive-foreground.png",
2929
"backgroundImage": "./assets/images/app-icon-android-adaptive-background.png",
3030
"backgroundColor": "#F4F2F1"
31-
}
31+
},
32+
"permissions": [
33+
"android.permission.RECORD_AUDIO"
34+
]
3235
},
3336
"ios": {
3437
"icon": "./assets/images/app-icon-ios.png",
@@ -37,7 +40,8 @@
3740
"infoPlist": {
3841
"NSCameraUsageDescription": "This app uses the camera to take pictures to demo the machine learning algorithms. (Face detection, Object detection and Image Labeling).",
3942
"NSPhotoLibraryUsageDescription": "This app uses the photo library to select images for Machine Learning purposes. i.e. Object and Image detection."
40-
}
43+
},
44+
"appleTeamId": "L7YNDPLSEB"
4145
},
4246
"web": {
4347
"favicon": "./assets/images/app-icon-web-favicon.png",
@@ -64,6 +68,12 @@
6468
"backgroundColor": "#F4F2F1",
6569
"imageWidth": 250
6670
}
71+
],
72+
[
73+
"expo-image-picker",
74+
{
75+
"photosPermission": "This app uses the photo library to select images for Machine Learning purposes. i.e. Object and Image detection."
76+
}
6777
]
6878
],
6979
"experiments": {

apps/ExampleApp/app/navigators/AppNavigator.tsx

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -34,6 +34,7 @@ export type AppStackParamList = {
3434
ImageLabeling: Record<string, never>
3535
ObjectDetection: Record<string, never>
3636
DocumentScanner: Record<string, never>
37+
TextRecognition: Record<string, never>
3738
// IGNITE_GENERATOR_ANCHOR_APP_STACK_PARAM_LIST
3839
}
3940

@@ -61,6 +62,7 @@ const AppStack = observer(function AppStack() {
6162
<Stack.Screen name="ImageLabeling" component={Screens.ImageLabelingScreen} />
6263
<Stack.Screen name="ObjectDetection" component={Screens.ObjectDetectionScreen} />
6364
<Stack.Screen name="DocumentScanner" component={Screens.DocumentScannerScreen} />
65+
<Stack.Screen name="TextRecognition" component={Screens.TextRecognitionScreen} />
6466
{/* IGNITE_GENERATOR_ANCHOR_APP_STACK_SCREENS */}
6567
</Stack.Navigator>
6668
)

apps/ExampleApp/app/screens/HomeScreen/demoInfo.ts

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,7 @@ export interface DemoInfo {
1111
const FACE_DETECTION = require("../../../assets/images/face-detection.jpg")
1212
const FACE_HOLDER = require("../../../assets/images/welcome-face.png")
1313
const DOCUMENT_SCANNER = require("../../../assets/images/doc-scanner.png")
14+
const TEXT_RECOGNITION = require("../../../assets/images/text-recognition.png")
1415

1516
const ANDROID_ONLY_DEMOS: DemoInfo[] = [
1617
{
@@ -46,5 +47,11 @@ export const DEMO_LIST: DemoInfo[] = [
4647
screen: "ImageLabeling",
4748
image: FACE_HOLDER,
4849
},
50+
{
51+
title: "Text recognition",
52+
description: "Recognize text in an image",
53+
screen: "TextRecognition",
54+
image: TEXT_RECOGNITION,
55+
},
4956
...PLATFORM_SPECIFIC_DEMOS,
5057
]
Lines changed: 188 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,188 @@
1+
import React, { FC, useState, useEffect, useCallback } from "react"
2+
import { observer } from "mobx-react-lite"
3+
import { ViewStyle, View, ImageStyle, TextStyle, ScrollView, Pressable } from "react-native"
4+
import { NativeStackScreenProps } from "@react-navigation/native-stack"
5+
import { AppStackScreenProps } from "../navigators"
6+
import { Text, Icon, ImageSelector, Screen } from "../components"
7+
import { useTypedNavigation } from "../navigators/useTypedNavigation"
8+
9+
import { recognizeText, Text as RecognizedText } from "@infinitered/react-native-mlkit-text-recognition"
10+
import { UseExampleImageStatus, SelectedImage } from "../utils/useExampleImage"
11+
12+
type TextRecognitionScreenProps = NativeStackScreenProps<AppStackScreenProps<"TextRecognition">>
13+
14+
function DebugOutput({ data }: { data: unknown }) {
15+
const [expanded, setExpanded] = useState(false)
16+
17+
return (
18+
<View style={$debugContainer}>
19+
<Pressable onPress={() => setExpanded(!expanded)} style={$debugHeader}>
20+
<Text style={$debugTitle}>{expanded ? "▼" : "▶"} Debug Output</Text>
21+
</Pressable>
22+
{expanded && (
23+
<ScrollView style={$debugContent} horizontal>
24+
<ScrollView nestedScrollEnabled>
25+
<Text style={$debugText}>{JSON.stringify(data, null, 2)}</Text>
26+
</ScrollView>
27+
</ScrollView>
28+
)}
29+
</View>
30+
)
31+
}
32+
export const TextRecognitionScreen: FC<TextRecognitionScreenProps> = observer(
33+
function TextRecognitionScreen() {
34+
const navigation = useTypedNavigation<"TextRecognition">()
35+
36+
const [image, setImage] = useState<SelectedImage | null>(null)
37+
38+
const handleImageChange = useCallback((nextImage: SelectedImage) => {
39+
setImage(nextImage)
40+
}, [])
41+
42+
const [result, setResult] = useState<RecognizedText | null>(null)
43+
const [status, setStatus] = useState<
44+
"init" | "noPermissions" | "done" | "error" | "loading" | UseExampleImageStatus
45+
>("init")
46+
47+
const onStatusChange = React.useCallback(
48+
(status: "init" | "noPermissions" | "done" | "error" | "loading" | UseExampleImageStatus) => {
49+
setStatus(status)
50+
},
51+
[],
52+
)
53+
54+
useEffect(() => {
55+
const recognizeImage = async () => {
56+
if (!image?.uri) return
57+
setStatus("recognizing")
58+
try {
59+
const recognitionResult = await recognizeText(image.uri)
60+
setResult(recognitionResult)
61+
setStatus("done")
62+
} catch (error) {
63+
console.error("Error recognizing image:", error)
64+
setStatus("error")
65+
}
66+
}
67+
68+
recognizeImage().then(() => null)
69+
}, [image])
70+
71+
const statusMessage = React.useMemo(() => {
72+
if (!image && status !== "init") {
73+
setStatus("init")
74+
}
75+
switch (status) {
76+
case "init":
77+
return "Take a photo or select one from your camera roll"
78+
case "noPermissions":
79+
return "You need to grant camera permissions to take a photo"
80+
case "takingPhoto":
81+
return "Taking photo..."
82+
case "selectingPhoto":
83+
return "Selecting photo..."
84+
case "done":
85+
return "Done!"
86+
case "error":
87+
return "Error during recognition!"
88+
case "recognizing":
89+
return "Recognizing Image..."
90+
case "loading":
91+
return "Loading Example Images..."
92+
default:
93+
throw new Error("Invalid status")
94+
}
95+
}, [result, image, status])
96+
97+
const clearResults = useCallback(() => {
98+
setResult(null)
99+
}, [])
100+
101+
return (
102+
<Screen style={$root} preset="scroll" safeAreaEdges={["top", "bottom"]}>
103+
<View>
104+
<Icon icon={"back"} onPress={() => navigation.navigate("Home")} style={$backIcon} />
105+
<Text preset={"heading"} text="Text Recognition" />
106+
<Text style={$description}>Take a photo, and extract text from it.</Text>
107+
</View>
108+
<ImageSelector
109+
onImageChange={handleImageChange}
110+
onImageClear={clearResults}
111+
onStatusChange={onStatusChange}
112+
statusMessage={statusMessage}
113+
status={status}
114+
isLoading={false}
115+
images={{
116+
filter: "all",
117+
groupBy: "label",
118+
}}
119+
/>
120+
121+
{result && (
122+
<>
123+
<View style={$resultContainer}>
124+
<Text preset="subheading">Recognized Text</Text>
125+
<Text style={$resultText}>{result.text}</Text>
126+
</View>
127+
<DebugOutput data={result} />
128+
</>
129+
)}
130+
</Screen>
131+
)
132+
},
133+
)
134+
135+
const $root: ViewStyle = {
136+
flex: 1,
137+
padding: 16,
138+
display: "flex",
139+
flexDirection: "column",
140+
}
141+
const $backIcon: ImageStyle = { marginVertical: 8 }
142+
143+
const $description: TextStyle = {
144+
marginVertical: 8,
145+
color: "rgba(0,0,0,0.6)",
146+
}
147+
148+
const $resultContainer: ViewStyle = {
149+
width: "100%",
150+
borderWidth: 1,
151+
borderColor: "rgba(0,0,0,0.2)",
152+
borderRadius: 8,
153+
padding: 12,
154+
marginVertical: 16,
155+
}
156+
157+
const $resultText: TextStyle = {
158+
marginTop: 8,
159+
}
160+
161+
const $debugContainer: ViewStyle = {
162+
width: "100%",
163+
borderWidth: 1,
164+
borderColor: "rgba(0,0,0,0.2)",
165+
borderRadius: 8,
166+
marginBottom: 24,
167+
overflow: "hidden",
168+
}
169+
170+
const $debugHeader: ViewStyle = {
171+
padding: 12,
172+
backgroundColor: "rgba(0,0,0,0.05)",
173+
}
174+
175+
const $debugTitle: TextStyle = {
176+
fontWeight: "bold",
177+
}
178+
179+
const $debugContent: ViewStyle = {
180+
maxHeight: 300,
181+
padding: 12,
182+
backgroundColor: "rgba(0,0,0,0.02)",
183+
}
184+
185+
const $debugText: TextStyle = {
186+
fontFamily: "monospace",
187+
fontSize: 12,
188+
}

apps/ExampleApp/app/screens/index.ts

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -7,3 +7,4 @@ export * from "./ImageLabelingScreen"
77
export * from "./DocumentScannerScreen"
88
export { BOX_COLORS } from "./FaceDetectionScreen"
99
export * from "./ObjectDetectionScreen"
10+
export * from "./TextRecognitionScreen"

apps/ExampleApp/app/utils/useExampleImage/useExampleImage.ts

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,7 @@ export type UseExampleImageStatus =
2323
| "takingPhoto"
2424
| "selectingPhoto"
2525
| "classifying"
26+
| "recognizing"
2627
| "done"
2728
| "error"
2829
| "loading"
@@ -126,13 +127,13 @@ export function useExampleImage(predicates?: {
126127
return
127128
}
128129
setStatus("takingPhoto")
129-
const result = await launchCameraAsync(IMAGE_PICKER_OPTIONS)
130+
const result: ImagePickerResult = await launchCameraAsync(IMAGE_PICKER_OPTIONS)
130131
if (result.assets?.[0]) {
131132
setImage({ ...result.assets?.[0], localUri: result.assets?.[0].uri } as SelectedImage)
132133
} else {
133134
setImage(undefined)
134135
}
135-
}, [checkPermissions, setStatus]) // Note: Removed parentheses from launchCameraAsync
136+
}, [checkPermissions, setStatus])
136137

137138
const [currentIndexes, setCurrentIndexes] = useState<Record<string, number>>(
138139
{} as Record<string, number>,
625 KB
Loading

0 commit comments

Comments
 (0)