Skip to content

Commit 6d58cea

Browse files
code-c-lightmengqiantomsun28
authored
feat: support OCR handwriting recognition feature (#65)
Co-authored-by: mengqian <[email protected]> Co-authored-by: Tomsun28 <[email protected]>
1 parent b0f2e80 commit 6d58cea

File tree

12 files changed

+354
-1
lines changed

12 files changed

+354
-1
lines changed

core/src/main/java/ai/z/openapi/AbstractAiClient.java

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,8 @@
1919
import ai.z.openapi.service.image.ImageServiceImpl;
2020
import ai.z.openapi.service.batches.BatchService;
2121
import ai.z.openapi.service.batches.BatchServiceImpl;
22+
import ai.z.openapi.service.ocr.HandwritingOcrService;
23+
import ai.z.openapi.service.ocr.HandwritingOcrServiceImpl;
2224
import ai.z.openapi.service.web_search.WebSearchService;
2325
import ai.z.openapi.service.web_search.WebSearchServiceImpl;
2426
import ai.z.openapi.service.web_reader.WebReaderService;
@@ -116,6 +118,9 @@ public abstract class AbstractAiClient extends AbstractClientBaseService {
116118
/** FileParsing service for fileParsing operations */
117119
private FileParsingService fileParsingService;
118120

121+
/** HandWriting service for handwritingOcrService operations */
122+
private HandwritingOcrService handwritingOcrService;
123+
119124
/** Moderation service for content safety detection */
120125
private ModerationService moderationService;
121126

@@ -295,6 +300,13 @@ public synchronized FileParsingService fileParsing() {
295300
return fileParsingService;
296301
}
297302

303+
public synchronized HandwritingOcrService handwriting() {
304+
if (handwritingOcrService == null) {
305+
this.handwritingOcrService = new HandwritingOcrServiceImpl(this);
306+
}
307+
return handwritingOcrService;
308+
}
309+
298310
/**
299311
* Returns the moderation service for content safety detection. This service handles
300312
* content moderation for text, image, video, and audio inputs.
Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,24 @@
1+
package ai.z.openapi.api.ocr;
2+
3+
import ai.z.openapi.service.ocr.HandwritingOcrResult;
4+
import okhttp3.MultipartBody;
5+
import retrofit2.Call;
6+
import retrofit2.http.Body;
7+
import retrofit2.http.POST;
8+
9+
/**
10+
* OCR Handwriting Recognition API Provides functionality to upload an image for
11+
* handwriting recognition, and retrieve the parsed result.
12+
*/
13+
public interface HandwritingOcrApi {
14+
15+
/**
16+
* Executes handwriting recognition synchronously.
17+
* @param multipartBody The multipart request body containing the image file and
18+
* metadata.
19+
* @return The recognition result as a HandwritingOcrResp object.
20+
*/
21+
@POST("files/ocr")
22+
Call<HandwritingOcrResult> recognize(@Body MultipartBody multipartBody);
23+
24+
}
Lines changed: 35 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,35 @@
1+
package ai.z.openapi.service.ocr;
2+
3+
import ai.z.openapi.core.model.ClientResponse;
4+
import ai.z.openapi.service.model.ChatError;
5+
import lombok.Data;
6+
7+
@Data
8+
public class HandwritingOcrResponse implements ClientResponse<HandwritingOcrResult> {
9+
10+
/**
11+
* Response status code.
12+
*/
13+
private int code;
14+
15+
/**
16+
* Response message.
17+
*/
18+
private String msg;
19+
20+
/**
21+
* Indicates whether the request was successful.
22+
*/
23+
private boolean success;
24+
25+
/**
26+
* The HandwritingOcr result data.
27+
*/
28+
private HandwritingOcrResult data;
29+
30+
/**
31+
* Error information if the request failed.
32+
*/
33+
private ChatError error;
34+
35+
}
Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,24 @@
1+
package ai.z.openapi.service.ocr;
2+
3+
import lombok.AllArgsConstructor;
4+
import lombok.Data;
5+
import lombok.NoArgsConstructor;
6+
7+
import java.util.List;
8+
9+
@Data
10+
@AllArgsConstructor
11+
@NoArgsConstructor
12+
public class HandwritingOcrResult {
13+
14+
private String task_id; // Task ID or result ID
15+
16+
private String message; // Response message
17+
18+
private String status; // OCR task status (e.g., "succeeded")
19+
20+
private int words_result_num; // Number of recognition results
21+
22+
private List<WordsResult> words_result; // List of recognition results
23+
24+
}
Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,13 @@
1+
package ai.z.openapi.service.ocr;
2+
3+
public interface HandwritingOcrService {
4+
5+
/**
6+
* Executes a synchronous handwriting recognition operation.
7+
* @param request The OCR upload request (contains file path, tool type, language
8+
* type)
9+
* @return HandwritingOcrResp containing the recognition result
10+
*/
11+
HandwritingOcrResponse recognize(HandwritingOcrUploadReq request);
12+
13+
}
Lines changed: 102 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,102 @@
1+
package ai.z.openapi.service.ocr;
2+
3+
import ai.z.openapi.AbstractAiClient;
4+
import ai.z.openapi.api.ocr.HandwritingOcrApi;
5+
import ai.z.openapi.utils.RequestSupplier;
6+
import com.fasterxml.jackson.databind.ObjectMapper;
7+
import io.reactivex.rxjava3.core.Single;
8+
import okhttp3.MediaType;
9+
import okhttp3.MultipartBody;
10+
import okhttp3.RequestBody;
11+
import retrofit2.Response;
12+
13+
import java.io.File;
14+
import java.io.IOException;
15+
16+
/**
17+
* OCR handwriting recognition service implementation
18+
*/
19+
public class HandwritingOcrServiceImpl implements HandwritingOcrService {
20+
21+
private final AbstractAiClient zAiClient;
22+
23+
private final HandwritingOcrApi handwritingOcrApi;
24+
25+
public HandwritingOcrServiceImpl(AbstractAiClient zAiClient) {
26+
this.zAiClient = zAiClient;
27+
this.handwritingOcrApi = zAiClient.retrofit().create(HandwritingOcrApi.class);
28+
}
29+
30+
@Override
31+
public HandwritingOcrResponse recognize(HandwritingOcrUploadReq request) {
32+
if (request == null) {
33+
throw new IllegalArgumentException("request cannot be null");
34+
}
35+
if (request.getFilePath() == null) {
36+
throw new IllegalArgumentException("filePath cannot be null");
37+
}
38+
if (request.getToolType() == null) {
39+
throw new IllegalArgumentException("toolType cannot be null");
40+
}
41+
42+
RequestSupplier<HandwritingOcrUploadReq, HandwritingOcrResult> supplier = params -> {
43+
try {
44+
File file = new File(params.getFilePath());
45+
if (!file.exists()) {
46+
throw new RuntimeException("file not found at " + params.getFilePath());
47+
}
48+
String toolType = params.getToolType();
49+
String languageType = params.getLanguageType();
50+
Boolean probability = params.getProbability();
51+
52+
// Build multipart/form-data
53+
MultipartBody.Part filePart = MultipartBody.Part.createFormData("file", file.getName(),
54+
RequestBody.create(MediaType.parse("application/octet-stream"), file));
55+
MultipartBody.Builder formBodyBuilder = new MultipartBody.Builder().setType(MultipartBody.FORM);
56+
formBodyBuilder.addPart(filePart);
57+
formBodyBuilder.addFormDataPart("tool_type", toolType);
58+
if (languageType != null) {
59+
formBodyBuilder.addFormDataPart("language_type", languageType);
60+
}
61+
if (probability != null) {
62+
formBodyBuilder.addFormDataPart("probability", String.valueOf(probability));
63+
}
64+
65+
MultipartBody multipartBody = formBodyBuilder.build();
66+
67+
// Send POST request
68+
retrofit2.Call<HandwritingOcrResult> call = handwritingOcrApi.recognize(multipartBody);
69+
Response<HandwritingOcrResult> response = call.execute();
70+
if (!response.isSuccessful() || response.body() == null) {
71+
String errorJson = "";
72+
if (response.errorBody() != null) {
73+
errorJson = response.errorBody().string();
74+
}
75+
String msg = response.message();
76+
String msgFromBody = "";
77+
if (errorJson != null && errorJson.trim().startsWith("{")) {
78+
try {
79+
ObjectMapper mapper = new ObjectMapper();
80+
HandwritingOcrResult errorResult = mapper.readValue(errorJson, HandwritingOcrResult.class);
81+
msgFromBody = errorResult.getMessage();
82+
}
83+
catch (Exception e) {
84+
msgFromBody = errorJson;
85+
}
86+
}
87+
throw new IOException("Failed to recognize, code: " + response.code() + ", msg: " + msg
88+
+ (msgFromBody.isEmpty() ? "" : (", detail: " + msgFromBody)));
89+
}
90+
91+
return Single.just(response.body());
92+
93+
}
94+
catch (Exception e) {
95+
throw new RuntimeException(e);
96+
}
97+
};
98+
99+
return this.zAiClient.executeRequest(request, supplier, HandwritingOcrResponse.class);
100+
}
101+
102+
}
Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,20 @@
1+
package ai.z.openapi.service.ocr;
2+
3+
import ai.z.openapi.core.model.ClientRequest;
4+
import lombok.Data;
5+
6+
/**
7+
* Handwriting OCR upload request object
8+
*/
9+
@Data
10+
public class HandwritingOcrUploadReq implements ClientRequest<HandwritingOcrUploadReq> {
11+
12+
private String filePath; // Path to the image file
13+
14+
private String toolType; // Tool type, must be "hand_write"
15+
16+
private String languageType; // Language type (optional)
17+
18+
private Boolean probability; // Confidence score for each line of text recognition
19+
20+
}
Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,20 @@
1+
package ai.z.openapi.service.ocr;
2+
3+
import lombok.AllArgsConstructor;
4+
import lombok.Data;
5+
import lombok.NoArgsConstructor;
6+
7+
@Data
8+
@AllArgsConstructor
9+
@NoArgsConstructor
10+
public class Location {
11+
12+
private int left;
13+
14+
private int top;
15+
16+
private int width;
17+
18+
private int height;
19+
20+
}
Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,18 @@
1+
package ai.z.openapi.service.ocr;
2+
3+
import lombok.AllArgsConstructor;
4+
import lombok.Data;
5+
import lombok.NoArgsConstructor;
6+
7+
@Data
8+
@AllArgsConstructor
9+
@NoArgsConstructor
10+
public class Probability {
11+
12+
private Double average; // Average confidence of the line
13+
14+
private Double variance; // Confidence variance of the line
15+
16+
private Double min; // Minimum confidence of the line
17+
18+
}
Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,18 @@
1+
package ai.z.openapi.service.ocr;
2+
3+
import lombok.AllArgsConstructor;
4+
import lombok.Data;
5+
import lombok.NoArgsConstructor;
6+
7+
@Data
8+
@AllArgsConstructor
9+
@NoArgsConstructor
10+
public class WordsResult {
11+
12+
private Location location; // Location information for detected text
13+
14+
private String words; // Recognized text
15+
16+
private Probability probability; // Confidence score for each line of text recognition
17+
18+
}

0 commit comments

Comments
 (0)