diff --git a/core/src/main/java/ai/z/openapi/AbstractAiClient.java b/core/src/main/java/ai/z/openapi/AbstractAiClient.java index 66e8261..b6de2d5 100644 --- a/core/src/main/java/ai/z/openapi/AbstractAiClient.java +++ b/core/src/main/java/ai/z/openapi/AbstractAiClient.java @@ -19,6 +19,8 @@ import ai.z.openapi.service.image.ImageServiceImpl; import ai.z.openapi.service.batches.BatchService; import ai.z.openapi.service.batches.BatchServiceImpl; +import ai.z.openapi.service.ocr.HandwritingOcrService; +import ai.z.openapi.service.ocr.HandwritingOcrServiceImpl; import ai.z.openapi.service.web_search.WebSearchService; import ai.z.openapi.service.web_search.WebSearchServiceImpl; import ai.z.openapi.service.web_reader.WebReaderService; @@ -116,6 +118,9 @@ public abstract class AbstractAiClient extends AbstractClientBaseService { /** FileParsing service for fileParsing operations */ private FileParsingService fileParsingService; + /** HandWriting service for handwritingOcrService operations */ + private HandwritingOcrService handwritingOcrService; + /** Moderation service for content safety detection */ private ModerationService moderationService; @@ -295,6 +300,13 @@ public synchronized FileParsingService fileParsing() { return fileParsingService; } + public synchronized HandwritingOcrService handwriting() { + if (handwritingOcrService == null) { + this.handwritingOcrService = new HandwritingOcrServiceImpl(this); + } + return handwritingOcrService; + } + /** * Returns the moderation service for content safety detection. This service handles * content moderation for text, image, video, and audio inputs. diff --git a/core/src/main/java/ai/z/openapi/api/ocr/HandwritingOcrApi.java b/core/src/main/java/ai/z/openapi/api/ocr/HandwritingOcrApi.java new file mode 100644 index 0000000..e38623b --- /dev/null +++ b/core/src/main/java/ai/z/openapi/api/ocr/HandwritingOcrApi.java @@ -0,0 +1,24 @@ +package ai.z.openapi.api.ocr; + +import ai.z.openapi.service.ocr.HandwritingOcrResult; +import okhttp3.MultipartBody; +import retrofit2.Call; +import retrofit2.http.Body; +import retrofit2.http.POST; + +/** + * OCR Handwriting Recognition API Provides functionality to upload an image for + * handwriting recognition, and retrieve the parsed result. + */ +public interface HandwritingOcrApi { + + /** + * Executes handwriting recognition synchronously. + * @param multipartBody The multipart request body containing the image file and + * metadata. + * @return The recognition result as a HandwritingOcrResp object. + */ + @POST("files/ocr") + Call recognize(@Body MultipartBody multipartBody); + +} \ No newline at end of file diff --git a/core/src/main/java/ai/z/openapi/service/ocr/HandwritingOcrResponse.java b/core/src/main/java/ai/z/openapi/service/ocr/HandwritingOcrResponse.java new file mode 100644 index 0000000..ac5f0d1 --- /dev/null +++ b/core/src/main/java/ai/z/openapi/service/ocr/HandwritingOcrResponse.java @@ -0,0 +1,35 @@ +package ai.z.openapi.service.ocr; + +import ai.z.openapi.core.model.ClientResponse; +import ai.z.openapi.service.model.ChatError; +import lombok.Data; + +@Data +public class HandwritingOcrResponse implements ClientResponse { + + /** + * Response status code. + */ + private int code; + + /** + * Response message. + */ + private String msg; + + /** + * Indicates whether the request was successful. + */ + private boolean success; + + /** + * The HandwritingOcr result data. + */ + private HandwritingOcrResult data; + + /** + * Error information if the request failed. + */ + private ChatError error; + +} diff --git a/core/src/main/java/ai/z/openapi/service/ocr/HandwritingOcrResult.java b/core/src/main/java/ai/z/openapi/service/ocr/HandwritingOcrResult.java new file mode 100644 index 0000000..b0f47e6 --- /dev/null +++ b/core/src/main/java/ai/z/openapi/service/ocr/HandwritingOcrResult.java @@ -0,0 +1,24 @@ +package ai.z.openapi.service.ocr; + +import lombok.AllArgsConstructor; +import lombok.Data; +import lombok.NoArgsConstructor; + +import java.util.List; + +@Data +@AllArgsConstructor +@NoArgsConstructor +public class HandwritingOcrResult { + + private String task_id; // Task ID or result ID + + private String message; // Response message + + private String status; // OCR task status (e.g., "succeeded") + + private int words_result_num; // Number of recognition results + + private List words_result; // List of recognition results + +} diff --git a/core/src/main/java/ai/z/openapi/service/ocr/HandwritingOcrService.java b/core/src/main/java/ai/z/openapi/service/ocr/HandwritingOcrService.java new file mode 100644 index 0000000..b467bcd --- /dev/null +++ b/core/src/main/java/ai/z/openapi/service/ocr/HandwritingOcrService.java @@ -0,0 +1,13 @@ +package ai.z.openapi.service.ocr; + +public interface HandwritingOcrService { + + /** + * Executes a synchronous handwriting recognition operation. + * @param request The OCR upload request (contains file path, tool type, language + * type) + * @return HandwritingOcrResp containing the recognition result + */ + HandwritingOcrResponse recognize(HandwritingOcrUploadReq request); + +} \ No newline at end of file diff --git a/core/src/main/java/ai/z/openapi/service/ocr/HandwritingOcrServiceImpl.java b/core/src/main/java/ai/z/openapi/service/ocr/HandwritingOcrServiceImpl.java new file mode 100644 index 0000000..4def6a5 --- /dev/null +++ b/core/src/main/java/ai/z/openapi/service/ocr/HandwritingOcrServiceImpl.java @@ -0,0 +1,102 @@ +package ai.z.openapi.service.ocr; + +import ai.z.openapi.AbstractAiClient; +import ai.z.openapi.api.ocr.HandwritingOcrApi; +import ai.z.openapi.utils.RequestSupplier; +import com.fasterxml.jackson.databind.ObjectMapper; +import io.reactivex.rxjava3.core.Single; +import okhttp3.MediaType; +import okhttp3.MultipartBody; +import okhttp3.RequestBody; +import retrofit2.Response; + +import java.io.File; +import java.io.IOException; + +/** + * OCR handwriting recognition service implementation + */ +public class HandwritingOcrServiceImpl implements HandwritingOcrService { + + private final AbstractAiClient zAiClient; + + private final HandwritingOcrApi handwritingOcrApi; + + public HandwritingOcrServiceImpl(AbstractAiClient zAiClient) { + this.zAiClient = zAiClient; + this.handwritingOcrApi = zAiClient.retrofit().create(HandwritingOcrApi.class); + } + + @Override + public HandwritingOcrResponse recognize(HandwritingOcrUploadReq request) { + if (request == null) { + throw new IllegalArgumentException("request cannot be null"); + } + if (request.getFilePath() == null) { + throw new IllegalArgumentException("filePath cannot be null"); + } + if (request.getToolType() == null) { + throw new IllegalArgumentException("toolType cannot be null"); + } + + RequestSupplier supplier = params -> { + try { + File file = new File(params.getFilePath()); + if (!file.exists()) { + throw new RuntimeException("file not found at " + params.getFilePath()); + } + String toolType = params.getToolType(); + String languageType = params.getLanguageType(); + Boolean probability = params.getProbability(); + + // Build multipart/form-data + MultipartBody.Part filePart = MultipartBody.Part.createFormData("file", file.getName(), + RequestBody.create(MediaType.parse("application/octet-stream"), file)); + MultipartBody.Builder formBodyBuilder = new MultipartBody.Builder().setType(MultipartBody.FORM); + formBodyBuilder.addPart(filePart); + formBodyBuilder.addFormDataPart("tool_type", toolType); + if (languageType != null) { + formBodyBuilder.addFormDataPart("language_type", languageType); + } + if (probability != null) { + formBodyBuilder.addFormDataPart("probability", String.valueOf(probability)); + } + + MultipartBody multipartBody = formBodyBuilder.build(); + + // Send POST request + retrofit2.Call call = handwritingOcrApi.recognize(multipartBody); + Response response = call.execute(); + if (!response.isSuccessful() || response.body() == null) { + String errorJson = ""; + if (response.errorBody() != null) { + errorJson = response.errorBody().string(); + } + String msg = response.message(); + String msgFromBody = ""; + if (errorJson != null && errorJson.trim().startsWith("{")) { + try { + ObjectMapper mapper = new ObjectMapper(); + HandwritingOcrResult errorResult = mapper.readValue(errorJson, HandwritingOcrResult.class); + msgFromBody = errorResult.getMessage(); + } + catch (Exception e) { + msgFromBody = errorJson; + } + } + throw new IOException("Failed to recognize, code: " + response.code() + ", msg: " + msg + + (msgFromBody.isEmpty() ? "" : (", detail: " + msgFromBody))); + } + + return Single.just(response.body()); + + } + catch (Exception e) { + throw new RuntimeException(e); + } + }; + + return this.zAiClient.executeRequest(request, supplier, HandwritingOcrResponse.class); + } + +} \ No newline at end of file diff --git a/core/src/main/java/ai/z/openapi/service/ocr/HandwritingOcrUploadReq.java b/core/src/main/java/ai/z/openapi/service/ocr/HandwritingOcrUploadReq.java new file mode 100644 index 0000000..3ce7476 --- /dev/null +++ b/core/src/main/java/ai/z/openapi/service/ocr/HandwritingOcrUploadReq.java @@ -0,0 +1,20 @@ +package ai.z.openapi.service.ocr; + +import ai.z.openapi.core.model.ClientRequest; +import lombok.Data; + +/** + * Handwriting OCR upload request object + */ +@Data +public class HandwritingOcrUploadReq implements ClientRequest { + + private String filePath; // Path to the image file + + private String toolType; // Tool type, must be "hand_write" + + private String languageType; // Language type (optional) + + private Boolean probability; // Confidence score for each line of text recognition + +} \ No newline at end of file diff --git a/core/src/main/java/ai/z/openapi/service/ocr/Location.java b/core/src/main/java/ai/z/openapi/service/ocr/Location.java new file mode 100644 index 0000000..b1cc21d --- /dev/null +++ b/core/src/main/java/ai/z/openapi/service/ocr/Location.java @@ -0,0 +1,20 @@ +package ai.z.openapi.service.ocr; + +import lombok.AllArgsConstructor; +import lombok.Data; +import lombok.NoArgsConstructor; + +@Data +@AllArgsConstructor +@NoArgsConstructor +public class Location { + + private int left; + + private int top; + + private int width; + + private int height; + +} \ No newline at end of file diff --git a/core/src/main/java/ai/z/openapi/service/ocr/Probability.java b/core/src/main/java/ai/z/openapi/service/ocr/Probability.java new file mode 100644 index 0000000..48a147c --- /dev/null +++ b/core/src/main/java/ai/z/openapi/service/ocr/Probability.java @@ -0,0 +1,18 @@ +package ai.z.openapi.service.ocr; + +import lombok.AllArgsConstructor; +import lombok.Data; +import lombok.NoArgsConstructor; + +@Data +@AllArgsConstructor +@NoArgsConstructor +public class Probability { + + private Double average; // Average confidence of the line + + private Double variance; // Confidence variance of the line + + private Double min; // Minimum confidence of the line + +} \ No newline at end of file diff --git a/core/src/main/java/ai/z/openapi/service/ocr/WordsResult.java b/core/src/main/java/ai/z/openapi/service/ocr/WordsResult.java new file mode 100644 index 0000000..6008eac --- /dev/null +++ b/core/src/main/java/ai/z/openapi/service/ocr/WordsResult.java @@ -0,0 +1,18 @@ +package ai.z.openapi.service.ocr; + +import lombok.AllArgsConstructor; +import lombok.Data; +import lombok.NoArgsConstructor; + +@Data +@AllArgsConstructor +@NoArgsConstructor +public class WordsResult { + + private Location location; // Location information for detected text + + private String words; // Recognized text + + private Probability probability; // Confidence score for each line of text recognition + +} \ No newline at end of file diff --git a/pom.xml b/pom.xml index 243e11c..ca10bc2 100644 --- a/pom.xml +++ b/pom.xml @@ -45,7 +45,7 @@ - 0.1.0 + 0.1.1 8 UTF-8 UTF-8 diff --git a/samples/src/main/ai.z.openapi.samples/HandwritingOcrExample.java b/samples/src/main/ai.z.openapi.samples/HandwritingOcrExample.java new file mode 100644 index 0000000..52276b6 --- /dev/null +++ b/samples/src/main/ai.z.openapi.samples/HandwritingOcrExample.java @@ -0,0 +1,67 @@ +package ai.z.openapi.samples; + +import ai.z.openapi.ZaiClient; +import ai.z.openapi.service.ocr.HandwritingOcrResponse; +import ai.z.openapi.service.ocr.HandwritingOcrResult; +import ai.z.openapi.service.ocr.HandwritingOcrUploadReq; +import ai.z.openapi.service.ocr.WordsResult; + +public class HandwritingOcrExample { + + public static void main(String[] args) { + // It is recommended to set the API Key via environment variable + // export ZAI_API_KEY=your.api_key + // ZaiClient client = ZaiClient.builder().build(); + + // You can also set the API Key directly in the code for testing + ZaiClient client = ZaiClient.builder() + .apiKey("your-real-api-key") + .build(); + + try { + System.out.println("=== Handwriting OCR Example ==="); + + String filePath = ""; // Change to your own image path + HandwritingOcrResponse response = syncHandwritingOcrExample(client, filePath, "hand_write", "CHN_ENG", true); + if (response != null && response.getData() != null) { + System.out.println(response.getData()); + } else { + System.out.println("Recognition failed."); + } + } catch (Exception e) { + System.err.println("Exception occurred: " + e.getMessage()); + e.printStackTrace(); + } + } + + /** + * Example: Upload an image and perform handwriting OCR recognition + * @param client ZaiClient instance + * @param filePath Path of the image file + * @param toolType Type of recognition tool + * @param languageType Language type (optional) + * @return OCR response object + */ + private static HandwritingOcrResponse syncHandwritingOcrExample(ZaiClient client, String filePath, String toolType, + String languageType, Boolean probability) { + if (filePath == null || filePath.trim().isEmpty()) { + System.err.println("Invalid file path."); + return null; + } + try { + HandwritingOcrUploadReq uploadReq = new HandwritingOcrUploadReq(); + uploadReq.setFilePath(filePath); + uploadReq.setToolType(toolType); // Must be "hand_write" + uploadReq.setLanguageType(languageType); // Can be "CHN_ENG", "ENG", etc. + uploadReq.setProbability(probability); + System.out.println(uploadReq.toString()); + System.out.println("Uploading the image and performing handwriting recognition...calling API"); + return client.handwriting().recognize(uploadReq); + } + catch (Exception e) { + System.err.println("Handwriting recognition task error: " + e.getMessage()); + } + // Return null indicates failure + return null; + } +} \ No newline at end of file