99import com .alibaba .dashscope .exception .ApiException ;
1010import com .alibaba .dashscope .exception .NoApiKeyException ;
1111import com .alibaba .dashscope .exception .UploadFileException ;
12+ import com .alibaba .dashscope .utils .JsonUtils ;
1213import com .google .gson .JsonObject ;
1314import io .reactivex .Flowable ;
1415
1819import java .util .Map ;
1920
2021public class MultiModalConversationQwenVLOcr {
21- private static final String modelName = "qwen-vl-ocr-2025-02-18 " ;
22+ private static final String modelName = "qwen-vl-ocr-2025-08-28 " ;
2223 public static void videoImageListSample () throws ApiException , NoApiKeyException , UploadFileException {
2324 MultiModalConversation conv = new MultiModalConversation ();
2425 MultiModalMessage systemMessage = MultiModalMessage .builder ()
@@ -28,20 +29,20 @@ public static void videoImageListSample() throws ApiException, NoApiKeyException
2829
2930 Map <String , Object > imageContent = new HashMap <>();
3031 imageContent .put ("type" , "image" );
31- imageContent .put ("image" , "http ://duguang-llm.oss-cn-hangzhou .aliyuncs.com/llm_data_keeper/public_data/POIE/test_subset/nf0986 .jpg" );
32- imageContent .put ("min_pixels" , "3136 " );
33- imageContent .put ("max_pixels" , "2007040 " );
32+ imageContent .put ("image" , "https ://help-static-aliyun-doc .aliyuncs.com/file-manage-files/zh-CN/20241108/ctdzex/biaozhun .jpg" );
33+ imageContent .put ("min_pixels" , "401408 " );
34+ imageContent .put ("max_pixels" , "6422528 " );
3435 imageContent .put ("enable_rotate" , false );
3536
3637 Map <String , Object > textContent = new HashMap <>();
3738 textContent .put ("type" , "text" );
38- textContent .put ("text" , "提取图像中的文字 。" );
39+ textContent .put ("text" , "定位所有的文字行,并且返回旋转矩形([cx, cy, width, height, angle])的坐标结果 。" );
3940
4041 JsonObject resultSchema = new JsonObject ();
4142 resultSchema .addProperty ("Calories" , "" );
4243
4344 OcrOptions ocrOptions = OcrOptions .builder ()
44- .task (OcrOptions .Task .KEY_INFORMATION_EXTRACTION )
45+ .task (OcrOptions .Task .ADVANCED_RECOGNITION )
4546 .taskConfig (OcrOptions .TaskConfig .builder ()
4647 .resultSchema (resultSchema )
4748 .build ())
@@ -65,6 +66,7 @@ public static void videoImageListSample() throws ApiException, NoApiKeyException
6566
6667 MultiModalConversationResult result = conv .call (param );
6768 System .out .println (result );
69+ System .out .println (JsonUtils .toJson (result ));
6870// Flowable<MultiModalConversationResult> result = conv.streamCall(param);
6971// result.blockingForEach(System.out::println);
7072 }
0 commit comments