Skip to content

Commit 2d32266

Browse files
authored
[Cherry-Pick]add 'max_image_tokens' param for docbee serving (#3888)
1 parent 9e547f0 commit 2d32266

File tree

4 files changed

+56
-2
lines changed

4 files changed

+56
-2
lines changed

docs/pipeline_usage/tutorials/vlm_pipelines/doc_understanding.en.md

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -399,6 +399,13 @@ Below is a basic service deployment API reference and multilingual service call
399399
<td>Optional</td>
400400
<td>false</td>
401401
</tr>
402+
<tr>
403+
<td><code>max_image_tokens</code></td>
404+
<td><code>int</code></td>
405+
<td>Maximum number of tokens of input image</td>
406+
<td>Optional</td>
407+
<td>None</td>
408+
</tr>
402409
</tbody>
403410
</table>
404411

docs/pipeline_usage/tutorials/vlm_pipelines/doc_understanding.md

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -399,6 +399,13 @@ for res in output:
399399
<td>否</td>
400400
<td>false</td>
401401
</tr>
402+
<tr>
403+
<td><code>max_image_tokens</code></td>
404+
<td><code>int</code></td>
405+
<td>图像的最大输入token数</td>
406+
<td>否</td>
407+
<td>None</td>
408+
</tr>
402409
</tbody>
403410
</table>
404411

paddlex/inference/serving/basic_serving/_pipeline_apps/doc_understanding.py

Lines changed: 41 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -12,9 +12,11 @@
1212
# See the License for the specific language governing permissions and
1313
# limitations under the License.
1414

15+
import math
1516
import time
1617
from typing import Any, List
1718

19+
from .....utils import logging
1820
from .....utils.deps import function_requires_deps, is_dep_available
1921
from ...infra import utils as serving_utils
2022
from ...infra.config import AppConfig
@@ -35,9 +37,11 @@
3537
from openai.types.chat import ChatCompletion
3638
from openai.types.chat.chat_completion import Choice as ChatCompletionChoice
3739
from openai.types.chat.chat_completion_message import ChatCompletionMessage
40+
if is_dep_available("pillow"):
41+
from PIL import Image
3842

3943

40-
@function_requires_deps("fastapi", "openai")
44+
@function_requires_deps("fastapi", "openai", "pillow")
4145
def create_pipeline_app(pipeline: Any, app_config: AppConfig) -> "FastAPI":
4246
app, ctx = create_app(
4347
pipeline=pipeline, app_config=app_config, app_aiohttp_session=True
@@ -55,6 +59,30 @@ def create_pipeline_app(pipeline: Any, app_config: AppConfig) -> "FastAPI":
5559
)
5660
async def _infer(request: InferRequest) -> "ChatCompletion":
5761
pipeline = ctx.pipeline
62+
aiohttp_session = ctx.aiohttp_session
63+
64+
def _resize_image_with_token_limit(image, max_token_num=2200, tile_size=28):
65+
image = Image.fromarray(image)
66+
w0, h0 = image.width, image.height
67+
tokens = math.ceil(w0 / tile_size) * math.ceil(h0 / tile_size)
68+
if tokens <= max_token_num:
69+
return image
70+
71+
k = math.sqrt(
72+
max_token_num / (math.ceil(w0 / tile_size) * math.ceil(h0 / tile_size))
73+
)
74+
k = min(1.0, k)
75+
w_new = max(int(w0 * k), tile_size)
76+
h_new = max(int(h0 * k), tile_size)
77+
new_size = (w_new, h_new)
78+
resized_image = image.resize(new_size)
79+
tokens_new = math.ceil(w_new / tile_size) * math.ceil(h_new / tile_size)
80+
logging.info(
81+
f"Resizing image from {w0}x{h0} to {w_new}x{h_new}, "
82+
f"which will reduce the image tokens from {tokens} to {tokens_new}."
83+
)
84+
85+
return resized_image
5886

5987
def _process_messages(messages: List[Message]):
6088
system_message = ""
@@ -88,9 +116,20 @@ def _process_messages(messages: List[Message]):
88116
return system_message, user_message, image_url
89117

90118
system_message, user_message, image_url = _process_messages(request.messages)
119+
if request.max_image_tokens is not None:
120+
if image_url.startswith("data:image"):
121+
_, image_url = image_url.split(",", 1)
122+
img_bytes = await serving_utils.get_raw_bytes_async(
123+
image_url, aiohttp_session
124+
)
125+
image = serving_utils.image_bytes_to_array(img_bytes)
126+
image = _resize_image_with_token_limit(image, request.max_image_tokens)
127+
else:
128+
image = image_url
129+
91130
result = (
92131
await pipeline.infer(
93-
{"image": image_url, "query": user_message},
132+
{"image": image, "query": user_message},
94133
)
95134
)[0]
96135

paddlex/inference/serving/schemas/doc_understanding.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -70,6 +70,7 @@ class InferRequest(BaseModel):
7070
temperature: Optional[float] = 0.1
7171
top_p: Optional[float] = 0.95
7272
stream: Optional[bool] = False
73+
max_image_tokens: Optional[int] = None
7374

7475

7576
PRIMARY_OPERATIONS: Final[PrimaryOperations] = {

0 commit comments

Comments
 (0)