Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions models/gemini/manifest.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@ description:
zh_Hans: 谷歌提供的 Gemini 模型.
icon: icon_s_en.svg
label:
en_US: Gemini
en_US: Gemini dev
meta:
arch:
- amd64
Expand Down Expand Up @@ -34,4 +34,4 @@ resource:
tool:
enabled: true
type: plugin
version: 0.6.0
version: 0.6.1
22 changes: 4 additions & 18 deletions models/gemini/models/llm/gemini-3-pro-preview.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -15,23 +15,11 @@ features:
model_properties:
mode: chat
context_size: 1048576

# Documentation: https://ai.google.dev/gemini-api/docs/gemini-3?hl=zh-cn
parameter_rules:
- name: temperature
use_template: temperature
default: 1
min: 0
max: 2
- name: top_p
use_template: top_p
- name: top_k
label:
zh_Hans: 取样数量
en_US: Top k
type: int
help:
zh_Hans: 仅从每个后续标记的前 K 个选项中采样。
en_US: Only sample from the top K options for each subsequent token.
required: false
# Force setting temperature = 1.0
# https://ai.google.dev/gemini-api/docs/gemini-3?hl=zh-cn&thinking=high#temperature
- name: max_output_tokens
use_template: max_tokens
default: 65536
Expand Down Expand Up @@ -96,8 +84,6 @@ parameter_rules:
en_US: Lets Gemini use code to solve complex tasks
zh_Hans: 让 Gemini 使用代码来解决复杂任务。
ja_JP: Gemini にコードを使って複雑なタスクを解決させましょう。
- name: json_schema
use_template: json_schema
# https://ai.google.dev/gemini-api/docs/pricing?hl=zh-cn#gemini-3-pro
pricing:
# $ > 200K tokens
Expand Down
69 changes: 65 additions & 4 deletions models/gemini/models/llm/llm.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@
from collections.abc import Generator, Iterator, Sequence
from contextlib import suppress
from typing import Optional, Union, Mapping, Any, Tuple, List, TypeVar
from loguru import logger

import requests
from dify_plugin.entities.model.llm import LLMResult, LLMResultChunk, LLMResultChunkDelta
Expand Down Expand Up @@ -267,6 +268,8 @@ def _set_chat_parameters(
config.temperature = model_parameters.get("temperature", None)
config.max_output_tokens = model_parameters.get("max_output_tokens", None)

# if config.temperature is None or not isinstance(config.temperature, float) or not isinstance(config.temperature, int):

@staticmethod
def _set_thinking_config(
*, config: types.GenerateContentConfig, model_parameters: Mapping[str, Any], model_name: str
Expand Down Expand Up @@ -581,7 +584,17 @@ def _handle_generate_response(
if model in IMAGE_GENERATION_MODELS:
assistant_prompt_message = self._parse_parts(response.candidates[0].content.parts)
else:
assistant_prompt_message = AssistantPromptMessage(content=response.text)
assistant_prompt_message = AssistantPromptMessage(
content=response.text,
name=base64.b64encode(
response.candidates[0].content.parts[-1].thought_signature
).decode(encoding="utf-8"),
)

for part in response.candidates[0].content.parts:
print(json.dumps(part.model_dump(mode="json"), indent=2, ensure_ascii=False))

print(f">> To dify message: {assistant_prompt_message}")

# calculate num tokens
prompt_tokens, completion_tokens = self._calculate_tokens_from_usage_metadata(
Expand Down Expand Up @@ -656,11 +669,24 @@ def _handle_generate_stream_response(
or not chunk.candidates[0].content.parts
):
continue

print(
json.dumps(
chunk.candidates[0].content.model_dump(mode="json"),
indent=2,
ensure_ascii=False,
)
)

candidate = chunk.candidates[0]
message = self._parse_parts(candidate.content.parts)

index += len(candidate.content.parts)

print(f">> To dify message -> {message=}")

print(f"------------------- [{candidate.finish_reason=}] ---------------------")

# if the stream is not finished, yield the chunk
if not candidate.finish_reason:
yield LLMResultChunk(
Expand Down Expand Up @@ -705,6 +731,7 @@ def _handle_generate_stream_response(

def _parse_parts(self, parts: Sequence[types.Part], /) -> AssistantPromptMessage:
"""
https://ai.google.dev/gemini-api/docs/gemini-3?hl=zh-cn&thinking=high#thought_signatures

Args:
parts: [
Expand Down Expand Up @@ -739,7 +766,11 @@ def _parse_parts(self, parts: Sequence[types.Part], /) -> AssistantPromptMessage
contents.append(TextPromptMessageContent(data="\n\n</think>"))
self.is_thinking = False

contents.append(TextPromptMessageContent(data=part.text))
contents.append(
TextPromptMessageContent(
data=part.text, thought_signature=part.thought_signature
)
)

# TODO:
# Upstream needs to provide a new type of PromptMessageContent for tracking the code executor's behavior.
Expand All @@ -749,11 +780,19 @@ def _parse_parts(self, parts: Sequence[types.Part], /) -> AssistantPromptMessage
code = part.executable_code.code
language = part.executable_code.language.lower()
code_block = f"\n```{language}\n{code}\n```\n"
contents.append(TextPromptMessageContent(data=code_block))
contents.append(
TextPromptMessageContent(
data=code_block, thought_signature=part.thought_signature
)
)
if part.code_execution_result:
with suppress(Exception):
result_tpl = f"\n```\n{part.code_execution_result.output}\n```\n"
contents.append(TextPromptMessageContent(data=result_tpl))
contents.append(
TextPromptMessageContent(
data=result_tpl, thought_signature=part.thought_signature
)
)

# A predicted [FunctionCall] returned from the model that contains a string
# representing the [FunctionDeclaration.name] with the parameters and their values.
Expand Down Expand Up @@ -796,11 +835,27 @@ def _parse_parts(self, parts: Sequence[types.Part], /) -> AssistantPromptMessage
base64_data=base64.b64encode(data).decode(),
mime_type=mime_type,
detail=ImagePromptMessageContent.DETAIL.HIGH,
thought_signature=part.thought_signature,
)
)
else:
raise InvokeError(f"unsupported mime_type {mime_type}")

# Hold interleaved thinking
if part.thought_signature:
thought_signature_base64 = base64.b64encode(part.thought_signature).decode(
encoding="utf-8"
)
# thought_signature_base64 = part.thought_signature
if contents:
contents[-1].thought_signature = thought_signature_base64
else:
contents.append(
TextPromptMessageContent(
data="", thought_signature=thought_signature_base64
)
)

# FIXME: This is a workaround to fix the typing issue in the dify_plugin
# https://github.com/langgenius/dify-plugin-sdks/issues/41
# fixed_contents = [content.model_dump(mode="json") for content in contents]
Expand Down Expand Up @@ -850,6 +905,10 @@ def _generate(
user: Optional[str] = None,
) -> Union[LLMResult, Generator[LLMResultChunk]]:

logger.debug(json.dumps(model_parameters, indent=2, ensure_ascii=False))
for p in prompt_messages:
print(f"{p=}")

# Validate and adjust feature compatibility
model_parameters = self._validate_feature_compatibility(model_parameters, tools)

Expand Down Expand Up @@ -905,6 +964,8 @@ def _generate(

# == InvokeModel == #

stream = False

if stream:
response = genai_client.models.generate_content_stream(
model=model, contents=contents, config=config
Expand Down
2 changes: 1 addition & 1 deletion models/gemini/provider/google.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@ icon_large:
icon_small:
en_US: icon_s_en.svg
label:
en_US: Gemini
en_US: Gemini dev
models:
llm:
position: models/llm/_position.yaml
Expand Down
Loading