1Panel-dev
diff --git a/‎apps/application/flow/step_node/__init__.py‎
Lines changed: 2 additions & 0 deletions b/‎apps/application/flow/step_node/__init__.py‎
Lines changed: 2 additions & 0 deletions
diff --git a/‎apps/application/flow/step_node/image_understand_step_node/impl/base_image_understand_node.py‎
Lines changed: 10 additions & 7 deletions b/‎apps/application/flow/step_node/image_understand_step_node/impl/base_image_understand_node.py‎
Lines changed: 10 additions & 7 deletions
diff --git a/‎apps/application/flow/step_node/start_node/impl/base_start_node.py‎
Lines changed: 3 additions & 0 deletions b/‎apps/application/flow/step_node/start_node/impl/base_start_node.py‎
Lines changed: 3 additions & 0 deletions
diff --git a/‎apps/application/flow/step_node/video_understand_step_node/__init__.py‎
Lines changed: 3 additions & 0 deletions b/‎apps/application/flow/step_node/video_understand_step_node/__init__.py‎
Lines changed: 3 additions & 0 deletions
diff --git a/‎apps/application/flow/step_node/video_understand_step_node/i_video_understand_node.py‎
Lines changed: 47 additions & 0 deletions b/‎apps/application/flow/step_node/video_understand_step_node/i_video_understand_node.py‎
Lines changed: 47 additions & 0 deletions
diff --git a/‎apps/application/flow/step_node/video_understand_step_node/impl/__init__.py‎
Lines changed: 3 additions & 0 deletions b/‎apps/application/flow/step_node/video_understand_step_node/impl/__init__.py‎
Lines changed: 3 additions & 0 deletions
diff --git a/‎apps/application/flow/step_node/video_understand_step_node/impl/base_video_understand_node.py‎
Lines changed: 229 additions & 0 deletions b/‎apps/application/flow/step_node/video_understand_step_node/impl/base_video_understand_node.py‎
Lines changed: 229 additions & 0 deletions
diff --git a/‎apps/application/flow/workflow_manage.py‎
Lines changed: 4 additions & 0 deletions b/‎apps/application/flow/workflow_manage.py‎
Lines changed: 4 additions & 0 deletions
diff --git a/‎apps/chat/serializers/chat.py‎
Lines changed: 2 additions & 0 deletions b/‎apps/chat/serializers/chat.py‎
Lines changed: 2 additions & 0 deletions
diff --git a/‎ui/src/assets/workflow/icon_file-video.svg‎
Lines changed: 6 additions & 0 deletions b/‎ui/src/assets/workflow/icon_file-video.svg‎
Lines changed: 6 additions & 0 deletions
@@ -32,13 +32,15 @@
 from .tool_node import *
 from .variable_assign_node import BaseVariableAssignNode
 from .variable_splitting_node import BaseVariableSplittingNode
+from .video_understand_step_node import BaseVideoUnderstandNode
 
 node_list = [BaseStartStepNode, BaseChatNode, BaseSearchKnowledgeNode, BaseQuestionNode,
              BaseConditionNode, BaseReplyNode,
              BaseToolNodeNode, BaseToolLibNodeNode, BaseRerankerNode, BaseApplicationNode,
              BaseDocumentExtractNode,
              BaseImageUnderstandNode, BaseFormNode, BaseSpeechToTextNode, BaseTextToSpeechNode,
              BaseImageGenerateNode, BaseVariableAssignNode, BaseMcpNode, BaseTextToVideoNode, BaseImageToVideoNode,
+             BaseVideoUnderstandNode,
              BaseIntentNode, BaseLoopNode, BaseLoopStartStepNode,
              BaseLoopContinueNode,
              BaseLoopBreakNode, BaseVariableSplittingNode]
 
@@ -169,13 +169,16 @@ def generate_message_list(self, image_model, system: str, prompt: str, history_m
             # 处理多张图片
             images = []
             for img in image:
-                file_id = img['file_id']
-                file = QuerySet(File).filter(id=file_id).first()
-                image_bytes = file.get_bytes()
-                base64_image = base64.b64encode(image_bytes).decode("utf-8")
-                image_format = what(None, image_bytes)
-                images.append(
-                    {'type': 'image_url', 'image_url': {'url': f'data:image/{image_format};base64,{base64_image}'}})
+                if isinstance(img, str) and img.startswith('http'):
+                    images.append({'type': 'image_url', 'image_url': {'url': img}})
+                else:
+                    file_id = img['file_id']
+                    file = QuerySet(File).filter(id=file_id).first()
+                    image_bytes = file.get_bytes()
+                    base64_image = base64.b64encode(image_bytes).decode("utf-8")
+                    image_format = what(None, image_bytes)
+                    images.append(
+                        {'type': 'image_url', 'image_url': {'url': f'data:image/{image_format};base64,{base64_image}'}})
             messages = [HumanMessage(
                 content=[
                     {'type': 'text', 'text': self.workflow_manage.generate_prompt(prompt)},
 
@@ -48,6 +48,7 @@ def save_context(self, details, workflow_manage):
         self.context['document'] = details.get('document_list')
         self.context['image'] = details.get('image_list')
         self.context['audio'] = details.get('audio_list')
+        self.context['video'] = details.get('video_list')
         self.context['other'] = details.get('other_list')
         self.status = details.get('status')
         self.err_message = details.get('err_message')
@@ -73,6 +74,7 @@ def execute(self, question, **kwargs) -> NodeResult:
             'image': self.workflow_manage.image_list,
             'document': self.workflow_manage.document_list,
             'audio': self.workflow_manage.audio_list,
+            'video': self.workflow_manage.video_list,
             'other': self.workflow_manage.other_list,
 
         }
@@ -97,6 +99,7 @@ def get_details(self, index: int, **kwargs):
             'status': self.status,
             'err_message': self.err_message,
             'image_list': self.context.get('image'),
+            'video_list': self.context.get('video'),
             'document_list': self.context.get('document'),
             'audio_list': self.context.get('audio'),
             'other_list': self.context.get('other'),
 
@@ -0,0 +1,3 @@
+# coding=utf-8
+
+from .impl import *
@@ -0,0 +1,47 @@
+# coding=utf-8
+
+from typing import Type
+
+from rest_framework import serializers
+
+from application.flow.i_step_node import INode, NodeResult
+
+from django.utils.translation import gettext_lazy as _
+
+
+class VideoUnderstandNodeSerializer(serializers.Serializer):
+    model_id = serializers.CharField(required=True, label=_("Model id"))
+    system = serializers.CharField(required=False, allow_blank=True, allow_null=True,
+                                   label=_("Role Setting"))
+    prompt = serializers.CharField(required=True, label=_("Prompt word"))
+    # 多轮对话数量
+    dialogue_number = serializers.IntegerField(required=True, label=_("Number of multi-round conversations"))
+
+    dialogue_type = serializers.CharField(required=True, label=_("Conversation storage type"))
+
+    is_result = serializers.BooleanField(required=False,
+                                         label=_('Whether to return content'))
+
+    video_list = serializers.ListField(required=False, label=_("video"))
+
+    model_params_setting = serializers.JSONField(required=False, default=dict,
+                                                 label=_("Model parameter settings"))
+
+
+class IVideoUnderstandNode(INode):
+    type = 'video-understand-node'
+
+    def get_node_params_serializer_class(self) -> Type[serializers.Serializer]:
+        return VideoUnderstandNodeSerializer
+
+    def _run(self):
+        res = self.workflow_manage.get_reference_field(self.node_params_serializer.data.get('video_list')[0],
+                                                       self.node_params_serializer.data.get('video_list')[1:])
+        return self.execute(video=res, **self.node_params_serializer.data, **self.flow_params_serializer.data)
+
+    def execute(self, model_id, system, prompt, dialogue_number, dialogue_type, history_chat_record, stream, chat_id,
+                model_params_setting,
+                chat_record_id,
+                video,
+                **kwargs) -> NodeResult:
+        pass
@@ -0,0 +1,3 @@
+# coding=utf-8
+
+from .base_video_understand_node import BaseVideoUnderstandNode
@@ -0,0 +1,229 @@
+# coding=utf-8
+import base64
+import time
+from functools import reduce
+from imghdr import what
+from typing import List, Dict
+
+from django.db.models import QuerySet
+from langchain_core.messages import BaseMessage, HumanMessage, SystemMessage, AIMessage
+
+from application.flow.i_step_node import NodeResult, INode
+from application.flow.step_node.video_understand_step_node.i_video_understand_node import IVideoUnderstandNode
+from knowledge.models import File
+from models_provider.tools import get_model_instance_by_model_workspace_id
+
+
+def _write_context(node_variable: Dict, workflow_variable: Dict, node: INode, workflow, answer: str):
+    chat_model = node_variable.get('chat_model')
+    message_tokens = node_variable['usage_metadata']['output_tokens'] if 'usage_metadata' in node_variable else 0
+    answer_tokens = chat_model.get_num_tokens(answer)
+    node.context['message_tokens'] = message_tokens
+    node.context['answer_tokens'] = answer_tokens
+    node.context['answer'] = answer
+    node.context['history_message'] = node_variable['history_message']
+    node.context['question'] = node_variable['question']
+    node.context['run_time'] = time.time() - node.context['start_time']
+    if workflow.is_result(node, NodeResult(node_variable, workflow_variable)):
+        node.answer_text = answer
+
+
+def write_context_stream(node_variable: Dict, workflow_variable: Dict, node: INode, workflow):
+    """
+    写入上下文数据 (流式)
+    @param node_variable:      节点数据
+    @param workflow_variable:  全局数据
+    @param node:               节点
+    @param workflow:           工作流管理器
+    """
+    response = node_variable.get('result')
+    answer = ''
+    for chunk in response:
+        answer += chunk.content
+        yield chunk.content
+    _write_context(node_variable, workflow_variable, node, workflow, answer)
+
+
+def write_context(node_variable: Dict, workflow_variable: Dict, node: INode, workflow):
+    """
+    写入上下文数据
+    @param node_variable:      节点数据
+    @param workflow_variable:  全局数据
+    @param node:               节点实例对象
+    @param workflow:           工作流管理器
+    """
+    response = node_variable.get('result')
+    answer = response.content
+    _write_context(node_variable, workflow_variable, node, workflow, answer)
+
+
+def file_id_to_base64(file_id: str):
+    file = QuerySet(File).filter(id=file_id).first()
+    file_bytes = file.get_bytes()
+    base64_video = base64.b64encode(file_bytes).decode("utf-8")
+    return [base64_video, what(None, file_bytes)]
+
+
+class BaseVideoUnderstandNode(IVideoUnderstandNode):
+    def save_context(self, details, workflow_manage):
+        self.context['answer'] = details.get('answer')
+        self.context['question'] = details.get('question')
+        if self.node_params.get('is_result', False):
+            self.answer_text = details.get('answer')
+
+    def execute(self, model_id, system, prompt, dialogue_number, dialogue_type, history_chat_record, stream, chat_id,
+                model_params_setting,
+                chat_record_id,
+                video,
+                **kwargs) -> NodeResult:
+        # 处理不正确的参数
+        if video is None or not isinstance(video, list):
+            video = []
+        workspace_id = self.workflow_manage.get_body().get('workspace_id')
+        video_model = get_model_instance_by_model_workspace_id(model_id, workspace_id,
+                                                               **model_params_setting)
+        # 执行详情中的历史消息不需要图片内容
+        history_message = self.get_history_message_for_details(history_chat_record, dialogue_number)
+        self.context['history_message'] = history_message
+        question = self.generate_prompt_question(prompt)
+        self.context['question'] = question.content
+        # 生成消息列表, 真实的history_message
+        message_list = self.generate_message_list(video_model, system, prompt,
+                                                  self.get_history_message(history_chat_record, dialogue_number), video)
+        self.context['message_list'] = message_list
+        self.context['video_list'] = video
+        self.context['dialogue_type'] = dialogue_type
+        if stream:
+            r = video_model.stream(message_list)
+            return NodeResult({'result': r, 'chat_model': video_model, 'message_list': message_list,
+                               'history_message': history_message, 'question': question.content}, {},
+                              _write_context=write_context_stream)
+        else:
+            r = video_model.invoke(message_list)
+            return NodeResult({'result': r, 'chat_model': video_model, 'message_list': message_list,
+                               'history_message': history_message, 'question': question.content}, {},
+                              _write_context=write_context)
+
+    def get_history_message_for_details(self, history_chat_record, dialogue_number):
+        start_index = len(history_chat_record) - dialogue_number
+        history_message = reduce(lambda x, y: [*x, *y], [
+            [self.generate_history_human_message_for_details(history_chat_record[index]),
+             self.generate_history_ai_message(history_chat_record[index])]
+            for index in
+            range(start_index if start_index > 0 else 0, len(history_chat_record))], [])
+        return history_message
+
+    def generate_history_ai_message(self, chat_record):
+        for val in chat_record.details.values():
+            if self.node.id == val['node_id'] and 'video_list' in val:
+                if val['dialogue_type'] == 'WORKFLOW':
+                    return chat_record.get_ai_message()
+                return AIMessage(content=val['answer'])
+        return chat_record.get_ai_message()
+
+    def generate_history_human_message_for_details(self, chat_record):
+        for data in chat_record.details.values():
+            if self.node.id == data['node_id'] and 'video_list' in data:
+                video_list = data['video_list']
+                if len(video_list) == 0 or data['dialogue_type'] == 'WORKFLOW':
+                    return HumanMessage(content=chat_record.problem_text)
+                file_id_list = [video.get('file_id') for video in video_list]
+                return HumanMessage(content=[
+                    {'type': 'text', 'text': data['question']},
+                    *[{'type': 'video_url', 'video_url': {'url': f'./oss/file/{file_id}'}} for file_id in file_id_list]
+
+                ])
+        return HumanMessage(content=chat_record.problem_text)
+
+    def get_history_message(self, history_chat_record, dialogue_number):
+        start_index = len(history_chat_record) - dialogue_number
+        history_message = reduce(lambda x, y: [*x, *y], [
+            [self.generate_history_human_message(history_chat_record[index]),
+             self.generate_history_ai_message(history_chat_record[index])]
+            for index in
+            range(start_index if start_index > 0 else 0, len(history_chat_record))], [])
+        return history_message
+
+    def generate_history_human_message(self, chat_record):
+
+        for data in chat_record.details.values():
+            if self.node.id == data['node_id'] and 'video_list' in data:
+                video_list = data['video_list']
+                if len(video_list) == 0 or data['dialogue_type'] == 'WORKFLOW':
+                    return HumanMessage(content=chat_record.problem_text)
+                video_base64_list = [file_id_to_base64(video.get('file_id')) for video in video_list]
+                return HumanMessage(
+                    content=[
+                        {'type': 'text', 'text': data['question']},
+                        *[{'type': 'video_url',
+                           'video_url': {'url': f'data:video/{base64_video[1]};base64,{base64_video[0]}'}} for
+                          base64_video in video_base64_list]
+                    ])
+        return HumanMessage(content=chat_record.problem_text)
+
+    def generate_prompt_question(self, prompt):
+        return HumanMessage(self.workflow_manage.generate_prompt(prompt))
+
+    def generate_message_list(self, video_model, system: str, prompt: str, history_message, video):
+        if video is not None and len(video) > 0:
+            # 处理多张图片
+            videos = []
+            for img in video:
+                if isinstance(img, str) and img.startswith('http'):
+                    videos.append({'type': 'video_url', 'video_url': {'url': img}})
+                else:
+                    file_id = img['file_id']
+                    file = QuerySet(File).filter(id=file_id).first()
+                    video_bytes = file.get_bytes()
+                    base64_video = base64.b64encode(video_bytes).decode("utf-8")
+                    video_format = what(None, video_bytes)
+                    videos.append(
+                        {'type': 'video_url', 'video_url': {'url': f'data:video/{video_format};base64,{base64_video}'}})
+            messages = [HumanMessage(
+                content=[
+                    {'type': 'text', 'text': self.workflow_manage.generate_prompt(prompt)},
+                    *videos
+                ])]
+        else:
+            messages = [HumanMessage(self.workflow_manage.generate_prompt(prompt))]
+
+        if system is not None and len(system) > 0:
+            return [
+                SystemMessage(self.workflow_manage.generate_prompt(system)),
+                *history_message,
+                *messages
+            ]
+        else:
+            return [
+                *history_message,
+                *messages
+            ]
+
+    @staticmethod
+    def reset_message_list(message_list: List[BaseMessage], answer_text):
+        result = [{'role': 'user' if isinstance(message, HumanMessage) else 'ai', 'content': message.content} for
+                  message
+                  in
+                  message_list]
+        result.append({'role': 'ai', 'content': answer_text})
+        return result
+
+    def get_details(self, index: int, **kwargs):
+        return {
+            'name': self.node.properties.get('stepName'),
+            "index": index,
+            'run_time': self.context.get('run_time'),
+            'system': self.node_params.get('system'),
+            'history_message': [{'content': message.content, 'role': message.type} for message in
+                                (self.context.get('history_message') if self.context.get(
+                                    'history_message') is not None else [])],
+            'question': self.context.get('question'),
+            'answer': self.context.get('answer'),
+            'type': self.node.type,
+            'message_tokens': self.context.get('message_tokens'),
+            'answer_tokens': self.context.get('answer_tokens'),
+            'status': self.status,
+            'err_message': self.err_message,
+            'video_list': self.context.get('video_list'),
+            'dialogue_type': self.context.get('dialogue_type')
+        }
@@ -94,6 +94,7 @@ def __init__(self, flow: Workflow, params, work_flow_post_handler: WorkFlowPostH
                  base_to_response: BaseToResponse = SystemToResponse(), form_data=None, image_list=None,
                  document_list=None,
                  audio_list=None,
+                 video_list=None,
                  other_list=None,
                  start_node_id=None,
                  start_node_data=None, chat_record=None, child_node=None):
@@ -105,12 +106,15 @@ def __init__(self, flow: Workflow, params, work_flow_post_handler: WorkFlowPostH
             document_list = []
         if audio_list is None:
             audio_list = []
+        if video_list is None:
+            video_list = []
         if other_list is None:
             other_list = []
         self.start_node_id = start_node_id
         self.start_node = None
         self.form_data = form_data
         self.image_list = image_list
+        self.video_list = video_list
         self.document_list = document_list
         self.audio_list = audio_list
         self.other_list = other_list
 
@@ -375,6 +375,7 @@ def chat_work_flow(self, chat_info: ChatInfo, instance: dict, base_to_response):
         chat_user_type = self.data.get('chat_user_type')
         form_data = instance.get('form_data')
         image_list = instance.get('image_list')
+        video_list = instance.get('video_list')
         document_list = instance.get('document_list')
         audio_list = instance.get('audio_list')
         other_list = instance.get('other_list')
@@ -401,6 +402,7 @@ def chat_work_flow(self, chat_info: ChatInfo, instance: dict, base_to_response):
                                            'application_id': str(chat_info.application_id)},
                                           WorkFlowPostHandler(chat_info),
                                           base_to_response, form_data, image_list, document_list, audio_list,
+                                          video_list,
                                           other_list,
                                           instance.get('runtime_node_id'),
                                           instance.get('node_data'), chat_record, instance.get('child_node'))
Original file line number	Diff line number	Diff line change
`@@ -0,0 +1,3 @@`
	`1`	`+# coding=utf-8`
	`2`	`+`
	`3`	`+from .impl import *`
Original file line number	Diff line number	Diff line change
`@@ -0,0 +1,3 @@`
	`1`	`+# coding=utf-8`
	`2`	`+`
	`3`	`+from .base_video_understand_node import BaseVideoUnderstandNode`