Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
#!/usr/bin/env python
# -*- coding: UTF-8 -*-
from typing import List, Dict
from typing import Dict

from setting.models_provider.base_model_provider import MaxKBBaseModel
from setting.models_provider.impl.base_chat_open_ai import BaseChatOpenAI
Expand Down
Original file line number Diff line number Diff line change
@@ -1,8 +1,10 @@
from typing import List, Dict
import os
import re
from typing import Dict

from botocore.config import Config
from langchain_community.chat_models import BedrockChat

from setting.models_provider.base_model_provider import MaxKBBaseModel


Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -7,13 +7,10 @@
@desc:
"""

from typing import List, Dict, Optional, Any, Iterator, Type
from typing import List, Dict

from langchain_core.callbacks import CallbackManagerForLLMRun
from langchain_core.messages import BaseMessage, get_buffer_string, BaseMessageChunk, AIMessageChunk
from langchain_core.outputs import ChatGenerationChunk
from langchain_core.messages import BaseMessage, get_buffer_string
from langchain_openai import AzureChatOpenAI
from langchain_openai.chat_models.base import _convert_delta_to_message_chunk

from common.config.tokenizer_manage_config import TokenizerManage
from setting.models_provider.base_model_provider import MaxKBBaseModel
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

There is an issue with the import statements. The AIMessageChunk class seems to be missing from the imported modules. Additionally, _convert_delta_to_messag_chunk function can cause issues if the base message object is not provided correctly due to potential inconsistencies between LangChain versions.

Here's a revised version of your code:

@@ -7,17 +7,6 @@
     @desc:
 """
 
-from typing import *
-import langchain_core.callbacks.CallbackManagerForLLMRun
from langchain_core.messages import (
    BaseMessage,
    get_buffer_string,
    # Add AIMessageChunk back if needed
)
 from langchain_core.outputs import ChatGenerationChunk

+import langchain_openai
from langchain_openai.chat_models.base import _convert_delta_to_message_chunk

 from common.config.tokenizer_manage_config import TokenizerManage
 from setting.models_provider.base_model_provider import MaxKBBaseModel

Optimization Suggestions:

  1. Remove Unnecessary Imports: If you don't need certain features (like CallbackManagerForLLMRun, get_buffer_string), you can remove them to simplify imports.
  2. Use Type Annotations Correctly: In Python 3.9+, Typing.Any is considered outdated. You might want to use more specific type hints wherever applicable.
  3. Keep Required Modules Imported: Ensure that all necessary functions and classes are still being used.
  4. Review Class Usage: Verify that all methods like _convert_delta_to_message_chunk are called appropriately without causing issues related to argument types or missing attributes.

Make sure that this revision meets your functional requirements and adheres to best practices for using PyPI package management.

Expand Down
Original file line number Diff line number Diff line change
@@ -1,8 +1,7 @@
import asyncio
import io
from typing import Dict

from openai import OpenAI, AzureOpenAI
from openai import AzureOpenAI

from common.config.tokenizer_manage_config import TokenizerManage
from setting.models_provider.base_model_provider import MaxKBBaseModel
Expand Down Expand Up @@ -61,4 +60,3 @@ def speech_to_text(self, audio_file):
buffer.name = "file.mp3" # this is the important line
res = client.audio.transcriptions.create(model=self.model, language="zh", file=buffer)
return res.text

3 changes: 2 additions & 1 deletion apps/setting/models_provider/impl/base_chat_open_ai.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
# coding=utf-8

from typing import List, Dict, Optional, Any, Iterator, Type, cast
from typing import List, Dict, Optional, Any, Iterator, cast

from langchain_core.language_models import LanguageModelInput
from langchain_core.messages import BaseMessage, get_buffer_string
from langchain_core.outputs import ChatGenerationChunk, ChatGeneration
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@
@Author :Brian Yang
@Date :5/12/24 7:44 AM
"""
from typing import List, Dict
from typing import Dict

from setting.models_provider.base_model_provider import MaxKBBaseModel
from setting.models_provider.impl.base_chat_open_ai import BaseChatOpenAI
Expand All @@ -29,4 +29,3 @@ def new_instance(model_type, model_name, model_credential: Dict[str, object], **
**optional_params
)
return deepseek_chat_open_ai

Original file line number Diff line number Diff line change
Expand Up @@ -11,14 +11,14 @@
from google.ai.generativelanguage_v1 import GenerateContentResponse
from google.generativeai.responder import ToolDict
from google.generativeai.types import FunctionDeclarationType, SafetySettingDict
from google.generativeai.types import Tool as GoogleTool
from langchain_core.callbacks import CallbackManagerForLLMRun
from langchain_core.messages import BaseMessage, get_buffer_string
from langchain_core.messages import BaseMessage
from langchain_core.outputs import ChatGenerationChunk
from langchain_google_genai import ChatGoogleGenerativeAI
from langchain_google_genai._function_utils import _ToolConfigDict
from langchain_google_genai.chat_models import _chat_with_retry, _response_to_result
from google.generativeai.types import Tool as GoogleTool
from common.config.tokenizer_manage_config import TokenizerManage

from setting.models_provider.base_model_provider import MaxKBBaseModel


Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@
@date:2023/11/10 17:45
@desc:
"""
from typing import List, Dict
from typing import Dict

from setting.models_provider.base_model_provider import MaxKBBaseModel
from setting.models_provider.impl.base_chat_open_ai import BaseChatOpenAI
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -6,11 +6,9 @@
@date:2024/4/18 15:28
@desc:
"""
from typing import List, Dict, Optional, Any
from typing import List, Dict

from langchain_core.language_models import LanguageModelInput
from langchain_core.messages import BaseMessage, get_buffer_string
from langchain_core.runnables import RunnableConfig
from langchain_openai.chat_models import ChatOpenAI

from common.config.tokenizer_manage_config import TokenizerManage
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -12,11 +12,10 @@
from langchain_community.llms.tongyi import generate_with_last_element_mark
from langchain_core.callbacks import CallbackManagerForLLMRun
from langchain_core.language_models import LanguageModelInput
from langchain_core.messages import BaseMessage, get_buffer_string
from langchain_core.messages import BaseMessage
from langchain_core.outputs import ChatGenerationChunk, ChatGeneration
from langchain_core.runnables import RunnableConfig, ensure_config

from common.config.tokenizer_manage_config import TokenizerManage
from setting.models_provider.base_model_provider import MaxKBBaseModel


Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -3,12 +3,13 @@
from typing import Dict

from dashscope import ImageSynthesis
from django.utils.translation import gettext as __
from langchain_community.chat_models import ChatTongyi
from langchain_core.messages import HumanMessage

from setting.models_provider.base_model_provider import MaxKBBaseModel
from setting.models_provider.impl.base_tti import BaseTextToImage
from django.utils.translation import gettext_lazy as _


class QwenTextToImageModel(MaxKBBaseModel, BaseTextToImage):
api_key: str
Expand Down Expand Up @@ -39,7 +40,7 @@ def is_cache_model(self):

def check_auth(self):
chat = ChatTongyi(api_key=self.api_key, model_name='qwen-max')
chat.invoke([HumanMessage([{"type": "text", "text": _('Hello')}])])
chat.invoke([HumanMessage([{"type": "text", "text": __('Hello')}])])

def generate_image(self, prompt: str, negative_prompt: str = None):
# api_base='https://dashscope.aliyuncs.com/compatible-mode/v1',
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -2,8 +2,8 @@

from typing import List, Dict, Optional, Any

from langchain_core.messages import BaseMessage, get_buffer_string
from common.config.tokenizer_manage_config import TokenizerManage
from langchain_core.messages import BaseMessage

from setting.models_provider.base_model_provider import MaxKBBaseModel
from setting.models_provider.impl.tencent_model_provider.model.hunyuan import ChatHunyuan

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
import json
from typing import Dict

from django.utils.translation import gettext as __
from tencentcloud.common import credential
from tencentcloud.common.exception.tencent_cloud_sdk_exception import TencentCloudSDKException
from tencentcloud.common.profile.client_profile import ClientProfile
Expand All @@ -12,7 +13,7 @@
from setting.models_provider.base_model_provider import MaxKBBaseModel
from setting.models_provider.impl.base_tti import BaseTextToImage
from setting.models_provider.impl.tencent_model_provider.model.hunyuan import ChatHunyuan
from django.utils.translation import gettext_lazy as _


class TencentTextToImageModel(MaxKBBaseModel, BaseTextToImage):
hunyuan_secret_id: str
Expand Down Expand Up @@ -50,7 +51,7 @@ def check_auth(self):
hunyuan_secret_id=self.hunyuan_secret_id,
hunyuan_secret_key=self.hunyuan_secret_key,
model="hunyuan-standard")
res = chat.invoke(_('Hello'))
res = chat.invoke(__('Hello'))
# print(res)

def generate_image(self, prompt: str, negative_prompt: str = None):
Expand Down
Original file line number Diff line number Diff line change
@@ -1,7 +1,8 @@
# coding=utf-8

from typing import List, Dict
from typing import Dict
from urllib.parse import urlparse, ParseResult

from setting.models_provider.base_model_provider import MaxKBBaseModel
from setting.models_provider.impl.base_chat_open_ai import BaseChatOpenAI

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -12,14 +12,14 @@
import hmac
import json
import os
import ssl
import uuid
import wave
from enum import Enum
from hashlib import sha256
from io import BytesIO
from typing import Dict
from urllib.parse import urlparse
import ssl

import websockets

from setting.models_provider.base_model_provider import MaxKBBaseModel
Expand Down Expand Up @@ -305,7 +305,8 @@ async def segment_data_processor(self, wav_data: bytes, segment_size: int):
res = await ws.recv()
result = parse_response(res)
if 'payload_msg' in result and result['payload_msg']['code'] != self.success_code:
raise Exception(f"Error code: {result['payload_msg']['code']}, message: {result['payload_msg']['message']}")
raise Exception(
f"Error code: {result['payload_msg']['code']}, message: {result['payload_msg']['message']}")
for seq, (chunk, last) in enumerate(VolcanicEngineSpeechToText.slice_data(wav_data, segment_size), 1):
# if no compression, comment this line
payload_bytes = gzip.compress(chunk)
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The provided code snippet is mostly clean and follows standard Python conventions. However, there are a few minor issues and potential improvements that can be considered:

  1. Import Order: The ssl module is imported twice, which might not be necessary since it's already used once before in another place.

  2. String Concatenation in Logs: The use of string concatenation within an exception message isn't efficient when dealing with dynamic content. Consider using formatted strings or f-string literals for better readability.

  3. Potential Memory Leak: If the ws.recv() method doesn’t close the WebSocket connection properly after processing messages, it could lead to memory leaks if the client stops sending data but the server continues to wait for more input.

Here’s the revised version with these considerations addressed:

@@ -12,14 +12,14 @@
 import hmac
 import json
 import os
+import ssl
 import uuid
 import wave

 from enum import Enum
 from hashlib import sha256
 from io import BytesIO
 from typing import Dict
 from urllib.parse import urlparse
-import ssl
+
 import websockets

 from setting.models_provider.base_model_provider import MaxKBBaseModel
@@ -305,7 +305,8 @@ async def segment_data_processor(self, wav_data: bytes, segment_size: int):
             res = await ws.recv()
             result = parse_response(res)
             if 'payload_msg' in result and result['payload_msg']['code'] != self.success_code:
-                raise Exception(f"Error code: {result['payload_msg']['code']}, message: {result['payload_msg']['message']}")
+                error_message = f"Error code: {result['payload_msg']['code']}, message: {result['payload_msg']['message']}"
+                raise Exception(error_message)
             for seq, (chunk, last) in enumerate(VolcanicEngineSpeechToText.slice_data(wav_data, segment_size), 1):
                 # if no compression, comment this line
                 payload_bytes = gzip.compress(chunk)

Additional Suggestion for Handling Connection Issues

It would also be wise to implement some basic error handling around the WebSocket connection, such as checking if the connection was closed gracefully during reception and possibly retrying a reconnection if needed:

async def receive_messages(self, ws):
    try:
        while True:
            msg = await ws.recv()
            result = parse_response(msg)
            if 'payload_msg' in result:
                if result['payload_msg']['code'] == self.success_code:
                    handle_successful_result(result)
                else:
                    log_error(result['payload_msg'])
            else:
                log_malformed_response(msg)

    except asyncio.IncompleteReadError:
        logging.error("Unexpectedly disconnected.")
    
    finally:
        await ws.close()

# Usage: self.websocket_coroutine = asyncio.create_task(receive_messages(websocket))

By making these changes, you improve both the readability and robustness of your codebase.

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,6 @@
from typing import Dict

import requests
from langchain_openai import ChatOpenAI

from setting.models_provider.base_model_provider import MaxKBBaseModel
from setting.models_provider.impl.base_tti import BaseTextToImage
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -6,17 +6,17 @@
@date:2023/11/10 17:45
@desc:
"""
import uuid
from typing import List, Dict, Optional, Any, Iterator

from langchain_community.chat_models.baidu_qianfan_endpoint import _convert_dict_to_message, QianfanChatEndpoint
from langchain_core.callbacks import CallbackManagerForLLMRun
from langchain_core.outputs import ChatGenerationChunk
from setting.models_provider.base_model_provider import MaxKBBaseModel
from langchain_core.messages import (
AIMessageChunk,
BaseMessage,
)
from langchain_core.outputs import ChatGenerationChunk

from setting.models_provider.base_model_provider import MaxKBBaseModel


class QianfanChatModel(MaxKBBaseModel, QianfanChatEndpoint):
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -10,10 +10,11 @@
import json
import logging
import os
import ssl
from datetime import datetime, UTC
from typing import Dict
from urllib.parse import urlencode, urlparse
import ssl

import websockets

from setting.models_provider.base_model_provider import MaxKBBaseModel
Expand All @@ -29,6 +30,7 @@

max_kb = logging.getLogger("max_kb")


class XFSparkSpeechToText(MaxKBBaseModel, BaseSpeechToText):
spark_app_id: str
spark_api_key: str
Expand Down Expand Up @@ -94,7 +96,7 @@ def create_url(self):
def check_auth(self):
cwd = os.path.dirname(os.path.abspath(__file__))
with open(f'{cwd}/iat_mp3_16k.mp3', 'rb') as f:
self.speech_to_text(f)
self.speech_to_text(f)

def speech_to_text(self, file):
async def handle():
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -11,17 +11,17 @@
import hmac
import json
import logging
import os
import ssl
from datetime import datetime, UTC
from typing import Dict
from urllib.parse import urlencode, urlparse
import ssl

import websockets
from django.utils.translation import gettext as __

from common.util.common import _remove_empty_lines
from setting.models_provider.base_model_provider import MaxKBBaseModel
from setting.models_provider.impl.base_tts import BaseTextToSpeech
from django.utils.translation import gettext_lazy as _

max_kb = logging.getLogger("max_kb")

Expand Down Expand Up @@ -98,7 +98,7 @@ def create_url(self):
return url

def check_auth(self):
self.text_to_speech(_('Hello'))
self.text_to_speech(__('Hello'))

def text_to_speech(self, text):

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The provided code snippet seems to have a few minor issues that can be addressed:

  1. Redundant Import: The ssl module is imported twice, which is unnecessary. You should remove one of them.

  2. Typo Correction: In the line self.text_to_speech(_('Hello')), there's an extra comma (,'') after 'Hello'.

  3. Incomplete Function Implementations: The create_url, _remove_empty_lines, and text_to_speech methods are incomplete. They need to include their respective logic.

Here’s the corrected version of the code with these improvements:

@@ -11,16 +11,16 @@ import hmac
 import json
 import logging
 import os
+ ssl  # Removed duplicate import

 from datetime import datetime, UTC
 from typing import Dict
 from urllib.parse import urlencode, urlparse
 from django.utils.translation import gettext_lazy as _

 max_kb = logging.getLogger("max_kb")

def create_url(self):
    scheme_port = 'http'
    if self.ssl_enabled:
        scheme_port = 'https'
    
    base_url = "{}://{}:{}/api".format(scheme_port, self.host, self.port)
    path_params = {"token": self.token}
    params_str = "?" + urlencode(path_params) if path_params else ""
    return "".join([base_url, "/", self.request_path, params_str])

def check_auth(self):
    self.text_to_speech(_("Hello"))

def text_to_speech(self, text):
    # Placeholder for actual implementation
    pass

Explanation of Changes:

  • Replaced Duplicates: Removed the second import statement for ssl.
  • Fixed Typo: Corrected the extra comma in the text_to_speech method.
  • Implemented Methods: Added placeholder comments within the remaining two methods to indicate that they need implementations based on specific requirements.

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,6 @@
import threading
from typing import Dict, Optional, List, Any

from langchain_community.embeddings import XinferenceEmbeddings
from langchain_core.embeddings import Embeddings

from setting.models_provider.base_model_provider import MaxKBBaseModel
Expand Down
Original file line number Diff line number Diff line change
@@ -1,12 +1,8 @@
# coding=utf-8

from typing import Dict, Optional, List, Any, Iterator
from typing import Dict
from urllib.parse import urlparse, ParseResult

from langchain_core.language_models import LanguageModelInput
from langchain_core.messages import BaseMessageChunk
from langchain_core.runnables import RunnableConfig

from setting.models_provider.base_model_provider import MaxKBBaseModel
from setting.models_provider.impl.base_chat_open_ai import BaseChatOpenAI

Expand Down
Original file line number Diff line number Diff line change
@@ -1,4 +1,3 @@
import asyncio
import io
from typing import Dict

Expand Down Expand Up @@ -56,4 +55,3 @@ def speech_to_text(self, audio_file):
buffer.name = "file.mp3" # this is the important line
res = client.audio.transcriptions.create(model=self.model, language="zh", file=buffer)
return res.text

Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@
from common.util.common import _remove_empty_lines
from setting.models_provider.base_model_provider import MaxKBBaseModel
from setting.models_provider.impl.base_tts import BaseTextToSpeech
from django.utils.translation import gettext_lazy as _
from django.utils.translation import gettext as __


def custom_get_token_ids(text: str):
Expand Down Expand Up @@ -41,7 +41,7 @@ def new_instance(model_type, model_name, model_credential: Dict[str, object], **
)

def check_auth(self):
self.text_to_speech(_('Hello'))
self.text_to_speech(__('Hello'))

def text_to_speech(self, text):
client = OpenAI(
Expand All @@ -58,4 +58,4 @@ def text_to_speech(self, text):
return response.read()

def is_cache_model(self):
return False
return False
Original file line number Diff line number Diff line change
@@ -1,13 +1,13 @@
from typing import Dict

from django.utils.translation import gettext as __
from langchain_community.chat_models import ChatZhipuAI
from langchain_core.messages import HumanMessage
from zhipuai import ZhipuAI

from common.config.tokenizer_manage_config import TokenizerManage
from setting.models_provider.base_model_provider import MaxKBBaseModel
from setting.models_provider.impl.base_tti import BaseTextToImage
from django.utils.translation import gettext_lazy as _


def custom_get_token_ids(text: str):
Expand Down Expand Up @@ -46,7 +46,7 @@ def check_auth(self):
zhipuai_api_key=self.api_key,
model_name=self.model,
)
chat.invoke([HumanMessage([{"type": "text", "text": _('Hello')}])])
chat.invoke([HumanMessage([{"type": "text", "text": __('Hello')}])])

# self.generate_image('生成一个小猫图片')

Expand Down
Loading