32
32
logger = logging .getLogger (__name__ )
33
33
34
34
35
+ # Note: we define both AsyncChat and Chat for Python type analysis.
36
+ class AsyncChat : # pylint: disable=too-few-public-methods
37
+ """The proxy class to direct to async chat completions."""
38
+
39
+ def __init__ (self , engine : weakref .ReferenceType ) -> None :
40
+ assert isinstance (engine (), AsyncMLCEngine )
41
+ self .completions = AsyncChatCompletion (engine )
42
+
43
+
35
44
class Chat : # pylint: disable=too-few-public-methods
36
45
"""The proxy class to direct to chat completions."""
37
46
38
47
def __init__ (self , engine : weakref .ReferenceType ) -> None :
39
- assert isinstance (engine (), (AsyncMLCEngine , MLCEngine ))
40
- self .completions = (
41
- AsyncChatCompletion (engine ) # type: ignore
42
- if isinstance (engine (), AsyncMLCEngine )
43
- else ChatCompletion (engine ) # type: ignore
44
- )
48
+ assert isinstance (engine (), MLCEngine )
49
+ self .completions = ChatCompletion (engine )
45
50
46
51
47
52
class AsyncChatCompletion : # pylint: disable=too-few-public-methods
@@ -151,7 +156,7 @@ async def create( # pylint: disable=too-many-arguments,too-many-locals
151
156
Extra debug options to pass to the request.
152
157
153
158
Returns
154
- ------
159
+ -------
155
160
response : ChatCompletionResponse
156
161
The chat completion response conforming to OpenAI API.
157
162
See mlc_llm/protocol/openai_api_protocol.py or
@@ -643,7 +648,7 @@ def create( # pylint: disable=too-many-arguments,too-many-locals
643
648
response_format : Optional [Dict [str , Any ]] = None ,
644
649
request_id : Optional [str ] = None ,
645
650
debug_config : Optional [Dict [str , Any ]] = None ,
646
- ) -> openai_api_protocol .CompletionResponse :
651
+ ) -> Iterator [ openai_api_protocol .CompletionResponse ] :
647
652
"""Synchronous streaming completion interface with OpenAI API compatibility.
648
653
The method streams back CompletionResponse that conforms to
649
654
OpenAI API one at a time via yield.
@@ -698,7 +703,7 @@ def create( # pylint: disable=too-many-arguments,too-many-locals
698
703
response_format : Optional [Dict [str , Any ]] = None ,
699
704
request_id : Optional [str ] = None ,
700
705
debug_config : Optional [Dict [str , Any ]] = None ,
701
- ) -> Iterator [ openai_api_protocol .CompletionResponse ] :
706
+ ) -> openai_api_protocol .CompletionResponse :
702
707
"""Synchronous non-streaming completion interface with OpenAI API compatibility.
703
708
704
709
See https://platform.openai.com/docs/api-reference/completions/create for specification.
@@ -714,7 +719,7 @@ def create( # pylint: disable=too-many-arguments,too-many-locals
714
719
Extra debug options to pass to the request.
715
720
716
721
Returns
717
- ------
722
+ -------
718
723
response : CompletionResponse
719
724
The completion response conforming to OpenAI API.
720
725
See mlc_llm/protocol/openai_api_protocol.py or
@@ -750,7 +755,10 @@ def create( # pylint: disable=too-many-arguments,too-many-locals
750
755
response_format : Optional [Dict [str , Any ]] = None ,
751
756
request_id : Optional [str ] = None ,
752
757
debug_config : Optional [Dict [str , Any ]] = None ,
753
- ) -> Iterator [openai_api_protocol .CompletionResponse ]:
758
+ ) -> Union [
759
+ Iterator [openai_api_protocol .CompletionResponse ],
760
+ openai_api_protocol .CompletionResponse ,
761
+ ]:
754
762
"""Synchronous completion interface with OpenAI API compatibility.
755
763
756
764
See https://platform.openai.com/docs/api-reference/completions/create for specification.
@@ -864,7 +872,7 @@ def __init__( # pylint: disable=too-many-arguments,too-many-locals
864
872
engine_config = engine_config ,
865
873
enable_tracing = enable_tracing ,
866
874
)
867
- self .chat = Chat (weakref .ref (self ))
875
+ self .chat = AsyncChat (weakref .ref (self ))
868
876
self .completions = AsyncCompletion (weakref .ref (self ))
869
877
870
878
async def abort (self , request_id : str ) -> None :
@@ -1568,7 +1576,10 @@ def _completion( # pylint: disable=too-many-arguments,too-many-locals
1568
1576
response_format : Optional [Dict [str , Any ]] = None ,
1569
1577
request_id : Optional [str ] = None ,
1570
1578
debug_config : Optional [Dict [str , Any ]] = None ,
1571
- ) -> Iterator [openai_api_protocol .CompletionResponse ]:
1579
+ ) -> Union [
1580
+ Iterator [openai_api_protocol .CompletionResponse ],
1581
+ openai_api_protocol .CompletionResponse ,
1582
+ ]:
1572
1583
"""Synchronous completion internal interface with OpenAI API compatibility.
1573
1584
1574
1585
See https://platform.openai.com/docs/api-reference/completions/create for specification.
0 commit comments