@@ -657,6 +657,153 @@ async def test_acompletion(
657657 )
658658
659659
660+ @pytest .mark .parametrize ("use_context_attributes" , [False , True ])
661+ async def test_acompletion_stream (
662+ in_memory_span_exporter : InMemorySpanExporter ,
663+ setup_litellm_instrumentation : Any ,
664+ use_context_attributes : bool ,
665+ session_id : str ,
666+ user_id : str ,
667+ metadata : Dict [str , Any ],
668+ tags : List [str ],
669+ prompt_template : str ,
670+ prompt_template_version : str ,
671+ prompt_template_variables : Dict [str , Any ],
672+ ) -> None :
673+ in_memory_span_exporter .clear ()
674+
675+ input_messages = [{"content" : "What's the capital of China?" , "role" : "user" }]
676+ if use_context_attributes :
677+ with using_attributes (
678+ session_id = session_id ,
679+ user_id = user_id ,
680+ metadata = metadata ,
681+ tags = tags ,
682+ prompt_template = prompt_template ,
683+ prompt_template_version = prompt_template_version ,
684+ prompt_template_variables = prompt_template_variables ,
685+ ):
686+ response = await litellm .acompletion (
687+ model = "gpt-3.5-turbo" ,
688+ messages = input_messages ,
689+ mock_response = "Beijing" ,
690+ stream = True ,
691+ )
692+ async for chunk in response :
693+ print (chunk )
694+ else :
695+ response = await litellm .acompletion (
696+ model = "gpt-3.5-turbo" ,
697+ messages = input_messages ,
698+ mock_response = "Beijing" ,
699+ stream = True ,
700+ )
701+ async for chunk in response :
702+ print (chunk )
703+
704+ spans = in_memory_span_exporter .get_finished_spans ()
705+ assert len (spans ) == 1
706+ span = spans [0 ]
707+ assert span .name == "acompletion"
708+ attributes = dict (cast (Mapping [str , AttributeValue ], span .attributes ))
709+ assert attributes .get (SpanAttributes .LLM_MODEL_NAME ) == "gpt-3.5-turbo"
710+ assert attributes .get (SpanAttributes .INPUT_VALUE ) == safe_json_dumps (
711+ {"messages" : input_messages }
712+ )
713+ assert attributes .get (SpanAttributes .INPUT_MIME_TYPE ) == "application/json"
714+
715+ assert "Beijing" == attributes .get (SpanAttributes .OUTPUT_VALUE )
716+ assert span .status .status_code == StatusCode .OK
717+
718+ if use_context_attributes :
719+ _check_context_attributes (
720+ attributes ,
721+ session_id ,
722+ user_id ,
723+ metadata ,
724+ tags ,
725+ prompt_template ,
726+ prompt_template_version ,
727+ prompt_template_variables ,
728+ )
729+
730+
731+ @pytest .mark .parametrize ("use_context_attributes" , [False , True ])
732+ async def test_acompletion_stream_token_count (
733+ in_memory_span_exporter : InMemorySpanExporter ,
734+ setup_litellm_instrumentation : Any ,
735+ use_context_attributes : bool ,
736+ session_id : str ,
737+ user_id : str ,
738+ metadata : Dict [str , Any ],
739+ tags : List [str ],
740+ prompt_template : str ,
741+ prompt_template_version : str ,
742+ prompt_template_variables : Dict [str , Any ],
743+ ) -> None :
744+ in_memory_span_exporter .clear ()
745+
746+ input_messages = [{"content" : "What's the capital of China?" , "role" : "user" }]
747+ if use_context_attributes :
748+ with using_attributes (
749+ session_id = session_id ,
750+ user_id = user_id ,
751+ metadata = metadata ,
752+ tags = tags ,
753+ prompt_template = prompt_template ,
754+ prompt_template_version = prompt_template_version ,
755+ prompt_template_variables = prompt_template_variables ,
756+ ):
757+ response = await litellm .acompletion (
758+ model = "gpt-3.5-turbo" ,
759+ messages = input_messages ,
760+ mock_response = "Beijing" ,
761+ stream = True ,
762+ stream_options = {"include_usage" : True },
763+ )
764+ async for chunk in response :
765+ print (chunk )
766+ else :
767+ response = await litellm .acompletion (
768+ model = "gpt-3.5-turbo" ,
769+ messages = input_messages ,
770+ mock_response = "Beijing" ,
771+ stream = True ,
772+ stream_options = {"include_usage" : True },
773+ )
774+ async for chunk in response :
775+ print (chunk )
776+
777+ spans = in_memory_span_exporter .get_finished_spans ()
778+ assert len (spans ) == 1
779+ span = spans [0 ]
780+ assert span .name == "acompletion"
781+ attributes = dict (cast (Mapping [str , AttributeValue ], span .attributes ))
782+ assert attributes .get (SpanAttributes .LLM_MODEL_NAME ) == "gpt-3.5-turbo"
783+ assert attributes .get (SpanAttributes .INPUT_VALUE ) == safe_json_dumps (
784+ {"messages" : input_messages }
785+ )
786+ assert attributes .get (SpanAttributes .INPUT_MIME_TYPE ) == "application/json"
787+
788+ assert "Beijing" == attributes .get (SpanAttributes .OUTPUT_VALUE )
789+ assert attributes .get (SpanAttributes .LLM_TOKEN_COUNT_PROMPT ) == 14
790+ assert attributes .get (SpanAttributes .LLM_TOKEN_COUNT_COMPLETION ) == 2
791+ assert attributes .get (SpanAttributes .LLM_TOKEN_COUNT_TOTAL ) == 16
792+ assert span .status .status_code == StatusCode .OK
793+
794+ if use_context_attributes :
795+ _check_context_attributes (
796+ attributes ,
797+ session_id ,
798+ user_id ,
799+ metadata ,
800+ tags ,
801+ prompt_template ,
802+ prompt_template_version ,
803+ prompt_template_variables ,
804+ )
805+
806+
660807async def test_acompletion_with_invalid_model_triggers_exception_event (
661808 in_memory_span_exporter : InMemorySpanExporter ,
662809 setup_litellm_instrumentation : None ,
0 commit comments