@@ -890,6 +890,7 @@ async def proxy_request(request: Request):
890890 )
891891 provided_hash = hash_api_key (api_key )
892892
893+ # Prepare or load chat_history
893894 if history_enabled :
894895 if session_id is not None :
895896 # Retrieve or verify existing session
@@ -902,44 +903,33 @@ async def proxy_request(request: Request):
902903 "error" : "Unauthorized: API key does not match session owner"
903904 },
904905 )
905- chat_history = (
906- session_data ["chat_history" ]
907- if session_data ["chat_history" ]
908- else []
909- )
906+ chat_history = session_data ["chat_history" ] or []
910907 else :
911908 chat_history = []
912909 create_chat_history (session_id , chat_history , provided_hash )
913910 else :
914- # No session_id provided but enable_history = True, so create a new session
911+ # No session_id but enable_history = True, so create a new session
915912 session_id = str (uuid .uuid4 ())
916913 chat_history = []
917914 create_chat_history (session_id , chat_history , provided_hash )
918-
919- # Merge incoming user messages into chat history
920- user_messages_this_round = [
921- m for m in data .get ("messages" , []) if m ["role" ] == "user"
922- ]
923- if user_messages_this_round :
924- chat_history .append (user_messages_this_round [- 1 ])
925-
926- # Overwrite data["messages"] with chat_history for the LLM request
927- data ["messages" ] = chat_history
928915 else :
929- # History is disabled and no valid session_id is provided.
930- # Pass messages through as-is.
916+ # History not enabled: start with empty
931917 chat_history = []
932918
933- # Merge incoming user messages into chat history
934- user_messages_this_round = [
935- m for m in data .get ("messages" , []) if m ["role" ] == "user"
936- ]
937- if user_messages_this_round :
938- chat_history .append (user_messages_this_round [- 1 ])
919+ # Merge incoming system/user messages into chat_history in original order
920+ # (We generally skip adding "assistant" messages from the request side,
921+ # because those come from the model, not from the user.)
922+ new_messages = data .get ("messages" , [])
923+ for msg in new_messages :
924+ if msg ["role" ] in ["system" , "user" ]:
925+ chat_history .append (msg )
939926
927+ # Now data["messages"] should be the entire conversation the model sees
940928 data ["messages" ] = chat_history
941929
942- # Check for prompt ARN logic
930+ # ---------------------------------------------------------------------
931+ # Handle optional "Bedrock Prompt" logic (unchanged from your snippet):
932+ # ---------------------------------------------------------------------
943933 model_id = data .get ("model" )
944934 prompt_variables = data .pop ("promptVariables" , {})
945935 final_prompt_text = None
@@ -968,15 +958,14 @@ async def proxy_request(request: Request):
968958 if final_prompt_text :
969959 data ["messages" ] = [{"role" : "user" , "content" : final_prompt_text }]
970960
971- # client = AsyncOpenAI(api_key=api_key, base_url=LITELLM_ENDPOINT)
972-
961+ # ---------------------------------------------------------------------
962+ # Stream vs. Non-Stream logic
963+ # ---------------------------------------------------------------------
973964 if is_streaming :
974- # print(f"streaming")
975965 return await get_chat_stream (
976966 api_key , data , session_id , chat_history , history_enabled
977967 )
978968 else :
979- # print(f"not streaming")
980969 headers = {
981970 "Content-Type" : "application/json" ,
982971 "Authorization" : f"Bearer { api_key } " ,
@@ -985,14 +974,14 @@ async def proxy_request(request: Request):
985974 async with session .post (
986975 f"{ LITELLM_ENDPOINT } /v1/chat/completions" ,
987976 headers = headers ,
988- json = data , # Sending the data in the body
977+ json = data ,
989978 ) as resp :
990- # Parse the response JSON
991979 response_headers = dict (resp .headers )
992- response_headers . pop ( "Content-Length" )
993- # print( response_headers)
980+ # Avoid passing through invalid content-length
981+ response_headers . pop ( "Content-Length" , None )
994982 response_dict = await resp .json ()
995983
984+ # If there's a response from the assistant, save it to history
996985 if response_dict .get ("choices" ):
997986 assistant_message = response_dict ["choices" ][0 ]["message" ]
998987 if history_enabled :
@@ -1001,6 +990,7 @@ async def proxy_request(request: Request):
1001990 )
1002991 update_chat_history (session_id , chat_history )
1003992
993+ # Return session_id in the response if we have one
1004994 if session_id :
1005995 response_dict ["session_id" ] = session_id
1006996
0 commit comments