- 
                Notifications
    
You must be signed in to change notification settings  - Fork 53
 
Description
Hi
When running the following cell :
faqs = doc2cache.invoke({"doc": sample_doc})
Results in the following output:
16:17:24 openai._base_client INFO   Retrying request to /completions in 0.433632 seconds
16:17:25 openai._base_client INFO   Retrying request to /completions in 0.830750 seconds
RemoteProtocolError                       Traceback (most recent call last)
File ~\litellm-qa-env\Lib\site-packages\httpx_transports\default.py:101, in map_httpcore_exceptions()
100 try:
--> 101     yield
102 except Exception as exc:
File ~\litellm-qa-env\Lib\site-packages\httpx_transports\default.py:250, in HTTPTransport.handle_request(self, request)
249 with map_httpcore_exceptions():
--> 250     resp = self._pool.handle_request(req)
252 assert isinstance(resp.stream, typing.Iterable)
File ~\litellm-qa-env\Lib\site-packages\httpcore_sync\connection_pool.py:256, in ConnectionPool.handle_request(self, request)
255     self._close_connections(closing)
--> 256     raise exc from None
258 # Return the response. Note that in this case we still have to manage
259 # the point at which the response is closed.
File ~\litellm-qa-env\Lib\site-packages\httpcore_sync\connection_pool.py:236, in ConnectionPool.handle_request(self, request)
234 try:
235     # Send the request on the assigned connection.
--> 236     response = connection.handle_request(
237         pool_request.request
238     )
239 except ConnectionNotAvailable:
240     # In some cases a connection may initially be available to
241     # handle a request, but then become unavailable.
242     #
243     # In this case we clear the connection and try again.
File ~\litellm-qa-env\Lib\site-packages\httpcore_sync\connection.py:103, in HTTPConnection.handle_request(self, request)
101     raise exc
--> 103 return self._connection.handle_request(request)
File ~\litellm-qa-env\Lib\site-packages\httpcore_sync\http11.py:136, in HTTP11Connection.handle_request(self, request)
135         self._response_closed()
--> 136 raise exc
File ~\litellm-qa-env\Lib\site-packages\httpcore_sync\http11.py:106, in HTTP11Connection.handle_request(self, request)
97 with Trace(
98     "receive_response_headers", logger, request, kwargs
99 ) as trace:
100     (
101         http_version,
102         status,
103         reason_phrase,
104         headers,
105         trailing_data,
--> 106     ) = self._receive_response_headers(**kwargs)
107     trace.return_value = (
108         http_version,
109         status,
110         reason_phrase,
111         headers,
112     )
File ~\litellm-qa-env\Lib\site-packages\httpcore_sync\http11.py:177, in HTTP11Connection._receive_response_headers(self, request)
176 while True:
--> 177     event = self._receive_event(timeout=timeout)
178     if isinstance(event, h11.Response):
File ~\litellm-qa-env\Lib\site-packages\httpcore_sync\http11.py:231, in HTTP11Connection._receive_event(self, timeout)
230     msg = "Server disconnected without sending a response."
--> 231     raise RemoteProtocolError(msg)
233 self._h11_state.receive_data(data)
RemoteProtocolError: Server disconnected without sending a response.
The above exception was the direct cause of the following exception:
RemoteProtocolError                       Traceback (most recent call last)
File ~\litellm-qa-env\Lib\site-packages\openai_base_client.py:989, in SyncAPIClient._request(self, cast_to, options, retries_taken, stream, stream_cls)
988 try:
--> 989     response = self._client.send(
990         request,
991         stream=stream or self._should_stream_response_body(request=request),
992         **kwargs,
993     )
994 except httpx.TimeoutException as err:
File ~\litellm-qa-env\Lib\site-packages\httpx_client.py:914, in Client.send(self, request, stream, auth, follow_redirects)
912 auth = self._build_request_auth(request, auth)
--> 914 response = self._send_handling_auth(
915     request,
916     auth=auth,
917     follow_redirects=follow_redirects,
918     history=[],
919 )
920 try:
File ~\litellm-qa-env\Lib\site-packages\httpx_client.py:942, in Client._send_handling_auth(self, request, auth, follow_redirects, history)
941 while True:
--> 942     response = self._send_handling_redirects(
943         request,
944         follow_redirects=follow_redirects,
945         history=history,
946     )
947     try:
File ~\litellm-qa-env\Lib\site-packages\httpx_client.py:979, in Client._send_handling_redirects(self, request, follow_redirects, history)
977     hook(request)
--> 979 response = self._send_single_request(request)
980 try:
File ~\litellm-qa-env\Lib\site-packages\httpx_client.py:1014, in Client._send_single_request(self, request)
1013 with request_context(request=request):
-> 1014     response = transport.handle_request(request)
1016 assert isinstance(response.stream, SyncByteStream)
File ~\litellm-qa-env\Lib\site-packages\httpx_transports\default.py:249, in HTTPTransport.handle_request(self, request)
237 req = httpcore.Request(
238     method=request.method,
239     url=httpcore.URL(
(...)    247     extensions=request.extensions,
248 )
--> 249 with map_httpcore_exceptions():
250     resp = self._pool.handle_request(req)
File C:\Program Files\Python313\Lib\contextlib.py:162, in _GeneratorContextManager.exit(self, typ, value, traceback)
161 try:
--> 162     self.gen.throw(value)
163 except StopIteration as exc:
164     # Suppress StopIteration unless it's the same exception that
165     # was passed to throw().  This prevents a StopIteration
166     # raised inside the "with" statement from being suppressed.
File ~\litellm-qa-env\Lib\site-packages\httpx_transports\default.py:118, in map_httpcore_exceptions()
117 message = str(exc)
--> 118 raise mapped_exc(message) from exc
RemoteProtocolError: Server disconnected without sending a response.
The above exception was the direct cause of the following exception:
APIConnectionError                        Traceback (most recent call last)
Cell In[11], line 1
----> 1 faqs = doc2cache.invoke({"doc": sample_doc})
File ~\litellm-qa-env\Lib\site-packages\langchain_core\runnables\base.py:3034, in RunnableSequence.invoke(self, input, config, **kwargs)
3032                 input = context.run(step.invoke, input, config, **kwargs)
3033             else:
-> 3034                 input = context.run(step.invoke, input, config)
3035 # finish the root run
3036 except BaseException as e:
File ~\litellm-qa-env\Lib\site-packages\langchain_core\language_models\llms.py:387, in BaseLLM.invoke(self, input, config, stop, **kwargs)
376 @OverRide
377 def invoke(
378     self,
(...)    383     **kwargs: Any,
384 ) -> str:
385     config = ensure_config(config)
386     return (
--> 387         self.generate_prompt(
388             [self._convert_input(input)],
389             stop=stop,
390             callbacks=config.get("callbacks"),
391             tags=config.get("tags"),
392             metadata=config.get("metadata"),
393             run_name=config.get("run_name"),
394             run_id=config.pop("run_id", None),
395             **kwargs,
396         )
397         .generations[0][0]
398         .text
399     )
File ~\litellm-qa-env\Lib\site-packages\langchain_core\language_models\llms.py:764, in BaseLLM.generate_prompt(self, prompts, stop, callbacks, **kwargs)
755 @OverRide
756 def generate_prompt(
757     self,
(...)    761     **kwargs: Any,
762 ) -> LLMResult:
763     prompt_strings = [p.to_string() for p in prompts]
--> 764     return self.generate(prompt_strings, stop=stop, callbacks=callbacks, **kwargs)
File ~\litellm-qa-env\Lib\site-packages\langchain_core\language_models\llms.py:971, in BaseLLM.generate(self, prompts, stop, callbacks, tags, metadata, run_name, run_id, **kwargs)
956 if (self.cache is None and get_llm_cache() is None) or self.cache is False:
957     run_managers = [
958         callback_manager.on_llm_start(
959             self._serialized,
(...)    969         )
970     ]
--> 971     return self._generate_helper(
972         prompts,
973         stop,
974         run_managers,
975         new_arg_supported=bool(new_arg_supported),
976         **kwargs,
977     )
978 if len(missing_prompts) > 0:
979     run_managers = [
980         callback_managers[idx].on_llm_start(
981             self._serialized,
(...)    988         for idx in missing_prompt_idxs
989     ]
File ~\litellm-qa-env\Lib\site-packages\langchain_core\language_models\llms.py:790, in BaseLLM._generate_helper(self, prompts, stop, run_managers, new_arg_supported, **kwargs)
779 def _generate_helper(
780     self,
781     prompts: list[str],
(...)    786     **kwargs: Any,
787 ) -> LLMResult:
788     try:
789         output = (
--> 790             self._generate(
791                 prompts,
792                 stop=stop,
793                 # TODO: support multiple run managers
794                 run_manager=run_managers[0] if run_managers else None,
795                 **kwargs,
796             )
797             if new_arg_supported
798             else self._generate(prompts, stop=stop)
799         )
800     except BaseException as e:
801         for run_manager in run_managers:
File ~\litellm-qa-env\Lib\site-packages\langchain_community\llms\openai.py:463, in BaseOpenAI._generate(self, prompts, stop, run_manager, **kwargs)
451     choices.append(
452         {
453             "text": generation.text,
(...)    460         }
461     )
462 else:
--> 463     response = completion_with_retry(
464         self, prompt=_prompts, run_manager=run_manager, **params
465     )
466     if not isinstance(response, dict):
467         # V1 client returns the response in an PyDantic object instead of
468         # dict. For the transition period, we deep convert it to dict.
469         response = response.dict()
File ~\litellm-qa-env\Lib\site-packages\langchain_community\llms\openai.py:121, in completion_with_retry(llm, run_manager, **kwargs)
119 """Use tenacity to retry the completion call."""
120 if is_openai_v1():
--> 121     return llm.client.create(**kwargs)
123 retry_decorator = _create_retry_decorator(llm, run_manager=run_manager)
125 @retry_decorator
126 def _completion_with_retry(**kwargs: Any) -> Any:
File ~\litellm-qa-env\Lib\site-packages\openai_utils_utils.py:279, in required_args..inner..wrapper(*args, **kwargs)
277             msg = f"Missing required argument: {quote(missing[0])}"
278     raise TypeError(msg)
--> 279 return func(*args, **kwargs)
File ~\litellm-qa-env\Lib\site-packages\openai\resources\completions.py:545, in Completions.create(self, model, prompt, best_of, echo, frequency_penalty, logit_bias, logprobs, max_tokens, n, presence_penalty, seed, stop, stream, stream_options, suffix, temperature, top_p, user, extra_headers, extra_query, extra_body, timeout)
516 @required_args(["model", "prompt"], ["model", "prompt", "stream"])
517 def create(
518     self,
(...)    543     timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
544 ) -> Completion | Stream[Completion]:
--> 545     return self._post(
546         "/completions",
547         body=maybe_transform(
548             {
549                 "model": model,
550                 "prompt": prompt,
551                 "best_of": best_of,
552                 "echo": echo,
553                 "frequency_penalty": frequency_penalty,
554                 "logit_bias": logit_bias,
555                 "logprobs": logprobs,
556                 "max_tokens": max_tokens,
557                 "n": n,
558                 "presence_penalty": presence_penalty,
559                 "seed": seed,
560                 "stop": stop,
561                 "stream": stream,
562                 "stream_options": stream_options,
563                 "suffix": suffix,
564                 "temperature": temperature,
565                 "top_p": top_p,
566                 "user": user,
567             },
568             completion_create_params.CompletionCreateParamsStreaming
569             if stream
570             else completion_create_params.CompletionCreateParamsNonStreaming,
571         ),
572         options=make_request_options(
573             extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
574         ),
575         cast_to=Completion,
576         stream=stream or False,
577         stream_cls=Stream[Completion],
578     )
File ~\litellm-qa-env\Lib\site-packages\openai_base_client.py:1276, in SyncAPIClient.post(self, path, cast_to, body, options, files, stream, stream_cls)
1262 def post(
1263     self,
1264     path: str,
(...)   1271     stream_cls: type[_StreamT] | None = None,
1272 ) -> ResponseT | _StreamT:
1273     opts = FinalRequestOptions.construct(
1274         method="post", url=path, json_data=body, files=to_httpx_files(files), **options
1275     )
-> 1276     return cast(ResponseT, self.request(cast_to, opts, stream=stream, stream_cls=stream_cls))
File ~\litellm-qa-env\Lib\site-packages\openai_base_client.py:949, in SyncAPIClient.request(self, cast_to, options, remaining_retries, stream, stream_cls)
946 else:
947     retries_taken = 0
--> 949 return self._request(
950     cast_to=cast_to,
951     options=options,
952     stream=stream,
953     stream_cls=stream_cls,
954     retries_taken=retries_taken,
955 )
File ~\litellm-qa-env\Lib\site-packages\openai_base_client.py:1013, in SyncAPIClient._request(self, cast_to, options, retries_taken, stream, stream_cls)
1010 log.debug("Encountered Exception", exc_info=True)
1012 if remaining_retries > 0:
-> 1013     return self._retry_request(
1014         input_options,
1015         cast_to,
1016         retries_taken=retries_taken,
1017         stream=stream,
1018         stream_cls=stream_cls,
1019         response_headers=None,
1020     )
1022 log.debug("Raising connection error")
1023 raise APIConnectionError(request=request) from err
File ~\litellm-qa-env\Lib\site-packages\openai_base_client.py:1091, in SyncAPIClient._retry_request(self, options, cast_to, retries_taken, response_headers, stream, stream_cls)
1087 # In a synchronous context we are blocking the entire thread. Up to the library user to run the client in a
1088 # different thread if necessary.
1089 time.sleep(timeout)
-> 1091 return self._request(
1092     options=options,
1093     cast_to=cast_to,
1094     retries_taken=retries_taken + 1,
1095     stream=stream,
1096     stream_cls=stream_cls,
1097 )
File ~\litellm-qa-env\Lib\site-packages\openai_base_client.py:1013, in SyncAPIClient._request(self, cast_to, options, retries_taken, stream, stream_cls)
1010 log.debug("Encountered Exception", exc_info=True)
1012 if remaining_retries > 0:
-> 1013     return self._retry_request(
1014         input_options,
1015         cast_to,
1016         retries_taken=retries_taken,
1017         stream=stream,
1018         stream_cls=stream_cls,
1019         response_headers=None,
1020     )
1022 log.debug("Raising connection error")
1023 raise APIConnectionError(request=request) from err
File ~\litellm-qa-env\Lib\site-packages\openai_base_client.py:1091, in SyncAPIClient._retry_request(self, options, cast_to, retries_taken, response_headers, stream, stream_cls)
1087 # In a synchronous context we are blocking the entire thread. Up to the library user to run the client in a
1088 # different thread if necessary.
1089 time.sleep(timeout)
-> 1091 return self._request(
1092     options=options,
1093     cast_to=cast_to,
1094     retries_taken=retries_taken + 1,
1095     stream=stream,
1096     stream_cls=stream_cls,
1097 )
File ~\litellm-qa-env\Lib\site-packages\openai_base_client.py:1023, in SyncAPIClient._request(self, cast_to, options, retries_taken, stream, stream_cls)
1013         return self._retry_request(
1014             input_options,
1015             cast_to,
(...)   1019             response_headers=None,
1020         )
1022     log.debug("Raising connection error")
-> 1023     raise APIConnectionError(request=request) from err
1025 log.debug(
1026     'HTTP Response: %s %s "%i %s" %s',
1027     request.method,
(...)   1031     response.headers,
1032 )
1033 log.debug("request_id: %s", response.headers.get("x-request-id"))
APIConnectionError: Connection error.