redis-ai-resources/python-recipes/semantic-cache/01_doc2cache_llama3_1.ipynb ERROR #2

Hi

When running the following cell :

faqs = doc2cache.invoke({"doc": sample_doc})

Results in the following output:
16:17:24 openai._base_client INFO   Retrying request to /completions in 0.433632 seconds
16:17:25 openai._base_client INFO   Retrying request to /completions in 0.830750 seconds

---------------------------------------------------------------------------
RemoteProtocolError                       Traceback (most recent call last)
File ~\litellm-qa-env\Lib\site-packages\httpx\_transports\default.py:101, in map_httpcore_exceptions()
    100 try:
--> 101     yield
    102 except Exception as exc:

File ~\litellm-qa-env\Lib\site-packages\httpx\_transports\default.py:250, in HTTPTransport.handle_request(self, request)
    249 with map_httpcore_exceptions():
--> 250     resp = self._pool.handle_request(req)
    252 assert isinstance(resp.stream, typing.Iterable)

File ~\litellm-qa-env\Lib\site-packages\httpcore\_sync\connection_pool.py:256, in ConnectionPool.handle_request(self, request)
    255     self._close_connections(closing)
--> 256     raise exc from None
    258 # Return the response. Note that in this case we still have to manage
    259 # the point at which the response is closed.

File ~\litellm-qa-env\Lib\site-packages\httpcore\_sync\connection_pool.py:236, in ConnectionPool.handle_request(self, request)
    234 try:
    235     # Send the request on the assigned connection.
--> 236     response = connection.handle_request(
    237         pool_request.request
    238     )
    239 except ConnectionNotAvailable:
    240     # In some cases a connection may initially be available to
    241     # handle a request, but then become unavailable.
    242     #
    243     # In this case we clear the connection and try again.

File ~\litellm-qa-env\Lib\site-packages\httpcore\_sync\connection.py:103, in HTTPConnection.handle_request(self, request)
    101     raise exc
--> 103 return self._connection.handle_request(request)

File ~\litellm-qa-env\Lib\site-packages\httpcore\_sync\http11.py:136, in HTTP11Connection.handle_request(self, request)
    135         self._response_closed()
--> 136 raise exc

File ~\litellm-qa-env\Lib\site-packages\httpcore\_sync\http11.py:106, in HTTP11Connection.handle_request(self, request)
     97 with Trace(
     98     "receive_response_headers", logger, request, kwargs
     99 ) as trace:
    100     (
    101         http_version,
    102         status,
    103         reason_phrase,
    104         headers,
    105         trailing_data,
--> 106     ) = self._receive_response_headers(**kwargs)
    107     trace.return_value = (
    108         http_version,
    109         status,
    110         reason_phrase,
    111         headers,
    112     )

File ~\litellm-qa-env\Lib\site-packages\httpcore\_sync\http11.py:177, in HTTP11Connection._receive_response_headers(self, request)
    176 while True:
--> 177     event = self._receive_event(timeout=timeout)
    178     if isinstance(event, h11.Response):

File ~\litellm-qa-env\Lib\site-packages\httpcore\_sync\http11.py:231, in HTTP11Connection._receive_event(self, timeout)
    230     msg = "Server disconnected without sending a response."
--> 231     raise RemoteProtocolError(msg)
    233 self._h11_state.receive_data(data)

RemoteProtocolError: Server disconnected without sending a response.

The above exception was the direct cause of the following exception:

RemoteProtocolError                       Traceback (most recent call last)
File ~\litellm-qa-env\Lib\site-packages\openai\_base_client.py:989, in SyncAPIClient._request(self, cast_to, options, retries_taken, stream, stream_cls)
    988 try:
--> 989     response = self._client.send(
    990         request,
    991         stream=stream or self._should_stream_response_body(request=request),
    992         **kwargs,
    993     )
    994 except httpx.TimeoutException as err:

File ~\litellm-qa-env\Lib\site-packages\httpx\_client.py:914, in Client.send(self, request, stream, auth, follow_redirects)
    912 auth = self._build_request_auth(request, auth)
--> 914 response = self._send_handling_auth(
    915     request,
    916     auth=auth,
    917     follow_redirects=follow_redirects,
    918     history=[],
    919 )
    920 try:

File ~\litellm-qa-env\Lib\site-packages\httpx\_client.py:942, in Client._send_handling_auth(self, request, auth, follow_redirects, history)
    941 while True:
--> 942     response = self._send_handling_redirects(
    943         request,
    944         follow_redirects=follow_redirects,
    945         history=history,
    946     )
    947     try:

File ~\litellm-qa-env\Lib\site-packages\httpx\_client.py:979, in Client._send_handling_redirects(self, request, follow_redirects, history)
    977     hook(request)
--> 979 response = self._send_single_request(request)
    980 try:

File ~\litellm-qa-env\Lib\site-packages\httpx\_client.py:1014, in Client._send_single_request(self, request)
   1013 with request_context(request=request):
-> 1014     response = transport.handle_request(request)
   1016 assert isinstance(response.stream, SyncByteStream)

File ~\litellm-qa-env\Lib\site-packages\httpx\_transports\default.py:249, in HTTPTransport.handle_request(self, request)
    237 req = httpcore.Request(
    238     method=request.method,
    239     url=httpcore.URL(
   (...)    247     extensions=request.extensions,
    248 )
--> 249 with map_httpcore_exceptions():
    250     resp = self._pool.handle_request(req)

File C:\Program Files\Python313\Lib\contextlib.py:162, in _GeneratorContextManager.__exit__(self, typ, value, traceback)
    161 try:
--> 162     self.gen.throw(value)
    163 except StopIteration as exc:
    164     # Suppress StopIteration *unless* it's the same exception that
    165     # was passed to throw().  This prevents a StopIteration
    166     # raised inside the "with" statement from being suppressed.

File ~\litellm-qa-env\Lib\site-packages\httpx\_transports\default.py:118, in map_httpcore_exceptions()
    117 message = str(exc)
--> 118 raise mapped_exc(message) from exc

RemoteProtocolError: Server disconnected without sending a response.

The above exception was the direct cause of the following exception:

APIConnectionError                        Traceback (most recent call last)
Cell In[11], line 1
----> 1 faqs = doc2cache.invoke({"doc": sample_doc})

File ~\litellm-qa-env\Lib\site-packages\langchain_core\runnables\base.py:3034, in RunnableSequence.invoke(self, input, config, **kwargs)
   3032                 input = context.run(step.invoke, input, config, **kwargs)
   3033             else:
-> 3034                 input = context.run(step.invoke, input, config)
   3035 # finish the root run
   3036 except BaseException as e:

File ~\litellm-qa-env\Lib\site-packages\langchain_core\language_models\llms.py:387, in BaseLLM.invoke(self, input, config, stop, **kwargs)
    376 @override
    377 def invoke(
    378     self,
   (...)    383     **kwargs: Any,
    384 ) -> str:
    385     config = ensure_config(config)
    386     return (
--> 387         self.generate_prompt(
    388             [self._convert_input(input)],
    389             stop=stop,
    390             callbacks=config.get("callbacks"),
    391             tags=config.get("tags"),
    392             metadata=config.get("metadata"),
    393             run_name=config.get("run_name"),
    394             run_id=config.pop("run_id", None),
    395             **kwargs,
    396         )
    397         .generations[0][0]
    398         .text
    399     )

File ~\litellm-qa-env\Lib\site-packages\langchain_core\language_models\llms.py:764, in BaseLLM.generate_prompt(self, prompts, stop, callbacks, **kwargs)
    755 @override
    756 def generate_prompt(
    757     self,
   (...)    761     **kwargs: Any,
    762 ) -> LLMResult:
    763     prompt_strings = [p.to_string() for p in prompts]
--> 764     return self.generate(prompt_strings, stop=stop, callbacks=callbacks, **kwargs)

File ~\litellm-qa-env\Lib\site-packages\langchain_core\language_models\llms.py:971, in BaseLLM.generate(self, prompts, stop, callbacks, tags, metadata, run_name, run_id, **kwargs)
    956 if (self.cache is None and get_llm_cache() is None) or self.cache is False:
    957     run_managers = [
    958         callback_manager.on_llm_start(
    959             self._serialized,
   (...)    969         )
    970     ]
--> 971     return self._generate_helper(
    972         prompts,
    973         stop,
    974         run_managers,
    975         new_arg_supported=bool(new_arg_supported),
    976         **kwargs,
    977     )
    978 if len(missing_prompts) > 0:
    979     run_managers = [
    980         callback_managers[idx].on_llm_start(
    981             self._serialized,
   (...)    988         for idx in missing_prompt_idxs
    989     ]

File ~\litellm-qa-env\Lib\site-packages\langchain_core\language_models\llms.py:790, in BaseLLM._generate_helper(self, prompts, stop, run_managers, new_arg_supported, **kwargs)
    779 def _generate_helper(
    780     self,
    781     prompts: list[str],
   (...)    786     **kwargs: Any,
    787 ) -> LLMResult:
    788     try:
    789         output = (
--> 790             self._generate(
    791                 prompts,
    792                 stop=stop,
    793                 # TODO: support multiple run managers
    794                 run_manager=run_managers[0] if run_managers else None,
    795                 **kwargs,
    796             )
    797             if new_arg_supported
    798             else self._generate(prompts, stop=stop)
    799         )
    800     except BaseException as e:
    801         for run_manager in run_managers:

File ~\litellm-qa-env\Lib\site-packages\langchain_community\llms\openai.py:463, in BaseOpenAI._generate(self, prompts, stop, run_manager, **kwargs)
    451     choices.append(
    452         {
    453             "text": generation.text,
   (...)    460         }
    461     )
    462 else:
--> 463     response = completion_with_retry(
    464         self, prompt=_prompts, run_manager=run_manager, **params
    465     )
    466     if not isinstance(response, dict):
    467         # V1 client returns the response in an PyDantic object instead of
    468         # dict. For the transition period, we deep convert it to dict.
    469         response = response.dict()

File ~\litellm-qa-env\Lib\site-packages\langchain_community\llms\openai.py:121, in completion_with_retry(llm, run_manager, **kwargs)
    119 """Use tenacity to retry the completion call."""
    120 if is_openai_v1():
--> 121     return llm.client.create(**kwargs)
    123 retry_decorator = _create_retry_decorator(llm, run_manager=run_manager)
    125 @retry_decorator
    126 def _completion_with_retry(**kwargs: Any) -> Any:

File ~\litellm-qa-env\Lib\site-packages\openai\_utils\_utils.py:279, in required_args.<locals>.inner.<locals>.wrapper(*args, **kwargs)
    277             msg = f"Missing required argument: {quote(missing[0])}"
    278     raise TypeError(msg)
--> 279 return func(*args, **kwargs)

File ~\litellm-qa-env\Lib\site-packages\openai\resources\completions.py:545, in Completions.create(self, model, prompt, best_of, echo, frequency_penalty, logit_bias, logprobs, max_tokens, n, presence_penalty, seed, stop, stream, stream_options, suffix, temperature, top_p, user, extra_headers, extra_query, extra_body, timeout)
    516 @required_args(["model", "prompt"], ["model", "prompt", "stream"])
    517 def create(
    518     self,
   (...)    543     timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
    544 ) -> Completion | Stream[Completion]:
--> 545     return self._post(
    546         "/completions",
    547         body=maybe_transform(
    548             {
    549                 "model": model,
    550                 "prompt": prompt,
    551                 "best_of": best_of,
    552                 "echo": echo,
    553                 "frequency_penalty": frequency_penalty,
    554                 "logit_bias": logit_bias,
    555                 "logprobs": logprobs,
    556                 "max_tokens": max_tokens,
    557                 "n": n,
    558                 "presence_penalty": presence_penalty,
    559                 "seed": seed,
    560                 "stop": stop,
    561                 "stream": stream,
    562                 "stream_options": stream_options,
    563                 "suffix": suffix,
    564                 "temperature": temperature,
    565                 "top_p": top_p,
    566                 "user": user,
    567             },
    568             completion_create_params.CompletionCreateParamsStreaming
    569             if stream
    570             else completion_create_params.CompletionCreateParamsNonStreaming,
    571         ),
    572         options=make_request_options(
    573             extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
    574         ),
    575         cast_to=Completion,
    576         stream=stream or False,
    577         stream_cls=Stream[Completion],
    578     )

File ~\litellm-qa-env\Lib\site-packages\openai\_base_client.py:1276, in SyncAPIClient.post(self, path, cast_to, body, options, files, stream, stream_cls)
   1262 def post(
   1263     self,
   1264     path: str,
   (...)   1271     stream_cls: type[_StreamT] | None = None,
   1272 ) -> ResponseT | _StreamT:
   1273     opts = FinalRequestOptions.construct(
   1274         method="post", url=path, json_data=body, files=to_httpx_files(files), **options
   1275     )
-> 1276     return cast(ResponseT, self.request(cast_to, opts, stream=stream, stream_cls=stream_cls))

File ~\litellm-qa-env\Lib\site-packages\openai\_base_client.py:949, in SyncAPIClient.request(self, cast_to, options, remaining_retries, stream, stream_cls)
    946 else:
    947     retries_taken = 0
--> 949 return self._request(
    950     cast_to=cast_to,
    951     options=options,
    952     stream=stream,
    953     stream_cls=stream_cls,
    954     retries_taken=retries_taken,
    955 )

File ~\litellm-qa-env\Lib\site-packages\openai\_base_client.py:1013, in SyncAPIClient._request(self, cast_to, options, retries_taken, stream, stream_cls)
   1010 log.debug("Encountered Exception", exc_info=True)
   1012 if remaining_retries > 0:
-> 1013     return self._retry_request(
   1014         input_options,
   1015         cast_to,
   1016         retries_taken=retries_taken,
   1017         stream=stream,
   1018         stream_cls=stream_cls,
   1019         response_headers=None,
   1020     )
   1022 log.debug("Raising connection error")
   1023 raise APIConnectionError(request=request) from err

File ~\litellm-qa-env\Lib\site-packages\openai\_base_client.py:1091, in SyncAPIClient._retry_request(self, options, cast_to, retries_taken, response_headers, stream, stream_cls)
   1087 # In a synchronous context we are blocking the entire thread. Up to the library user to run the client in a
   1088 # different thread if necessary.
   1089 time.sleep(timeout)
-> 1091 return self._request(
   1092     options=options,
   1093     cast_to=cast_to,
   1094     retries_taken=retries_taken + 1,
   1095     stream=stream,
   1096     stream_cls=stream_cls,
   1097 )

File ~\litellm-qa-env\Lib\site-packages\openai\_base_client.py:1013, in SyncAPIClient._request(self, cast_to, options, retries_taken, stream, stream_cls)
   1010 log.debug("Encountered Exception", exc_info=True)
   1012 if remaining_retries > 0:
-> 1013     return self._retry_request(
   1014         input_options,
   1015         cast_to,
   1016         retries_taken=retries_taken,
   1017         stream=stream,
   1018         stream_cls=stream_cls,
   1019         response_headers=None,
   1020     )
   1022 log.debug("Raising connection error")
   1023 raise APIConnectionError(request=request) from err

File ~\litellm-qa-env\Lib\site-packages\openai\_base_client.py:1091, in SyncAPIClient._retry_request(self, options, cast_to, retries_taken, response_headers, stream, stream_cls)
   1087 # In a synchronous context we are blocking the entire thread. Up to the library user to run the client in a
   1088 # different thread if necessary.
   1089 time.sleep(timeout)
-> 1091 return self._request(
   1092     options=options,
   1093     cast_to=cast_to,
   1094     retries_taken=retries_taken + 1,
   1095     stream=stream,
   1096     stream_cls=stream_cls,
   1097 )

File ~\litellm-qa-env\Lib\site-packages\openai\_base_client.py:1023, in SyncAPIClient._request(self, cast_to, options, retries_taken, stream, stream_cls)
   1013         return self._retry_request(
   1014             input_options,
   1015             cast_to,
   (...)   1019             response_headers=None,
   1020         )
   1022     log.debug("Raising connection error")
-> 1023     raise APIConnectionError(request=request) from err
   1025 log.debug(
   1026     'HTTP Response: %s %s "%i %s" %s',
   1027     request.method,
   (...)   1031     response.headers,
   1032 )
   1033 log.debug("request_id: %s", response.headers.get("x-request-id"))

APIConnectionError: Connection error.


Provide feedback

Saved searches

Use saved searches to filter your results more quickly

Uh oh!

redis-ai-resources/python-recipes/semantic-cache/01_doc2cache_llama3_1.ipynb ERROR #2 #87

Metadata

Assignees

Labels

Type

Projects

Milestone

Relationships

Development

redis-ai-resources/python-recipes/semantic-cache/01_doc2cache_llama3_1.ipynb ERROR #2 #87

Description

Metadata

Metadata

Assignees

Labels

Type

Projects

Milestone

Relationships

Development

Issue actions