gpt4 #15245

d3buggerdan · 2024-08-09T06:31:15Z

d3buggerdan
Aug 9, 2024

@dosu
default_retriever = index.as_retriever(
vector_store_query_mode=VectorStoreQueryMode.SEMANTIC_HYBRID,
similarity_top_k=3,
)
response_synthesizer = get_response_synthesizer(
response_mode="tree_summarize",
streaming=True,
structured_answer_filtering= True
)
query_engine = RetrieverQueryEngine(
retriever=default_retriever,
response_synthesizer=response_synthesizer,
)
response = query_engine.query("hello",)
print(response)
print(response.source_nodes)

{
"name": "BadRequestError",
"message": "Error code: 400 - {'error': {'code': 'OperationNotSupported', 'message': 'The chatCompletion operation does not work with the specified model, gpt-4. Please choose different model and try again. You can learn more about which models can be used with each operation here: https://go.microsoft.com/fwlink/?linkid=2197993.'}}",
"stack": "---------------------------------------------------------------------------
BadRequestError Traceback (most recent call last)
in
1 response = query_engine.query("hello",)
----> 2 print(response)
3 print(response.source_nodes)

~\AppData\Local\Packages\PythonSoftwareFoundation.Python.3.9_qbz5n2kfra8p0\LocalCache\local-packages\Python39\site-packages\llama_index\core\base\response\schema.py in str(self)
122 if self.response_txt is None and self.response_gen is not None:
123 response_txt = ""
--> 124 for text in self.response_gen:
125 response_txt += text
126 self.response_txt = response_txt

~\AppData\Local\Packages\PythonSoftwareFoundation.Python.3.9_qbz5n2kfra8p0\LocalCache\local-packages\Python39\site-packages\llama_index\core\llms\llm.py in gen()
114
115 def gen() -> TokenGen:
--> 116 for response in chat_response_gen:
117 yield response.delta or ""
118

~\AppData\Local\Packages\PythonSoftwareFoundation.Python.3.9_qbz5n2kfra8p0\LocalCache\local-packages\Python39\site-packages\llama_index\core\llms\callbacks.py in wrapped_gen()
183 last_response = None
184 try:
--> 185 for x in f_return_val:
186 dispatcher.event(
187 LLMChatInProgressEvent(

~\AppData\Local\Packages\PythonSoftwareFoundation.Python.3.9_qbz5n2kfra8p0\LocalCache\local-packages\Python39\site-packages\llama_index\llms\openai\base.py in gen()
479
480 is_function = False
--> 481 for response in client.chat.completions.create(
482 messages=message_dicts,
483 stream=True,

~\AppData\Local\Packages\PythonSoftwareFoundation.Python.3.9_qbz5n2kfra8p0\LocalCache\local-packages\Python39\site-packages\openai\_utils\_utils.py in wrapper(*args, **kwargs)
275 msg = f"Missing required argument: {quote(missing[0])}"
276 raise TypeError(msg)
--> 277 return func(*args, **kwargs)
278
279 return wrapper # type: ignore

~\AppData\Local\Packages\PythonSoftwareFoundation.Python.3.9_qbz5n2kfra8p0\LocalCache\local-packages\Python39\site-packages\openai\resources\chat\completions.py in create(self, messages, model, frequency_penalty, function_call, functions, logit_bias, logprobs, max_tokens, n, parallel_tool_calls, presence_penalty, response_format, seed, service_tier, stop, stream, stream_options, temperature, tool_choice, tools, top_logprobs, top_p, user, extra_headers, extra_query, extra_body, timeout)
644 timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
645 ) -> ChatCompletion | Stream[ChatCompletionChunk]:
--> 646 return self._post(
647 "/chat/completions",
648 body=maybe_transform(

~\AppData\Local\Packages\PythonSoftwareFoundation.Python.3.9_qbz5n2kfra8p0\LocalCache\local-packages\Python39\site-packages\openai\_base_client.py in post(self, path, cast_to, body, options, files, stream, stream_cls)
1264 method="post", url=path, json_data=body, files=to_httpx_files(files), **options
1265 )
-> 1266 return cast(ResponseT, self.request(cast_to, opts, stream=stream, stream_cls=stream_cls))
1267
1268 def patch(

~\AppData\Local\Packages\PythonSoftwareFoundation.Python.3.9_qbz5n2kfra8p0\LocalCache\local-packages\Python39\site-packages\openai\_base_client.py in request(self, cast_to, options, remaining_retries, stream, stream_cls)
940 stream_cls: type[_StreamT] | None = None,
941 ) -> ResponseT | _StreamT:
--> 942 return self._request(
943 cast_to=cast_to,
944 options=options,

~\AppData\Local\Packages\PythonSoftwareFoundation.Python.3.9_qbz5n2kfra8p0\LocalCache\local-packages\Python39\site-packages\openai\_base_client.py in _request(self, cast_to, options, remaining_retries, stream, stream_cls)
1044
1045 log.debug("Re-raising status error")
-> 1046 raise self._make_status_error_from_response(err.response) from None
1047
1048 return self._process_response(

BadRequestError: Error code: 400 - {'error': {'code': 'OperationNotSupported', 'message': 'The chatCompletion operation does not work with the specified model, gpt-4. Please choose different model and try again. You can learn more about which models can be used with each operation here: https://go.microsoft.com/fwlink/?linkid=2197993.'}}"
}

@dosu · 2024-08-09T06:33:28Z

dosubot[bot]
bot Aug 9, 2024

To resolve the issue where the chatCompletion operation does not work with the GPT-4 model, you need to provide an 'engine' or 'deployment_id' parameter. The error message indicates that these parameters are required for the ChatCompletion class.

You should use the gpt-35-turbo model, which is compatible with the chatCompletion operation. Here is an example of how to set it up:

openai.api_type = "azure"
openai.api_version = "2022-12-01"
openai.api_base = "your_api_base"
openai.api_key = "your_api_key"
deployment_name = "gpt-35-turbo"

llm = ChatOpenAI(model_name=deployment_name)
llm_predictor = LLMPredictor(llm=llm)

embedding_model = LangchainEmbedding(HuggingFaceInstructEmbeddings(
    model_name="hkunlp/instructor-xl",
    model_kwargs={'device': 'cuda:1'}
))

# Define prompt helper
prompt_helper = PromptHelper(max_input_size=max_input_size, num_output=num_output,
                             max_chunk_overlap=CHUNK_OVERLAP_LLM,
                             chunk_size_limit=max_input_size)

service_context = ServiceContext.from_defaults(llm_predictor, prompt_helper, embedding_model)

This setup should resolve the issue and make the chatCompletion operation work correctly.

If you encounter a similar issue with the GPT-4 model, ensure you provide the 'engine' or 'deployment_id' parameter as shown in the example below:

llm = AzureOpenAI(deployment_name="gpt-4", model_name="gpt-4", model_kwargs={
  "api_key": openai.api_key,
  "api_base": openai.api_base,
  "api_type": openai.api_type,
  "api_version": openai.api_version,
  "temperature": 0,
  "max_tokens": num_outputs
})
llm_predictor = LLMPredictor(llm=llm)
embedding_llm = LangchainEmbedding(OpenAIEmbeddings(chunk_size=1))
prompt_helper = PromptHelper(max_input_size, num_outputs, max_chunk_overlap, chunk_size_limit=chunk_size_limit)
service_context = ServiceContext.from_defaults(
  llm_predictor=llm_predictor,
  embed_model=embedding_llm,
  prompt_helper=prompt_helper,
  chunk_size_limit=chunk_size_limit
)

This should ensure compatibility and resolve the error ^[1]^[2].

To continue talking to Dosu, mention @dosu.

0 replies

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

Uh oh!

gpt4 #15245

Uh oh!

{{title}}

Uh oh!

Replies: 1 comment

Uh oh!

{{title}}

Uh oh!

Select a reply

Uh oh!

gpt4 #15245

Uh oh!

d3buggerdan Aug 9, 2024

Replies: 1 comment

Uh oh!

dosubot[bot] bot Aug 9, 2024

d3buggerdan
Aug 9, 2024

dosubot[bot]
bot Aug 9, 2024