gpt4 #15245
Replies: 1 comment
-
To resolve the issue where the You should use the openai.api_type = "azure"
openai.api_version = "2022-12-01"
openai.api_base = "your_api_base"
openai.api_key = "your_api_key"
deployment_name = "gpt-35-turbo"
llm = ChatOpenAI(model_name=deployment_name)
llm_predictor = LLMPredictor(llm=llm)
embedding_model = LangchainEmbedding(HuggingFaceInstructEmbeddings(
model_name="hkunlp/instructor-xl",
model_kwargs={'device': 'cuda:1'}
))
# Define prompt helper
prompt_helper = PromptHelper(max_input_size=max_input_size, num_output=num_output,
max_chunk_overlap=CHUNK_OVERLAP_LLM,
chunk_size_limit=max_input_size)
service_context = ServiceContext.from_defaults(llm_predictor, prompt_helper, embedding_model) This setup should resolve the issue and make the If you encounter a similar issue with the GPT-4 model, ensure you provide the 'engine' or 'deployment_id' parameter as shown in the example below: llm = AzureOpenAI(deployment_name="gpt-4", model_name="gpt-4", model_kwargs={
"api_key": openai.api_key,
"api_base": openai.api_base,
"api_type": openai.api_type,
"api_version": openai.api_version,
"temperature": 0,
"max_tokens": num_outputs
})
llm_predictor = LLMPredictor(llm=llm)
embedding_llm = LangchainEmbedding(OpenAIEmbeddings(chunk_size=1))
prompt_helper = PromptHelper(max_input_size, num_outputs, max_chunk_overlap, chunk_size_limit=chunk_size_limit)
service_context = ServiceContext.from_defaults(
llm_predictor=llm_predictor,
embed_model=embedding_llm,
prompt_helper=prompt_helper,
chunk_size_limit=chunk_size_limit
) This should ensure compatibility and resolve the error [1][2]. |
Beta Was this translation helpful? Give feedback.
Uh oh!
There was an error while loading. Please reload this page.
-
@dosu
default_retriever = index.as_retriever(
vector_store_query_mode=VectorStoreQueryMode.SEMANTIC_HYBRID,
similarity_top_k=3,
)
response_synthesizer = get_response_synthesizer(
response_mode="tree_summarize",
streaming=True,
structured_answer_filtering= True
)
query_engine = RetrieverQueryEngine(
retriever=default_retriever,
response_synthesizer=response_synthesizer,
)
response = query_engine.query("hello",)
print(response)
print(response.source_nodes)
{
"name": "BadRequestError",
"message": "Error code: 400 - {'error': {'code': 'OperationNotSupported', 'message': 'The chatCompletion operation does not work with the specified model, gpt-4. Please choose different model and try again. You can learn more about which models can be used with each operation here: https://go.microsoft.com/fwlink/?linkid=2197993.'}}",
"stack": "---------------------------------------------------------------------------
BadRequestError Traceback (most recent call last)
in
1 response = query_engine.query("hello",)
----> 2 print(response)
3 print(response.source_nodes)
~\AppData\Local\Packages\PythonSoftwareFoundation.Python.3.9_qbz5n2kfra8p0\LocalCache\local-packages\Python39\site-packages\llama_index\core\base\response\schema.py in str(self)
122 if self.response_txt is None and self.response_gen is not None:
123 response_txt = ""
--> 124 for text in self.response_gen:
125 response_txt += text
126 self.response_txt = response_txt
~\AppData\Local\Packages\PythonSoftwareFoundation.Python.3.9_qbz5n2kfra8p0\LocalCache\local-packages\Python39\site-packages\llama_index\core\llms\llm.py in gen()
114
115 def gen() -> TokenGen:
--> 116 for response in chat_response_gen:
117 yield response.delta or ""
118
~\AppData\Local\Packages\PythonSoftwareFoundation.Python.3.9_qbz5n2kfra8p0\LocalCache\local-packages\Python39\site-packages\llama_index\core\llms\callbacks.py in wrapped_gen()
183 last_response = None
184 try:
--> 185 for x in f_return_val:
186 dispatcher.event(
187 LLMChatInProgressEvent(
~\AppData\Local\Packages\PythonSoftwareFoundation.Python.3.9_qbz5n2kfra8p0\LocalCache\local-packages\Python39\site-packages\llama_index\llms\openai\base.py in gen()
479
480 is_function = False
--> 481 for response in client.chat.completions.create(
482 messages=message_dicts,
483 stream=True,
~\AppData\Local\Packages\PythonSoftwareFoundation.Python.3.9_qbz5n2kfra8p0\LocalCache\local-packages\Python39\site-packages\openai\_utils\_utils.py in wrapper(*args, **kwargs)
275 msg = f"Missing required argument: {quote(missing[0])}"
276 raise TypeError(msg)
--> 277 return func(*args, **kwargs)
278
279 return wrapper # type: ignore
~\AppData\Local\Packages\PythonSoftwareFoundation.Python.3.9_qbz5n2kfra8p0\LocalCache\local-packages\Python39\site-packages\openai\resources\chat\completions.py in create(self, messages, model, frequency_penalty, function_call, functions, logit_bias, logprobs, max_tokens, n, parallel_tool_calls, presence_penalty, response_format, seed, service_tier, stop, stream, stream_options, temperature, tool_choice, tools, top_logprobs, top_p, user, extra_headers, extra_query, extra_body, timeout)
644 timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
645 ) -> ChatCompletion | Stream[ChatCompletionChunk]:
--> 646 return self._post(
647 "/chat/completions",
648 body=maybe_transform(
~\AppData\Local\Packages\PythonSoftwareFoundation.Python.3.9_qbz5n2kfra8p0\LocalCache\local-packages\Python39\site-packages\openai\_base_client.py in post(self, path, cast_to, body, options, files, stream, stream_cls)
1264 method="post", url=path, json_data=body, files=to_httpx_files(files), **options
1265 )
-> 1266 return cast(ResponseT, self.request(cast_to, opts, stream=stream, stream_cls=stream_cls))
1267
1268 def patch(
~\AppData\Local\Packages\PythonSoftwareFoundation.Python.3.9_qbz5n2kfra8p0\LocalCache\local-packages\Python39\site-packages\openai\_base_client.py in request(self, cast_to, options, remaining_retries, stream, stream_cls)
940 stream_cls: type[_StreamT] | None = None,
941 ) -> ResponseT | _StreamT:
--> 942 return self._request(
943 cast_to=cast_to,
944 options=options,
~\AppData\Local\Packages\PythonSoftwareFoundation.Python.3.9_qbz5n2kfra8p0\LocalCache\local-packages\Python39\site-packages\openai\_base_client.py in _request(self, cast_to, options, remaining_retries, stream, stream_cls)
1044
1045 log.debug("Re-raising status error")
-> 1046 raise self._make_status_error_from_response(err.response) from None
1047
1048 return self._process_response(
BadRequestError: Error code: 400 - {'error': {'code': 'OperationNotSupported', 'message': 'The chatCompletion operation does not work with the specified model, gpt-4. Please choose different model and try again. You can learn more about which models can be used with each operation here: https://go.microsoft.com/fwlink/?linkid=2197993.'}}"
}
Beta Was this translation helpful? Give feedback.
All reactions