-
Notifications
You must be signed in to change notification settings - Fork 5.1k
Closed
Labels
Description
- [x] bug report -> please search issues before submitting
- [ ] feature request
- [ ] documentation issue or request
- [ ] regression (a behavior that used to work and stopped in a new release)
OS and Version?
macOS 14
azd version?
1.5.0
Versions
Commit: 144698a
Date: Thu Nov 16 2023 14:58:20 GMT-0500 (Eastern Standard Time)
I am indexing a big folder of files ~10k and it keeps failed with different tracebacks after processing around 50 - 100 files (I am including the most 3 recent ones). It could be great if it can handle the exception to skip a file, or have a longer retry counter?
Traceback 1:
Traceback (most recent call last):
File "/.../azure-search-openai-demo/scripts/.venv/lib/python3.9/site-packages/azure/core/pipeline/transport/_aiohttp.py", line 280, in send
result = await self.session.request( # type: ignore
File "/.../azure-search-openai-demo/scripts/.venv/lib/python3.9/site-packages/aiohttp/client.py", line 586, in _request
await resp.start(conn)
File "/.../azure-search-openai-demo/scripts/.venv/lib/python3.9/site-packages/aiohttp/client_reqrep.py", line 905, in start
message, payload = await protocol.read() # type: ignore[union-attr]
File "/.../azure-search-openai-demo/scripts/.venv/lib/python3.9/site-packages/aiohttp/streams.py", line 616, in read
await self._waiter
aiohttp.client_exceptions.ClientOSError: [Errno 32] Broken pipe
The above exception was the direct cause of the following exception:
Traceback (most recent call last):
File "/.../azure-search-openai-demo/./scripts/prepdocs.py", line 256, in <module>
loop.run_until_complete(main(file_strategy, azd_credential, args))
File "/Users/.../opt/anaconda3/envs/.../lib/python3.9/asyncio/base_events.py", line 647, in run_until_complete
return future.result()
File "/.../azure-search-openai-demo/./scripts/prepdocs.py", line 131, in main
await strategy.run(search_info)
File "/.../azure-search-openai-demo/scripts/prepdocslib/filestrategy.py", line 63, in run
await search_manager.update_content(sections)
File "/.../azure-search-openai-demo/scripts/prepdocslib/searchmanager.py", line 146, in update_content
await search_client.upload_documents(documents)
File "/.../azure-search-openai-demo/scripts/.venv/lib/python3.9/site-packages/azure/search/documents/aio/_search_client_async.py", line 557, in upload_documents
results = await self.index_documents(batch, **kwargs)
File "/.../azure-search-openai-demo/scripts/.venv/lib/python3.9/site-packages/azure/core/tracing/decorator_async.py", line 77, in wrapper_use_tracer
return await func(*args, **kwargs)
File "/.../azure-search-openai-demo/scripts/.venv/lib/python3.9/site-packages/azure/search/documents/aio/_search_client_async.py", line 655, in index_documents
return await self._index_documents_actions(actions=batch.actions, **kwargs)
File "/.../azure-search-openai-demo/scripts/.venv/lib/python3.9/site-packages/azure/search/documents/aio/_search_client_async.py", line 663, in _index_documents_actions
batch_response = await self._client.documents.index(batch=batch, error_map=error_map, **kwargs)
File "/.../azure-search-openai-demo/scripts/.venv/lib/python3.9/site-packages/azure/core/tracing/decorator_async.py", line 77, in wrapper_use_tracer
return await func(*args, **kwargs)
File "/.../azure-search-openai-demo/scripts/.venv/lib/python3.9/site-packages/azure/search/documents/_generated/aio/operations/_documents_operations.py", line 895, in index
pipeline_response: PipelineResponse = await self._client._pipeline.run( # pylint: disable=protected-access
File "/.../azure-search-openai-demo/scripts/.venv/lib/python3.9/site-packages/azure/core/pipeline/_base_async.py", line 221, in run
return await first_node.send(pipeline_request)
File "/.../azure-search-openai-demo/scripts/.venv/lib/python3.9/site-packages/azure/core/pipeline/_base_async.py", line 69, in send
response = await self.next.send(request)
File "/.../azure-search-openai-demo/scripts/.venv/lib/python3.9/site-packages/azure/core/pipeline/_base_async.py", line 69, in send
response = await self.next.send(request)
File "/.../azure-search-openai-demo/scripts/.venv/lib/python3.9/site-packages/azure/core/pipeline/_base_async.py", line 69, in send
response = await self.next.send(request)
[Previous line repeated 2 more times]
File "/.../azure-search-openai-demo/scripts/.venv/lib/python3.9/site-packages/azure/core/pipeline/policies/_redirect_async.py", line 73, in send
response = await self.next.send(request)
File "/.../azure-search-openai-demo/scripts/.venv/lib/python3.9/site-packages/azure/core/pipeline/policies/_retry_async.py", line 205, in send
raise err
File "/.../azure-search-openai-demo/scripts/.venv/lib/python3.9/site-packages/azure/core/pipeline/policies/_retry_async.py", line 179, in send
response = await self.next.send(request)
File "/.../azure-search-openai-demo/scripts/.venv/lib/python3.9/site-packages/azure/core/pipeline/policies/_authentication_async.py", line 94, in send
response = await self.next.send(request)
File "/.../azure-search-openai-demo/scripts/.venv/lib/python3.9/site-packages/azure/core/pipeline/_base_async.py", line 69, in send
response = await self.next.send(request)
File "/.../azure-search-openai-demo/scripts/.venv/lib/python3.9/site-packages/azure/core/pipeline/_base_async.py", line 69, in send
response = await self.next.send(request)
File "/.../azure-search-openai-demo/scripts/.venv/lib/python3.9/site-packages/azure/core/pipeline/_base_async.py", line 69, in send
response = await self.next.send(request)
[Previous line repeated 2 more times]
File "/.../azure-search-openai-demo/scripts/.venv/lib/python3.9/site-packages/azure/core/pipeline/_base_async.py", line 106, in send
await self._sender.send(request.http_request, **request.context.options),
File "/.../azure-search-openai-demo/scripts/.venv/lib/python3.9/site-packages/azure/core/pipeline/transport/_aiohttp.py", line 317, in send
raise ServiceRequestError(err, error=err) from err
azure.core.exceptions.ServiceRequestError: [Errno 32] Broken pipe
Traceback 2:
Extracting text from './data/xxx.pdf' using Azure Document Intelligence
Traceback (most recent call last):
File "/.../azure-search-openai-demo/scripts/.venv/lib/python3.9/site-packages/azure/core/pipeline/transport/_aiohttp.py", line 280, in send
result = await self.session.request( # type: ignore
File "/.../azure-search-openai-demo/scripts/.venv/lib/python3.9/site-packages/aiohttp/client.py", line 586, in _request
await resp.start(conn)
File "/.../azure-search-openai-demo/scripts/.venv/lib/python3.9/site-packages/aiohttp/client_reqrep.py", line 905, in start
message, payload = await protocol.read() # type: ignore[union-attr]
File "/.../azure-search-openai-demo/scripts/.venv/lib/python3.9/site-packages/aiohttp/streams.py", line 616, in read
await self._waiter
aiohttp.client_exceptions.ServerTimeoutError: Timeout on reading data from socket
The above exception was the direct cause of the following exception:
Traceback (most recent call last):
File "/.../azure-search-openai-demo/./scripts/prepdocs.py", line 256, in <module>
loop.run_until_complete(main(file_strategy, azd_credential, args))
File "/Users/.../opt/anaconda3/envs/.../lib/python3.9/asyncio/base_events.py", line 647, in run_until_complete
return future.result()
File "/.../azure-search-openai-demo/./scripts/prepdocs.py", line 131, in main
await strategy.run(search_info)
File "/.../azure-search-openai-demo/scripts/prepdocslib/filestrategy.py", line 56, in run
pages = [page async for page in self.pdf_parser.parse(content=file.content)]
File "/.../azure-search-openai-demo/scripts/prepdocslib/filestrategy.py", line 56, in <listcomp>
pages = [page async for page in self.pdf_parser.parse(content=file.content)]
File "/.../azure-search-openai-demo/scripts/prepdocslib/pdfparser.py", line 81, in parse
poller = await form_recognizer_client.begin_analyze_document(model_id=self.model_id, document=content)
File "/.../azure-search-openai-demo/scripts/.venv/lib/python3.9/site-packages/azure/core/tracing/decorator_async.py", line 77, in wrapper_use_tracer
return await func(*args, **kwargs)
File "/.../azure-search-openai-demo/scripts/.venv/lib/python3.9/site-packages/azure/ai/formrecognizer/aio/_document_analysis_client_async.py", line 132, in begin_analyze_document
return await _client_op_path.begin_analyze_document( # type: ignore
File "/.../azure-search-openai-demo/scripts/.venv/lib/python3.9/site-packages/azure/core/tracing/decorator_async.py", line 77, in wrapper_use_tracer
return await func(*args, **kwargs)
File "/.../azure-search-openai-demo/scripts/.venv/lib/python3.9/site-packages/azure/ai/formrecognizer/_generated/v2023_07_31/aio/operations/_document_models_operations.py", line 189, in begin_analyze_document
raw_result = await self._analyze_document_initial( # type: ignore
File "/.../azure-search-openai-demo/scripts/.venv/lib/python3.9/site-packages/azure/ai/formrecognizer/_generated/v2023_07_31/aio/operations/_document_models_operations.py", line 105, in _analyze_document_initial
pipeline_response = await self._client._pipeline.run( # type: ignore # pylint: disable=protected-access
File "/.../azure-search-openai-demo/scripts/.venv/lib/python3.9/site-packages/azure/core/pipeline/_base_async.py", line 221, in run
return await first_node.send(pipeline_request)
File "/.../azure-search-openai-demo/scripts/.venv/lib/python3.9/site-packages/azure/core/pipeline/_base_async.py", line 69, in send
response = await self.next.send(request)
File "/.../azure-search-openai-demo/scripts/.venv/lib/python3.9/site-packages/azure/core/pipeline/_base_async.py", line 69, in send
response = await self.next.send(request)
File "/.../azure-search-openai-demo/scripts/.venv/lib/python3.9/site-packages/azure/core/pipeline/_base_async.py", line 69, in send
response = await self.next.send(request)
[Previous line repeated 2 more times]
File "/.../azure-search-openai-demo/scripts/.venv/lib/python3.9/site-packages/azure/core/pipeline/policies/_redirect_async.py", line 73, in send
response = await self.next.send(request)
File "/.../azure-search-openai-demo/scripts/.venv/lib/python3.9/site-packages/azure/core/pipeline/policies/_retry_async.py", line 205, in send
raise err
File "/.../azure-search-openai-demo/scripts/.venv/lib/python3.9/site-packages/azure/core/pipeline/policies/_retry_async.py", line 179, in send
response = await self.next.send(request)
File "/.../azure-search-openai-demo/scripts/.venv/lib/python3.9/site-packages/azure/core/pipeline/policies/_authentication_async.py", line 94, in send
response = await self.next.send(request)
File "/.../azure-search-openai-demo/scripts/.venv/lib/python3.9/site-packages/azure/core/pipeline/_base_async.py", line 69, in send
response = await self.next.send(request)
File "/.../azure-search-openai-demo/scripts/.venv/lib/python3.9/site-packages/azure/core/pipeline/_base_async.py", line 69, in send
response = await self.next.send(request)
File "/.../azure-search-openai-demo/scripts/.venv/lib/python3.9/site-packages/azure/core/pipeline/_base_async.py", line 69, in send
response = await self.next.send(request)
[Previous line repeated 3 more times]
File "/.../azure-search-openai-demo/scripts/.venv/lib/python3.9/site-packages/azure/core/pipeline/_base_async.py", line 106, in send
await self._sender.send(request.http_request, **request.context.options),
File "/.../azure-search-openai-demo/scripts/.venv/lib/python3.9/site-packages/azure/core/pipeline/transport/_aiohttp.py", line 315, in send
raise ServiceResponseError(err, error=err) from err
azure.core.exceptions.ServiceResponseError: Timeout on reading data from socket
Traceback 3
Traceback (most recent call last):
File "/.../azure-search-openai-demo/./scripts/prepdocs.py", line 256, in <module>
loop.run_until_complete(main(file_strategy, azd_credential, args))
File "/.../opt/anaconda3/envs/.../lib/python3.9/asyncio/base_events.py", line 647, in run_until_complete
return future.result()
File "/.../azure-search-openai-demo/./scripts/prepdocs.py", line 131, in main
await strategy.run(search_info)
File "/.../azure-search-openai-demo/scripts/prepdocslib/filestrategy.py", line 63, in run
await search_manager.update_content(sections)
File "/.../azure-search-openai-demo/scripts/prepdocslib/searchmanager.py", line 140, in update_content
embeddings = await self.embeddings.create_embeddings(
File "/.../azure-search-openai-demo/scripts/prepdocslib/embeddings.py", line 116, in create_embeddings
return await self.create_embedding_batch(texts)
File "/.../azure-search-openai-demo/scripts/prepdocslib/embeddings.py", line 86, in create_embedding_batch
async for attempt in AsyncRetrying(
File "/.../azure-search-openai-demo/scripts/.venv/lib/python3.9/site-packages/tenacity/_asyncio.py", line 71, in __anext__
do = self.iter(retry_state=self._retry_state)
File "/.../azure-search-openai-demo/scripts/.venv/lib/python3.9/site-packages/tenacity/__init__.py", line 314, in iter
return fut.result()
File "/.../opt/anaconda3/envs/.../lib/python3.9/concurrent/futures/_base.py", line 439, in result
return self.__get_result()
File "/.../opt/anaconda3/envs/.../lib/python3.9/concurrent/futures/_base.py", line 391, in __get_result
raise self._exception
File "/.../azure-search-openai-demo/scripts/prepdocslib/embeddings.py", line 94, in create_embedding_batch
emb_response = await openai.Embedding.acreate(**emb_args, input=batch.texts)
File "/.../azure-search-openai-demo/scripts/.venv/lib/python3.9/site-packages/openai/api_resources/embedding.py", line 73, in acreate
response = await super().acreate(*args, **kwargs)
File "/.../azure-search-openai-demo/scripts/.venv/lib/python3.9/site-packages/openai/api_resources/abstract/engine_api_resource.py", line 219, in acreate
response, _, api_key = await requestor.arequest(
File "/.../azure-search-openai-demo/scripts/.venv/lib/python3.9/site-packages/openai/api_requestor.py", line 384, in arequest
resp, got_stream = await self._interpret_async_response(result, stream)
File "/.../azure-search-openai-demo/scripts/.venv/lib/python3.9/site-packages/openai/api_requestor.py", line 738, in _interpret_async_response
self._interpret_response_line(
File "/.../azure-search-openai-demo/scripts/.venv/lib/python3.9/site-packages/openai/api_requestor.py", line 775, in _interpret_response_line
raise self.handle_error_response(
File "/.../azure-search-openai-demo/scripts/.venv/lib/python3.9/site-packages/openai/api_requestor.py", line 415, in handle_error_response
raise error.APIError(
openai.error.APIError: Invalid response object from API: '{ "statusCode": 401, "message": "Unauthorized. Access token is missing, invalid, audience is incorrect (https://cognitiveservices.azure.com), or have expired." }' (HTTP response code was 401)
pamelafox