-
Notifications
You must be signed in to change notification settings - Fork 5.1k
Description
Hi all, when I enabled file upload with user login features, I got this error: Object of type FileStorage is not JSON serializable
here is the call stack: Traceback (most recent call last): File "/tmp/8de1fc16293c5fa/antenv/lib/python3.11/site-packages/azure/ai/documentintelligence/_model_base.py", line 146, in default return super(SdkJSONEncoder, self).default(o) ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ File "/opt/python/3.11.14/lib/python3.11/json/encoder.py", line 180, in default raise TypeError(f'Object of type {o.class.name} ' TypeError: Object of type FileStorage is not JSON serializable During handling of the above exception, another exception occurred: Traceback (most recent call last): File "/tmp/8de1fc16293c5fa/antenv/lib/python3.11/site-packages/opentelemetry/trace/init.py", line 589, in use_span yield span File "/tmp/8de1fc16293c5fa/antenv/lib/python3.11/site-packages/azure/core/tracing/decorator_async.py", line 138, in wrapper_use_tracer return await func(*args, **kwargs) ^^^^^^^^^^^^^^^^^^^^^^^^^^^ File "/tmp/8de1fc16293c5fa/antenv/lib/python3.11/site-packages/azure/ai/documentintelligence/aio/_operations/_patch.py", line 529, in begin_analyze_document raw_result = await self._analyze_document_initial( ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ File "/tmp/8de1fc16293c5fa/antenv/lib/python3.11/site-packages/azure/ai/documentintelligence/aio/_operations/_operations.py", line 120, in _analyze_document_initial _content = json.dumps(analyze_request, cls=SdkJSONEncoder, exclude_readonly=True) # type: ignore ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ File "/opt/python/3.11.14/lib/python3.11/json/init.py", line 238, in dumps **kw).encode(obj) ^^^^^^^^^^^ File "/opt/python/3.11.14/lib/python3.11/json/encoder.py", line 200, in encode chunks = self.iterencode(o, _one_shot=True) ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ File "/opt/python/3.11.14/lib/python3.11/json/encoder.py", line 258, in iterencode return _iterencode(o, 0) ^^^^^^^^^^^^^^^^^ File "/tmp/8de1fc16293c5fa/antenv/lib/python3.11/site-packages/azure/ai/documentintelligence/_model_base.py", line 165, in default return super(SdkJSONEncoder, self).default(o) ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ File "/opt/python/3.11.14/lib/python3.11/json/encoder.py", line 180, in default raise TypeError(f'Object of type {o.class.name} ' TypeError: Object of type FileStorage is not JSON serializable
Fix was this change at /workspaces/azure-search-openai-demo/app/backend/prepdocslib/pdfparser.py
if media_describer is not None:
content_bytes = content.read()
try:
#change for Object of type FileStorage is not JSON serializable Error
# poller = await document_intelligence_client.begin_analyze_document(
# model_id="prebuilt-layout",
# analyze_request=AnalyzeDocumentRequest(bytes_source=content_bytes),
# output=["figures"],
# features=["ocrHighResolution"],
# output_content_format="markdown",
# )
poller = await document_intelligence_client.begin_analyze_document(
model_id="prebuilt-layout",
body=content_bytes,
content_type="application/octet-stream",
output=["figures"],
features=["ocrHighResolution"],
output_content_format="markdown",
)
doc_for_pymupdf = pymupdf.open(stream=io.BytesIO(content_bytes))
file_analyzed = True
except HttpResponseError as e:
content.seek(0)
if e.error and e.error.code == "InvalidArgument":
logger.error(
"This document type does not support media description. Proceeding with standard analysis."
)
else:
logger.error(
"Unexpected error analyzing document for media description: %s. Proceeding with standard analysis.",
e,
)
#change for Object of type FileStorage is not JSON serializable Error
# if file_analyzed is False:
# poller = await document_intelligence_client.begin_analyze_document(
# model_id=self.model_id, analyze_request=content, content_type="application/octet-stream"
# )
if file_analyzed is False:
# ensure stream at start if it was read earlier
try:
content.seek(0)
except Exception:
pass
poller = await document_intelligence_client.begin_analyze_document(
model_id=self.model_id,
body=content, # ✅ stream/bytes go in body
content_type="application/octet-stream",
)
analyze_result: AnalyzeResult = await poller.result()
offset = 0
this worked for me! but not sure if this is the best solution...