Skip to content

Commit 8d6a451

Browse files
authored
Code fixes for how to extract block types (#654)
1 parent 6aa431b commit 8d6a451

File tree

1 file changed

+21
-3
lines changed

1 file changed

+21
-3
lines changed

snippets/how-to-api/extract_image_block_types.py.mdx

Lines changed: 21 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -19,7 +19,7 @@ if __name__ == "__main__":
1919
local_input_filepath = "local-ingest-input-pdf/embedded-images-tables.pdf"
2020
local_output_filepath = "local-ingest-output/embedded-images-tables.json"
2121

22-
with open(local_filepath, "rb") as f:
22+
with open(local_input_filepath, "rb") as f:
2323
files = shared.Files(
2424
content=f.read(),
2525
file_name=local_input_filepath
@@ -28,7 +28,6 @@ if __name__ == "__main__":
2828
request = operations.PartitionRequest(
2929
shared.PartitionParameters(
3030
files=files,
31-
strategy=shared.Strategy.HI_RES,
3231
split_pdf_page=True,
3332
split_pdf_allow_failed=True,
3433
split_pdf_concurrency_level=15,
@@ -44,7 +43,7 @@ if __name__ == "__main__":
4443
)
4544

4645
try:
47-
result = await client.general.partition_async(
46+
result = client.general.partition(
4847
request=request
4948
)
5049

@@ -56,7 +55,26 @@ if __name__ == "__main__":
5655
image_data = base64.b64decode(element["metadata"]["image_base64"])
5756
image = Image.open(io.BytesIO(image_data))
5857
image.show()
58+
59+
# Optionally, prepare to print or save the elements as JSON.
60+
dict_elements = elements_from_dicts(
61+
element_dicts=result.elements
62+
)
63+
64+
# Print the elements as JSON...
65+
json_elements = elements_to_json(
66+
elements=dict_elements,
67+
indent=2
68+
)
5969

70+
print(json_elements)
71+
72+
# ...or save as JSON.
73+
elements_to_json(
74+
elements=dict_elements,
75+
indent=2,
76+
filename=local_output_filepath
77+
)
6078
except Exception as e:
6179
print(e)
6280
```

0 commit comments

Comments
 (0)