Skip to content

Commit c917b31

Browse files
committed
📦 NEW: Parser Pdf Example
1 parent 5588bcb commit c917b31

File tree

3 files changed

+50
-16
lines changed

3 files changed

+50
-16
lines changed

examples/parser/document.pdf

100 KB
Binary file not shown.
Lines changed: 42 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,42 @@
1+
import json
2+
import os
3+
import pathlib
4+
5+
from dotenv import load_dotenv
6+
7+
from langbase import Langbase
8+
9+
load_dotenv()
10+
11+
# Get API key from environment variable
12+
langbase_api_key = os.getenv("LANGBASE_API_KEY")
13+
14+
# Initialize the client
15+
langbase = Langbase(api_key=langbase_api_key)
16+
17+
18+
def main():
19+
try:
20+
# Get the path to the PDF document
21+
document_path = pathlib.Path(__file__).parent / "document.pdf"
22+
23+
# Read the file
24+
with open(document_path, "rb") as file:
25+
document_content = file.read()
26+
27+
# Parse the document
28+
results = langbase.parser(
29+
document=document_content,
30+
document_name="document.pdf",
31+
content_type="application/pdf",
32+
)
33+
34+
# Print the results
35+
print(json.dumps(results, indent=2))
36+
37+
except Exception as e:
38+
print(f"Error parsing document: {e}")
39+
40+
41+
if __name__ == "__main__":
42+
main()

examples/parser/parser.py

Lines changed: 8 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,3 @@
1-
"""
2-
Example demonstrating how to parse a document using Langbase.
3-
"""
4-
51
import json
62
import os
73
import pathlib
@@ -16,33 +12,29 @@
1612
langbase_api_key = os.getenv("LANGBASE_API_KEY")
1713

1814
# Initialize the client
19-
lb = Langbase(api_key=langbase_api_key)
15+
langbase = Langbase(api_key=langbase_api_key)
2016

2117

2218
def main():
2319
"""
24-
Parses a document using Langbase.
20+
Chunks text content using Langbase.
2521
"""
2622
try:
2723
# Get the path to the document
2824
document_path = pathlib.Path(__file__).parent / "composable-ai.md"
2925

3026
# Read the file
31-
with open(document_path, "rb") as file:
27+
with open(document_path, "r", encoding="utf-8") as file:
3228
document_content = file.read()
33-
34-
# Parse the document
35-
results = lb.parser(
36-
document=document_content,
37-
document_name="composable-ai.md",
38-
content_type="text/markdown",
29+
# Chunk the content
30+
chunks = langbase.chunker(
31+
content=document_content, chunk_max_length=1024, chunk_overlap=256
3932
)
4033

41-
# Print the results
42-
print(json.dumps(results, indent=2))
34+
print(json.dumps(chunks, indent=2))
4335

4436
except Exception as e:
45-
print(f"Error parsing document: {e}")
37+
print(f"Error chunking content: {e}")
4638

4739

4840
if __name__ == "__main__":

0 commit comments

Comments
 (0)