We read every piece of feedback, and take your input very seriously.
To see all available qualifiers, see our documentation.
There was an error while loading. Please reload this page.
1 parent 86dd608 commit 3f1fd46Copy full SHA for 3f1fd46
agentic_rag/pdf_processor.py
@@ -4,6 +4,15 @@
4
import argparse
5
from docling.document_converter import DocumentConverter
6
from docling.chunking import HybridChunker
7
+from urllib.parse import urlparse
8
+
9
+def is_url(string: str) -> bool:
10
+ """Check if a string is a valid URL"""
11
+ try:
12
+ result = urlparse(string)
13
+ return all([result.scheme, result.netloc])
14
+ except:
15
+ return False
16
17
class PDFProcessor:
18
def __init__(self, tokenizer: str = "BAAI/bge-small-en-v1.5"):
0 commit comments