File tree Expand file tree Collapse file tree 3 files changed +14
-11
lines changed Expand file tree Collapse file tree 3 files changed +14
-11
lines changed Original file line number Diff line number Diff line change @@ -18,3 +18,4 @@ playwright==1.43.0
1818langchain-aws == 0.1.2
1919langchain-anthropic == 0.1.11
2020yahoo-search-py == 0.3
21+ pypdf == 4.2.0
Original file line number Diff line number Diff line change @@ -56,36 +56,29 @@ def _create_graph(self) -> BaseGraph:
5656 """
5757
5858 fetch_node = FetchNode (
59- input = "pdf_dir" ,
59+ input = 'pdf' ,
6060 output = ["doc" ],
61- node_config = {
62- "headless" : self .headless ,
63- "verbose" : self .verbose
64- }
6561 )
6662 parse_node = ParseNode (
6763 input = "doc" ,
6864 output = ["parsed_doc" ],
6965 node_config = {
7066 "chunk_size" : self .model_token ,
71- "verbose" : self .verbose
7267 }
7368 )
7469 rag_node = RAGNode (
7570 input = "user_prompt & (parsed_doc | doc)" ,
7671 output = ["relevant_chunks" ],
7772 node_config = {
78- "llm " : self .llm_model ,
73+ "llm_model " : self .llm_model ,
7974 "embedder_model" : self .embedder_model ,
80- "verbose" : self .verbose
8175 }
8276 )
8377 generate_answer_node = GenerateAnswerNode (
8478 input = "user_prompt & (relevant_chunks | parsed_doc | doc)" ,
8579 output = ["answer" ],
8680 node_config = {
87- "llm" : self .llm_model ,
88- "verbose" : self .verbose
81+ "llm_model" : self .llm_model ,
8982 }
9083 )
9184
Original file line number Diff line number Diff line change 55from typing import List , Optional
66from langchain_community .document_loaders import AsyncChromiumLoader
77from langchain_core .documents import Document
8+ from langchain_community .document_loaders import PyPDFLoader
89from .base_node import BaseNode
910from ..utils .remover import remover
1011
@@ -56,7 +57,6 @@ def execute(self, state):
5657
5758 # Interpret input keys based on the provided input expression
5859 input_keys = self .get_input_keys (state )
59-
6060 # Fetching data from the state based on the input keys
6161 input_data = [state [key ] for key in input_keys ]
6262
@@ -66,6 +66,15 @@ def execute(self, state):
6666 "source" : "local_dir"
6767 })]
6868 # if it is a local directory
69+
70+ # handling for pdf
71+ elif self .input == "pdf" :
72+ loader = PyPDFLoader (source )
73+ compressed_document = loader .load ()
74+
75+ elif self .input == "pdf_dir" :
76+ pass
77+
6978 elif not source .startswith ("http" ):
7079 compressed_document = [Document (page_content = remover (source ), metadata = {
7180 "source" : "local_dir"
You can’t perform that action at this time.
0 commit comments