1818 */
1919package co .elastic .clients .rag .article ;
2020
21- import org .apache .tika .exception .TikaException ;
22- import org .apache .tika .metadata .Metadata ;
23- import org .apache .tika .parser .AutoDetectParser ;
24- import org .apache .tika .parser .ParseContext ;
25- import org .apache .tika .parser .Parser ;
26- import org .apache .tika .parser .pdf .PDFParserConfig ;
27- import org .springframework .ai .chat .messages .Message ;
28- import org .springframework .ai .chat .messages .UserMessage ;
29- import org .springframework .ai .chat .model .ChatModel ;
30- import org .springframework .ai .chat .model .ChatResponse ;
31- import org .springframework .ai .chat .prompt .Prompt ;
32- import org .springframework .ai .chat .prompt .SystemPromptTemplate ;
21+ import org .springframework .ai .chat .client .ChatClient ;
3322import org .springframework .ai .document .Document ;
23+ import org .springframework .ai .reader .pdf .PagePdfDocumentReader ;
3424import org .springframework .ai .transformer .splitter .TokenTextSplitter ;
3525import org .springframework .ai .vectorstore .ElasticsearchVectorStore ;
3626import org .springframework .ai .vectorstore .SearchRequest ;
37- import org .springframework .beans .factory .annotation .Autowired ;
3827import org .springframework .stereotype .Service ;
39- import org .xml .sax .SAXException ;
4028
41- import java .io .FileInputStream ;
42- import java .io .IOException ;
43- import java .util .ArrayList ;
44- import java .util .HashMap ;
4529import java .util .List ;
46- import java .util .Map ;
4730import java .util .stream .Collectors ;
4831
4932@ Service
5033public class RagService {
5134
35+ // Both beans autowired from default configuration
5236 private ElasticsearchVectorStore vectorStore ;
53- private ChatModel chatModel ;
37+ private ChatClient chatClient ;
5438
55- @ Autowired
56- public RagService (ElasticsearchVectorStore vectorStore , ChatModel model ) {
39+ public RagService (ElasticsearchVectorStore vectorStore , ChatClient .Builder clientBuilder ) {
5740 this .vectorStore = vectorStore ;
58- this .chatModel = model ;
41+ this .chatClient = clientBuilder . build () ;
5942 }
6043
61- public void ingestPDF (String path ) throws IOException , TikaException , SAXException {
62- // Initializing the PDF parser
63- // Keep in mind that AutoDetectParser is not thread safe
64- Parser parser = new AutoDetectParser ();
65- // Using our custom single page handler class
66- PageContentHandler handler = new PageContentHandler ();
44+ public void ingestPDF (String path ) {
6745
68- // No need for any other specific PDF configuration
69- ParseContext parseContext = new ParseContext ();
70- parseContext .set (PDFParserConfig .class , new PDFParserConfig ());
71-
72- // The metadata contain information such as creation date, creation tool used, etc... which we
73- // don't need
74- Metadata metadata = new Metadata ();
75-
76- // Reading the file
77- try (FileInputStream stream = new FileInputStream (path )) {
78- parser .parse (stream , handler , metadata , parseContext );
79- }
80-
81- // Getting the result as a list of Strings with the content of the pages
82- List <String > allPages = handler .getPages ();
83- List <Document > docbatch = new ArrayList <>();
84-
85- // Converting pages to Documents
86- for (int i = 0 ; i < allPages .size (); i ++) {
87- Map <String , Object > docMetadata = new HashMap <>();
88- // The page number will be used in the response
89- docMetadata .put ("page" , i + 1 );
90-
91- Document doc = new Document (allPages .get (i ), docMetadata );
92- docbatch .add (doc );
93- }
46+ // Spring AI utility class to read a PDF file page by page
47+ PagePdfDocumentReader pdfReader = new PagePdfDocumentReader (path );
48+ List <Document > docbatch = pdfReader .read ();
9449
9550 // Sending batch of documents to vector store
9651 // applying tokenizer
@@ -109,31 +64,31 @@ public String queryLLM(String question) {
10964 .map (Document ::getContent )
11065 .collect (Collectors .joining (System .lineSeparator ()));
11166
112- // Setting the prompt
113- String basePrompt = """
67+ // Setting the prompt with the context
68+ String prompt = """
11469 You're assisting with providing the rules of the tabletop game Runewars.
115- Use the information from the DOCUMENTS section to provide accurate answers.
70+ Use the information from the DOCUMENTS section to provide accurate answers to the
71+ question in the QUESTION section.
11672 If unsure, simply state that you don't know.
11773
11874 DOCUMENTS:
119- {documents}
120- """ ;
121-
122- // Preparing the question for the LLM
123- SystemPromptTemplate systemPromptTemplate = new SystemPromptTemplate (basePrompt );
124- Message systemMessage = systemPromptTemplate .createMessage (Map .of ("documents" , documents ));
75+ """ + documents
76+ + """
77+ QUESTION:
78+ """ + question ;
12579
126- UserMessage userMessage = new UserMessage (question );
12780
128- Prompt prompt = new Prompt (List .of (systemMessage , userMessage ));
12981 // Calling the chat model with the question
130- ChatResponse response = chatModel .call (prompt );
82+ String response = chatClient .prompt ()
83+ .user (prompt )
84+ .call ()
85+ .content ();
13186
132- return response . getResult (). getOutput (). getContent () +
87+ return response +
13388 System .lineSeparator () +
13489 "Found at page: " +
13590 // Retrieving the first ranked page number from the document metadata
136- vectorStoreResult .get (0 ).getMetadata ().get ("page" ) +
91+ vectorStoreResult .get (0 ).getMetadata ().get (PagePdfDocumentReader . METADATA_START_PAGE_NUMBER ) +
13792 " of the manual" ;
13893 }
13994}
0 commit comments