@@ -85,30 +85,33 @@ JVM start-up time is a lot of the cost of the `tabula` command, so if you're try
8585
8686A simple Java code example which extracts all rows and cells from all tables of all pages of a PDF document:
8787
88- InputStream in = this.getClass().getResourceAsStream("my.pdf");
89- try (PDDocument document = PDDocument.load(in)) {
90- SpreadsheetExtractionAlgorithm sea = new SpreadsheetExtractionAlgorithm();
91- PageIterator pi = new ObjectExtractor(document).extract();
92- while (pi.hasNext()) {
93- // iterate over the pages of the document
94- Page page = pi.next();
95- List<Table> table = sea.extract(page);
96- // iterate over the tables of the page
97- for(Table tables: table) {
98- List<List<RectangularTextContainer>> rows = tables.getRows();
99- // iterate over the rows of the table
100- for (List<RectangularTextContainer> cells : rows) {
101- // print all column-cells of the row plus linefeed
102- for (RectangularTextContainer content : cells) {
103- // Note: Cell.getText() uses \r to concat text chunks
104- String text = content.getText().replace("\r", " ");
105- System.out.print(text + "|");
106- }
107- System.out.println();
108- }
88+ ``` java
89+ InputStream in = this . getClass(). getResourceAsStream(" my.pdf" );
90+ try (PDDocument document = PDDocument . load(in)) {
91+ SpreadsheetExtractionAlgorithm sea = new SpreadsheetExtractionAlgorithm ();
92+ PageIterator pi = new ObjectExtractor (document). extract();
93+ while (pi. hasNext()) {
94+ // iterate over the pages of the document
95+ Page page = pi. next();
96+ List<Table > table = sea. extract(page);
97+ // iterate over the tables of the page
98+ for (Table tables: table) {
99+ List<List<RectangularTextContainer > > rows = tables. getRows();
100+ // iterate over the rows of the table
101+ for (List<RectangularTextContainer > cells : rows) {
102+ // print all column-cells of the row plus linefeed
103+ for (RectangularTextContainer content : cells) {
104+ // Note: Cell.getText() uses \r to concat text chunks
105+ String text = content. getText(). replace(" \r " , " " );
106+ System . out. print(text + " |" );
109107 }
108+ System . out. println();
110109 }
111110 }
111+ }
112+ }
113+ ```
114+
112115
113116For more detail information check the Javadoc.
114117The Javadoc API documentation can be generated (see also '_ Building from Source_ ' section) via
0 commit comments