Skip to content

Commit 77275e1

Browse files
committed
Refactor TextLine
1 parent aae79a7 commit 77275e1

File tree

1 file changed

+25
-40
lines changed
  • document-readers/pdf-reader/src/main/java/org/springframework/ai/reader/pdf/layout

1 file changed

+25
-40
lines changed

document-readers/pdf-reader/src/main/java/org/springframework/ai/reader/pdf/layout/TextLine.java

Lines changed: 25 additions & 40 deletions
Original file line numberDiff line numberDiff line change
@@ -16,29 +16,34 @@
1616

1717
package org.springframework.ai.reader.pdf.layout;
1818

19+
import java.util.Arrays;
20+
1921
class TextLine {
2022

2123
private static final char SPACE_CHARACTER = ' ';
2224

2325
private int lineLength;
24-
25-
private String line;
26-
26+
private char[] line;
2727
private int lastIndex;
2828

2929
TextLine(int lineLength) {
30-
this.line = "";
30+
if (lineLength < 0) {
31+
throw new IllegalArgumentException("Line length cannot be negative");
32+
}
3133
this.lineLength = lineLength / ForkPDFLayoutTextStripper.OUTPUT_SPACE_CHARACTER_WIDTH_IN_PT;
32-
this.completeLineWithSpaces();
34+
this.line = new char[this.lineLength];
35+
Arrays.fill(this.line, SPACE_CHARACTER);
3336
}
3437

3538
public void writeCharacterAtIndex(final Character character) {
39+
if (character == null) {
40+
throw new IllegalArgumentException("Character cannot be null");
41+
}
3642
character.setIndex(this.computeIndexForCharacter(character));
3743
int index = character.getIndex();
3844
char characterValue = character.getCharacterValue();
39-
if (this.indexIsInBounds(index) && this.line.charAt(index) == SPACE_CHARACTER) {
40-
this.line = this.line.substring(0, index) + characterValue
41-
+ this.line.substring(index + 1, this.getLineLength());
45+
if (this.indexIsInBounds(index) && this.line[index] == SPACE_CHARACTER) {
46+
this.line[index] = characterValue;
4247
}
4348
}
4449

@@ -47,7 +52,7 @@ public int getLineLength() {
4752
}
4853

4954
public String getLine() {
50-
return this.line;
55+
return new String(this.line);
5156
}
5257

5358
private int computeIndexForCharacter(final Character character) {
@@ -58,16 +63,13 @@ private int computeIndexForCharacter(final Character character) {
5863

5964
if (!this.indexIsInBounds(index)) {
6065
return -1;
61-
}
62-
else {
66+
} else {
6367
if (isCharacterPartOfPreviousWord && !isCharacterAtTheBeginningOfNewLine) {
6468
index = this.findMinimumIndexWithSpaceCharacterFromIndex(index);
65-
}
66-
else if (isCharacterCloseToPreviousWord) {
67-
if (this.line.charAt(index) != SPACE_CHARACTER) {
69+
} else if (isCharacterCloseToPreviousWord) {
70+
if (this.line[index] != SPACE_CHARACTER) {
6871
index = index + 1;
69-
}
70-
else {
72+
} else {
7173
index = this.findMinimumIndexWithSpaceCharacterFromIndex(index) + 1;
7274
}
7375
}
@@ -77,51 +79,34 @@ else if (isCharacterCloseToPreviousWord) {
7779
}
7880

7981
private boolean isSpaceCharacterAtIndex(int index) {
80-
return this.line.charAt(index) != SPACE_CHARACTER;
82+
return this.line[index] == SPACE_CHARACTER;
8183
}
8284

8385
private boolean isNewIndexGreaterThanLastIndex(int index) {
84-
int lastIndex = this.getLastIndex();
85-
return (index > lastIndex);
86+
return index > this.lastIndex;
8687
}
8788

8889
private int getNextValidIndex(int index, boolean isCharacterPartOfPreviousWord) {
8990
int nextValidIndex = index;
90-
int lastIndex = this.getLastIndex();
9191
if (!this.isNewIndexGreaterThanLastIndex(index)) {
92-
nextValidIndex = lastIndex + 1;
92+
nextValidIndex = this.lastIndex + 1;
9393
}
94-
if (!isCharacterPartOfPreviousWord && this.isSpaceCharacterAtIndex(index - 1)) {
94+
if (!isCharacterPartOfPreviousWord && index > 0 && this.isSpaceCharacterAtIndex(index - 1)) {
9595
nextValidIndex = nextValidIndex + 1;
9696
}
97-
this.setLastIndex(nextValidIndex);
97+
this.lastIndex = nextValidIndex;
9898
return nextValidIndex;
9999
}
100100

101101
private int findMinimumIndexWithSpaceCharacterFromIndex(int index) {
102102
int newIndex = index;
103-
while (newIndex >= 0 && this.line.charAt(newIndex) == SPACE_CHARACTER) {
103+
while (newIndex >= 0 && this.line[newIndex] == SPACE_CHARACTER) {
104104
newIndex = newIndex - 1;
105105
}
106106
return newIndex + 1;
107107
}
108108

109109
private boolean indexIsInBounds(int index) {
110-
return (index >= 0 && index < this.lineLength);
110+
return index >= 0 && index < this.lineLength;
111111
}
112-
113-
private void completeLineWithSpaces() {
114-
for (int i = 0; i < this.getLineLength(); ++i) {
115-
this.line += SPACE_CHARACTER;
116-
}
117-
}
118-
119-
private int getLastIndex() {
120-
return this.lastIndex;
121-
}
122-
123-
private void setLastIndex(int lastIndex) {
124-
this.lastIndex = lastIndex;
125-
}
126-
127112
}

0 commit comments

Comments
 (0)