Skip to content

Commit 70f34d2

Browse files
lukesandbergcopybara-github
authored andcommitted
Precalculate num bytes and numLines when reading source files.
These are often used for recording metrics and calculating them is pretty trivial so doing it slightly more eagerly is reasonable. lineOffsets is _not_ precalculated since it is only used for warning formatting which should be rare. These values will persist even after a call to `clearCachedSource()` since they are very cheap to store, this does introduce the possibility that they hold 'stale' values if the source file changes. Docs have been updated to address this. PiperOrigin-RevId: 507485430
1 parent 212c105 commit 70f34d2

File tree

1 file changed

+57
-34
lines changed

1 file changed

+57
-34
lines changed

src/com/google/javascript/jscomp/SourceFile.java

Lines changed: 57 additions & 34 deletions
Original file line numberDiff line numberDiff line change
@@ -122,25 +122,46 @@ public int getLineOffset(int lineno) {
122122
}
123123

124124
/**
125-
* @return the number of lines in the source file
125+
* Returns the number of lines in the source file.
126+
*
127+
* <p>NOTE: this may be stale if the SourceFile has been {@link #clearCachedSource cleared} and
128+
* the file has changed on disk. If being fully accurate is required call {@link #getCode} and
129+
* then call this method.
126130
*/
127131
int getNumLines() {
128132
if (numLines < 0) {
129133
// A negative value means we need to read in the code and calculate this information.
130134
// Otherwise, assume the file hasn't changed since we read it.
131-
findLineOffsets();
135+
try {
136+
var unused = getCode();
137+
} catch (IOException e) {
138+
// this is consistent with old behavior of this method.
139+
return 0;
140+
}
132141
}
133142
return numLines;
134143
}
135144

136145
/**
137-
* @return the number of bytes in the source file
146+
* Returns the number of bytes in the source file
147+
*
148+
* <p>Oddly, bytes is defined as 'length of code as a Java String' this is accurate assuming files
149+
* only use Latin1 characters but may be inaccurate outside of that.
150+
*
151+
* <p>NOTE: this may be stale if the SourceFile has been {@link #clearCachedSource cleared} and
152+
* the file has changed on disk. If being fully accurate is required call {@link #getCode} and
153+
* then call this method.
138154
*/
139155
int getNumBytes() {
140156
if (numBytes < 0) {
141157
// A negative value means we need to read in the code and calculate this information.
142158
// Otherwise, assume the file hasn't changed since we read it.
143-
findLineOffsets();
159+
try {
160+
var unused = getCode();
161+
} catch (IOException e) {
162+
// this is consistent with old behavior of this method.
163+
return 0;
164+
}
144165
}
145166
return numBytes;
146167
}
@@ -156,28 +177,22 @@ private void findLineOffsets() {
156177
localCode = this.getCode();
157178
} catch (IOException e) {
158179
localCode = "";
180+
this.lineOffsets = new int[1];
181+
return;
159182
}
160183
}
161184

162-
int[] offsets = new int[256]; // default is arbitrary. Assume most files are around 200 lines
185+
// getCode() updates numLines, so this is always in sync with the code.
186+
int[] offsets = new int[this.numLines];
163187
int index = 1; // start at 1 since the offset for line 0 is always at byte 0
164188
int offset = 0;
165189
while ((offset = localCode.indexOf('\n', offset)) != -1) {
166190
// +1 because this is the offset of the next line which is one past the newline
167191
offset++;
168192
offsets[index++] = offset;
169-
if (index == offsets.length) {
170-
offsets = Arrays.copyOf(offsets, offsets.length * 2);
171-
}
172193
}
173-
this.lineOffsets = index == offsets.length ? offsets : Arrays.copyOf(offsets, index);
174-
175-
// Update numLines and numBytes
176-
// NOTE: In some edit/refresh development workflows, we may end up re-reading the file
177-
// and getting different numbers than we got last time we read it, so we should
178-
// not have checkState() calls here to assert they have not changed.
179-
this.numLines = index;
180-
this.numBytes = localCode.length();
194+
checkState(index == offsets.length);
195+
this.lineOffsets = offsets;
181196
}
182197

183198
/** Gets all the code in this source file. */
@@ -223,15 +238,27 @@ private void setCodeAndDoBookkeeping(@Nullable String sourceCode) {
223238
this.code = null;
224239
// Force recalculation of all of these values when they are requested.
225240
this.lineOffsets = null;
226-
this.numLines = -1;
227-
this.numBytes = -1;
228241

229242
if (sourceCode != null) {
230243
if (sourceCode.startsWith(UTF8_BOM)) {
231244
sourceCode = sourceCode.substring(UTF8_BOM.length());
232245
}
233246

234247
this.code = sourceCode;
248+
// Update numLines and numBytes
249+
// NOTE: In some edit/refresh development workflows, we may end up re-reading the file
250+
// and getting different numbers than we got last time we read it, so we should
251+
// not have checkState() calls here to assert they have not changed.
252+
// Misleading variable name. This really stores the 'number of utf16' code points which is
253+
// not the same as number of bytes.
254+
this.numBytes = sourceCode.length();
255+
int numLines = 1; // there is always at least one line
256+
int index = 0;
257+
while ((index = sourceCode.indexOf('\n', index)) != -1) {
258+
index++;
259+
numLines++;
260+
}
261+
this.numLines = numLines;
235262
}
236263
}
237264

@@ -305,6 +332,12 @@ public int getColumnOfOffset(int offset) {
305332
* null} if it does not exist, or if there was an IO exception.
306333
*/
307334
public @Nullable String getLine(int lineNumber) {
335+
String js;
336+
try {
337+
js = getCode();
338+
} catch (IOException e) {
339+
return null;
340+
}
308341
findLineOffsets();
309342
if (lineNumber > lineOffsets.length) {
310343
return null;
@@ -315,15 +348,6 @@ public int getColumnOfOffset(int offset) {
315348
}
316349

317350
int pos = lineOffsets[lineNumber - 1];
318-
String js = "";
319-
try {
320-
// NOTE(nicksantos): Right now, this is optimized for few warnings.
321-
// This is probably the right trade-off, but will be slow if there
322-
// are lots of warnings in one file.
323-
js = getCode();
324-
} catch (IOException e) {
325-
return null;
326-
}
327351

328352
if (js.indexOf('\n', pos) == -1) {
329353
// If next new line cannot be found, there are two cases
@@ -350,6 +374,12 @@ public int getColumnOfOffset(int offset) {
350374
* exception.
351375
*/
352376
public @Nullable Region getLines(int lineNumber, int length) {
377+
String js;
378+
try {
379+
js = getCode();
380+
} catch (IOException e) {
381+
return null;
382+
}
353383
findLineOffsets();
354384
if (lineNumber > lineOffsets.length) {
355385
return null;
@@ -362,13 +392,6 @@ public int getColumnOfOffset(int offset) {
362392
length = 1;
363393
}
364394

365-
String js = "";
366-
try {
367-
js = getCode();
368-
} catch (IOException e) {
369-
return null;
370-
}
371-
372395
int pos = lineOffsets[lineNumber - 1];
373396
if (pos == js.length()) {
374397
return new SimpleRegion(

0 commit comments

Comments
 (0)