Skip to content

Commit 15458da

Browse files
committed
Show WARNINGs about content classified as Huge Text
1 parent 1f5d484 commit 15458da

16 files changed

+87
-75
lines changed

opengrok-indexer/src/main/java/org/opengrok/indexer/analysis/StreamSource.java

Lines changed: 16 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -19,7 +19,7 @@
1919

2020
/*
2121
* Copyright (c) 2013, 2018 Oracle and/or its affiliates. All rights reserved.
22-
* Portions Copyright (c) 2018, Chris Fraire <[email protected]>.
22+
* Portions Copyright (c) 2018, 2020, Chris Fraire <[email protected]>.
2323
*/
2424
package org.opengrok.indexer.analysis;
2525

@@ -52,6 +52,11 @@ public abstract class StreamSource {
5252
*/
5353
public abstract InputStream getStream() throws IOException;
5454

55+
/**
56+
* Gets a reportable identifier of the source.
57+
*/
58+
public abstract String getSourceIdentifier();
59+
5560
/**
5661
* Helper method that creates a {@code StreamSource} instance that
5762
* reads data from a file.
@@ -65,6 +70,11 @@ public static StreamSource fromFile(final File file) {
6570
public InputStream getStream() throws IOException {
6671
return new BufferedInputStream(new FileInputStream(file));
6772
}
73+
74+
@Override
75+
public String getSourceIdentifier() {
76+
return file.getAbsolutePath();
77+
}
6878
};
6979
}
7080

@@ -82,6 +92,11 @@ public static StreamSource fromString(final String str) {
8292
public InputStream getStream() throws IOException {
8393
return new ByteArrayInputStream(sbuf);
8494
}
95+
96+
@Override
97+
public String getSourceIdentifier() {
98+
return "String";
99+
}
85100
};
86101
}
87102
}

opengrok-indexer/src/main/java/org/opengrok/indexer/analysis/archive/BZip2Analyzer.java

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -112,6 +112,11 @@ public InputStream getStream() throws IOException {
112112
throw new IOException("Not BZIP2 format");
113113
}
114114
}
115+
116+
@Override
117+
public String getSourceIdentifier() {
118+
return src.getSourceIdentifier();
119+
}
115120
};
116121
}
117122
}

opengrok-indexer/src/main/java/org/opengrok/indexer/analysis/archive/CompressedAnalyzer.java

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -33,11 +33,14 @@
3333
import org.opengrok.indexer.analysis.StreamSource;
3434
import org.opengrok.indexer.analysis.data.HugeTextAnalyzerFactory;
3535
import org.opengrok.indexer.configuration.RuntimeEnvironment;
36+
import org.opengrok.indexer.logger.LoggerFactory;
3637
import org.opengrok.indexer.search.QueryBuilder;
3738

3839
import java.io.IOException;
3940
import java.io.InputStream;
4041
import java.io.Writer;
42+
import java.util.logging.Level;
43+
import java.util.logging.Logger;
4144

4245
/**
4346
* Represents a base for compressed formats (e.g. gzip or bzip2) but not for
@@ -46,6 +49,8 @@
4649
*/
4750
public abstract class CompressedAnalyzer extends FileAnalyzer {
4851

52+
private static final Logger LOGGER = LoggerFactory.getLogger(CompressedAnalyzer.class);
53+
4954
private static final int CHUNK_SIZE = 8 * 1024;
5055

5156
protected Genre g;
@@ -68,8 +73,13 @@ protected void analyzeUncompressed(
6873

6974
if (fa.getGenre() == Genre.PLAIN) {
7075
if (meetsHugeTextThreshold(compressedSrc)) {
76+
String origFileTypeName = fa.getFileTypeName();
7177
fa = HugeTextAnalyzerFactory.DEFAULT_INSTANCE.getAnalyzer();
7278
g = Genre.DATA;
79+
if (LOGGER.isLoggable(Level.WARNING)) {
80+
LOGGER.log(Level.WARNING, "{0} is compressed huge text: {1}",
81+
new Object[]{origFileTypeName, compressedSrc.getSourceIdentifier()});
82+
}
7383
} else {
7484
g = Genre.XREFABLE;
7585
}

opengrok-indexer/src/main/java/org/opengrok/indexer/analysis/archive/GZIPAnalyzer.java

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -105,6 +105,11 @@ public InputStream getStream() throws IOException {
105105
return new BufferedInputStream(
106106
new GZIPInputStream(src.getStream()));
107107
}
108+
109+
@Override
110+
public String getSourceIdentifier() {
111+
return src.getSourceIdentifier();
112+
}
108113
};
109114
}
110115
}

opengrok-indexer/src/main/java/org/opengrok/indexer/index/IndexDatabase.java

Lines changed: 9 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -712,7 +712,12 @@ private void addFile(File file, String path, Ctags ctags)
712712

713713
if (AbstractAnalyzer.Genre.PLAIN.equals(fa.getGenre()) &&
714714
file.length() >= env.getHugeTextThresholdBytes()) {
715+
String origFileTypeName = fa.getFileTypeName();
715716
fa = HugeTextAnalyzerFactory.DEFAULT_INSTANCE.getAnalyzer();
717+
if (LOGGER.isLoggable(Level.WARNING)) {
718+
LOGGER.log(Level.WARNING, "{0} is huge text: {1}",
719+
new Object[]{origFileTypeName, path});
720+
}
716721
}
717722

718723
for (IndexChangedListener listener : listeners) {
@@ -1803,8 +1808,8 @@ private boolean checkSettings(File file, String path) throws IOException {
18031808
// If it is a Huge Text file, re-check constraints.
18041809
if (AnalyzerGuru.getHugeTextFileTypeName().equals(fileTypeName) &&
18051810
file.length() < env.getHugeTextThresholdBytes()) {
1806-
if (LOGGER.isLoggable(Level.FINE)) {
1807-
LOGGER.log(Level.FINE, "{0} no longer qualifies: {1}",
1811+
if (LOGGER.isLoggable(Level.WARNING)) {
1812+
LOGGER.log(Level.WARNING, "{0} no longer qualifies: {1}",
18081813
new Object[]{fileTypeName, path});
18091814
}
18101815
return false;
@@ -1814,8 +1819,8 @@ private boolean checkSettings(File file, String path) throws IOException {
18141819
// If the Genre is PLAIN, re-check Huge Text file constraints.
18151820
if (AbstractAnalyzer.Genre.PLAIN.equals(fa.getGenre()) &&
18161821
file.length() >= env.getHugeTextThresholdBytes()) {
1817-
if (LOGGER.isLoggable(Level.FINE)) {
1818-
LOGGER.log(Level.FINE, "{0} is now a huge text file: {1}",
1822+
if (LOGGER.isLoggable(Level.WARNING)) {
1823+
LOGGER.log(Level.WARNING, "{0} is now huge text: {1}",
18191824
new Object[]{fileTypeName, path});
18201825
}
18211826
return false;

opengrok-indexer/src/test/java/org/opengrok/indexer/analysis/JFlexXrefTest.java

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -19,7 +19,7 @@
1919

2020
/*
2121
* Copyright (c) 2010, 2019, Oracle and/or its affiliates. All rights reserved.
22-
* Portions Copyright (c) 2017-2019, Chris Fraire <[email protected]>.
22+
* Portions Copyright (c) 2017-2020, Chris Fraire <[email protected]>.
2323
*/
2424

2525
package org.opengrok.indexer.analysis;
@@ -505,6 +505,11 @@ public void testJavaClassAnalyzer() throws Exception {
505505
".class";
506506
return StringWriter.class.getResourceAsStream(path);
507507
}
508+
509+
@Override
510+
public String getSourceIdentifier() {
511+
return "StringWriter.class";
512+
}
508513
};
509514
Document doc = new Document();
510515
StringWriter out = new StringWriter();

opengrok-indexer/src/test/java/org/opengrok/indexer/analysis/TextAnalyzerTest.java

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -19,7 +19,7 @@
1919

2020
/*
2121
* Copyright (c) 2010, 2018, Oracle and/or its affiliates. All rights reserved.
22-
* Portions Copyright (c) 2017, Chris Fraire <[email protected]>.
22+
* Portions Copyright (c) 2017, 2020, Chris Fraire <[email protected]>.
2323
*/
2424
package org.opengrok.indexer.analysis;
2525

@@ -49,6 +49,11 @@ private static StreamSource getStreamSource(final byte[] bytes) {
4949
public InputStream getStream() throws IOException {
5050
return new ByteArrayInputStream(bytes);
5151
}
52+
53+
@Override
54+
public String getSourceIdentifier() {
55+
return "byte[]";
56+
}
5257
};
5358
}
5459

opengrok-indexer/src/test/java/org/opengrok/indexer/analysis/c/CAnalyzerFactoryTest.java

Lines changed: 2 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -19,7 +19,7 @@
1919

2020
/*
2121
* Copyright (c) 2015, 2018 Oracle and/or its affiliates. All rights reserved.
22-
* Portions Copyright (c) 2017-2019, Chris Fraire <[email protected]>.
22+
* Portions Copyright (c) 2017-2020, Chris Fraire <[email protected]>.
2323
*/
2424
package org.opengrok.indexer.analysis.c;
2525

@@ -60,15 +60,6 @@ public class CAnalyzerFactoryTest {
6060
private static TestRepository repository;
6161
private static AbstractAnalyzer analyzer;
6262

63-
private static StreamSource getStreamSource(final String fname) {
64-
return new StreamSource() {
65-
@Override
66-
public InputStream getStream() throws IOException {
67-
return new FileInputStream(fname);
68-
}
69-
};
70-
}
71-
7263
@BeforeClass
7364
public static void setUpClass() throws Exception {
7465
ctags = new Ctags();
@@ -110,7 +101,7 @@ public void testScopeAnalyzer() throws Exception {
110101
StringWriter xrefOut = new StringWriter();
111102
analyzer.setCtags(ctags);
112103
analyzer.setScopesEnabled(true);
113-
analyzer.analyze(doc, getStreamSource(path), xrefOut);
104+
analyzer.analyze(doc, StreamSource.fromFile(f), xrefOut);
114105

115106
IndexableField scopesField = doc.getField(QueryBuilder.SCOPES);
116107
assertNotNull(scopesField);

opengrok-indexer/src/test/java/org/opengrok/indexer/analysis/c/CxxAnalyzerFactoryTest.java

Lines changed: 2 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -19,7 +19,7 @@
1919

2020
/*
2121
* Copyright (c) 2015, 2018 Oracle and/or its affiliates. All rights reserved.
22-
* Portions Copyright (c) 2017-2019, Chris Fraire <[email protected]>.
22+
* Portions Copyright (c) 2017-2020, Chris Fraire <[email protected]>.
2323
*/
2424
package org.opengrok.indexer.analysis.c;
2525

@@ -60,15 +60,6 @@ public class CxxAnalyzerFactoryTest {
6060
private static TestRepository repository;
6161
private static AbstractAnalyzer analyzer;
6262

63-
private static StreamSource getStreamSource(final String fname) {
64-
return new StreamSource() {
65-
@Override
66-
public InputStream getStream() throws IOException {
67-
return new FileInputStream(fname);
68-
}
69-
};
70-
}
71-
7263
@BeforeClass
7364
public static void setUpClass() throws Exception {
7465
ctags = new Ctags();
@@ -111,7 +102,7 @@ public void testScopeAnalyzer() throws Exception {
111102
analyzer.setScopesEnabled(true);
112103
System.out.println(path);
113104

114-
analyzer.analyze(doc, getStreamSource(path), xrefOut);
105+
analyzer.analyze(doc, StreamSource.fromFile(f), xrefOut);
115106

116107
IndexableField scopesField = doc.getField(QueryBuilder.SCOPES);
117108
assertNotNull(scopesField);

opengrok-indexer/src/test/java/org/opengrok/indexer/analysis/clojure/ClojureAnalyzerFactoryTest.java

Lines changed: 2 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -19,7 +19,7 @@
1919

2020
/*
2121
* Copyright (c) 2016, 2018 Oracle and/or its affiliates. All rights reserved.
22-
* Portions Copyright (c) 2017-2019, Chris Fraire <[email protected]>.
22+
* Portions Copyright (c) 2017-2020, Chris Fraire <[email protected]>.
2323
*/
2424
package org.opengrok.indexer.analysis.clojure;
2525

@@ -57,15 +57,6 @@ public class ClojureAnalyzerFactoryTest {
5757
private static TestRepository repository;
5858
private static AbstractAnalyzer analyzer;
5959

60-
private static StreamSource getStreamSource(final String fname) {
61-
return new StreamSource() {
62-
@Override
63-
public InputStream getStream() throws IOException {
64-
return new FileInputStream(fname);
65-
}
66-
};
67-
}
68-
6960
@BeforeClass
7061
public static void setUpClass() throws Exception {
7162
ctags = new Ctags();
@@ -106,7 +97,7 @@ public void testScopeAnalyzer() throws Exception {
10697
string_ft_nstored_nanalyzed_norms));
10798
StringWriter xrefOut = new StringWriter();
10899
analyzer.setCtags(ctags);
109-
analyzer.analyze(doc, getStreamSource(path), xrefOut);
100+
analyzer.analyze(doc, StreamSource.fromFile(f), xrefOut);
110101

111102
Definitions definitions = Definitions.deserialize(doc.getField(QueryBuilder.TAGS).binaryValue().bytes);
112103

0 commit comments

Comments
 (0)