1515 * limitations under the License.
1616 */
1717
18+ import java.nio.charset.CharacterCodingException ;
19+ import java.nio.charset.CodingErrorAction ;
20+ import java.nio.charset.StandardCharsets ;
21+
1822import org.apache.rat.Defaults
1923import org.apache.rat.document.impl.FileDocument
2024import org.apache.rat.api.MetaData
@@ -144,8 +148,8 @@ class ValidateSourcePatternsTask extends DefaultTask {
144148 (~$/ \$ $Id\b / $) : ' svn keyword' ,
145149 (~$/ \$ $Header\b / $) : ' svn keyword' ,
146150 (~$/ \$ $Source\b / $) : ' svn keyword' ,
147- (~$/ ^ \ u FEFF/ $) : ' UTF-8 byte order mark' ,
148- (~$/ import java\. lang\.\w +;/ $) : ' java.lang import is unnecessary'
151+ (~$/ [ \u 200B \ u FEFF] / $) : ' UTF-8 byte order mark or other zero-width codepoints ' ,
152+ (~$/ import java\. lang\.\w +;/ $) : ' java.lang import is unnecessary' ,
149153 ]
150154
151155 // Python and others merrily use var declarations, this is a problem _only_ in Java at least for 8x where we're forbidding var declarations
@@ -198,15 +202,29 @@ class ValidateSourcePatternsTask extends DefaultTask {
198202 ProgressLogger progress = progressLoggerFactory. newOperation(this . class)
199203 progress. start(this . name, this . name)
200204
205+ def validatingDecoder = StandardCharsets . UTF_8 . newDecoder()
206+ .onMalformedInput(CodingErrorAction . REPORT ). onUnmappableCharacter(CodingErrorAction . REPORT )
207+
201208 sourceFiles. each { f ->
202209 try {
203210 progress. progress(" Scanning ${ f.name} " )
204211 logger. debug(' Scanning source file: {}' , f);
205212
206- def text = f. getText(' UTF-8' );
213+ String text
214+ try {
215+ validatingDecoder. reset()
216+ text = f. withInputStream {
217+ in -> new InputStreamReader (in , validatingDecoder). getText()
218+ }
219+ } catch (CharacterCodingException e) {
220+ reportViolation(f, " incorrect UTF-8 encoding [${ e} ]" )
221+ return // we can't proceed for this file
222+ }
223+
207224 invalidPatterns. each { pattern , name ->
208- if (pattern. matcher(text). find()) {
209- reportViolation(f, name);
225+ def matcher = pattern. matcher(text);
226+ if (matcher. find()) {
227+ reportViolation(f, String . format(Locale . ROOT , ' %s [start=%d, end=%d]' , name, matcher. start(), matcher. end()));
210228 }
211229 }
212230 def javadocsMatcher = javadocsPattern. matcher(text);
@@ -230,9 +248,10 @@ class ValidateSourcePatternsTask extends DefaultTask {
230248 }
231249 checkLicenseHeaderPrecedes(f, ' package' , packagePattern, javaCommentPattern, text, ratDocument);
232250
233- invalidJavaOnlyPatterns. each { pattern ,name ->
234- if (pattern. matcher(text). find()) {
235- reportViolation(f, name);
251+ invalidJavaOnlyPatterns. each { pattern , name ->
252+ def matcher = pattern. matcher(text);
253+ if (matcher. find()) {
254+ reportViolation(f, String . format(Locale . ROOT , ' %s [start=%d, end=%d]' , name, matcher. start(), matcher. end()));
236255 }
237256 }
238257 }
0 commit comments