Skip to content
This repository was archived by the owner on Feb 1, 2023. It is now read-only.

Commit 0b30177

Browse files
authored
Merge pull request #21 from snyk/fix/ignore_files_processing
fix: parsing of .ignore file by using PathMatcher and internal caches
2 parents 4b0a661 + 6ecb023 commit 0b30177

File tree

9 files changed

+2013
-97
lines changed

9 files changed

+2013
-97
lines changed

build.gradle

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,7 @@ plugins {
66

77
group = "io.snyk.code.sdk"
88
archivesBaseName = "snyk-code-client"
9-
version = "2.1.8"
9+
version = "2.1.9"
1010

1111
repositories {
1212
mavenCentral()
@@ -38,6 +38,11 @@ compileIntegTestJava {
3838
targetCompatibility = 11
3939
}
4040

41+
compileTestJava {
42+
sourceCompatibility = 11
43+
targetCompatibility = 11
44+
}
45+
4146
dependencies {
4247
implementation "com.squareup.retrofit2:retrofit:2.7.1"
4348
implementation "com.squareup.retrofit2:converter-gson:2.7.1"

src/main/java/ai/deepcode/javaclient/core/AnalysisDataBase.java

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -101,7 +101,9 @@ public Set<Object> getAllCachedProject() {
101101

102102
public void removeFilesFromCache(@NotNull Collection<Object> files) {
103103
try {
104-
dcLogger.logInfo("Request to remove from cache " + files.size() + " files: " + files);
104+
final List<String> first50FilesName =
105+
files.stream().limit(50).map(pdUtils::getFileName).collect(Collectors.toList());
106+
dcLogger.logInfo("Request to remove from cache " + files.size() + " files: " + first50FilesName);
105107
// todo: do we really need mutex here?
106108
MUTEX.lock();
107109
dcLogger.logInfo("MUTEX LOCK");
@@ -189,8 +191,10 @@ public void updateCachedResultsForFiles(
189191
dcLogger.logWarn("updateCachedResultsForFiles requested for empty list of files");
190192
return;
191193
}
194+
final List<String> first50FilesName =
195+
allProjectFiles.stream().limit(50).map(pdUtils::getFileName).collect(Collectors.toList());
192196
dcLogger.logInfo(
193-
"Update requested for " + allProjectFiles.size() + " files: " + allProjectFiles.toString());
197+
"Update requested for " + allProjectFiles.size() + " files: " + first50FilesName);
194198
if (!deepCodeParams.consentGiven(project)) {
195199
dcLogger.logWarn("Consent check fail! Project: " + pdUtils.getProjectName(project));
196200
return;
Lines changed: 126 additions & 74 deletions
Original file line numberDiff line numberDiff line change
@@ -1,134 +1,186 @@
11
package ai.deepcode.javaclient.core;
22

33
import org.jetbrains.annotations.NotNull;
4+
import org.jetbrains.annotations.Nullable;
45

5-
import java.util.HashSet;
6-
import java.util.Map;
7-
import java.util.Set;
6+
import java.nio.file.FileSystems;
7+
import java.nio.file.Path;
8+
import java.nio.file.PathMatcher;
9+
import java.util.*;
810
import java.util.concurrent.ConcurrentHashMap;
11+
import java.util.regex.PatternSyntaxException;
912

1013
public abstract class DeepCodeIgnoreInfoHolderBase {
1114

1215
private final HashContentUtilsBase hashContentUtils;
16+
private final PlatformDependentUtilsBase pdUtils;
17+
private final DCLoggerBase dcLogger;
18+
19+
// .ignore file to Line in .ignore file to PathMatcher
20+
private final Map<Object, Map<Integer, PathMatcher>> map_ignore2PathMatchers = new ConcurrentHashMap<>();
21+
22+
// .ignore file to Line in .ignore file to PathMatcher
23+
private final Map<Object, Map<Integer, PathMatcher>> map_ignore2ReIncludePathMatchers = new ConcurrentHashMap<>();
24+
25+
private final Map<Object, Map<String, Boolean>> project2IgnoredFilePaths = new ConcurrentHashMap<>();
1326

1427
protected DeepCodeIgnoreInfoHolderBase(
15-
@NotNull HashContentUtilsBase hashContentUtils) {
28+
@NotNull HashContentUtilsBase hashContentUtils,
29+
@NotNull PlatformDependentUtilsBase pdUtils,
30+
@NotNull DCLoggerBase dcLogger) {
1631
this.hashContentUtils = hashContentUtils;
32+
this.pdUtils = pdUtils;
33+
this.dcLogger = dcLogger;
34+
}
35+
36+
public void scanAllMissedIgnoreFiles(
37+
@NotNull Collection<Object> allProjectFiles,
38+
@Nullable Object progress) {
39+
allProjectFiles.stream()
40+
.filter(this::is_ignoreFile)
41+
.filter(ignoreFile -> !map_ignore2PathMatchers.containsKey(ignoreFile))
42+
.forEach(ignoreFile -> update_ignoreFileContent(ignoreFile, progress));
1743
}
1844

19-
private static final Map<Object, Set<String>> map_dcignore2Regexps = new ConcurrentHashMap<>();
20-
private static final Map<Object, Set<String>> map_gitignore2Regexps = new ConcurrentHashMap<>();
45+
public boolean isIgnoredFile(@NotNull Object fileToCheck) {
46+
return project2IgnoredFilePaths
47+
.computeIfAbsent(pdUtils.getProject(fileToCheck), prj -> new ConcurrentHashMap<>())
48+
.computeIfAbsent(
49+
pdUtils.getFilePath(fileToCheck),
50+
filePath ->
51+
map_ignore2PathMatchers.keySet().stream()
52+
.filter(ignoreFile -> inScope(filePath, ignoreFile))
53+
.anyMatch(ignoreFile -> isIgnoredFile(filePath, ignoreFile))
54+
);
55+
}
2156

22-
public boolean isDcIgnoredFile(@NotNull Object file) {
23-
return map_dcignore2Regexps.entrySet().stream()
24-
.filter(e -> inScope(e.getKey(), file))
25-
.flatMap(e -> e.getValue().stream())
26-
.anyMatch(getFilePath(file)::matches);
57+
private boolean isIgnoredFile(@NotNull String filePath, @NotNull Object ignoreFile) {
58+
final Path path = pathOf(filePath);
59+
return map_ignore2PathMatchers.get(ignoreFile).entrySet().stream()
60+
.anyMatch(line2matcher -> {
61+
final int lineIndex = line2matcher.getKey();
62+
final PathMatcher pathMatcher = line2matcher.getValue();
63+
return pathMatcher.matches(path) &&
64+
// An optional prefix "!" which negates the pattern;
65+
// any matching file excluded by a _previous_ pattern will become included again.
66+
map_ignore2ReIncludePathMatchers.get(ignoreFile).entrySet().stream()
67+
.filter(e -> e.getKey() > lineIndex)
68+
.noneMatch(e -> e.getValue().matches(path));
69+
});
2770
}
2871

29-
public boolean isGitIgnoredFile(@NotNull Object file) {
30-
return map_gitignore2Regexps.entrySet().stream()
31-
.filter(e -> inScope(e.getKey(), file))
32-
.flatMap(e -> e.getValue().stream())
33-
.anyMatch(getFilePath(file)::matches);
72+
private void removeIgnoredFilePaths(@NotNull Object ignoreFile) {
73+
final Object project = pdUtils.getProject(ignoreFile);
74+
project2IgnoredFilePaths
75+
.getOrDefault(project, Collections.emptyMap())
76+
.keySet()
77+
.removeIf(filePath -> inScope(filePath, ignoreFile));
3478
}
3579

36-
protected abstract String getFilePath(@NotNull Object file);
80+
/** copy of {@link Path#of(java.lang.String, java.lang.String...)} due to java 8 compatibility */
81+
private static Path pathOf(String first, String... more){
82+
return FileSystems.getDefault().getPath(first, more);
83+
}
3784

38-
private boolean inScope(@NotNull Object ignoreFile, @NotNull Object fileToCheck) {
39-
return getFilePath(fileToCheck).startsWith(getDirPath(ignoreFile));
85+
private boolean inScope(@NotNull String filePathToCheck, @NotNull Object ignoreFile) {
86+
return filePathToCheck.startsWith(pdUtils.getDirPath(ignoreFile));
4087
};
4188

4289
public boolean is_ignoreFile(@NotNull Object file) {
4390
return is_dcignoreFile(file) || is_gitignoreFile(file);
4491
}
4592

46-
protected abstract String getFileName(@NotNull Object file);
47-
4893
public boolean is_dcignoreFile(@NotNull Object file) {
49-
return getFileName(file).equals(".dcignore");
94+
return pdUtils.getFileName(file).equals(".dcignore");
5095
}
5196

5297
public boolean is_gitignoreFile(@NotNull Object file) {
53-
return getFileName(file).equals(".gitignore");
98+
return pdUtils.getFileName(file).equals(".gitignore");
5499
}
55100

56-
public void remove_dcignoreFileContent(@NotNull Object file) {
57-
map_dcignore2Regexps.remove(file);
58-
}
59-
60-
public void remove_gitignoreFileContent(@NotNull Object file) {
61-
map_gitignore2Regexps.remove(file);
101+
public void remove_ignoreFileContent(@NotNull Object ignoreFile) {
102+
removeIgnoredFilePaths(ignoreFile);
103+
map_ignore2PathMatchers.remove(ignoreFile);
104+
map_ignore2ReIncludePathMatchers.remove(ignoreFile);
62105
}
63106

64107
public void removeProject(@NotNull Object project) {
65-
map_dcignore2Regexps.forEach((file, _set) -> {
66-
if (getProjectOfFile(file).equals(project)) map_dcignore2Regexps.remove(file);
108+
map_ignore2PathMatchers.keySet().forEach(file -> {
109+
if (pdUtils.getProject(file).equals(project)) remove_ignoreFileContent(file);
67110
});
68-
map_gitignore2Regexps.forEach((file, _set) -> {
69-
if (getProjectOfFile(file).equals(project)) map_gitignore2Regexps.remove(file);
111+
map_ignore2ReIncludePathMatchers.keySet().forEach(file -> {
112+
if (pdUtils.getProject(file).equals(project)) remove_ignoreFileContent(file);
70113
});
114+
project2IgnoredFilePaths.remove(project);
71115
}
72116

73-
protected abstract Object getProjectOfFile(@NotNull Object file);
74-
75-
public void update_dcignoreFileContent(@NotNull Object file) {
76-
map_dcignore2Regexps.put(file, parse_ignoreFile2Regexps(file));
77-
}
78-
79-
public void update_gitignoreFileContent(@NotNull Object file) {
80-
map_gitignore2Regexps.put(file, parse_ignoreFile2Regexps(file));
117+
public void update_ignoreFileContent(@NotNull Object ignoreFile, @Nullable Object progress) {
118+
dcLogger.logInfo("Scanning .ignore file: " + pdUtils.getFilePath(ignoreFile));
119+
parse_ignoreFile2Globs(ignoreFile, progress);
120+
dcLogger.logInfo("Scan FINISHED for .ignore file: " + pdUtils.getFilePath(ignoreFile));
81121
}
82122

83-
protected abstract String getDirPath(@NotNull Object file);
84-
85-
private Set<String> parse_ignoreFile2Regexps(@NotNull Object file) {
86-
Set<String> result = new HashSet<>();
87-
String basePath = getDirPath(file);
88-
String lineSeparator = "[\n\r]";
89-
final String fileText = hashContentUtils.doGetFileContent(file);
90-
for (String line : fileText.split(lineSeparator)) {
123+
private void parse_ignoreFile2Globs(@NotNull Object ignoreFile, @Nullable Object progress) {
124+
pdUtils.progressSetText(progress, "parsing file: " + pdUtils.getFilePath(ignoreFile));
125+
Map<Integer, PathMatcher> ignoreMatchers = new HashMap<>();
126+
Map<Integer, PathMatcher> reIncludedMatchers = new HashMap<>();
127+
String basePath = pdUtils.getDirPath(ignoreFile);
128+
String lineSeparator = "\r\n|[\r\n]";
129+
final String fileText = hashContentUtils.doGetFileContent(ignoreFile);
130+
final String[] lines = fileText.split(lineSeparator);
131+
for (int lineIndex = 0; lineIndex < lines.length; lineIndex++) {
132+
String line = lines[lineIndex];
91133

92134
// https://git-scm.com/docs/gitignore#_pattern_format
93-
// todo: `!` negation not implemented yet
94135
line = line.trim();
95136
if (line.isEmpty() || line.startsWith("#")) continue;
96137

138+
// An optional prefix "!" which negates the pattern;
139+
// any matching file excluded by a previous pattern will become included again.
140+
// todo??? It is not possible to re-include a file if a parent directory of that file is excluded.
141+
boolean isReIncludePattern = line.startsWith("!");
142+
if (isReIncludePattern) line = line.substring(1);
143+
97144
String prefix = basePath;
98145
// If there is a separator at the beginning or middle (or both) of the pattern, then the
99146
// pattern is relative to the directory level of the particular .gitignore file itself.
100147
// Otherwise the pattern may also match at any level below the .gitignore level.
101148
int indexBegMidSepar = line.substring(0, line.length() - 1).indexOf('/');
102-
if (indexBegMidSepar != 0) prefix += "/";
103149
if (indexBegMidSepar == -1) {
104-
prefix += ".*";
105-
} else if (line.endsWith("/*") || line.endsWith("/**")) {
106-
int indexLastSepar = line.lastIndexOf('/');
107-
if (indexBegMidSepar == indexLastSepar) prefix += ".*";
150+
prefix += "**/";
151+
} else if (indexBegMidSepar > 0) {
152+
if (line.endsWith("/*") || line.endsWith("/**")) {
153+
int indexLastSepar = line.lastIndexOf('/');
154+
if (indexBegMidSepar == indexLastSepar) prefix += "**/";
155+
} else {
156+
prefix += "/";
157+
}
108158
}
109159

110160
// If there is a separator at the end of the pattern then the pattern will only match
111161
// directories, otherwise the pattern can match both files and directories.
112162
String postfix =
113-
(line.endsWith("/"))
114-
? ".+" // should be dir
115-
: "(/.+)?"; // could be dir or file
116-
117-
String body =
118-
line.replace(".", "\\.")
119-
// An asterisk "*" matches anything except a slash.
120-
.replace("*", "[^/]*")
121-
// The character "?" matches any one character except "/".
122-
.replace("?", "[^/]?")
123-
// A slash followed by two consecutive asterisks then a slash matches zero or more
124-
// directories. For example, "a/**/b" matches "a/b", "a/x/b", "a/x/y/b" and so on.
125-
// A trailing "/**" matches everything inside. For example, "abc/**" matches all
126-
// files inside directory "abc", relative to the location of the .gitignore file,
127-
// with infinite depth.
128-
.replace("[^/]*[^/]*", ".*");
129-
130-
result.add(prefix + body + postfix);
163+
(line.endsWith("/"))
164+
? "?**" // should be dir
165+
: "{/?**,}"; // could be dir or file
166+
167+
// glob sanity check for validity
168+
try {
169+
PathMatcher globToMatch = FileSystems.getDefault()
170+
.getPathMatcher("glob:" + prefix + line + postfix);
171+
172+
if (isReIncludePattern) {
173+
reIncludedMatchers.put(lineIndex, globToMatch);
174+
} else {
175+
ignoreMatchers.put(lineIndex, globToMatch);
176+
}
177+
} catch (PatternSyntaxException e) {
178+
dcLogger.logWarn("Incorrect Glob syntax in .ignore file: " + e.getMessage());
179+
}
180+
pdUtils.progressSetFraction(progress, (double) lineIndex/lines.length);
181+
pdUtils.progressCheckCanceled(progress);
131182
}
132-
return result;
183+
map_ignore2ReIncludePathMatchers.put(ignoreFile, reIncludedMatchers);
184+
map_ignore2PathMatchers.put(ignoreFile, ignoreMatchers);
133185
}
134186
}

0 commit comments

Comments
 (0)