Skip to content

Commit 9be81b4

Browse files
refactor: document and reformat code
1 parent 7917f7b commit 9be81b4

File tree

10 files changed

+282
-214
lines changed

10 files changed

+282
-214
lines changed

src/main/java/org/variantsync/vevos/extraction/FastGroundTruthExtraction.java

Lines changed: 14 additions & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -13,11 +13,17 @@
1313

1414
import static org.variantsync.vevos.extraction.ConfigProperties.*;
1515

16+
/**
17+
* A very fast ground truth extraction that only extracts the ground truths of changed files for
18+
* each commit. This extraction is very useful for studies that are only interested in the evolution
19+
* of a software family.
20+
*/
1621
public class FastGroundTruthExtraction extends GroundTruthExtraction {
1722

1823
public FastGroundTruthExtraction(Properties properties) {
1924
super(properties);
20-
Logger.info("Starting a fast ground truth extraction that only extracts a ground truth for the changed files of each commit.");
25+
Logger.info(
26+
"Starting a fast ground truth extraction that only extracts a ground truth for the changed files of each commit.");
2127
}
2228

2329
protected BiConsumer<Repository, Path> extractionRunner() {
@@ -26,33 +32,14 @@ protected BiConsumer<Repository, Path> extractionRunner() {
2632
Path resultsRoot = extractionDir.resolve(repo.getRepositoryName());
2733
boolean printEnabled = Boolean.parseBoolean(this.properties.getProperty(PRINT_ENABLED));
2834

29-
FastVariabilityAnalysis analysis = new FastVariabilityAnalysis(printEnabled, resultsRoot,
30-
Boolean.parseBoolean(properties.getProperty(IGNORE_PC_CHANGES)),
35+
FastVariabilityAnalysis analysis = new FastVariabilityAnalysis(printEnabled,
36+
resultsRoot, Boolean.parseBoolean(properties.getProperty(IGNORE_PC_CHANGES)),
3137
Boolean.parseBoolean(properties.getProperty(EXTRACT_CODE_MATCHING)));
32-
final BiFunction<Repository, Path, Analysis> AnalysisFactory = (r, out) -> new Analysis(
33-
"PCAnalysis",
34-
List.of(
35-
analysis
36-
),
37-
r,
38-
out
39-
);
40-
final int availableProcessors;
41-
String numThreads = this.properties.getProperty(NUM_THREADS);
42-
if (numThreads == null || numThreads.trim().isEmpty() || numThreads.trim().equals("0")) {
43-
availableProcessors = Runtime.getRuntime().availableProcessors();
44-
} else {
45-
availableProcessors = Integer.parseInt(numThreads);
46-
}
47-
final int batchSize;
48-
String configuredSize = this.properties.getProperty(BATCH_SIZE);
49-
if (configuredSize == null || configuredSize.trim().isEmpty() || configuredSize.trim().equals("0")) {
50-
batchSize = 256;
51-
} else {
52-
batchSize = Integer.parseInt(configuredSize);
53-
}
54-
55-
Analysis.forEachCommit(() -> AnalysisFactory.apply(repo, repoOutputDir), batchSize, availableProcessors);
38+
final BiFunction<Repository, Path, Analysis> AnalysisFactory =
39+
(r, out) -> new Analysis("PCAnalysis", List.of(analysis), r, out);
40+
41+
Analysis.forEachCommit(() -> AnalysisFactory.apply(repo, repoOutputDir),
42+
diffDetectiveBatchSize(), numProcessors());
5643

5744
FastVariabilityAnalysis.numProcessed = 0;
5845
};

src/main/java/org/variantsync/vevos/extraction/FullGroundTruthExtraction.java

Lines changed: 43 additions & 28 deletions
Original file line numberDiff line numberDiff line change
@@ -25,30 +25,34 @@
2525
import static org.variantsync.vevos.extraction.gt.GroundTruth.*;
2626

2727

28+
/**
29+
* A full ground truth extraction that extracts the ground truth for all code files of all commits
30+
* in the repositories. Due to the effort of extracting and saving a ground truth for all files of
31+
* each commit, this extraction may require a very long time and large amounts of free disk space.
32+
*
33+
* <p>
34+
* Essentially, the full ground truth extraction first performs a fast ground truth extraction and
35+
* then incrementally combines the ground truths of all commits.
36+
* </p>
37+
*/
2838
public class FullGroundTruthExtraction extends GroundTruthExtraction {
2939

3040
public FullGroundTruthExtraction(Properties properties) {
3141
super(properties);
32-
Logger.info("Starting full ground truth extraction that extracts a ground truth for all files of each commit.");
42+
Logger.info(
43+
"Starting full ground truth extraction that extracts a ground truth for all files of each commit.");
3344
}
3445

3546
protected BiConsumer<Repository, Path> extractionRunner() {
3647
return (repo, repoOutputDir) -> {
3748
FullVariabilityAnalysis analysis =
3849
new FullVariabilityAnalysis(Path.of(properties.getProperty(DD_OUTPUT_DIR)),
3950
Boolean.parseBoolean(properties.getProperty(IGNORE_PC_CHANGES)));
40-
final BiFunction<Repository, Path, Analysis> AnalysisFactory = (r, out) -> new Analysis(
41-
"PCAnalysis",
42-
List.of(
43-
analysis
44-
),
45-
r,
46-
out
47-
);
48-
final int availableProcessors = Runtime.getRuntime().availableProcessors();
49-
final int commitsToProcessPerThread = 256;
50-
51-
Analysis.forEachCommit(() -> AnalysisFactory.apply(repo, repoOutputDir), commitsToProcessPerThread, availableProcessors);
51+
final BiFunction<Repository, Path, Analysis> AnalysisFactory =
52+
(r, out) -> new Analysis("PCAnalysis", List.of(analysis), r, out);
53+
54+
Analysis.forEachCommit(() -> AnalysisFactory.apply(repo, repoOutputDir),
55+
diffDetectiveBatchSize(), numProcessors());
5256

5357
ArrayList<RevCommit> commits = new ArrayList<>();
5458
try (Git gitRepo = repo.getGitRepo().run()) {
@@ -61,7 +65,7 @@ protected BiConsumer<Repository, Path> extractionRunner() {
6165

6266
ExecutorService threadPool = null;
6367
try {
64-
threadPool = Executors.newFixedThreadPool(availableProcessors);
68+
threadPool = Executors.newFixedThreadPool(numProcessors());
6569
postprocess(repo, commits, threadPool);
6670
} finally {
6771
if (threadPool != null) {
@@ -75,14 +79,16 @@ protected BiConsumer<Repository, Path> extractionRunner() {
7579

7680

7781
/**
78-
* Incrementally combines the ground truths from the first to the last commit. The ground truth for unmodified files
79-
* are reused. New file ground truths are added for created files, and old ground truths are updated for modified files.
82+
* Incrementally combines the ground truths from the first to the last commit. The ground truth
83+
* for unmodified files are reused. New file ground truths are added for created files, and old
84+
* ground truths are updated for modified files.
8085
*
81-
* @param repo The repo that has been analyzed
82-
* @param commits A list of commits in the repo
86+
* @param repo The repo that has been analyzed
87+
* @param commits A list of commits in the repo
8388
* @param threadPool A thread pool for multithreading of IO operations
8489
*/
85-
private void postprocess(Repository repo, ArrayList<RevCommit> commits, ExecutorService threadPool) {
90+
private void postprocess(Repository repo, ArrayList<RevCommit> commits,
91+
ExecutorService threadPool) {
8692
boolean print = Boolean.parseBoolean(this.properties.getProperty(PRINT_ENABLED));
8793
int processedCount = 0;
8894
RevCommit lastCommit = null;
@@ -92,16 +98,19 @@ private void postprocess(Repository repo, ArrayList<RevCommit> commits, Executor
9298
if (lastCommit != null) {
9399
// Check whether the last commit is the first parent of this commit.
94100
// If this is the case, we can continue with the existing ground truth.
95-
// If this is not the case, we have to load the completed ground truth of the parent.
101+
// If this is not the case, we have to load the completed ground truth of the
102+
// parent.
96103
RevCommit firstParent = Arrays.stream(commit.getParents()).findFirst().orElse(null);
97104
if (firstParent == null) {
98105
completedGroundTruth = new GroundTruth(new HashMap<>(), new HashSet<>());
99106
} else if (!firstParent.equals(lastCommit)) {
100-
File parentGT = new File(diffDetectiveCache + "/pc/" + repo.getRepositoryName() + "/" + firstParent.getName() + ".gt");
107+
File parentGT = new File(diffDetectiveCache + "/pc/" + repo.getRepositoryName()
108+
+ "/" + firstParent.getName() + ".gt");
101109
completedGroundTruth = Serde.deserialize(parentGT);
102110
}
103111
}
104-
File currentGTFile = new File(diffDetectiveCache + "/pc/" + repo.getRepositoryName() + "/" + commit.getName() + ".gt");
112+
File currentGTFile = new File(diffDetectiveCache + "/pc/" + repo.getRepositoryName()
113+
+ "/" + commit.getName() + ".gt");
105114
if (Files.exists(currentGTFile.toPath())) {
106115
GroundTruth loadedGT = Serde.deserialize(currentGTFile);
107116
if (processedCount % 1_000 == 0) {
@@ -124,19 +133,24 @@ private void postprocess(Repository repo, ArrayList<RevCommit> commits, Executor
124133
throw new UncheckedIOException(e);
125134
}
126135
String variablesList = completedGroundTruth.variablesListAsString();
127-
threadPool.submit(() -> Serde.writeToFile(commitSaveDir.resolve(VARIABLES_FILE), variablesList));
136+
threadPool.submit(
137+
() -> Serde.writeToFile(commitSaveDir.resolve(VARIABLES_FILE), variablesList));
128138

129139
String groundTruthAsCSV = completedGroundTruth.asPcCsvString();
130-
threadPool.submit(() -> Serde.writeToFile(commitSaveDir.resolve(CODE_VARIABILITY_CSV), groundTruthAsCSV));
140+
threadPool.submit(() -> Serde.writeToFile(commitSaveDir.resolve(CODE_VARIABILITY_CSV),
141+
groundTruthAsCSV));
131142

132-
threadPool.submit(() -> Serde.writeToFile(commitSaveDir.resolve(COMMIT_MESSAGE_FILE), commit.getFullMessage()));
143+
threadPool.submit(() -> Serde.writeToFile(commitSaveDir.resolve(COMMIT_MESSAGE_FILE),
144+
commit.getFullMessage()));
133145

134-
Optional<String> parentIds = Arrays.stream(commit.getParents()).map(RevCommit::getName).reduce((s, s2) -> s + " " + s2);
146+
Optional<String> parentIds = Arrays.stream(commit.getParents()).map(RevCommit::getName)
147+
.reduce((s, s2) -> s + " " + s2);
135148
threadPool.submit(() -> parentIds.ifPresentOrElse(
136149
s -> Serde.writeToFile(commitSaveDir.resolve(COMMIT_PARENTS_FILE), s),
137150
() -> Serde.writeToFile(commitSaveDir.resolve(COMMIT_PARENTS_FILE), "")));
138151

139-
threadPool.submit(() -> Serde.appendText(resultsRoot.resolve(SUCCESS_COMMIT_FILE), commit.getName() + "\n"));
152+
threadPool.submit(() -> Serde.appendText(resultsRoot.resolve(SUCCESS_COMMIT_FILE),
153+
commit.getName() + "\n"));
140154

141155
if (Boolean.parseBoolean(properties.getProperty(EXTRACT_CODE_MATCHING))) {
142156
String matchingAsCSV = completedGroundTruth.asMatchingCsvString();
@@ -146,7 +160,8 @@ private void postprocess(Repository repo, ArrayList<RevCommit> commits, Executor
146160
}
147161

148162
if (processedCount % 1_000 == 0) {
149-
Logger.info("Saved ground truth for commit {} of {}", processedCount + 1, commits.size());
163+
Logger.info("Saved ground truth for commit {} of {}", processedCount + 1,
164+
commits.size());
150165
}
151166
lastCommit = commit;
152167
processedCount++;

src/main/java/org/variantsync/vevos/extraction/GroundTruthExtraction.java

Lines changed: 71 additions & 32 deletions
Original file line numberDiff line numberDiff line change
@@ -19,25 +19,42 @@
1919

2020
import static org.variantsync.vevos.extraction.ConfigProperties.*;
2121

22+
/**
23+
* Base class for ground truth extractions. This class offers basic utilities for any ground truth
24+
* extraction and expects the implementation of an extraction runner that is provided in form of a
25+
* supplier method.
26+
*
27+
* Each GroundTruthExtraction must be initialized with a set of properties that configure the
28+
* extraction.
29+
*/
2230
public abstract class GroundTruthExtraction {
2331
protected final Properties properties;
2432

33+
/**
34+
* Initialize the basic GroundTruth extraction with a set of extraction properties.
35+
*/
2536
protected GroundTruthExtraction(Properties properties) {
2637
this.properties = properties;
2738
}
2839

2940
/**
30-
* Main method to start the extraction.
41+
* Main method to start the extraction. The method first loads the properties from the specified
42+
* file and then intializes the specified extraction class with those properties. Lastly, it
43+
* starts the ground truth extraction with the configuration specified in the properties.
3144
*
32-
* @param args Command-line options.
33-
* @throws IOException When copying the log file fails.
45+
* @param args Two arguments are expected: First, a path to a properties file in which the
46+
* extraction is configured, and second, the full specifier of a GroundTruthExtraction
47+
* subclass. The subclass' constructor must match the constructor of
48+
* GroundTruthExtraction.
49+
* @throws IOException When loading the properties fails.
3450
*/
3551
public static void main(String[] args) throws IOException {
3652
checkOS();
3753

3854
// Load the configuration
3955
Properties properties = getProperties(getPropertiesFile(args));
40-
// TODO: load dynamically
56+
57+
//
4158
Class<?> extractionClass;
4259
try {
4360
extractionClass = determineExtractionClass(args);
@@ -49,10 +66,11 @@ public static void main(String[] args) throws IOException {
4966
try {
5067
extraction = initializeExtraction(extractionClass, properties);
5168
} catch (NoSuchMethodException e) {
52-
throw new RuntimeException("The required constructor does not exist for the specified class " + args[1]);
69+
throw new RuntimeException(
70+
"The required constructor does not exist for the specified class " + args[1]);
5371
} catch (InvocationTargetException | InstantiationException | IllegalAccessException e) {
54-
Logger.error("Was not able to instantiate extraction class with the propterties " + args[0]
55-
+ " and the class name " + args[1]);
72+
Logger.error("Was not able to instantiate extraction class with the propterties "
73+
+ args[0] + " and the class name " + args[1]);
5674
throw new RuntimeException(e);
5775
}
5876

@@ -65,13 +83,16 @@ private static Class<?> determineExtractionClass(String... args) throws ClassNot
6583
if (args.length > 1) {
6684
return Class.forName(args[1]);
6785
} else {
68-
Logger.error("The second program argument must specify a valid GroundTruthExtraction class.");
69-
throw new IllegalArgumentException("The second program argument must specify a valid GroundTruthExtraction class.");
86+
Logger.error(
87+
"The second program argument must specify a valid GroundTruthExtraction class.");
88+
throw new IllegalArgumentException(
89+
"The second program argument must specify a valid GroundTruthExtraction class.");
7090
}
7191
}
7292

73-
private static GroundTruthExtraction initializeExtraction(Class<?> extractionClass, Properties properties)
74-
throws NoSuchMethodException, InvocationTargetException, InstantiationException, IllegalAccessException {
93+
private static GroundTruthExtraction initializeExtraction(Class<?> extractionClass,
94+
Properties properties) throws NoSuchMethodException, InvocationTargetException,
95+
InstantiationException, IllegalAccessException {
7596
Constructor<?> constructor = extractionClass.getDeclaredConstructor(Properties.class);
7697
constructor.setAccessible(true); // If the constructor is not public
7798
return (GroundTruthExtraction) constructor.newInstance(properties);
@@ -103,40 +124,30 @@ public static Properties getProperties(File propertiesFile) {
103124
*/
104125
public static AnalysisRunner.Options diffdetectiveOptions(Properties properties) {
105126

106-
return new AnalysisRunner.Options(
107-
Path.of(properties.getProperty(REPO_SAVE_DIR)),
127+
return new AnalysisRunner.Options(Path.of(properties.getProperty(REPO_SAVE_DIR)),
108128
Path.of(properties.getProperty(DD_OUTPUT_DIR)),
109-
Path.of(properties.getProperty(DATASET_FILE)),
110-
repo -> {
129+
Path.of(properties.getProperty(DATASET_FILE)), repo -> {
111130
final PatchDiffParseOptions repoDefault = repo.getParseOptions();
112131
return new PatchDiffParseOptions(
113132
PatchDiffParseOptions.DiffStoragePolicy.DO_NOT_REMEMBER,
114133
new VariationDiffParseOptions(
115134
repoDefault.variationDiffParseOptions().annotationParser(),
116-
false,
117-
false
118-
)
119-
);
120-
},
121-
repo -> new DiffFilter.Builder()
122-
.allowMerge(true)
135+
false, false));
136+
}, repo -> new DiffFilter.Builder().allowMerge(true)
123137
// TODO: make configurable
124-
.allowedFileExtensions("h", "hpp", "c", "cpp")
125-
.build(),
126-
true,
127-
false
128-
);
138+
.allowedFileExtensions("h", "hpp", "c", "cpp").build(),
139+
true, false);
129140
}
130141

131142
/**
132143
* Throws an error if the host OS is Windows.
133144
*/
134145
public static void checkOS() {
135-
boolean isWindows = System.getProperty("os.name")
136-
.toLowerCase().startsWith("windows");
146+
boolean isWindows = System.getProperty("os.name").toLowerCase().startsWith("windows");
137147
if (isWindows) {
138-
Logger.error("Running the analysis under Windows is not supported as the Linux/BusyBox sources are not" +
139-
"checked out correctly.");
148+
Logger.error(
149+
"Running the analysis under Windows is not supported as the Linux/BusyBox sources are not"
150+
+ "checked out correctly.");
140151
quitOnError();
141152
}
142153
}
@@ -173,7 +184,7 @@ public static File getPropertiesFile(String[] args) {
173184
* Prints the given ground truth to console.
174185
*
175186
* @param groundTruth GT to print
176-
* @param commitName The id of the commit for which the GT has been calculated
187+
* @param commitName The id of the commit for which the GT has been calculated
177188
*/
178189
public static void print(GroundTruth groundTruth, String commitName) {
179190
System.out.println();
@@ -194,5 +205,33 @@ public void run(AnalysisRunner.Options options) throws IOException {
194205
AnalysisRunner.run(options, extractionRunner());
195206
}
196207

208+
protected int numProcessors() {
209+
final int availableProcessors;
210+
String numThreads = this.properties.getProperty(NUM_THREADS);
211+
if (numThreads == null || numThreads.trim().isEmpty() || numThreads.trim().equals("0")) {
212+
availableProcessors = Runtime.getRuntime().availableProcessors();
213+
} else {
214+
availableProcessors = Integer.parseInt(numThreads);
215+
}
216+
return availableProcessors;
217+
}
218+
219+
protected int diffDetectiveBatchSize() {
220+
final int batchSize;
221+
String configuredSize = this.properties.getProperty(BATCH_SIZE);
222+
if (configuredSize == null || configuredSize.trim().isEmpty()
223+
|| configuredSize.trim().equals("0")) {
224+
batchSize = 256;
225+
} else {
226+
batchSize = Integer.parseInt(configuredSize);
227+
}
228+
return batchSize;
229+
}
230+
231+
/**
232+
* Return a runner for the ground truth extraction. The runner receives pairs of repositories
233+
* and paths to result output directories and then starts a DiffDetective analysis. See
234+
* {@link FastGroundTruthExtraction} and {@link FullGroundTruthExtraction} for examples.
235+
*/
197236
protected abstract BiConsumer<Repository, Path> extractionRunner();
198237
}

0 commit comments

Comments
 (0)