Skip to content

Commit 02c1eb3

Browse files
Merge pull request #25 from commoncrawl/format-spotless
Integrate Spotless code formatter
2 parents 963c4f7 + a33d9de commit 02c1eb3

File tree

9 files changed

+528
-92
lines changed

9 files changed

+528
-92
lines changed

README.md

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,11 @@ The assembly jar file includes also the [WebGraph](https://webgraph.di.unimi.it/
1919
The Javadocs are created by `mvn javadoc:javadoc`. Then open the file `target/site/apidocs/index.html` in a browser.
2020

2121

22+
### Source Code Formatting
23+
24+
Run `mvn spotless:check` and `mvn spotless:apply`, see the [Spotless Maven guide](https://github.com/diffplug/spotless/blob/main/plugin-maven/README.md). Java formatting rules are defined in [eclipse-formatter.xml](eclipse-formatter.xml).
25+
26+
2227
## Memory and Disk Requirements
2328

2429
Note that the webgraphs are usually multiple Gigabytes in size and require for processing

eclipse-formatter.xml

Lines changed: 404 additions & 0 deletions
Large diffs are not rendered by default.

pom.xml

Lines changed: 90 additions & 64 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,8 @@
1-
<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
2-
xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
1+
<?xml version="1.0" encoding="UTF-8"?>
2+
<project
3+
xmlns="http://maven.apache.org/POM/4.0.0"
4+
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
5+
xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
36
<modelVersion>4.0.0</modelVersion>
47

58
<groupId>org.commoncrawl</groupId>
@@ -25,68 +28,6 @@
2528
<junit.version>5.13.2</junit.version>
2629
</properties>
2730

28-
<build>
29-
<resources>
30-
<resource>
31-
<directory>src/main/resources</directory>
32-
</resource>
33-
</resources>
34-
<plugins>
35-
<plugin>
36-
<artifactId>maven-compiler-plugin</artifactId>
37-
<version>3.14.0</version>
38-
<configuration>
39-
<source>${java.version}</source>
40-
<target>${java.version}</target>
41-
</configuration>
42-
</plugin>
43-
<plugin>
44-
<artifactId>maven-assembly-plugin</artifactId>
45-
<version>3.7.1</version>
46-
<configuration>
47-
<descriptorRefs>
48-
<descriptorRef>jar-with-dependencies</descriptorRef>
49-
</descriptorRefs>
50-
<finalName>cc-webgraph-${project.version}</finalName>
51-
</configuration>
52-
<executions>
53-
<execution>
54-
<phase>package</phase>
55-
<goals>
56-
<goal>single</goal>
57-
</goals>
58-
</execution>
59-
</executions>
60-
</plugin>
61-
<plugin>
62-
<artifactId>maven-surefire-plugin</artifactId>
63-
<version>3.5.2</version>
64-
</plugin>
65-
<plugin>
66-
<groupId>org.apache.maven.plugins</groupId>
67-
<artifactId>maven-enforcer-plugin</artifactId>
68-
<version>3.5.0</version>
69-
<executions>
70-
<execution>
71-
<id>enforce-maven</id>
72-
<goals>
73-
<goal>enforce</goal>
74-
</goals>
75-
<configuration>
76-
<rules>
77-
<requireMavenVersion>
78-
<version>3.6.3</version>
79-
</requireMavenVersion>
80-
</rules>
81-
</configuration>
82-
</execution>
83-
</executions>
84-
</plugin>
85-
</plugins>
86-
</build>
87-
88-
89-
9031
<dependencyManagement>
9132
<dependencies>
9233
<dependency>
@@ -248,4 +189,89 @@
248189
</dependency>
249190

250191
</dependencies>
192+
193+
<build>
194+
<resources>
195+
<resource>
196+
<directory>src/main/resources</directory>
197+
</resource>
198+
</resources>
199+
<plugins>
200+
<plugin>
201+
<artifactId>maven-compiler-plugin</artifactId>
202+
<version>3.14.0</version>
203+
<configuration>
204+
<source>${java.version}</source>
205+
<target>${java.version}</target>
206+
</configuration>
207+
</plugin>
208+
<plugin>
209+
<artifactId>maven-assembly-plugin</artifactId>
210+
<version>3.7.1</version>
211+
<configuration>
212+
<descriptorRefs>
213+
<descriptorRef>jar-with-dependencies</descriptorRef>
214+
</descriptorRefs>
215+
<finalName>cc-webgraph-${project.version}</finalName>
216+
</configuration>
217+
<executions>
218+
<execution>
219+
<goals>
220+
<goal>single</goal>
221+
</goals>
222+
<phase>package</phase>
223+
</execution>
224+
</executions>
225+
</plugin>
226+
<plugin>
227+
<artifactId>maven-surefire-plugin</artifactId>
228+
<version>3.5.2</version>
229+
</plugin>
230+
<plugin>
231+
<groupId>org.apache.maven.plugins</groupId>
232+
<artifactId>maven-enforcer-plugin</artifactId>
233+
<version>3.5.0</version>
234+
<executions>
235+
<execution>
236+
<id>enforce-maven</id>
237+
<goals>
238+
<goal>enforce</goal>
239+
</goals>
240+
<configuration>
241+
<rules>
242+
<requireMavenVersion>
243+
<version>3.6.3</version>
244+
</requireMavenVersion>
245+
</rules>
246+
</configuration>
247+
</execution>
248+
</executions>
249+
</plugin>
250+
<plugin>
251+
<groupId>com.diffplug.spotless</groupId>
252+
<artifactId>spotless-maven-plugin</artifactId>
253+
<version>2.46.1</version>
254+
<configuration>
255+
<pom>
256+
<!-- These are the defaults, you can override if you want -->
257+
<includes>
258+
<include>pom.xml</include>
259+
</includes>
260+
<sortPom>
261+
<indentAttribute>all</indentAttribute>
262+
<keepBlankLines>true</keepBlankLines>
263+
<expandEmptyElements>false</expandEmptyElements>
264+
<nrOfIndentSpace>-1</nrOfIndentSpace>
265+
<predefinedSortOrder>recommended_2008_06</predefinedSortOrder>
266+
</sortPom>
267+
</pom>
268+
<java>
269+
<eclipse>
270+
<file>${project.basedir}/eclipse-formatter.xml</file>
271+
</eclipse>
272+
</java>
273+
</configuration>
274+
</plugin>
275+
</plugins>
276+
</build>
251277
</project>

src/main/java/org/commoncrawl/webgraph/CreatePreferenceVector.java

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -35,7 +35,6 @@ public class CreatePreferenceVector {
3535
long recordsProcessed;
3636
long preferenceNamesFound;
3737

38-
3938
public CreatePreferenceVector(double defVal) {
4039
defaultPreferenceValue = defVal;
4140
}

src/main/java/org/commoncrawl/webgraph/HostToDomainGraph.java

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -337,7 +337,10 @@ public String convertNode(String line) {
337337
return null;
338338
}
339339
if (lastDomain != null && domain.equals(lastDomain.name)) {
340-
// short cut for the common case of many subsequent subdomains of the same domain
340+
/*
341+
* short cut for the common case of many subsequent subdomains of the same
342+
* domain
343+
*/
341344
lastDomain.add(id);
342345
return null;
343346
}

src/main/java/org/commoncrawl/webgraph/JoinSortRanks.java

Lines changed: 3 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -97,7 +97,7 @@ private void assignRank(int[] ranks, IntComparator comp) {
9797
indirectSortPerm[i] = i;
9898
}
9999
Arrays.parallelQuickSort(0, length, comp, this::swapIndirect);
100-
for (int i = 0; i < length; ) {
100+
for (int i = 0; i < length;) {
101101
ranks[indirectSortPerm[i]] = ++i;
102102
}
103103
indirectSortPerm = null;
@@ -139,7 +139,7 @@ public String addRanks(String line) {
139139
long id = Long.parseLong(line.substring(0, sep));
140140
// check whether new line is already contained
141141
int end = line.lastIndexOf('\n');
142-
String revHost = line.substring(sep+1);
142+
String revHost = line.substring(sep + 1);
143143
float hcv = getHarmonicCentralityValue(id);
144144
long hcr = getHarmonicCentralityRank(id);
145145
double prv = getPageRankValue(id);
@@ -160,7 +160,6 @@ public String addRanks(String line) {
160160
return sb.toString();
161161
}
162162

163-
164163
/**
165164
* Implementation of {@link JoinSortRanks} for lists exceeding
166165
* {@link Arrays#MAX_ARRAY_SIZE}.
@@ -229,7 +228,7 @@ private void assignRank(long[][] ranks, LongComparator comp) {
229228
BigArrays.set(indirectSortPerm, i, i);
230229
}
231230
BigArrays.quickSort(0, length, comp, this::swapIndirect);
232-
for (long i = 0; i < length; ) {
231+
for (long i = 0; i < length;) {
233232
BigArrays.set(ranks, BigArrays.get(indirectSortPerm, i), ++i);
234233
}
235234
indirectSortPerm = null;

src/main/java/org/commoncrawl/webgraph/explore/Graph.java

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -384,7 +384,6 @@ public static String getTopLevelDomain(String reversedDomainName) {
384384
return reversedDomainName;
385385
}
386386

387-
388387
/** Intersection of two sorted lists */
389388
public static long[] intersect(long[] a, long[] b) {
390389
int m = a.length;
@@ -523,7 +522,9 @@ public void subgraphMetrics(long[] nodes) {
523522
LOG.info("\toutlinks = {} (links from the subgraph to outer nodes)", clusterOutlinks);
524523
LOG.info("\ttotal inlinks = {} (all inlinks)", totalInlinks);
525524
LOG.info("\ttotal outlinks = {} (all outlinks)", totalOutlinks);
526-
LOG.info("\tnodes linked = {} (outer nodes linked from subgraph)", sharedSuccessors(nodes, 1, nodes.length).length);
527-
LOG.info("\tnodes linking = {} (outer nodes linking to subgraph)", sharedPredecessors(nodes, 1, nodes.length).length);
525+
LOG.info("\tnodes linked = {} (outer nodes linked from subgraph)",
526+
sharedSuccessors(nodes, 1, nodes.length).length);
527+
LOG.info("\tnodes linking = {} (outer nodes linking to subgraph)",
528+
sharedPredecessors(nodes, 1, nodes.length).length);
528529
}
529530
}

src/main/java/org/commoncrawl/webgraph/explore/GraphExplorer.java

Lines changed: 8 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -189,15 +189,14 @@ public void sl(String vertexLabel) {
189189
public long[] loadVerticesFromFile(String fileName) {
190190
AtomicLong lines = new AtomicLong();
191191
try (Stream<String> in = Files.lines(Paths.get(fileName), StandardCharsets.UTF_8)) {
192-
long[] res = in.mapToLong(
193-
label -> {
194-
lines.incrementAndGet();
195-
long id = g.vertexLabelToId(label);
196-
if (id == -1) {
197-
LOG.debug("Vertex `{}` not found in graph.", label);
198-
}
199-
return id;
200-
}).filter(id -> id > -1).toArray();
192+
long[] res = in.mapToLong(label -> {
193+
lines.incrementAndGet();
194+
long id = g.vertexLabelToId(label);
195+
if (id == -1) {
196+
LOG.debug("Vertex `{}` not found in graph.", label);
197+
}
198+
return id;
199+
}).filter(id -> id > -1).toArray();
201200
LOG.info("Loaded {} vertices of {} lines in {}.", res.length, lines, fileName);
202201
return res;
203202
} catch (IOException e) {

src/test/java/org/commoncrawl/webgraph/TestCountingMergedIntIterator.java

Lines changed: 10 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -26,16 +26,16 @@ void testSimple() {
2626
CountingMergedIntIterator iter = new CountingMergedIntIterator(LazyIntIterators.EMPTY_ITERATOR);
2727
assertFalse(iter.hasNext());
2828

29-
int[][][] testArrays = { //
30-
{{0, 1}}, //
31-
{{0}, {1}}, //
32-
{{1}, {0}}, //
33-
{{1}, {0}, {}}, //
34-
{{1}, {0}, {}, {0}, {0}}, //
35-
{{1}, {0}, {}, {0}, {0, 1}}, //
29+
int[][][] testArrays = { //
30+
{ { 0, 1 } }, //
31+
{ { 0 }, { 1 } }, //
32+
{ { 1 }, { 0 } }, //
33+
{ { 1 }, { 0 }, {} }, //
34+
{ { 1 }, { 0 }, {}, { 0 }, { 0 } }, //
35+
{ { 1 }, { 0 }, {}, { 0 }, { 0, 1 } }, //
3636
// tests for input arrays with repeating numbers
37-
{{1, 1}, {0, 0}, {}, {0, 0}, {0, 0}}, //
38-
{{1, 1}, {0, 0}, {}, {0}, {0, 1}} //
37+
{ { 1, 1 }, { 0, 0 }, {}, { 0, 0 }, { 0, 0 } }, //
38+
{ { 1, 1 }, { 0, 0 }, {}, { 0 }, { 0, 1 } } //
3939
};
4040

4141
for (int[][] tArrays : testArrays) {
@@ -48,7 +48,7 @@ void testSimple() {
4848
int totalCount = 0;
4949
iter = new CountingMergedIntIterator(tIters);
5050
assertTrue(iter.hasNext());
51-
51+
5252
assertEquals(0, iter.nextInt());
5353
assertTrue(iter.getCount() > 0);
5454
totalCount += iter.getCount();

0 commit comments

Comments
 (0)