Skip to content

Commit bae0a94

Browse files
committed
Merge branch 'release/0.1.0'
2 parents f42a8de + edbef73 commit bae0a94

File tree

4 files changed

+23
-185
lines changed

4 files changed

+23
-185
lines changed

pom.xml

Lines changed: 1 addition & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,7 @@
44

55
<groupId>net.sansa-stack</groupId>
66
<artifactId>sansa-datalake-parent_2.11</artifactId>
7-
<version>0.4.1-SNAPSHOT</version>
7+
<version>0.1.0</version>
88
<packaging>pom</packaging>
99

1010
<name>SANSA Stack - DataLake Layer - Parent</name>
@@ -25,19 +25,10 @@
2525

2626
<scala.version>2.11.11</scala.version>
2727
<scala.binary.version>2.11</scala.binary.version>
28-
<scala.classifier>${scala.binary.version}</scala.classifier>
2928
<scala.version.suffix>_${scala.binary.version}</scala.version.suffix>
3029

31-
<flink.version>1.5.0</flink.version>
3230
<spark.version>2.4.0</spark.version>
3331
<jena.version>3.9.0</jena.version>
34-
<jsa.subversion>3</jsa.subversion>
35-
<hadoop.version>2.8.3</hadoop.version>
36-
37-
<jsa.version>${jena.version}-${jsa.subversion}</jsa.version>
38-
39-
<jetty.version>9.4.8.v20171121</jetty.version>
40-
<mortbay.jetty.version>6.1.26</mortbay.jetty.version>
4132

4233
<scalastyle.config.path>${project.basedir}/scalastyle-config.xml</scalastyle.config.path>
4334
</properties>

sansa-datalake-spark/pom.xml

Lines changed: 1 addition & 168 deletions
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,7 @@
77
<parent>
88
<groupId>net.sansa-stack</groupId>
99
<artifactId>sansa-datalake-parent_2.11</artifactId>
10-
<version>0.4.1-SNAPSHOT</version>
10+
<version>0.1.0</version>
1111
</parent>
1212

1313
<dependencies>
@@ -155,171 +155,4 @@
155155
</plugins>
156156
</build>
157157

158-
<profiles>
159-
<profile>
160-
<id>dist</id>
161-
<activation>
162-
<property>
163-
<name>dist</name>
164-
</property>
165-
</activation>
166-
<!-- This profile uses the assembly plugin to create a special "dist"
167-
package for BigTop that contains Spark but not the Hadoop JARs it depends
168-
on. -->
169-
<build>
170-
<plugins>
171-
<plugin>
172-
<groupId>org.apache.maven.plugins</groupId>
173-
<artifactId>maven-shade-plugin</artifactId>
174-
<!--<version>3.1.1</version> -->
175-
<executions>
176-
<execution>
177-
<phase>package</phase>
178-
<goals>
179-
<goal>shade</goal>
180-
</goals>
181-
<configuration>
182-
<minimizeJar>true</minimizeJar>
183-
184-
<artifactSet>
185-
<excludes>
186-
<exclude>org.apache.spark:spark-core_${scala.binary.version}</exclude>
187-
<exclude>org.apache.spark:spark-sql_${scala.binary.version}</exclude>
188-
<exclude>org.apache.spark:spark-graphx_${scala.binary.version}</exclude>
189-
<exclude>org.apache.spark:*</exclude>
190-
<exclude>org.eclipse.jetty:jetty-server</exclude>
191-
<exclude>org.eclipse.jetty:jetty-continuation</exclude>
192-
<exclude>org.eclipse.jetty:jetty-http</exclude>
193-
<exclude>org.eclipse.jetty:jetty-io</exclude>
194-
<exclude>org.eclipse.jetty:jetty-util</exclude>
195-
<exclude>org.eclipse.jetty:jetty-security</exclude>
196-
<exclude>org.eclipse.jetty:jetty-servlet</exclude>
197-
<exclude>org.eclipse.jetty:*</exclude>
198-
<exclude>org.eclipse.*:*</exclude>
199-
<exclude>org.glassfish.*:*</exclude>
200-
<exclude>org.netbeans.api:*</exclude>
201-
<exclude>org.netbeans:*</exclude>
202-
<exclude>org.scala-lang:scala-library</exclude>
203-
<exclude>org.scala-lang:scala-compiler</exclude>
204-
<exclude>org.scala-lang:scala-reflect</exclude>
205-
<exclude>commons-cli:commons-cli</exclude>
206-
<exclude>commons-codec:commons-codec</exclude>
207-
<exclude>commons-collections:commons-collections</exclude>
208-
<exclude>commons-configuration:commons-configuration</exclude>
209-
<exclude>commons-digester:commons-digester</exclude>
210-
<exclude>commons-httpclient:commons-httpclient</exclude>
211-
<exclude>commons-io:commons-io</exclude>
212-
<exclude>commons-lang:commons-lang</exclude>
213-
<exclude>commons-logging:commons-logging</exclude>
214-
<exclude>commons-net:commons-net</exclude>
215-
<exclude>io.dropwizard.metrics:metrics*</exclude>
216-
<exclude>io.netty:netty*</exclude>
217-
<exclude>javax.activation:activation</exclude>
218-
<exclude>javax.annotation:javax.annotation-api</exclude>
219-
<exclude>javax.servlet:javax.servlet-api</exclude>
220-
<exclude>javax.servlet.jsp:jsp-api</exclude>
221-
<exclude>javax.servlet:servlet-api</exclude>
222-
<exclude>javax.validation:validation-api</exclude>
223-
<exclude>javax.ws.rs:javax.ws.rs-api</exclude>
224-
<exclude>javax.xml.bind:jaxb-api</exclude>
225-
<exclude>javax.xml.stream:stax-api</exclude>
226-
<exclude>jdk.tools:jdk.tools</exclude>
227-
<exclude>net.java.dev.jets3t:jets3t</exclude>
228-
<exclude>net.jpountz.lz4:lz4</exclude>
229-
<exclude>net.razorvine:pyrolite</exclude>
230-
<exclude>net.sf.py4j:py4j</exclude>
231-
<exclude>org.antlr:antlr4-runtime</exclude>
232-
<exclude>org.apache.avro:avro*</exclude>
233-
<exclude>org.apache.commons:commons-lang3</exclude>
234-
<exclude>org.apache.commons:commons-math3</exclude>
235-
<exclude>org.apache.commons:commons-compress</exclude>
236-
<exclude>org.apache.curator:curator*</exclude>
237-
<exclude>org.apache.directory.api:*</exclude>
238-
<exclude>org.apache.directory.server:*</exclude>
239-
<exclude>org.apache.hadoop:*</exclude>
240-
<exclude>org.apache.htrace:htrace-core</exclude>
241-
<exclude>org.apache.httpcomponents:*</exclude>
242-
<exclude>org.apache.ivy:ivy</exclude>
243-
<exclude>org.apache.mesos:mesos</exclude>
244-
<exclude>org.apache.parquet:parquet*</exclude>
245-
<exclude>org.apache.xbean:xbean-asm5-shaded</exclude>
246-
<exclude>org.apache.zookeeper:zookeeper</exclude>
247-
<exclude>org.codehaus.jackson:jackson-*</exclude>
248-
<exclude>org.codehaus.janino:*</exclude>
249-
<exclude>org.codehaus.jettison:jettison</exclude>
250-
<exclude>org.fusesource.leveldbjni:leveldbjni-all</exclude>
251-
<exclude>org.glassfish.hk2*</exclude>
252-
<exclude>org.glassfish.jersey*</exclude>
253-
<exclude>org.javassist:javassist</exclude>
254-
<exclude>org.json4s:json4s*</exclude>
255-
<exclude>org.mortbay.jetty:jetty*</exclude>
256-
<exclude>org.objenesis:objenesis</exclude>
257-
<exclude>org.roaringbitmap:RoaringBitmap</exclude>
258-
<exclude>org.scala-lang:*</exclude>
259-
<exclude>org.slf4j:jul-to-slf4j</exclude>
260-
<exclude>org.slf4j:jcl-over-slf4j</exclude>
261-
<exclude>org.spark-project.spark:unused</exclude>
262-
<exclude>org.xerial.snappy:snappy-java</exclude>
263-
<exclude>oro:oro</exclude>
264-
<exclude>xmlenc:xmlenc</exclude>
265-
<exclude>org.gephi:*</exclude>
266-
<exclude>org.jfree:*</exclude>
267-
<exclude>com.itextpdf:*</exclude>
268-
<exclude>org.apache.poi:*</exclude>
269-
<exclude>org.apache.batik:*</exclude>
270-
<exclude>com.ibm.sparktc:*</exclude>
271-
</excludes>
272-
273-
</artifactSet>
274-
<filters>
275-
<filter>
276-
<artifact>*:*</artifact>
277-
<excludes>
278-
<!-- Avoid a Spark error: Invalid signature file digest for Manifest
279-
main attributes -->
280-
<exclude>META-INF/*.SF</exclude>
281-
<exclude>META-INF/*.DSA</exclude>
282-
<exclude>META-INF/*.RSA</exclude>
283-
<exclude>META-INF/maven/**</exclude>
284-
</excludes>
285-
</filter>
286-
<filter>
287-
<artifact>org.apache.jena:*</artifact>
288-
<includes>
289-
<include>**</include>
290-
</includes>
291-
</filter>
292-
<filter>
293-
<artifact>net.sansa-stack:*</artifact>
294-
<includes>
295-
<include>**</include>
296-
</includes>
297-
</filter>
298-
<!-- This has to be done because the service transformer finds
299-
InitJenaSparqlApiSparqlExtensions in adds it to the Jena subsystem lifecycle,
300-
but the class itself is removed during minimization, the Jena init procedure
301-
fails -->
302-
<filter>
303-
<artifact>org.aksw.jena-sparql-api:jena-sparql-api-sparql-ext</artifact>
304-
<includes>
305-
<include>org/aksw/jena_sparql_api/sparql/ext/init/*</include>
306-
</includes>
307-
</filter>
308-
</filters>
309-
<finalName>dist-${project.artifactId}-${project.version}</finalName>
310-
<transformers>
311-
<transformer
312-
implementation="org.apache.maven.plugins.shade.resource.ServicesResourceTransformer" />
313-
</transformers>
314-
<createDependencyReducedPom>false</createDependencyReducedPom>
315-
316-
</configuration>
317-
</execution>
318-
</executions>
319-
</plugin>
320-
</plugins>
321-
</build>
322-
</profile>
323-
</profiles>
324-
325158
</project>

sansa-datalake-spark/src/main/scala/net/sansa_stack/datalake/spark/Mapper.scala

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -134,7 +134,7 @@ class Mapper (mappingsFile: String) {
134134

135135
val in = FileManager.get().open(mappingsFile)
136136
if (in == null) {
137-
throw new IllegalArgumentException("File: " + queryString + " not found")
137+
throw new IllegalArgumentException("ERROR: File: " + queryString + " not found")
138138
}
139139

140140
val model = ModelFactory.createDefaultModel()

sansa-datalake-spark/src/main/scala/net/sansa_stack/datalake/spark/Run.scala

Lines changed: 20 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,7 @@ package net.sansa_stack.datalake.spark
33
import java.io.FileNotFoundException
44

55
import org.apache.commons.lang.time.StopWatch
6-
import org.apache.log4j.{Level, Logger}
6+
import org.apache.log4j.{ Level, Logger }
77
import net.sansa_stack.datalake.spark.utils.Helpers._
88

99
import scala.collection.JavaConversions._
@@ -16,6 +16,7 @@ import org.apache.spark.sql.DataFrame
1616
class Run[A](executor: QueryExecutor[A]) {
1717

1818
private var finalDataSet: A = _
19+
val logger = Logger.getLogger(this.getClass.getName.stripSuffix("$"))
1920

2021
def application(queryFile: String, mappingsFile: String, configFile: String): DataFrame = {
2122

@@ -36,7 +37,6 @@ class Run[A](executor: QueryExecutor[A]) {
3637
val queryString = scala.io.Source.fromFile(queryFile)
3738
var query = try queryString.mkString finally queryString.close()
3839

39-
4040
// Transformations
4141
var transformExist = false
4242
var trans = ""
@@ -259,14 +259,28 @@ class Run[A](executor: QueryExecutor[A]) {
259259

260260
} catch {
261261
case ex : FileNotFoundException =>
262-
println("One of input files ins't found")
262+
println("ERROR: One of input files ins't found.")
263263
null
264264

265-
case ex : IndexOutOfBoundsException =>
266-
println("IO Exception")
265+
case ex : org.apache.jena.riot.RiotException =>
266+
println("ERROR: invalid Mappings, check syntax.")
267267
null
268268

269-
}
269+
case ex : org.apache.spark.SparkException =>
270+
println("ERROR: invalid Spark Master.")
271+
null
272+
273+
case ex : com.fasterxml.jackson.core.JsonParseException =>
274+
println("ERROR: invalid JSON content in config file.")
275+
null
270276

277+
case ex : java.lang.IllegalArgumentException =>
278+
println("ERROR: invalid mappings.")
279+
null
280+
281+
case ex : org.apache.jena.query.QueryParseException =>
282+
println("ERROR: invalid query.")
283+
null
284+
}
271285
}
272286
}

0 commit comments

Comments
 (0)