Skip to content
This repository was archived by the owner on Oct 8, 2020. It is now read-only.

Commit a3241bc

Browse files
committed
Update DataLake example by removing implicit calls
1 parent 3df78b1 commit a3241bc

File tree

3 files changed

+35
-32
lines changed

3 files changed

+35
-32
lines changed

sansa-examples-spark/src/main/resources/datalake/config

Lines changed: 21 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -11,43 +11,44 @@
1111
"entity": "Person"
1212
}
1313
, {
14-
"type": "csv",
14+
"type": "parquet",
1515
"options": {
16-
"header": "true",
17-
"delimiter": ",",
18-
"mode": "DROPMALFORMED"
16+
"spark_sql_parquet_filterPushdown": "true"
1917
},
20-
"source": "src/main/resources/Data/review.csv",
18+
"source": "src/main/resources/Data/review.parquet",
2119
"entity": "Review"
2220
}
2321
, {
24-
"type": "csv",
22+
"type": "mongodb",
2523
"options": {
26-
"header": "true",
27-
"delimiter": ",",
28-
"mode": "DROPMALFORMED"
24+
"url": "127.0.0.1",
25+
"database": "bsbm",
26+
"collection": "offer",
27+
"options": ""
2928
},
30-
"source": "src/main/resources/Data/offer.csv",
29+
"source": "//Offer",
3130
"entity": "Offer"
3231
}
3332
, {
34-
"type": "csv",
33+
"type": "cassandra",
3534
"options": {
36-
"header": "true",
37-
"delimiter": ",",
38-
"mode": "DROPMALFORMED"
35+
"keyspace": "db",
36+
"table": "product"
3937
},
40-
"source": "src/main/resources/Data/product.csv",
38+
"source": "//Product",
4139
"entity": "Product"
4240
}
4341
, {
44-
"type": "csv",
42+
"type": "jdbc",
4543
"options": {
46-
"header": "true",
47-
"delimiter": ",",
48-
"mode": "DROPMALFORMED"
44+
"url": "jdbc:mysql://localhost:3306/benchmark?useUnicode=true&useJDBCCompliantTimezoneShift=true&useLegacyDatetimeCode=false&serverTimezone=UTC&autoReconnect=true&useSSL=false",
45+
46+
"driver": "com.mysql.cj.jdbc.Driver",
47+
"dbtable": "producer",
48+
"user": "root",
49+
"password": "root"
4950
},
50-
"source": "src/main/resources/Data/producer.csv",
51+
"source": "//Producer",
5152
"entity": "Producer"
5253
}
5354
],

sansa-examples-spark/src/main/resources/datalake/mappings.ttl

Lines changed: 9 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -15,8 +15,8 @@
1515

1616
<#ProducerMapping>
1717
rml:logicalSource [
18-
rml:source "src/main/resources/Data/producer.csv";
19-
nosql:store nosql:csv
18+
rml:source "//Producer";
19+
nosql:store nosql:jdbc
2020
];
2121
rr:subjectMap [
2222
rr:template "http://example.com/{nr}";
@@ -55,8 +55,8 @@
5555

5656
<#ReviewMapping>
5757
rml:logicalSource [
58-
rml:source "src/main/resources/Data/review.csv";
59-
nosql:store nosql:csv
58+
rml:source "src/main/resources/Data/review.parquet";
59+
nosql:store nosql:parquet
6060
];
6161
rr:subjectMap [
6262
rr:template "http://example.com/{nr}";
@@ -165,8 +165,8 @@
165165

166166
<#OfferMapping>
167167
rml:logicalSource [
168-
rml:source "src/main/resources/Data/offer.csv";
169-
nosql:store nosql:csv
168+
rml:source "//Offer";
169+
nosql:store nosql:mongodb
170170
];
171171
rr:subjectMap [
172172
rr:template "http://example.com/{_id}";
@@ -225,8 +225,8 @@
225225

226226
<#ProductMapping>
227227
rml:logicalSource [
228-
rml:source "src/main/resources/Data/product.csv";
229-
nosql:store nosql:csv
228+
rml:source "//Product";
229+
nosql:store nosql:cassandra
230230
];
231231
rr:subjectMap [
232232
rr:template "http://example.com/{nr}";
@@ -316,4 +316,4 @@
316316
rr:predicateObjectMap [
317317
rr:predicate bsbm:productPropertyTextual2;
318318
rr:objectMap [rml:reference "propertyTex2"]
319-
].
319+
].

sansa-examples-spark/src/main/scala/net/sansa_stack/examples/spark/query/DataLake.scala

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@ package net.sansa_stack.examples.spark.query
22

33
import scala.collection.mutable
44

5+
import net.sansa_stack.query.spark.datalake.DataLakeEngine
56
import net.sansa_stack.query.spark.query._
67
import org.apache.jena.riot.Lang
78
import org.apache.spark.sql.SparkSession
@@ -31,7 +32,8 @@ object DataLake {
3132
.master("local[*]")
3233
.getOrCreate()
3334

34-
val result = spark.sparqlDL(queryFile, mappingsFile, configFile)
35+
// val result = spark.sparqlDL(queryFile, mappingsFile, configFile)
36+
val result = DataLakeEngine.run(queryFile, mappingsFile, configFile, spark)
3537
result.show()
3638

3739
spark.stop
@@ -40,8 +42,8 @@ object DataLake {
4042

4143
case class Config(
4244
queryFile: String = getClass.getResource("/datalake/queries/Q1.sparql").getPath,
43-
mappingsFile: String = getClass.getResource("/datalake/config_csv-only").getPath,
44-
configFile: String = getClass.getResource("/datalake/mappings_csv-only.ttl").getPath)
45+
mappingsFile: String = getClass.getResource("/datalake/config").getPath,
46+
configFile: String = getClass.getResource("/datalake/mappings.ttl").getPath)
4547

4648
val parser = new scopt.OptionParser[Config]("Sparqlify example") {
4749

0 commit comments

Comments
 (0)