Skip to content
This repository was archived by the owner on Oct 8, 2020. It is now read-only.

Commit 5c9dbb6

Browse files
committed
Add DataLake example for Spark
1 parent 18615d2 commit 5c9dbb6

24 files changed

+748
-0
lines changed
Lines changed: 76 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,76 @@
1+
{
2+
"sources": [
3+
{
4+
"type": "csv",
5+
"options": {
6+
"header": "true",
7+
"delimiter": ",",
8+
"mode": "DROPMALFORMED"
9+
},
10+
"source": "src/main/resources/Data/person.csv",
11+
"entity": "Person"
12+
}
13+
, {
14+
"type": "csv",
15+
"options": {
16+
"header": "true",
17+
"delimiter": ",",
18+
"mode": "DROPMALFORMED"
19+
},
20+
"source": "src/main/resources/Data/review.csv",
21+
"entity": "Review"
22+
}
23+
, {
24+
"type": "csv",
25+
"options": {
26+
"header": "true",
27+
"delimiter": ",",
28+
"mode": "DROPMALFORMED"
29+
},
30+
"source": "src/main/resources/Data/offer.csv",
31+
"entity": "Offer"
32+
}
33+
, {
34+
"type": "csv",
35+
"options": {
36+
"header": "true",
37+
"delimiter": ",",
38+
"mode": "DROPMALFORMED"
39+
},
40+
"source": "src/main/resources/Data/product.csv",
41+
"entity": "Product"
42+
}
43+
, {
44+
"type": "csv",
45+
"options": {
46+
"header": "true",
47+
"delimiter": ",",
48+
"mode": "DROPMALFORMED"
49+
},
50+
"source": "src/main/resources/Data/producer.csv",
51+
"entity": "Producer"
52+
}
53+
],
54+
"weights": [
55+
{
56+
"datasource": "cassandra",
57+
"weight": 1
58+
},
59+
{
60+
"datasource": "mongodb",
61+
"weight": 1
62+
},
63+
{
64+
"datasource": "parquet",
65+
"weight": 1
66+
},
67+
{
68+
"datasource": "csv",
69+
"weight": 1
70+
},
71+
{
72+
"datasource": "jdbc",
73+
"weight": 1
74+
}
75+
]
76+
}
Binary file not shown.

sansa-examples-spark/src/main/resources/datalake/data/offer.csv/part-00000-86272586-5266-423f-b936-829019bf5d8c.csv

Lines changed: 201 additions & 0 deletions
Large diffs are not rendered by default.

sansa-examples-spark/src/main/resources/datalake/data/offer.csv/part-00001-86272586-5266-423f-b936-829019bf5d8c.csv

Whitespace-only changes.
Binary file not shown.
Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,7 @@
1+
nr,name,mbox_sha1sum,country,publisher,publishDate
2+
1,'Ruggiero-Delane','fb3efd92e3c7a8d775a895ba476e11a3e8f3fac','US',1,2008-09-05
3+
2,'Eyana-Aurelianus','df1cf8e68d49e5b65f1507dbecec6b61e9dc98','JP',1,2008-08-07
4+
3,'Danijela-Adalbrand','9b9d4b8dcf7ada3c181b4bed1fa3c53d29caf65','US',1,2008-07-21
5+
4,'Allegra-Walburga','619b2f69a01a7d86c0eca3f5e910c5b559ff3a','RU',1,2008-06-23
6+
5,'Przemek-Berte','c3b1c82511908f706153319688a7a5599b8ad8c0','ES',1,2008-08-19
7+
6,'Caryn','d6deee088e99af0f7c65fb7cca9bdfbbe3d7343','CN',1,2008-06-29

0 commit comments

Comments
 (0)