Skip to content

Commit 81baf10

Browse files
committed
create a dataframe, manipulate it and load it in a mysql database locally
1 parent 2f07ae0 commit 81baf10

File tree

5 files changed

+123
-61
lines changed

5 files changed

+123
-61
lines changed

.classpath

Lines changed: 13 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -1,23 +1,10 @@
11
<?xml version="1.0" encoding="UTF-8"?>
22
<classpath>
3-
<classpathentry kind="src" output="target/classes" path="src/main/java">
4-
<attributes>
5-
<attribute name="optional" value="true"/>
6-
<attribute name="maven.pomderived" value="true"/>
7-
</attributes>
8-
</classpathentry>
93
<classpathentry excluding="**" kind="src" output="target/classes" path="src/main/resources">
104
<attributes>
115
<attribute name="maven.pomderived" value="true"/>
126
</attributes>
137
</classpathentry>
14-
<classpathentry kind="src" output="target/test-classes" path="src/test/java">
15-
<attributes>
16-
<attribute name="optional" value="true"/>
17-
<attribute name="maven.pomderived" value="true"/>
18-
<attribute name="test" value="true"/>
19-
</attributes>
20-
</classpathentry>
218
<classpathentry excluding="**" kind="src" output="target/test-classes" path="src/test/resources">
229
<attributes>
2310
<attribute name="maven.pomderived" value="true"/>
@@ -42,6 +29,19 @@
4229
<attribute name="m2e-apt" value="true"/>
4330
</attributes>
4431
</classpathentry>
32+
<classpathentry kind="src" output="target/classes" path="src/main/java">
33+
<attributes>
34+
<attribute name="optional" value="true"/>
35+
<attribute name="maven.pomderived" value="true"/>
36+
</attributes>
37+
</classpathentry>
38+
<classpathentry kind="src" output="target/test-classes" path="src/test/java">
39+
<attributes>
40+
<attribute name="optional" value="true"/>
41+
<attribute name="maven.pomderived" value="true"/>
42+
<attribute name="test" value="true"/>
43+
</attributes>
44+
</classpathentry>
4545
<classpathentry kind="src" output="target/test-classes" path="target/generated-test-sources/test-annotations">
4646
<attributes>
4747
<attribute name="optional" value="true"/>

.gitignore

Lines changed: 33 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,33 @@
1+
HELP.md
2+
target/
3+
!.mvn/wrapper/maven-wrapper.jar
4+
!**/src/main/**/target/
5+
!**/src/test/**/target/
6+
7+
### STS ###
8+
.apt_generated
9+
.classpath
10+
.factorypath
11+
.project
12+
.settings
13+
.springBeans
14+
.sts4-cache
15+
16+
### IntelliJ IDEA ###
17+
.idea
18+
*.iws
19+
*.iml
20+
*.ipr
21+
22+
### NetBeans ###
23+
/nbproject/private/
24+
/nbbuild/
25+
/dist/
26+
/nbdist/
27+
/.nb-gradle/
28+
build/
29+
!**/src/main/**/build/
30+
!**/src/test/**/build/
31+
32+
### VS Code ###
33+
.vscode/

pom.xml

Lines changed: 50 additions & 48 deletions
Original file line numberDiff line numberDiff line change
@@ -13,58 +13,62 @@
1313
<maven.compiler.source>11</maven.compiler.source>
1414
<maven.compiler.target>11</maven.compiler.target>
1515
<project.build.sourceEncoding>UTF-8</project.build.sourceEncoding>
16-
<project.reporting.outputEncoding>UTF-8</project.reporting.outputEncoding>
17-
<scala.version>2.12</scala.version>
16+
<project.reporting.outputEncoding>UTF-8</project.reporting.outputEncoding>
17+
<scala.version>2.11</scala.version>
1818
<spark.version>2.3.1</spark.version>
1919
</properties>
2020

21-
<dependencies>
22-
<!-- Spark -->
23-
<dependency>
24-
<groupId>org.apache.spark</groupId>
25-
<artifactId>spark-core_${scala.version}</artifactId>
26-
<version>${spark.version}</version>
27-
</dependency>
28-
<dependency>
29-
<groupId>org.apache.spark</groupId>
30-
<artifactId>spark-sql_${scala.version}</artifactId>
31-
<version>${spark.version}</version>
32-
<exclusions>
33-
<exclusion>
34-
<groupId>org.slf4j</groupId>
35-
<artifactId>slf4j-simple</artifactId>
36-
</exclusion>
37-
</exclusions>
38-
</dependency>
39-
<dependency>
40-
<groupId>org.apache.spark</groupId>
41-
<artifactId>spark-mllib_${scala.version}</artifactId>
42-
<version>${spark.version}</version>
43-
<exclusions>
44-
<exclusion>
45-
<groupId>org.slf4j</groupId>
46-
<artifactId>slf4j-log4j12</artifactId>
47-
</exclusion>
48-
<exclusion>
49-
<groupId>org.slf4j</groupId>
50-
<artifactId>slf4j-simple</artifactId>
51-
</exclusion>
52-
</exclusions>
53-
</dependency>
54-
55-
<dependency>
56-
<groupId>junit</groupId>
57-
<artifactId>junit</artifactId>
58-
<version>4.11</version>
59-
<scope>test</scope>
60-
</dependency>
61-
21+
<dependencies>
22+
<!-- Spark -->
23+
<dependency>
24+
<groupId>org.apache.spark</groupId>
25+
<artifactId>spark-core_${scala.version}</artifactId>
26+
<version>${spark.version}</version>
27+
</dependency>
28+
<dependency>
29+
<groupId>org.apache.spark</groupId>
30+
<artifactId>spark-sql_${scala.version}</artifactId>
31+
<version>${spark.version}</version>
32+
<exclusions>
33+
<exclusion>
34+
<groupId>org.slf4j</groupId>
35+
<artifactId>slf4j-simple</artifactId>
36+
</exclusion>
37+
</exclusions>
38+
</dependency>
39+
<dependency>
40+
<groupId>org.apache.spark</groupId>
41+
<artifactId>spark-mllib_${scala.version}</artifactId>
42+
<version>${spark.version}</version>
43+
<exclusions>
44+
<exclusion>
45+
<groupId>org.slf4j</groupId>
46+
<artifactId>slf4j-log4j12</artifactId>
47+
</exclusion>
48+
<exclusion>
49+
<groupId>org.slf4j</groupId>
50+
<artifactId>slf4j-simple</artifactId>
51+
</exclusion>
52+
</exclusions>
53+
</dependency>
54+
<dependency>
55+
<groupId>mysql</groupId>
56+
<artifactId>mysql-connector-java</artifactId>
57+
<version>8.0.22</version>
58+
</dependency>
59+
<dependency>
60+
<groupId>junit</groupId>
61+
<artifactId>junit</artifactId>
62+
<version>4.11</version>
63+
<scope>test</scope>
64+
</dependency>
65+
6266
</dependencies>
6367

6468
<build>
6569

6670
<plugins>
67-
<plugin>
71+
<!-- <plugin>
6872
<groupId>org.apache.maven.plugins</groupId>
6973
<artifactId>maven-dependency-plugin</artifactId>
7074
<executions>
@@ -81,7 +85,7 @@
8185
</configuration>
8286
</execution>
8387
</executions>
84-
</plugin>
88+
</plugin> -->
8589

8690
<plugin>
8791
<groupId>org.springframework.boot</groupId>
@@ -92,15 +96,13 @@
9296
<goal>repackage</goal>
9397
</goals>
9498
<configuration>
95-
9699
<mainClass>com.selimhorri.pack.Main</mainClass>
97-
98100
</configuration>
99101
</execution>
100102
</executions>
101103
</plugin>
102-
103104
</plugins>
105+
<finalName>spark-app</finalName>
104106
</build>
105107

106108
</project>

src/main/java/com/selimhorri/pack/Main.java

Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,10 +1,32 @@
11
package com.selimhorri.pack;
22

3+
import org.apache.spark.sql.Dataset;
4+
import org.apache.spark.sql.Row;
5+
import org.apache.spark.sql.SaveMode;
6+
import org.apache.spark.sql.SparkSession;
7+
import static org.apache.spark.sql.functions.*;
8+
9+
import java.util.Properties;
10+
311
public class Main {
412

513
public static void main(String[] args) {
614

15+
final SparkSession sparkSession = new SparkSession.Builder().appName("CSV to DB").master("local").getOrCreate();
16+
17+
Dataset<Row> df = sparkSession.read().format("csv").option("header", true).load("src/main/resources/name_job.txt");
18+
19+
df = df.withColumn("fullName", concat(lit("FIRST_NAME => "), df.col("first_name"), lit(" || "), lit("LAST_NAME => "), df.col("last_name")) );
20+
df.show();
21+
22+
final String dbUrl = "jdbc:mysql://localhost:3306/spark_db";
23+
final Properties properties = new Properties();
24+
// properties.setProperty("driver", "");
25+
properties.setProperty("user", "root");
26+
properties.setProperty("password", "");
727

28+
df.write().mode(SaveMode.Overwrite).jdbc(dbUrl, "persons", properties);
29+
System.out.println("====>> Loaded in the Database <<====");
830

931
}
1032

src/main/resources/name_job.txt

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
first_name,last_name,job
2+
Selim,Horri,Software Engineer
3+
Amine,Ajimi,Student
4+
Omar,Derouiche,Agent
5+
Yesmine,Derouiche,Student

0 commit comments

Comments
 (0)