5353
5454import java .text .SimpleDateFormat ;
5555
56+ /** BulkExportToJdbc shows how simple it is to use Data Movement SDK to move
57+ * massive data sets from a source MarkLogic Server to a JDBC target server.
58+ * In this example all employees are exported using a query matching all docs
59+ * in directory /employees/. Using the
60+ * [Shortcut Method](https://docs.marklogic.com/javadoc/client/overview-summary.html#ShortcutMethods)
61+ * `getContentAs` and the Employee POJO class (pre-registered with the handle
62+ * registry by DatabaseClientSingleton), we can easily serialize each document
63+ * to an Employee object. From there it's straightforward to use Spring's
64+ * JdbcTemplate to write the employees, their salaries, and their titles via
65+ * JDBC. Of course, Spring's JdbcTemplate is not required--you could choose
66+ * your favorite JDBC libraries to use with Data Movement SDK. And of course
67+ * you don't need to deserialize to pojos--you could use any of the Java Client
68+ * API handles to deserialize the matching documents.
69+ */
5670public class BulkExportToJdbc {
5771 private static Logger logger = LoggerFactory .getLogger (BulkExportToJdbc .class );
72+ // this is the date format required by our relational database tables
5873 public static final SimpleDateFormat dateFormat = new SimpleDateFormat ("yyyy-MM-dd" );
5974
75+ // we're using a small thread count and batch size because the example
76+ // dataset is small, but with a larger dataset you'd use more threads and
77+ // larger batches
6078 private static int threadCount = 3 ;
6179 private static int batchSize = 3 ;
6280
@@ -68,25 +86,49 @@ public static void main(String[] args) throws IOException, SQLException {
6886 }
6987
7088 public void run () throws IOException , SQLException {
89+ // connect to JDBC and initialize JdbcTemplate
7190 JdbcTemplate jdbcTemplate = new JdbcTemplate (getDataSource ());
72- final boolean isMySQLDB ;
73- SimpleDateFormat dateFormat = new SimpleDateFormat ("yyyy-MM-dd" );
91+ // query for all employees in directory /employees/
7492 StructuredQueryDefinition query = new StructuredQueryBuilder ().directory (true , "/employees/" );
93+ // run the query on each forest in the cluster and asynchronously paginate
94+ // through matches, sending them to the onUrisReady listener ExportListener
7595 QueryBatcher qb = moveMgr .newQueryBatcher (query )
7696 .withBatchSize (batchSize )
7797 .withThreadCount (threadCount )
98+
99+ // use withConsistentSnapshot so the set of matches doesn't change while this job
100+ // runs even though updates are still occurring concurrently in MarkLogic Server.
101+ // Requires a [merge timestamp](https://docs.marklogic.com/guide/app-dev/point_in_time#id_32468)
102+ // to be set on MarkLogic Server.
103+ .withConsistentSnapshot ()
104+
78105 .onUrisReady (
106+ // Since ExportListener meets our needs we'll use it instead of a
107+ // custom listener
79108 new ExportListener ()
109+
110+ // since the ExportListener uses a separate request from the QueryBatcher
111+ // we must also use withConsistentSnapshot on the ExportListener
80112 .withConsistentSnapshot ()
113+
114+ // this is our custom onDocumentReady listener
81115 .onDocumentReady (record -> {
116+
117+ // Employee class is registered by DatabaseClientSingleton with the
118+ // handle registry so we can use the getContentAs shortcut method
82119 Employee employee = record .getContentAs (Employee .class );
120+
121+ // using jdbcTemplate (which simplifies using jdbc) we can easily
122+ // write the employee to the target relational database server
83123 jdbcTemplate .update (
84124 "INSERT INTO employees_export (emp_no, hire_date, first_name, last_name, gender, birth_date) " +
85125 "VALUES (?, ?, ?, ?, ?, ?) " ,
86126 employee .getEmployeeId (), dateFormat .format (employee .getHireDate ().getTime ()), employee .getFirstName (),
87127 employee .getLastName (), employee .getGender () == Gender .MALE ? "M" : "F" ,
88128 dateFormat .format (employee .getBirthDate ().getTime ()));
89129 if ( employee .getSalaries () != null ) {
130+ // each employee could have many salaries, and we need to write
131+ // each of those to its own row
90132 for ( Salary salary : employee .getSalaries () ) {
91133 jdbcTemplate .update (
92134 "INSERT INTO salaries_export (emp_no, salary, from_date, to_date) " +
@@ -96,6 +138,8 @@ public void run() throws IOException, SQLException {
96138 }
97139 }
98140 if ( employee .getTitles () != null ) {
141+ // each employee could have many titles, and we need to write
142+ // each of those to its own row
99143 for ( Title title : employee .getTitles () ) {
100144 jdbcTemplate .update (
101145 "INSERT INTO titles_export (emp_no, title, from_date, to_date) " +
@@ -105,23 +149,40 @@ public void run() throws IOException, SQLException {
105149 }
106150 }
107151 })
152+
153+ // in a production application we could have more elaborate error
154+ // handling here
108155 .onBatchFailure ((failedBatch ,exception ) -> exception .printStackTrace ())
109156 )
157+
158+ // another onUrisReady listener, this one custom, and just for logging
110159 .onUrisReady (batch ->
111160 logger .debug ("Batch exported {}, so far {}" ,
112161 batch .getJobBatchNumber (), batch .getJobResultsSoFar ())
113162 )
163+
164+ // in a production application we could have more elaborate error
165+ // handling here
114166 .onQueryFailure (exception -> exception .printStackTrace ());
167+
168+ // now that the job is configured, kick it off
115169 JobTicket ticket = moveMgr .startJob (qb );
170+
171+ // wait for the job to fully complete all pagination and all listeners
116172 qb .awaitCompletion ();
173+
174+ // free up resources by stopping the job
117175 moveMgr .stopJob (qb );
176+
177+ // double check that we didn't have any failed batches
118178 JobReport report = moveMgr .getJobReport (ticket );
119179 if ( report .getFailureBatchesCount () > 0 ) {
120180 throw new IllegalStateException ("Encountered " +
121181 report .getFailureBatchesCount () + " failed batches" );
122182 }
123183 }
124184
185+ // get the jdbcUrl property from Example.properties with our jdbc connection info
125186 private DataSource getDataSource () throws IOException {
126187 ExampleProperties properties = Util .loadProperties ();
127188 return new DriverManagerDataSource (properties .jdbcUrl , properties .jdbcUser , properties .jdbcPassword );
0 commit comments