Skip to content

Commit 19db4b9

Browse files
committed
fix #562 - add documentation
1 parent f3d41e8 commit 19db4b9

File tree

1 file changed

+63
-2
lines changed

1 file changed

+63
-2
lines changed

src/main/java/com/marklogic/client/example/cookbook/datamovement/BulkExportToJdbc.java

Lines changed: 63 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -53,10 +53,28 @@
5353

5454
import java.text.SimpleDateFormat;
5555

56+
/** BulkExportToJdbc shows how simple it is to use Data Movement SDK to move
57+
* massive data sets from a source MarkLogic Server to a JDBC target server.
58+
* In this example all employees are exported using a query matching all docs
59+
* in directory /employees/. Using the
60+
* [Shortcut Method](https://docs.marklogic.com/javadoc/client/overview-summary.html#ShortcutMethods)
61+
* `getContentAs` and the Employee POJO class (pre-registered with the handle
62+
* registry by DatabaseClientSingleton), we can easily serialize each document
63+
* to an Employee object. From there it's straightforward to use Spring's
64+
* JdbcTemplate to write the employees, their salaries, and their titles via
65+
* JDBC. Of course, Spring's JdbcTemplate is not required--you could choose
66+
* your favorite JDBC libraries to use with Data Movement SDK. And of course
67+
* you don't need to deserialize to pojos--you could use any of the Java Client
68+
* API handles to deserialize the matching documents.
69+
*/
5670
public class BulkExportToJdbc {
5771
private static Logger logger = LoggerFactory.getLogger(BulkExportToJdbc.class);
72+
// this is the date format required by our relational database tables
5873
public static final SimpleDateFormat dateFormat = new SimpleDateFormat("yyyy-MM-dd");
5974

75+
// we're using a small thread count and batch size because the example
76+
// dataset is small, but with a larger dataset you'd use more threads and
77+
// larger batches
6078
private static int threadCount = 3;
6179
private static int batchSize = 3;
6280

@@ -68,25 +86,49 @@ public static void main(String[] args) throws IOException, SQLException {
6886
}
6987

7088
public void run() throws IOException, SQLException {
89+
// connect to JDBC and initialize JdbcTemplate
7190
JdbcTemplate jdbcTemplate = new JdbcTemplate(getDataSource());
72-
final boolean isMySQLDB;
73-
SimpleDateFormat dateFormat = new SimpleDateFormat("yyyy-MM-dd");
91+
// query for all employees in directory /employees/
7492
StructuredQueryDefinition query = new StructuredQueryBuilder().directory(true, "/employees/");
93+
// run the query on each forest in the cluster and asynchronously paginate
94+
// through matches, sending them to the onUrisReady listener ExportListener
7595
QueryBatcher qb = moveMgr.newQueryBatcher(query)
7696
.withBatchSize(batchSize)
7797
.withThreadCount(threadCount)
98+
99+
// use withConsistentSnapshot so the set of matches doesn't change while this job
100+
// runs even though updates are still occurring concurrently in MarkLogic Server.
101+
// Requires a [merge timestamp](https://docs.marklogic.com/guide/app-dev/point_in_time#id_32468)
102+
// to be set on MarkLogic Server.
103+
.withConsistentSnapshot()
104+
78105
.onUrisReady(
106+
// Since ExportListener meets our needs we'll use it instead of a
107+
// custom listener
79108
new ExportListener()
109+
110+
// since the ExportListener uses a separate request from the QueryBatcher
111+
// we must also use withConsistentSnapshot on the ExportListener
80112
.withConsistentSnapshot()
113+
114+
// this is our custom onDocumentReady listener
81115
.onDocumentReady(record -> {
116+
117+
// Employee class is registered by DatabaseClientSingleton with the
118+
// handle registry so we can use the getContentAs shortcut method
82119
Employee employee = record.getContentAs(Employee.class);
120+
121+
// using jdbcTemplate (which simplifies using jdbc) we can easily
122+
// write the employee to the target relational database server
83123
jdbcTemplate.update(
84124
"INSERT INTO employees_export (emp_no, hire_date, first_name, last_name, gender, birth_date) " +
85125
"VALUES (?, ?, ?, ?, ?, ?) ",
86126
employee.getEmployeeId(), dateFormat.format(employee.getHireDate().getTime()), employee.getFirstName(),
87127
employee.getLastName(), employee.getGender() == Gender.MALE ? "M" : "F",
88128
dateFormat.format(employee.getBirthDate().getTime()));
89129
if ( employee.getSalaries() != null ) {
130+
// each employee could have many salaries, and we need to write
131+
// each of those to its own row
90132
for ( Salary salary : employee.getSalaries() ) {
91133
jdbcTemplate.update(
92134
"INSERT INTO salaries_export (emp_no, salary, from_date, to_date) " +
@@ -96,6 +138,8 @@ public void run() throws IOException, SQLException {
96138
}
97139
}
98140
if ( employee.getTitles() != null ) {
141+
// each employee could have many titles, and we need to write
142+
// each of those to its own row
99143
for ( Title title : employee.getTitles() ) {
100144
jdbcTemplate.update(
101145
"INSERT INTO titles_export (emp_no, title, from_date, to_date) " +
@@ -105,23 +149,40 @@ public void run() throws IOException, SQLException {
105149
}
106150
}
107151
})
152+
153+
// in a production application we could have more elaborate error
154+
// handling here
108155
.onBatchFailure((failedBatch,exception) -> exception.printStackTrace())
109156
)
157+
158+
// another onUrisReady listener, this one custom, and just for logging
110159
.onUrisReady(batch ->
111160
logger.debug("Batch exported {}, so far {}",
112161
batch.getJobBatchNumber(), batch.getJobResultsSoFar())
113162
)
163+
164+
// in a production application we could have more elaborate error
165+
// handling here
114166
.onQueryFailure(exception -> exception.printStackTrace());
167+
168+
// now that the job is configured, kick it off
115169
JobTicket ticket = moveMgr.startJob(qb);
170+
171+
// wait for the job to fully complete all pagination and all listeners
116172
qb.awaitCompletion();
173+
174+
// free up resources by stopping the job
117175
moveMgr.stopJob(qb);
176+
177+
// double check that we didn't have any failed batches
118178
JobReport report = moveMgr.getJobReport(ticket);
119179
if ( report.getFailureBatchesCount() > 0 ) {
120180
throw new IllegalStateException("Encountered " +
121181
report.getFailureBatchesCount() + " failed batches");
122182
}
123183
}
124184

185+
// get the jdbcUrl property from Example.properties with our jdbc connection info
125186
private DataSource getDataSource() throws IOException {
126187
ExampleProperties properties = Util.loadProperties();
127188
return new DriverManagerDataSource(properties.jdbcUrl, properties.jdbcUser, properties.jdbcPassword);

0 commit comments

Comments
 (0)