|
2 | 2 |
|
3 | 3 | name=marklogic-sink |
4 | 4 | connector.class=com.marklogic.kafka.connect.sink.MarkLogicSinkConnector |
5 | | - |
6 | | -# Should only need one task since it's using a WriteBatcher, which is multi-threaded |
7 | 5 | tasks.max=1 |
8 | | - |
9 | 6 | # Topics to consume from [comma separated list for multiple topics] |
10 | 7 | topics=marklogic |
11 | 8 |
|
12 | | - |
13 | 9 | # MarkLogic connector-specific properties |
| 10 | +# See ./config/marklogic-sink.properties for information on each of these |
14 | 11 |
|
15 | | -# A MarkLogic host to connect to. The connector uses the Data Movement SDK, and thus it will connect to each of the |
16 | | -# hosts in a cluster. |
17 | 12 | ml.connection.host=172.31.48.57 |
18 | | - |
19 | | -# The port of a REST API server to connect to. |
20 | 13 | ml.connection.port=8003 |
21 | | - |
22 | | -# Optional - the name of a database to connect to. If your REST API server has a content database matching that of the |
23 | | -# one that you want to write documents to, you do not need to set this. |
24 | 14 | ml.connection.database=Kafka |
25 | | - |
26 | | -# Optional - set to "gateway" when using a load balancer, else leave blank. |
27 | | -# See https://docs.marklogic.com/guide/java/data-movement#id_26583 for more information. |
28 | | -ml.connection.type= |
29 | | - |
30 | | -# Either DIGEST, BASIC, CERTIFICATE, KERBEROS, or NONE |
31 | 15 | ml.connection.securityContextType=DIGEST |
32 | | - |
33 | | -# Set these based on the security context type defined above |
34 | 16 | ml.connection.username=admin |
35 | 17 | ml.connection.password=admin |
36 | | -ml.connection.certFile= |
37 | | -ml.connection.certPassword= |
38 | | -ml.connection.externalName= |
39 | | - |
40 | | -# Set "ml.connection.simpleSsl" to "true" for a "simple" SSL strategy that uses the JVM's default SslContext and |
41 | | -# X509TrustManager and a "trust everything" HostnameVerifier. Further customization of an SSL connection via properties |
42 | | -# is not supported. If you need to do so, consider using the source code for this connector as a starting point. |
43 | | -ml.connection.simpleSsl=false |
44 | | -# You must also ensure that the server cert or the signing CA cert is imported in the JVMs cacerts file. |
45 | | -# These commands may be used to get the server cert and to import it into your cacerts file. |
46 | | -# Don't forget to customize the commands for your particular case. |
47 | | -# openssl x509 -in <(openssl s_client -connect <server>:8004 -prexit 2>/dev/null) -out ~/example.crt |
48 | | -# sudo keytool -importcert -file ~/example.crt -alias <server> -keystore /path/to/java/lib/security/cacerts -storepass <storepass-password> |
49 | | - |
50 | | -# Sets the number of documents to be written in a batch to MarkLogic. This may not have any impact depending on the |
51 | | -# connector receives data from Kafka, as the connector calls flushAsync on the DMSDK WriteBatcher after processing every |
52 | | -# collection of records. Thus, if the connector never receives at one time more than the value of this property, then |
53 | | -# the value of this property will have no impact. |
54 | | -ml.dmsdk.batchSize=100 |
55 | | - |
56 | | -# Sets the number of threads used by the Data Movement SDK for parallelizing writes to MarkLogic. Similar to the batch |
57 | | -# size property above, this may never come into play depending on how many records the connector receives at once. |
58 | | -ml.dmsdk.threadCount=8 |
59 | | - |
60 | | -# Optional - a comma-separated list of collections that each document should be written to |
61 | 18 | ml.document.collections=kafka-data |
62 | | - |
63 | | -# Optional - set this to true so that the name of the topic that the connector reads from is added as a collection to each document inserted by the connector |
64 | | -ml.document.addTopicToCollections=false |
65 | | - |
66 | | -# Optional - specify the format of each document; either JSON, XML, BINARY, TEXT, or UNKNOWN |
67 | 19 | ml.document.format=JSON |
68 | | - |
69 | | -# Optional - specify a mime type for each document; typically the format property above will be used instead of this |
70 | | -ml.document.mimeType= |
71 | | - |
72 | | -# Optional - a comma-separated list of roles and capabilities that define the permissions for each document written to MarkLogic |
73 | 20 | ml.document.permissions=rest-reader,read,rest-writer,update |
74 | | - |
75 | | -# Optional - a prefix to prepend to each URI; the URI itself is a UUID |
76 | 21 | ml.document.uriPrefix=/kafka-data/ |
77 | | - |
78 | | -# Optional - a suffix to append to each URI |
79 | 22 | ml.document.uriSuffix=.json |
80 | | - |
81 | | -# Optional - name of a REST transform to use when writing documents |
82 | | -# For Data Hub, can use mlRunIngest |
83 | | -ml.dmsdk.transform= |
84 | | - |
85 | | -# Optional - delimited set of transform names and values |
86 | | -# Data Hub example = flow-name,ingestion_mapping_mastering-flow,step,1 |
87 | | -ml.dmsdk.transformParams= |
88 | | - |
89 | | -# Optional - delimiter for transform parameter names and values |
90 | | -ml.dmsdk.transformParamsDelimiter=, |
91 | | - |
92 | | -# Properties for running a Data Hub flow |
93 | | -# Using examples/dh-5-example in the DH project, could use the following config: |
94 | | -# ml.datahub.flow.name=ingestion_mapping_mastering-flow |
95 | | -# ml.datahub.flow.steps=2,3,4 |
96 | | -ml.datahub.flow.name= |
97 | | -ml.datahub.flow.steps= |
98 | | -# Whether or not the response data from running a flow should be logged at the info level |
99 | | -ml.datahub.flow.logResponse=true |
100 | | - |
101 | | -ml.id.strategy= |
102 | | -ml.id.strategy.paths= |
103 | | -ml.connection.enableCustomSsl=false |
104 | | -ml.connection.customSsl.tlsVersion= |
105 | | -ml.connection.customSsl.hostNameVerifier= |
106 | | -ml.connection.customSsl.mutualAuth=false |
0 commit comments